diff --git a/fsutils/TODO b/fsutils/TODO new file mode 100644 index 0000000..b41690b --- /dev/null +++ b/fsutils/TODO @@ -0,0 +1,3 @@ +- XATTRS +(see FS_XFLAG_* in fs.h, FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) +- fs label, UUID? (fs.h) diff --git a/fsutils/consts.go b/fsutils/consts.go new file mode 100644 index 0000000..52de9bc --- /dev/null +++ b/fsutils/consts.go @@ -0,0 +1,36 @@ +package fsutils + +var ( + /* + linuxFsAttrsListOrder defines the order the attributes are printed in per e2fsprogs. + + See flags_name at https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c for order. + Up to date as of e2fsprogs v1.47.1, Linux 6.12-rc7. + + The below are the struct field names for easier reflection. + */ + linuxFsAttrsListOrder []string = []string{ + "SecureDelete", + "UnDelete", + "SyncUpdate", + "DirSync", + "Immutable", + "AppendOnly", + "NoDumpFile", + "NoUpdateAtime", + "CompressFile", + "EncFile", + "ReservedExt3", + "HashIdxDir", + "NoMergeTail", + "DirTop", + "Extents", + "NoCOWFile", + "DAX", + "CaseInsensitive", + "ReservedExt4c", + "UseParentProjId", + "VerityProtected", + "NoCompress", + } +) diff --git a/fsutils/consts_linux.go b/fsutils/consts_lin.go similarity index 97% rename from fsutils/consts_linux.go rename to fsutils/consts_lin.go index f1f2192..60ea18c 100644 --- a/fsutils/consts_linux.go +++ b/fsutils/consts_lin.go @@ -1,8 +1,9 @@ -//go:build linux - package fsutils -// https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h "Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)" +/* + https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h "Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)" + Up to date as of Linux 6.12-rc7. +*/ const ( SecureDelete fsAttr = 1 << iota // Secure deletion UnDelete // Undelete diff --git a/fsutils/funcs.go b/fsutils/funcs.go new file mode 100644 index 0000000..282dfee --- /dev/null +++ b/fsutils/funcs.go @@ -0,0 +1,16 @@ +package fsutils + +// invertMap returns some handy consts remapping for easier lookups. +func invertMap(origMap map[string]fsAttr) (newMap map[fsAttr]string) { + + if origMap == nil { + return + } + newMap = make(map[fsAttr]string) + + for k, v := range origMap { + newMap[v] = k + } + + return +} diff --git a/fsutils/funcs_fsattrs.go b/fsutils/funcs_fsattrs.go new file mode 100644 index 0000000..624405f --- /dev/null +++ b/fsutils/funcs_fsattrs.go @@ -0,0 +1,96 @@ +package fsutils + +import ( + `reflect` + `strings` +) + +/* + String returns a string representation (comparable to lsattr(1)) of an FsAttrs. + + Not all flags are represented, as this aims for compatibility with e2fsprogs/lsattr output. +*/ +func (f *FsAttrs) String() (s string) { + + // Flags have their short name printed if set, otherwise a '-' placeholder is used. + // https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c + + var refType reflect.Type + var refVal reflect.Value + var refField reflect.StructField + var fieldVal reflect.Value + var tagVal string + var sb strings.Builder + + if f == nil { + s = strings.Repeat("-", len(linuxFsAttrsListOrder)) + return + } + + refVal = reflect.ValueOf(*f) + refType = refVal.Type() + for _, fn := range linuxFsAttrsListOrder { + refField, _ = refType.FieldByName(fn) + tagVal = refField.Tag.Get("fsAttrShort") + if tagVal == "" || tagVal == "-" { + continue + } + fieldVal = refVal.FieldByName(fn) + if fieldVal.Bool() { + sb.WriteString(tagVal) + } else { + sb.WriteString("-") + } + } + + s = sb.String() + + return +} + +/* + StringLong returns a more extensive/"human-friendly" representation (comparable to lsattr(1) wiih -l) of an Fsattrs. + + Not all flags are represented, as this aims for compatibility with e2fsprogs/lsattr output. +*/ +func (f *FsAttrs) StringLong() (s string) { + + // The long names are separated via a commma then a space. + // If no attrs are set, the string "---" is used. + // https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c + + var refType reflect.Type + var refVal reflect.Value + var refField reflect.StructField + var fieldVal reflect.Value + var tagVal string + var out []string + + if f == nil { + s = strings.Repeat("-", 3) + return + } + + refVal = reflect.ValueOf(*f) + refType = refVal.Type() + for _, fn := range linuxFsAttrsListOrder { + refField, _ = refType.FieldByName(fn) + tagVal = refField.Tag.Get("fsAttrLong") + if tagVal == "" || tagVal == "-" { + continue + } + fieldVal = refVal.FieldByName(fn) + if fieldVal.Bool() { + out = append(out, tagVal) + } + } + + if out == nil || len(out) == 0 { + s = strings.Repeat("-", 3) + return + } + + s = strings.Join(out, ", ") + + return +} diff --git a/fsutils/funcs_fsattrs_linux.go b/fsutils/funcs_fsattrs_linux.go index 5a2dfea..83f1cae 100644 --- a/fsutils/funcs_fsattrs_linux.go +++ b/fsutils/funcs_fsattrs_linux.go @@ -15,12 +15,14 @@ func (f *FsAttrs) Apply(path string) (err error) { var reflectVal reflect.Value var fieldVal reflect.Value - var myPath string = path - - if err = paths.RealPath(&myPath); err != nil { + if f == nil { return } - if file, err = os.Open(myPath); err != nil { + + if err = paths.RealPath(&path); err != nil { + return + } + if file, err = os.Open(path); err != nil { return } defer file.Close() diff --git a/fsutils/funcs_linux.go b/fsutils/funcs_linux.go index 2c53a52..473ed71 100644 --- a/fsutils/funcs_linux.go +++ b/fsutils/funcs_linux.go @@ -73,21 +73,6 @@ func getAttrs(f *os.File) (attrVal fsAttr, err error) { return } -// invertMap returns some handy consts remapping for easier lookups. -func invertMap(origMap map[string]fsAttr) (newMap map[fsAttr]string) { - - if origMap == nil { - return - } - newMap = make(map[fsAttr]string) - - for k, v := range origMap { - newMap[v] = k - } - - return -} - // setAttrs is the unexported low-level syscall to set attributes. attrs may be OR'd. func setAttrs(f *os.File, attrs fsAttr) (err error) { diff --git a/fsutils/types.go b/fsutils/types.go index 72da5ac..c51b11f 100644 --- a/fsutils/types.go +++ b/fsutils/types.go @@ -6,36 +6,39 @@ import ( type fsAttr bitmask.MaskBit -// FsAttrs is a struct representation of filesystem attributes on Linux. +/* + FsAttrs is a struct representation of filesystem attributes on Linux. + Up to date as of Linux 6.12-rc7. +*/ type FsAttrs struct { - SecureDelete bool - UnDelete bool - CompressFile bool - SyncUpdate bool - Immutable bool - AppendOnly bool - NoDumpFile bool - NoUpdateAtime bool - IsDirty bool - CompressedClusters bool - NoCompress bool - EncFile bool - BtreeFmt bool - HashIdxDir bool - AfsDir bool - ReservedExt3 bool - NoMergeTail bool - DirSync bool - DirTop bool - ReservedExt4a bool - Extents bool - VerityProtected bool - LargeEaInode bool - ReservedExt4b bool - NoCOWFile bool - DAX bool - ReservedExt4c bool - UseParentProjId bool - CaseInsensitive bool - ReservedExt2 bool + SecureDelete bool `fsAttrShort:"s" fsAttrLong:"Secure_Deletion" fsAttrKern:"FS_SECRM_FL" json:"secure_delete" toml:"SecureDelete" yaml:"Secure Delete" xml:"secureDelete,attr"` + UnDelete bool `fsAttrShort:"u" fsAttrLong:"Undelete" fsAttrKern:"FS_UNRM_FL" json:"undelete" toml:"Undelete" yaml:"Undelete" xml:"undelete,attr"` + CompressFile bool `fsAttrShort:"c" fsAttrLong:"Compression_Requested" fsAttrKern:"FS_COMPR_FL" json:"compress" toml:"Compress" yaml:"Compress" xml:"compress,attr"` + SyncUpdate bool `fsAttrShort:"S" fsAttrLong:"Synchronous_Updates" fsAttrKern:"FS_SYNC_FL" json:"sync" toml:"SyncUpdate" yaml:"Synchronized Update" xml:"syncUpdate,attr"` + Immutable bool `fsAttrShort:"i" fsAttrLong:"Immutable" fsAttrKern:"FS_IMMUTABLE_FL" json:"immutable" toml:"Immutable" yaml:"Immutable" xml:"immutable,attr"` + AppendOnly bool `fsAttrShort:"a" fsAttrLong:"Append_Only" fsAttrKern:"FS_APPEND_FL" json:"append_only" toml:"AppendOnly" yaml:"Append Only" xml:"appendOnly,attr"` + NoDumpFile bool `fsAttrShort:"d" fsAttrLong:"No_Dump" fsAttrKern:"FS_NODUMP_FL" json:"no_dump" toml:"NoDump" yaml:"Disable Dumping" xml:"noDump,attr"` + NoUpdateAtime bool `fsAttrShort:"A" fsAttrLong:"No_Atime" fsAttrKern:"FS_NOATIME_FL" json:"no_atime" toml:"DisableAtime" yaml:"Disable Atime Updating" xml:"noAtime,attr"` + IsDirty bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_DIRTY_FL" json:"dirty" toml:"Dirty" yaml:"Dirty" xml:"dirty,attr"` + CompressedClusters bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_COMPRBLK_FL" json:"compress_clst" toml:"CompressedClusters" yaml:"Compressed Clusters" xml:"compressClst,attr"` + NoCompress bool `fsAttrShort:"m" fsAttrLong:"Dont_Compress" fsAttrKern:"FS_NOCOMP_FL" json:"no_compress" toml:"DisableCompression" yaml:"Disable Compression" xml:"noCompress,attr"` + EncFile bool `fsAttrShort:"E" fsAttrLong:"Encrypted" fsAttrKern:"FS_ENCRYPT_FL" json:"enc" toml:"Encrypted" yaml:"Encrypted" xml:"enc,attr"` + BtreeFmt bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_BTREE_FL" json:"btree" toml:"Btree" yaml:"Btree" xml:"btree,attr"` + HashIdxDir bool `fsAttrShort:"I" fsAttrLong:"Indexed_directory" fsAttrKern:"FS_INDEX_FL" json:"idx_dir" toml:"IdxDir" yaml:"Indexed Directory" xml:"idxDir,attr"` + AfsDir bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_IMAGIC_FL" json:"afs" toml:"AFS" yaml:"AFS" xml:"afs,attr"` + ReservedExt3 bool `fsAttrShort:"j" fsAttrLong:"Journaled_Data" fsAttrKern:"FS_JOURNAL_DATA_FL" json:"res_ext3" toml:"ReservedExt3" yaml:"Reserved Ext3" xml:"resExt3,attr"` + NoMergeTail bool `fsAttrShort:"t" fsAttrLong:"No_Tailmerging" fsAttrKern:"FS_NOTAIL_FL" json:"no_merge_tail" toml:"DisableTailmerging" yaml:"Disable Tailmerging" xml:"noMergeTail,attr"` + DirSync bool `fsAttrShort:"D" fsAttrLong:"Synchronous_Directory_Updates" fsAttrKern:"FS_DIRSYNC_FL" json:"dir_sync" toml:"DirSync" yaml:"Synchronized Directory Updates" xml:"dirSync,attr"` + DirTop bool `fsAttrShort:"T" fsAttrLong:"Top_of_Directory_Hierarchies" fsAttrKern:"FS_TOPDIR_FL" json:"dir_top" toml:"DirTop" yaml:"Top of Directory Hierarchies" xml:"dirTop,attr"` + ReservedExt4a bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_HUGE_FILE_FL" json:"res_ext4a" toml:"ReservedExt4A" yaml:"Reserved Ext4 A" xml:"resExt4a,attr"` + Extents bool `fsAttrShort:"e" fsAttrLong:"Extents" fsAttrKern:"FS_EXTENT_FL" json:"extents" toml:"Extents" yaml:"Extents" xml:"extents,attr"` + VerityProtected bool `fsAttrShort:"V" fsAttrLong:"Verity" fsAttrKern:"FS_VERITY_FL" json:"verity" toml:"Verity" yaml:"Verity Protected" xml:"verity,attr"` + LargeEaInode bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_EA_INODE_FL" json:"ea" toml:"EAInode" yaml:"EA Inode" xml:"ea,attr"` + ReservedExt4b bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_EOFBLOCKS_FL" json:"res_ext4b" toml:"ReservedExt4B" yaml:"Reserved Ext4 B" xml:"resExt4b,attr"` + NoCOWFile bool `fsAttrShort:"C" fsAttrLong:"No_COW" fsAttrKern:"FS_NOCOW_FL" json:"no_cow" toml:"NoCOW" yaml:"Disable COW" xml:"noCOW,attr"` + DAX bool `fsAttrShort:"x" fsAttrLong:"DAX" fsAttrKern:"FS_DAX_FL" json:"dax" toml:"DAX" yaml:"DAX" xml:"DAX,attr"` + ReservedExt4c bool `fsAttrShort:"N" fsAttrLong:"Inline_Data" fsAttrKern:"FS_INLINE_DATA_FL" json:"res_ext4c" toml:"ReservedExt4C" yaml:"Reserved Ext4 C" xml:"resExt4c,attr"` + UseParentProjId bool `fsAttrShort:"P" fsAttrLong:"Project_Hierarchy" fsAttrKern:"FS_PROJINHERIT_FL" json:"parent_proj_id" toml:"ParentProjId" yaml:"Use Parent Project ID" xml:"parentProjId,attr"` + CaseInsensitive bool `fsAttrShort:"F" fsAttrLong:"Casefold" fsAttrKern:"FS_CASEFOLD_FL" json:"case_ins" toml:"CaseInsensitive" yaml:"Case Insensitive" xml:"caseIns,attr"` + ReservedExt2 bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_RESERVED_FL" json:"res_ext2" toml:"ReservedExt2" yaml:"Reserved Ext2" xml:"resExt2,attr"` } diff --git a/go.mod b/go.mod index 873e078..2752c9f 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/davecgh/go-spew v1.1.1 github.com/djherbis/times v1.6.0 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 + golang.org/x/sync v0.9.0 golang.org/x/sys v0.26.0 honnef.co/go/augeas v0.0.0-20161110001225-ca62e35ed6b8 r00t2.io/goutils v1.7.1 diff --git a/go.sum b/go.sum index 8d693a1..7e771f3 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYC github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= diff --git a/paths/funcs.go b/paths/funcs.go index b578898..dd0f5ca 100644 --- a/paths/funcs.go +++ b/paths/funcs.go @@ -19,6 +19,7 @@ package paths import ( + `context` "errors" "fmt" "io/fs" @@ -28,11 +29,13 @@ import ( `regexp` `slices` "strings" + `sync` `time` // "syscall" `github.com/djherbis/times` + `golang.org/x/sync/semaphore` `r00t2.io/goutils/bitmask` ) @@ -311,15 +314,22 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error) * atime (access time) olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*. + + now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.) */ func SearchFsPaths( root string, targetType fs.FileMode, noFiles bool, basePtrn, pathPtrn *regexp.Regexp, - age *time.Duration, ageType pathTimeType, olderThan bool, + age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, ) (foundPaths []string, err error) { - var now time.Time = time.Now() + if age != nil { + if now == nil { + now = new(time.Time) + *now = time.Now() + } + } if err = RealPath(&root); err != nil { return @@ -329,52 +339,25 @@ func SearchFsPaths( root, func(path string, d fs.DirEntry, inErr error) (outErr error) { - var typeMode fs.FileMode - var fi fs.FileInfo - var tspec times.Timespec - var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType)) + var include bool if inErr != nil { outErr = inErr return } - // patterns - if pathPtrn != nil { - if !pathPtrn.MatchString(path) { - return - } - } - if basePtrn != nil { - if !basePtrn.MatchString(filepath.Base(path)) { - return - } - } - - // age - if age != nil { - if tspec, outErr = times.Stat(path); outErr != nil { - return - } - if !filterTimes(tspec, age, &ageType, olderThan, &now) { - return - } - } - - // fs object type (file, dir, etc.) - if fi, outErr = d.Info(); outErr != nil { - return - } - typeMode = fi.Mode().Type() - if typeMode == 0 && noFiles { - return - } - if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) { + if include, outErr = filterPath( + path, d, + targetType, noFiles, + basePtrn, pathPtrn, + age, ageType, olderThan, now, + ); outErr != nil { return } - // All filters passed at this point. - foundPaths = append(foundPaths, path) + if include { + foundPaths = append(foundPaths, path) + } return }, @@ -388,6 +371,170 @@ func SearchFsPaths( return } +/* + SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent + workers for the filtering logic instead of performing iteratively/recursively. + It may, in some cases, be *slightly more* performant and *slightly less* in others. + Additional options are documented below. + Note that unlike SearchFsPaths, the results written to foundPathsChan are not + guaranteed to be in any predictable order. + + All channels are expected to have been initialized by the caller ahead of time, + and all provided channels will be closed upon completion (so they are only safe + to READ from after invoking SearchFsPathsAsync). + + foundPathsChan is a channel to which matched filepaths will be written. + + sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks. + (semCtx is the context.Context used for sem when acquiring. It may be nil; + one will be locally created if so.) + The default will be to spawn all filtering logic concurrently. + For very large directories, you almost assuredly do not want that -- it + can cause a significant amount of I/O and CPU wait. + (See https://pkg.go.dev/golang.org/x/sync/semaphore for details.) + + wg *must not* be nil, and must be managed by the caller. + SearchFsPathsAsync will exit with no errors but no-op if wg is nil. + + errChan will receive any/all encountered errors. +*/ +func SearchFsPathsAsync( + root string, + targetType fs.FileMode, noFiles bool, + basePtrn, pathPtrn *regexp.Regexp, + age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, + foundPathsChan chan string, + sem *semaphore.Weighted, semCtx context.Context, + wg *sync.WaitGroup, + errChan chan error, +) { + + var err error + var localWg sync.WaitGroup + + if wg == nil { + return + } + + if age != nil { + if now == nil { + now = new(time.Time) + *now = time.Now() + } + } + + if sem != nil && semCtx == nil { + semCtx = context.Background() + } + + if err = filepath.WalkDir( + root, + func(path string, de fs.DirEntry, inErr error) (outErr error) { + localWg.Add(1) + wg.Add(1) + if sem != nil { + if err = sem.Acquire(semCtx, 1); err != nil { + return + } + } + + go func(p string, d fs.DirEntry) { + var pErr error + var pInclude bool + + defer localWg.Done() + defer wg.Done() + + if sem != nil { + defer sem.Release(1) + } + + if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil { + errChan <- pErr + return + } + + if pInclude { + foundPathsChan <- p + } + }(path, de) + + return + }, + ); err != nil { + errChan <- err + return + } + + go func() { + localWg.Wait() + close(foundPathsChan) + close(errChan) + }() + + return +} + +// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync. +func filterPath( + path string, d fs.DirEntry, + targetType fs.FileMode, noFiles bool, + basePtrn, pathPtrn *regexp.Regexp, + age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, +) (include bool, err error) { + + var typeMode fs.FileMode + var fi fs.FileInfo + var tspec times.Timespec + var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType)) + + if age != nil { + if now == nil { + now = new(time.Time) + *now = time.Now() + } + } + + // patterns + if pathPtrn != nil { + if !pathPtrn.MatchString(path) { + return + } + } + if basePtrn != nil { + if !basePtrn.MatchString(filepath.Base(path)) { + return + } + } + + // age + if age != nil { + if tspec, err = times.Stat(path); err != nil { + return + } + if !filterTimes(tspec, age, &ageType, olderThan, now) { + return + } + } + + // fs object type (file, dir, etc.) + if fi, err = d.Info(); err != nil { + return + } + typeMode = fi.Mode().Type() + if typeMode == 0 && noFiles { + return + } else if typeMode != 0 { + if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) { + return + } + } + + include = true + + return +} + /* filterTimes checks a times.Timespec of a file using: * an age specified by the caller