From c6efc2d83cb91072cd8547e2065fc3c17afecfb9 Mon Sep 17 00:00:00 2001 From: brent saner Date: Mon, 18 Nov 2024 17:36:14 -0500 Subject: [PATCH] v1.12.0 FIXED: * paths: Async searching works correctly now, and is consolidated to a single struct for searching options for async and synchronous searches. --- fsutils/errs.go | 11 ++ paths/consts.go | 11 ++ paths/errs.go | 12 ++ paths/funcs.go | 284 +++++++++++--------------------- paths/funcs_fssearchcriteria.go | 125 ++++++++++++++ paths/types.go | 127 ++++++++++++++ 6 files changed, 383 insertions(+), 187 deletions(-) create mode 100644 fsutils/errs.go create mode 100644 paths/errs.go create mode 100644 paths/funcs_fssearchcriteria.go diff --git a/fsutils/errs.go b/fsutils/errs.go new file mode 100644 index 0000000..e40a963 --- /dev/null +++ b/fsutils/errs.go @@ -0,0 +1,11 @@ +package fsutils + +import ( + `syscall` +) + +var ( + // Yes, I know. "Why ENOTTY?" I don't know, ask Linus. + // If you see "inappropriate ioctl for device", it's this'un. + ErrFsAttrsUnsupported error = syscall.ENOTTY +) diff --git a/paths/consts.go b/paths/consts.go index f948b2c..7aaeb63 100644 --- a/paths/consts.go +++ b/paths/consts.go @@ -17,6 +17,17 @@ const ( modeAnyExceptRegular pathMode = modeDir | modeSymlink | modePipe | modeSocket | modeDev | modeCharDev | modeIrregular ) +// Miss reasons +const ( + MissNoMiss missReason = "" + MissNoMeta missReason = "Could not determine metadata" + MissBadBase missReason = "Base name does not match BasePtrn" + MissBadPath missReason = "Path does not match PathPtrn" + MissBadTime missReason = "Time(s) does not/do not match Age" + MissFile missReason = "Object is a file and NoFiles is set" + MissType missReason = "Object does not match TargetType" +) + // Times const TimeAny pathTimeType = 0 const ( diff --git a/paths/errs.go b/paths/errs.go new file mode 100644 index 0000000..e9bb296 --- /dev/null +++ b/paths/errs.go @@ -0,0 +1,12 @@ +package paths + +import ( + `errors` +) + +var ( + ErrNilErrChan error = errors.New("an initialized error channel is required") + ErrNilMatchChan error = errors.New("an initialized matches channel is required") + ErrNilMismatchChan error = errors.New("an initialized mismatches channel is required") + ErrNilWg error = errors.New("a non-nil sync.WaitGroup is required") +) diff --git a/paths/funcs.go b/paths/funcs.go index dd0f5ca..049f99d 100644 --- a/paths/funcs.go +++ b/paths/funcs.go @@ -26,8 +26,7 @@ import ( "os" "os/user" "path/filepath" - `regexp` - `slices` + `sort` "strings" `sync` `time` @@ -35,7 +34,6 @@ import ( // "syscall" `github.com/djherbis/times` - `golang.org/x/sync/semaphore` `r00t2.io/goutils/bitmask` ) @@ -277,86 +275,33 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error) return } -/* - SearchPaths gets a file/directory path list based on the provided criteria. +// SearchFsPaths gets a file/directory/etc. path list based on the provided criteria. +func SearchFsPaths(matcher FsSearchCriteria) (found, miss []*FsSearchResult, err error) { - targetType defines what should be included in the path list. - It can consist of one or more (io/)fs.FileMode types OR'd together - (ensure they are part of (io/)fs.ModeType). - (You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.) + var matched *FsSearchResult + var missed *FsSearchResult - noFiles, if true, will explicitly filter out regular files from the path results. - (Normally they are *always* included regardless of targetType.) - - basePtrn may be nil; if it isn't, it will be applied to *base names* - (that is, quux.txt rather than /foo/bar/baz/quux.txt). - - pathPtrn is like basePtrn except it applies to the *entire* path, - not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt, - not just quux.txt). - - If age is not nil, it will be applied to the path object. - It will match older files/directories/etc. if olderThan is true, - otherwise it will match newer files/directories/etc. - (olderThan is not used otherwise.) - - ageType is one or more Time* constants OR'd together to describe which timestamp type to check. - (Note that TimeCreated may not match if specified as it is only available on certain OSes, - kernel versions, and filesystems. This may lead to files being excluded that may have otherwise - been included.) - (You can use TimeAny to specify any supported time.) - *Any* matching timestamp of all specified (and supported) timestamp types matches, - so be judicious with your selection. They are processed in order of: - - * btime (birth/creation time) (if supported) - * mtime (modification time -- contents have changed) - * ctime (OS-specific behavior; generally disk metadata has changed) (if supported) - * atime (access time) - - olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*. - - now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.) -*/ -func SearchFsPaths( - root string, - targetType fs.FileMode, noFiles bool, - basePtrn, pathPtrn *regexp.Regexp, - age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, -) (foundPaths []string, err error) { - - if age != nil { - if now == nil { - now = new(time.Time) - *now = time.Now() - } - } - - if err = RealPath(&root); err != nil { + if err = RealPath(&matcher.Root); err != nil { return } if err = filepath.WalkDir( - root, + matcher.Root, func(path string, d fs.DirEntry, inErr error) (outErr error) { - var include bool - if inErr != nil { outErr = inErr return } - if include, outErr = filterPath( - path, d, - targetType, noFiles, - basePtrn, pathPtrn, - age, ageType, olderThan, now, - ); outErr != nil { + if matched, missed, outErr = matcher.Match(path, d, nil); outErr != nil { return } - - if include { - foundPaths = append(foundPaths, path) + if matched != nil && !matcher.NoMatch { + found = append(found, matched) + } + if missed != nil && !matcher.NoMismatch { + miss = append(miss, missed) } return @@ -365,8 +310,18 @@ func SearchFsPaths( return } + if found == nil || len(found) == 0 { + return + } + // And sort them. - slices.Sort(foundPaths) + sort.Slice( + found, + func(i, j int) (isLess bool) { + isLess = found[i].Path < found[j].Path + return + }, + ) return } @@ -375,162 +330,107 @@ func SearchFsPaths( SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent workers for the filtering logic instead of performing iteratively/recursively. It may, in some cases, be *slightly more* performant and *slightly less* in others. - Additional options are documented below. - Note that unlike SearchFsPaths, the results written to foundPathsChan are not - guaranteed to be in any predictable order. + Note that unlike SearchFsPaths, the results written to the + FsSearchCriteriaAsync.ResChan are not guaranteed to be in any predictable order. - All channels are expected to have been initialized by the caller ahead of time, - and all provided channels will be closed upon completion (so they are only safe - to READ from after invoking SearchFsPathsAsync). - - foundPathsChan is a channel to which matched filepaths will be written. - - sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks. - (semCtx is the context.Context used for sem when acquiring. It may be nil; - one will be locally created if so.) - The default will be to spawn all filtering logic concurrently. - For very large directories, you almost assuredly do not want that -- it - can cause a significant amount of I/O and CPU wait. - (See https://pkg.go.dev/golang.org/x/sync/semaphore for details.) - - wg *must not* be nil, and must be managed by the caller. - SearchFsPathsAsync will exit with no errors but no-op if wg is nil. - - errChan will receive any/all encountered errors. + All channels are expected to have already been initialized by the caller. + They will not be closed by this function. */ -func SearchFsPathsAsync( - root string, - targetType fs.FileMode, noFiles bool, - basePtrn, pathPtrn *regexp.Regexp, - age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, - foundPathsChan chan string, - sem *semaphore.Weighted, semCtx context.Context, - wg *sync.WaitGroup, - errChan chan error, -) { +func SearchFsPathsAsync(matcher FsSearchCriteriaAsync) { var err error - var localWg sync.WaitGroup + var wgLocal sync.WaitGroup + var doneChan chan bool = make(chan bool, 1) - if wg == nil { + if matcher.ErrChan == nil { + panic(ErrNilErrChan) return } - if age != nil { - if now == nil { - now = new(time.Time) - *now = time.Now() - } + if matcher.WG == nil { + matcher.ErrChan <- ErrNilWg + return } - if sem != nil && semCtx == nil { - semCtx = context.Background() + defer matcher.WG.Done() + + if matcher.ResChan == nil && !matcher.NoMatch { + matcher.ErrChan <- ErrNilMatchChan + return + } + if matcher.MismatchChan == nil && !matcher.NoMismatch { + matcher.ErrChan <- ErrNilMismatchChan + return + } + + if err = RealPath(&matcher.Root); err != nil { + matcher.ErrChan <- err + return + } + + if matcher.Semaphore != nil && matcher.SemaphoreCtx == nil { + matcher.SemaphoreCtx = context.Background() } if err = filepath.WalkDir( - root, + matcher.Root, func(path string, de fs.DirEntry, inErr error) (outErr error) { - localWg.Add(1) - wg.Add(1) - if sem != nil { - if err = sem.Acquire(semCtx, 1); err != nil { + + if inErr != nil { + inErr = filterNoFileDir(inErr) + if inErr != nil { + outErr = inErr + return + } + } + + wgLocal.Add(1) + if matcher.Semaphore != nil { + if err = matcher.Semaphore.Acquire(matcher.SemaphoreCtx, 1); err != nil { return } } go func(p string, d fs.DirEntry) { var pErr error - var pInclude bool + var pResMatch *FsSearchResult + var pResMiss *FsSearchResult - defer localWg.Done() - defer wg.Done() + defer wgLocal.Done() - if sem != nil { - defer sem.Release(1) + if matcher.Semaphore != nil { + defer matcher.Semaphore.Release(1) } - if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil { - errChan <- pErr + if pResMatch, pResMiss, pErr = matcher.Match(p, d, nil); pErr != nil { + matcher.ErrChan <- pErr return } - if pInclude { - foundPathsChan <- p + if pResMatch != nil && !matcher.NoMatch { + matcher.ResChan <- pResMatch + } + if pResMiss != nil && !matcher.NoMismatch { + matcher.MismatchChan <- pResMiss } }(path, de) return }, ); err != nil { - errChan <- err - return + err = filterNoFileDir(err) + if err != nil { + matcher.ErrChan <- err + return + } } go func() { - localWg.Wait() - close(foundPathsChan) - close(errChan) + wgLocal.Wait() + doneChan <- true }() - return -} - -// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync. -func filterPath( - path string, d fs.DirEntry, - targetType fs.FileMode, noFiles bool, - basePtrn, pathPtrn *regexp.Regexp, - age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, -) (include bool, err error) { - - var typeMode fs.FileMode - var fi fs.FileInfo - var tspec times.Timespec - var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType)) - - if age != nil { - if now == nil { - now = new(time.Time) - *now = time.Now() - } - } - - // patterns - if pathPtrn != nil { - if !pathPtrn.MatchString(path) { - return - } - } - if basePtrn != nil { - if !basePtrn.MatchString(filepath.Base(path)) { - return - } - } - - // age - if age != nil { - if tspec, err = times.Stat(path); err != nil { - return - } - if !filterTimes(tspec, age, &ageType, olderThan, now) { - return - } - } - - // fs object type (file, dir, etc.) - if fi, err = d.Info(); err != nil { - return - } - typeMode = fi.Mode().Type() - if typeMode == 0 && noFiles { - return - } else if typeMode != 0 { - if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) { - return - } - } - - include = true + <-doneChan return } @@ -597,3 +497,13 @@ func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType return } + +func filterNoFileDir(err error) (filtered error) { + + filtered = err + if errors.Is(err, fs.ErrNotExist) { + filtered = nil + } + + return +} diff --git a/paths/funcs_fssearchcriteria.go b/paths/funcs_fssearchcriteria.go new file mode 100644 index 0000000..8a51d2c --- /dev/null +++ b/paths/funcs_fssearchcriteria.go @@ -0,0 +1,125 @@ +package paths + +import ( + `io/fs` + `os` + `path/filepath` + `time` + + `github.com/djherbis/times` + `r00t2.io/goutils/bitmask` +) + +/* + Match returns match (a ptr to a FsSearchResult if the specified path matches, otherwise nil), + miss (ptr the specified path does not match, otherwise nil), and an fs.DirEntry and fs.FileInfo + for path. d and/or fi may be nil. + + If err is not nil, it represents an unexpected error and as such, both match and miss should be nil. + + Match, miss, and err will all be nil if the filesystem object/path does not exist. +*/ +func (f *FsSearchCriteria) Match(path string, d fs.DirEntry, fi fs.FileInfo) (match, miss *FsSearchResult, err error) { + + var typeMode fs.FileMode + var m FsSearchResult + var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(f.TargetType)) + + m = FsSearchResult{ + Path: path, + DirEntry: d, + FileInfo: fi, + Criteria: f, + } + + if f == nil { + return + } + + // A DirEntry can be created from a FileInfo but not vice versa. + if m.FileInfo == nil { + if m.DirEntry != nil { + if m.FileInfo, err = m.DirEntry.Info(); err != nil { + err = filterNoFileDir(err) + if err != nil { + return + } + } + } else { + if f.FollowSymlinks { + if m.FileInfo, err = os.Stat(path); err != nil { + err = filterNoFileDir(err) + if err != nil { + return + } + } + } else { + if m.FileInfo, err = os.Lstat(path); err != nil { + err = filterNoFileDir(err) + if err != nil { + return + } + } + } + m.DirEntry = fs.FileInfoToDirEntry(m.FileInfo) + } + } + if m.DirEntry == nil { + m.DirEntry = fs.FileInfoToDirEntry(m.FileInfo) + } + if m.DirEntry == nil || m.FileInfo == nil { + m.MissReason = MissNoMeta + miss = &m + return + } + + if m.Times, err = times.Stat(path); err != nil { + err = filterNoFileDir(err) + if err != nil { + return + } + } + + if f.PathPtrn != nil && !f.PathPtrn.MatchString(path) { + m.MissReason = MissBadPath + miss = &m + return + } + if f.BasePtrn != nil && !f.BasePtrn.MatchString(filepath.Base(path)) { + m.MissReason = MissBadBase + miss = &m + return + } + + // age + if f.Age != nil { + if f.Now == nil { + f.Now = new(time.Time) + *f.Now = time.Now() + } + if !filterTimes(m.Times, f.Age, &f.AgeType, f.OlderThan, f.Now) { + m.MissReason = MissBadTime + miss = &m + return + } + } + + // fs object type (file, dir, etc.) + typeMode = m.FileInfo.Mode().Type() + if typeMode == 0 && f.NoFiles { + m.MissReason = MissFile + miss = &m + return + } else if typeMode != 0 { + if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) { + m.MissReason = MissType + miss = &m + return + } + } + + // If it gets to here, it matches. + match = &m + + return +} diff --git a/paths/types.go b/paths/types.go index 8e6dc58..b0367ac 100644 --- a/paths/types.go +++ b/paths/types.go @@ -1,9 +1,136 @@ package paths import ( + `context` + `io/fs` + `regexp` + `sync` + `time` + + `github.com/djherbis/times` + `golang.org/x/sync/semaphore` `r00t2.io/goutils/bitmask` ) +// FsSearchCriteria contains filter criteria for SearchFsPaths* functions. +type FsSearchCriteria struct { + // Root indicates the root to search. + Root string `json:"root" toml:"RootPath" yaml:"Root Path" xml:"root,attr" validate:"dir"` + // NoMatch, if true, will not return matches. If NoMatch and NoMismatch are both true, no results will be returned. + NoMatch bool `json:"no_match" toml:"NoMatch" yaml:"No Matches" xml:"noMatch,attr"` + // NoMismatch, if true, will not return mismatches. If NoMatch and NoMismatch are both true, no results will be returned. + NoMismatch bool `json:"no_miss" toml:"NoMismatch" yaml:"No Mismatches" xml:"noMiss,attr"` + /* + TargetType defines what types of filesystem objects should be matched. + It can consist of one or more (io/)fs.FileMode types OR'd together + (ensure they are part of (io/)fs.ModeType). + (You can use 0 to match regular files explicitly, and/or NoFiles = true to exclude them.) + */ + TargetType fs.FileMode `json:"type_tgt" toml:"TargetType" yaml:"Target Type" xml:"typeTgt,attr"` + // NoFiles excludes files from TargetType-matching (as there isn't a way to explicitly exclude files otherwise if a non-zero mode is given). + NoFiles bool `json:"no_file" toml:"ExcludeFiles" yaml:"Exclude Files" xml:"noFile,attr"` + // FollowSymlinks, if true and a path being tested is a symlink, will use metadata (age, etc.) of the symlink itself rather than the link target. + FollowSymlinks bool `json:"follow_sym" toml:"FollowSymlinks" yaml:"Follow Symlinks" xml:"followSym,attr"` + // BasePtrn, if specified, will apply to the *base name (that is, quux.txt rather than /foo/bar/baz/quux.txt). See also PathPtrn. + BasePtrn *regexp.Regexp `json:"ptrn_base,omitempty" toml:"BaseNamePattern,omitempty" yaml:"Base Name Pattern,omitempty" xml:"ptrnBase,attr,omitempty"` + // PathPtrn, if specified, will apply to the *full path* (e.g. /foo/bar/baz/quux.txt, not just quux.txt). See also BasePtrn. + PathPtrn *regexp.Regexp `json:"ptrn_path,omitempty" toml:"PathPattern,omitempty" yaml:"Path Pattern,omitempty" xml:"ptrnPath,attr,omitempty"` + /* + Age, if specified, indicates the comparison of Now againt the AgeType of filesystem objects. + Use OlderThan to indicate if it should be older or newer. + */ + Age *time.Duration `json:"age,omitempty" toml:"Age,omitempty" yaml:"Age,omitempty" xml:"age,attr,omitempty"` + /* + AgeType can be one (or more, OR'd together) of the Time* constants in this package (TimeAny, TimeAccessed, TimeCreated, + TimeChanged, TimeModified) to indicate what timestamp(s) to use for comparing Age. + + The zero-value is TimeAny. + + The first matching timestamp will pass all time comparisons. + Be mindful of timestamp type support/limitations per OS/filesystem of Root. + + Completely unused if Age is nil. + */ + AgeType pathTimeType `json:"type_age" toml:"AgeType" yaml:"Age Type" xml:"typeAge,attr"` + /* + OlderThan, if true (and Age is not nil), indicates that matching filesystem objects should have their + AgeType older than Now. If false, their AgeType should be *newer* than Now. + + Completely unused if Age is nil. + */ + OlderThan bool `json:"older" toml:"OlderThan" yaml:"Older Than" xml:"older,attr"` + /* + Now expresses a time to compare to Age via AgeType and OlderThan. + Note that it may be any valid time, not necessarily "now". + If Age is specified but Now is nil, it will be populated with time.Now() when the search is invoked. + + Completely unused if Age is nil. + */ + Now *time.Time `json:"now,omitempty" toml:"Now,omitempty" yaml:"Now,omitempty" xml:"now,attr,omitempty"` +} + +// FsSearchCriteriaAsync extends FsSearchCriteria for use in an asynchronous (goroutine) manner. +type FsSearchCriteriaAsync struct { + FsSearchCriteria + /* + WG should be a non-nil pointer to a sync.WaitGroup. + This is used to manage searching completion to the caller. + + .Done() will be called once within the search function, but no .Add() will be called; + .Add() should be done by the caller beforehand. + */ + WG *sync.WaitGroup + // ResChan must be a non-nil channel for (positive) match results to be sent to. + ResChan chan *FsSearchResult + // MismatchChan, if not nil, will have negative matches/"misses" sent to it. + MismatchChan chan *FsSearchResult + /* + ErrChan should be a non-nil error channel for any unexpected errors encountered. + + If nil, a panic will be raised. + */ + ErrChan chan error + /* + Semaphore is completely optional, but if non-nil + it will be used to limit concurrent filesystem + object processing. + + It is generally a Very Good Idea(TM) to use this, + as the default is to dispatch all processing concurrently. + This can lead to some heavy I/O and CPU wait. + + (See https://pkg.go.dev/golang.org/x/sync/semaphore for details.) + */ + Semaphore *semaphore.Weighted + /* + SemaphoreCtx is the context.Context to use for Semaphore. + If nil (but Sempaphore is not), one will be created locally/internally. + */ + SemaphoreCtx context.Context +} + +// FsSearchResult contains a match/miss result for FsSearchCriteria and FsSearchCriteriaAsync. +type FsSearchResult struct { + /* + Path is the path to the object on the filesystem. + It may or may not exist at the time of return, + but will not be an empty string. + */ + Path string `json:"path" toml:"Path" yaml:"Path" xml:"path,attr"` + // DirEntry is the fs.DirEntry for the Path; note that .Name() is the base name only. TODO: serialization? + DirEntry fs.DirEntry `json:"-" toml:"-" yaml:"-" xml:"-"` + // FileInfo is the fs.FileInfo for the Path; note that .Name() is the base name only. TODO: serialization? + FileInfo fs.FileInfo `json:"-" toml:"-" yaml:"-" xml:"-"` + // Criteria is the evaluated criteria specified that this FsSearchResult matched. + Criteria *FsSearchCriteria `json:"criteria" toml:"Criteria" yaml:"Criteria" xml:"criteria"` + // Times holds the mtime, ctime, etc. of the filesystem object (where supported). TODO: serialization? + Times times.Timespec `json:"-" toml:"-" yaml:"-" xml:"-"` + // MissReason contains the reason the result is a miss (MissNoMiss if a match); see the Miss* constants. + MissReason missReason `json:"miss_reason" toml:"MissReason" yaml:"Miss Reason" xml:"miss,attr"` +} + +type missReason string + type pathMode bitmask.MaskBit type pathTimeType bitmask.MaskBit