/* SysUtils - a library to assist with various system-related functions Copyright (C) 2020 Brent Saner This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package paths import ( `context` "errors" "fmt" "io/fs" "os" "os/user" "path/filepath" `regexp` `slices` "strings" `sync` `time` // "syscall" `github.com/djherbis/times` `golang.org/x/sync/semaphore` `r00t2.io/goutils/bitmask` ) /* ExpandHome will take a tilde(~)-prefixed path and resolve it to the actual path in-place. "Nested" user paths (~someuser/somechroot/~someotheruser) are not supported as home directories are expected to be absolute paths. */ func ExpandHome(path *string) (err error) { var unameSplit []string var uname string var u *user.User // Props to this guy. // https://stackoverflow.com/a/43578461/733214 if len(*path) == 0 { err = errors.New("empty path") return } else if (*path)[0] != '~' { return } // E(ffective)UID (e.g. chown'd user for SUID) /* uid := strconv.Itoa(syscall.Geteuid()) u, err := user.LookupId(euid) */ // (Real)UID (invoking user) /* if u, err = user.Current(); err != nil { return } */ // K but do it smarter. unameSplit = strings.SplitN(*path, string(os.PathSeparator), 2) if len(unameSplit) != 2 { unameSplit = append(unameSplit, "") } uname = strings.TrimPrefix(unameSplit[0], "~") if uname == "" { if u, err = user.Current(); err != nil { return } } else { if u, err = user.Lookup(uname); err != nil { return } } *path = filepath.Join(u.HomeDir, unameSplit[1]) return } /* GetFirst is the file equivalent of envs.GetFirst. It iterates through paths, normalizing them along the way (so abstracted paths such as ~/foo/bar.txt and relative paths such as bar/baz.txt will still work), and returns the content of the first found existing file. If the first found path is a directory, content will be nil but isDir will be true (as will ok). If no path exists, ok will be false. As always, results are not guaranteed due to permissions, etc. potentially returning an inaccurate result. This is a thin wrapper around GetFirstWithRef. */ func GetFirst(paths []string) (content []byte, isDir, ok bool) { content, isDir, ok, _ = GetFirstWithRef(paths) return } /* GetFirstWithRef is the file equivalent of envs.GetFirstWithRef. It behaves exactly like GetFirst, but with an additional returned value, idx, which specifies the index in paths in which a path was found. As always, results are not guaranteed due to permissions, etc. potentially returning an inaccurate result. */ func GetFirstWithRef(paths []string) (content []byte, isDir, ok bool, idx int) { var locPaths []string var exists bool var stat os.FileInfo var err error idx = -1 // We have to be a little less cavalier about this. if paths == nil { return } locPaths = make([]string, len(paths)) locPaths = paths[:] // Create an explicit copy so we don't modify paths. for i, p := range locPaths { if exists, stat, err = RealPathExistsStat(&p); err != nil { err = nil continue } if !exists { continue } isDir = stat.IsDir() if !isDir { if content, err = os.ReadFile(p); err != nil { continue } } ok = true idx = i return } return } /* MakeDirIfNotExist will create a directory at a given path if it doesn't exist. See also the documentation for RealPath. This is a bit more sane option than os.MkdirAll as it will normalize paths a little better. */ func MakeDirIfNotExist(path string) (err error) { var stat os.FileInfo var exists bool var locPath string = path if exists, stat, err = RealPathExistsStat(&locPath); err != nil { if !exists { // This, at least as of golang 1.15, uses the user's umask. // It does not actually create a dir with 0777. // It's up to the caller to do an os.Chmod() on the path after, if desired. if err = os.MkdirAll(locPath, 0777); err != nil { return } err = nil return } else { return } } // So it exists, but it probably isn't a dir. if !stat.Mode().IsDir() { err = errors.New(fmt.Sprintf("path %v exists but is not a directory", locPath)) return } else { return } // This should probably never happen. Probably. err = errors.New("undefined") return } /* RealPath will transform a given path into the very best guess for an absolute path in-place. It is recommended to check err (if not nil) for an invalid path error. If this is true, the path syntax/string itself is not supported on the runtime OS. This can be done via: if errors.Is(err, fs.ErrInvalid) {...} */ func RealPath(path *string) (err error) { if err = ExpandHome(path); err != nil { return } if *path, err = filepath.Abs(*path); err != nil { return } return } /* RealPathExists is like RealPath, but will also return a boolean as to whether the path actually exists or not. Note that err *may* be os.ErrPermission/fs.ErrPermission, in which case the exists value cannot be trusted as a permission error occurred when trying to stat the path - if the calling user/process does not have read permission on e.g. a parent directory, then exists may be false but the path may actually exist. This condition can be checked via via: if errors.Is(err, fs.ErrPermission) {...} See also the documentation for RealPath. In those cases, it may be preferable to use RealPathExistsStat and checking stat for nil. */ func RealPathExists(path *string) (exists bool, err error) { if err = RealPath(path); err != nil { return } if _, err = os.Stat(*path); err != nil { if errors.Is(err, fs.ErrNotExist) { err = nil } return } exists = true return } /* RealPathExistsStat is like RealPathExists except it will also return the os.FileInfo for the path (assuming it exists). If stat is nil, it is highly recommended to check err via the methods suggested in the documentation for RealPath and RealPathExists. */ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error) { if exists, err = RealPathExists(path); err != nil { return } if stat, err = os.Stat(*path); err != nil { return } return } /* SearchPaths gets a file/directory path list based on the provided criteria. targetType defines what should be included in the path list. It can consist of one or more (io/)fs.FileMode types OR'd together (ensure they are part of (io/)fs.ModeType). (You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.) noFiles, if true, will explicitly filter out regular files from the path results. (Normally they are *always* included regardless of targetType.) basePtrn may be nil; if it isn't, it will be applied to *base names* (that is, quux.txt rather than /foo/bar/baz/quux.txt). pathPtrn is like basePtrn except it applies to the *entire* path, not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt, not just quux.txt). If age is not nil, it will be applied to the path object. It will match older files/directories/etc. if olderThan is true, otherwise it will match newer files/directories/etc. (olderThan is not used otherwise.) ageType is one or more Time* constants OR'd together to describe which timestamp type to check. (Note that TimeCreated may not match if specified as it is only available on certain OSes, kernel versions, and filesystems. This may lead to files being excluded that may have otherwise been included.) (You can use TimeAny to specify any supported time.) *Any* matching timestamp of all specified (and supported) timestamp types matches, so be judicious with your selection. They are processed in order of: * btime (birth/creation time) (if supported) * mtime (modification time -- contents have changed) * ctime (OS-specific behavior; generally disk metadata has changed) (if supported) * atime (access time) olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*. now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.) */ func SearchFsPaths( root string, targetType fs.FileMode, noFiles bool, basePtrn, pathPtrn *regexp.Regexp, age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, ) (foundPaths []string, err error) { if age != nil { if now == nil { now = new(time.Time) *now = time.Now() } } if err = RealPath(&root); err != nil { return } if err = filepath.WalkDir( root, func(path string, d fs.DirEntry, inErr error) (outErr error) { var include bool if inErr != nil { outErr = inErr return } if include, outErr = filterPath( path, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now, ); outErr != nil { return } if include { foundPaths = append(foundPaths, path) } return }, ); err != nil { return } // And sort them. slices.Sort(foundPaths) return } /* SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent workers for the filtering logic instead of performing iteratively/recursively. It may, in some cases, be *slightly more* performant and *slightly less* in others. Additional options are documented below. Note that unlike SearchFsPaths, the results written to foundPathsChan are not guaranteed to be in any predictable order. All channels are expected to have been initialized by the caller ahead of time, and all provided channels will be closed upon completion (so they are only safe to READ from after invoking SearchFsPathsAsync). foundPathsChan is a channel to which matched filepaths will be written. sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks. (semCtx is the context.Context used for sem when acquiring. It may be nil; one will be locally created if so.) The default will be to spawn all filtering logic concurrently. For very large directories, you almost assuredly do not want that -- it can cause a significant amount of I/O and CPU wait. (See https://pkg.go.dev/golang.org/x/sync/semaphore for details.) wg *must not* be nil, and must be managed by the caller. SearchFsPathsAsync will exit with no errors but no-op if wg is nil. errChan will receive any/all encountered errors. */ func SearchFsPathsAsync( root string, targetType fs.FileMode, noFiles bool, basePtrn, pathPtrn *regexp.Regexp, age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, foundPathsChan chan string, sem *semaphore.Weighted, semCtx context.Context, wg *sync.WaitGroup, errChan chan error, ) { var err error var localWg sync.WaitGroup if wg == nil { return } if age != nil { if now == nil { now = new(time.Time) *now = time.Now() } } if sem != nil && semCtx == nil { semCtx = context.Background() } if err = filepath.WalkDir( root, func(path string, de fs.DirEntry, inErr error) (outErr error) { localWg.Add(1) wg.Add(1) if sem != nil { if err = sem.Acquire(semCtx, 1); err != nil { return } } go func(p string, d fs.DirEntry) { var pErr error var pInclude bool defer localWg.Done() defer wg.Done() if sem != nil { defer sem.Release(1) } if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil { errChan <- pErr return } if pInclude { foundPathsChan <- p } }(path, de) return }, ); err != nil { errChan <- err return } go func() { localWg.Wait() close(foundPathsChan) close(errChan) }() return } // filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync. func filterPath( path string, d fs.DirEntry, targetType fs.FileMode, noFiles bool, basePtrn, pathPtrn *regexp.Regexp, age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time, ) (include bool, err error) { var typeMode fs.FileMode var fi fs.FileInfo var tspec times.Timespec var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType)) if age != nil { if now == nil { now = new(time.Time) *now = time.Now() } } // patterns if pathPtrn != nil { if !pathPtrn.MatchString(path) { return } } if basePtrn != nil { if !basePtrn.MatchString(filepath.Base(path)) { return } } // age if age != nil { if tspec, err = times.Stat(path); err != nil { return } if !filterTimes(tspec, age, &ageType, olderThan, now) { return } } // fs object type (file, dir, etc.) if fi, err = d.Info(); err != nil { return } typeMode = fi.Mode().Type() if typeMode == 0 && noFiles { return } else if typeMode != 0 { if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) { return } } include = true return } /* filterTimes checks a times.Timespec of a file using: * an age specified by the caller * an ageType bitmask for types of times to compare * an olderThan bool (if false, the file must be younger than) * an optional "now" timestamp for the age derivation. */ func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType, olderThan bool, now *time.Time) (include bool) { var curAge time.Duration var mask *bitmask.MaskBit var tfunc func(t *time.Duration) (match bool) = func(t *time.Duration) (match bool) { if olderThan { match = *t > *age } else { match = *t < *age } return } if tspec == nil || age == nil || ageType == nil { return } mask = ageType.Mask() if now == nil { now = new(time.Time) *now = time.Now() } // BTIME (if supported) if tspec.HasBirthTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeCreated))) { curAge = now.Sub(tspec.BirthTime()) if include = tfunc(&curAge); include { return } } // MTIME if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeModified)) { curAge = now.Sub(tspec.ModTime()) if include = tfunc(&curAge); include { return } } // CTIME (if supported) if tspec.HasChangeTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeChanged))) { curAge = now.Sub(tspec.ChangeTime()) if include = tfunc(&curAge); include { return } } // ATIME if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeAccessed)) { curAge = now.Sub(tspec.AccessTime()) if include = tfunc(&curAge); include { return } } return }