eefe02afaf
ADDED: * fsutils: better/additional fsattrs functionality * paths: highly filterable filesystem searching
600 lines
15 KiB
Go
600 lines
15 KiB
Go
/*
|
|
SysUtils - a library to assist with various system-related functions
|
|
Copyright (C) 2020 Brent Saner
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package paths
|
|
|
|
import (
|
|
`context`
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"os/user"
|
|
"path/filepath"
|
|
`regexp`
|
|
`slices`
|
|
"strings"
|
|
`sync`
|
|
`time`
|
|
|
|
// "syscall"
|
|
|
|
`github.com/djherbis/times`
|
|
`golang.org/x/sync/semaphore`
|
|
`r00t2.io/goutils/bitmask`
|
|
)
|
|
|
|
/*
|
|
ExpandHome will take a tilde(~)-prefixed path and resolve it to the actual path in-place.
|
|
"Nested" user paths (~someuser/somechroot/~someotheruser) are not supported as home directories are expected to be absolute paths.
|
|
*/
|
|
func ExpandHome(path *string) (err error) {
|
|
|
|
var unameSplit []string
|
|
var uname string
|
|
|
|
var u *user.User
|
|
|
|
// Props to this guy.
|
|
// https://stackoverflow.com/a/43578461/733214
|
|
if len(*path) == 0 {
|
|
err = errors.New("empty path")
|
|
return
|
|
} else if (*path)[0] != '~' {
|
|
return
|
|
}
|
|
|
|
// E(ffective)UID (e.g. chown'd user for SUID)
|
|
/*
|
|
uid := strconv.Itoa(syscall.Geteuid())
|
|
u, err := user.LookupId(euid)
|
|
*/
|
|
// (Real)UID (invoking user)
|
|
/*
|
|
if u, err = user.Current(); err != nil {
|
|
return
|
|
}
|
|
*/
|
|
// K but do it smarter.
|
|
unameSplit = strings.SplitN(*path, string(os.PathSeparator), 2)
|
|
if len(unameSplit) != 2 {
|
|
unameSplit = append(unameSplit, "")
|
|
}
|
|
|
|
uname = strings.TrimPrefix(unameSplit[0], "~")
|
|
if uname == "" {
|
|
if u, err = user.Current(); err != nil {
|
|
return
|
|
}
|
|
} else {
|
|
if u, err = user.Lookup(uname); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
*path = filepath.Join(u.HomeDir, unameSplit[1])
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
GetFirst is the file equivalent of envs.GetFirst.
|
|
|
|
It iterates through paths, normalizing them along the way
|
|
(so abstracted paths such as ~/foo/bar.txt and relative paths
|
|
such as bar/baz.txt will still work), and returns the content
|
|
of the first found existing file. If the first found path
|
|
is a directory, content will be nil but isDir will be true
|
|
(as will ok).
|
|
|
|
If no path exists, ok will be false.
|
|
|
|
As always, results are not guaranteed due to permissions, etc.
|
|
potentially returning an inaccurate result.
|
|
|
|
This is a thin wrapper around GetFirstWithRef.
|
|
*/
|
|
func GetFirst(paths []string) (content []byte, isDir, ok bool) {
|
|
|
|
content, isDir, ok, _ = GetFirstWithRef(paths)
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
GetFirstWithRef is the file equivalent of envs.GetFirstWithRef.
|
|
|
|
It behaves exactly like GetFirst, but with an additional returned value, idx,
|
|
which specifies the index in paths in which a path was found.
|
|
|
|
As always, results are not guaranteed due to permissions, etc.
|
|
potentially returning an inaccurate result.
|
|
*/
|
|
func GetFirstWithRef(paths []string) (content []byte, isDir, ok bool, idx int) {
|
|
|
|
var locPaths []string
|
|
var exists bool
|
|
var stat os.FileInfo
|
|
var err error
|
|
|
|
idx = -1
|
|
// We have to be a little less cavalier about this.
|
|
if paths == nil {
|
|
return
|
|
}
|
|
locPaths = make([]string, len(paths))
|
|
locPaths = paths[:] // Create an explicit copy so we don't modify paths.
|
|
for i, p := range locPaths {
|
|
if exists, stat, err = RealPathExistsStat(&p); err != nil {
|
|
err = nil
|
|
continue
|
|
}
|
|
if !exists {
|
|
continue
|
|
}
|
|
isDir = stat.IsDir()
|
|
if !isDir {
|
|
if content, err = os.ReadFile(p); err != nil {
|
|
continue
|
|
}
|
|
}
|
|
ok = true
|
|
idx = i
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
MakeDirIfNotExist will create a directory at a given path if it doesn't exist.
|
|
|
|
See also the documentation for RealPath.
|
|
|
|
This is a bit more sane option than os.MkdirAll as it will normalize paths a little better.
|
|
*/
|
|
func MakeDirIfNotExist(path string) (err error) {
|
|
|
|
var stat os.FileInfo
|
|
var exists bool
|
|
var locPath string = path
|
|
|
|
if exists, stat, err = RealPathExistsStat(&locPath); err != nil {
|
|
if !exists {
|
|
// This, at least as of golang 1.15, uses the user's umask.
|
|
// It does not actually create a dir with 0777.
|
|
// It's up to the caller to do an os.Chmod() on the path after, if desired.
|
|
if err = os.MkdirAll(locPath, 0777); err != nil {
|
|
return
|
|
}
|
|
err = nil
|
|
return
|
|
} else {
|
|
return
|
|
}
|
|
}
|
|
|
|
// So it exists, but it probably isn't a dir.
|
|
if !stat.Mode().IsDir() {
|
|
err = errors.New(fmt.Sprintf("path %v exists but is not a directory", locPath))
|
|
return
|
|
} else {
|
|
return
|
|
}
|
|
|
|
// This should probably never happen. Probably.
|
|
err = errors.New("undefined")
|
|
return
|
|
}
|
|
|
|
/*
|
|
RealPath will transform a given path into the very best guess for an absolute path in-place.
|
|
|
|
It is recommended to check err (if not nil) for an invalid path error. If this is true, the
|
|
path syntax/string itself is not supported on the runtime OS. This can be done via:
|
|
|
|
if errors.Is(err, fs.ErrInvalid) {...}
|
|
*/
|
|
func RealPath(path *string) (err error) {
|
|
|
|
if err = ExpandHome(path); err != nil {
|
|
return
|
|
}
|
|
|
|
if *path, err = filepath.Abs(*path); err != nil {
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
RealPathExists is like RealPath, but will also return a boolean as to whether the path
|
|
actually exists or not.
|
|
|
|
Note that err *may* be os.ErrPermission/fs.ErrPermission, in which case the exists value
|
|
cannot be trusted as a permission error occurred when trying to stat the path - if the
|
|
calling user/process does not have read permission on e.g. a parent directory, then
|
|
exists may be false but the path may actually exist. This condition can be checked via
|
|
via:
|
|
|
|
if errors.Is(err, fs.ErrPermission) {...}
|
|
|
|
See also the documentation for RealPath.
|
|
|
|
In those cases, it may be preferable to use RealPathExistsStat and checking stat for nil.
|
|
*/
|
|
func RealPathExists(path *string) (exists bool, err error) {
|
|
|
|
if err = RealPath(path); err != nil {
|
|
return
|
|
}
|
|
|
|
if _, err = os.Stat(*path); err != nil {
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
err = nil
|
|
}
|
|
return
|
|
}
|
|
|
|
exists = true
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
RealPathExistsStat is like RealPathExists except it will also return the os.FileInfo
|
|
for the path (assuming it exists).
|
|
|
|
If stat is nil, it is highly recommended to check err via the methods suggested
|
|
in the documentation for RealPath and RealPathExists.
|
|
*/
|
|
func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error) {
|
|
|
|
if exists, err = RealPathExists(path); err != nil {
|
|
return
|
|
}
|
|
|
|
if stat, err = os.Stat(*path); err != nil {
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
SearchPaths gets a file/directory path list based on the provided criteria.
|
|
|
|
targetType defines what should be included in the path list.
|
|
It can consist of one or more (io/)fs.FileMode types OR'd together
|
|
(ensure they are part of (io/)fs.ModeType).
|
|
(You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.)
|
|
|
|
noFiles, if true, will explicitly filter out regular files from the path results.
|
|
(Normally they are *always* included regardless of targetType.)
|
|
|
|
basePtrn may be nil; if it isn't, it will be applied to *base names*
|
|
(that is, quux.txt rather than /foo/bar/baz/quux.txt).
|
|
|
|
pathPtrn is like basePtrn except it applies to the *entire* path,
|
|
not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt,
|
|
not just quux.txt).
|
|
|
|
If age is not nil, it will be applied to the path object.
|
|
It will match older files/directories/etc. if olderThan is true,
|
|
otherwise it will match newer files/directories/etc.
|
|
(olderThan is not used otherwise.)
|
|
|
|
ageType is one or more Time* constants OR'd together to describe which timestamp type to check.
|
|
(Note that TimeCreated may not match if specified as it is only available on certain OSes,
|
|
kernel versions, and filesystems. This may lead to files being excluded that may have otherwise
|
|
been included.)
|
|
(You can use TimeAny to specify any supported time.)
|
|
*Any* matching timestamp of all specified (and supported) timestamp types matches,
|
|
so be judicious with your selection. They are processed in order of:
|
|
|
|
* btime (birth/creation time) (if supported)
|
|
* mtime (modification time -- contents have changed)
|
|
* ctime (OS-specific behavior; generally disk metadata has changed) (if supported)
|
|
* atime (access time)
|
|
|
|
olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.
|
|
|
|
now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
|
|
*/
|
|
func SearchFsPaths(
|
|
root string,
|
|
targetType fs.FileMode, noFiles bool,
|
|
basePtrn, pathPtrn *regexp.Regexp,
|
|
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
|
) (foundPaths []string, err error) {
|
|
|
|
if age != nil {
|
|
if now == nil {
|
|
now = new(time.Time)
|
|
*now = time.Now()
|
|
}
|
|
}
|
|
|
|
if err = RealPath(&root); err != nil {
|
|
return
|
|
}
|
|
|
|
if err = filepath.WalkDir(
|
|
root,
|
|
func(path string, d fs.DirEntry, inErr error) (outErr error) {
|
|
|
|
var include bool
|
|
|
|
if inErr != nil {
|
|
outErr = inErr
|
|
return
|
|
}
|
|
|
|
if include, outErr = filterPath(
|
|
path, d,
|
|
targetType, noFiles,
|
|
basePtrn, pathPtrn,
|
|
age, ageType, olderThan, now,
|
|
); outErr != nil {
|
|
return
|
|
}
|
|
|
|
if include {
|
|
foundPaths = append(foundPaths, path)
|
|
}
|
|
|
|
return
|
|
},
|
|
); err != nil {
|
|
return
|
|
}
|
|
|
|
// And sort them.
|
|
slices.Sort(foundPaths)
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
|
|
workers for the filtering logic instead of performing iteratively/recursively.
|
|
It may, in some cases, be *slightly more* performant and *slightly less* in others.
|
|
Additional options are documented below.
|
|
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
|
|
guaranteed to be in any predictable order.
|
|
|
|
All channels are expected to have been initialized by the caller ahead of time,
|
|
and all provided channels will be closed upon completion (so they are only safe
|
|
to READ from after invoking SearchFsPathsAsync).
|
|
|
|
foundPathsChan is a channel to which matched filepaths will be written.
|
|
|
|
sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
|
|
(semCtx is the context.Context used for sem when acquiring. It may be nil;
|
|
one will be locally created if so.)
|
|
The default will be to spawn all filtering logic concurrently.
|
|
For very large directories, you almost assuredly do not want that -- it
|
|
can cause a significant amount of I/O and CPU wait.
|
|
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
|
|
|
|
wg *must not* be nil, and must be managed by the caller.
|
|
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.
|
|
|
|
errChan will receive any/all encountered errors.
|
|
*/
|
|
func SearchFsPathsAsync(
|
|
root string,
|
|
targetType fs.FileMode, noFiles bool,
|
|
basePtrn, pathPtrn *regexp.Regexp,
|
|
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
|
foundPathsChan chan string,
|
|
sem *semaphore.Weighted, semCtx context.Context,
|
|
wg *sync.WaitGroup,
|
|
errChan chan error,
|
|
) {
|
|
|
|
var err error
|
|
var localWg sync.WaitGroup
|
|
|
|
if wg == nil {
|
|
return
|
|
}
|
|
|
|
if age != nil {
|
|
if now == nil {
|
|
now = new(time.Time)
|
|
*now = time.Now()
|
|
}
|
|
}
|
|
|
|
if sem != nil && semCtx == nil {
|
|
semCtx = context.Background()
|
|
}
|
|
|
|
if err = filepath.WalkDir(
|
|
root,
|
|
func(path string, de fs.DirEntry, inErr error) (outErr error) {
|
|
localWg.Add(1)
|
|
wg.Add(1)
|
|
if sem != nil {
|
|
if err = sem.Acquire(semCtx, 1); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
go func(p string, d fs.DirEntry) {
|
|
var pErr error
|
|
var pInclude bool
|
|
|
|
defer localWg.Done()
|
|
defer wg.Done()
|
|
|
|
if sem != nil {
|
|
defer sem.Release(1)
|
|
}
|
|
|
|
if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
|
|
errChan <- pErr
|
|
return
|
|
}
|
|
|
|
if pInclude {
|
|
foundPathsChan <- p
|
|
}
|
|
}(path, de)
|
|
|
|
return
|
|
},
|
|
); err != nil {
|
|
errChan <- err
|
|
return
|
|
}
|
|
|
|
go func() {
|
|
localWg.Wait()
|
|
close(foundPathsChan)
|
|
close(errChan)
|
|
}()
|
|
|
|
return
|
|
}
|
|
|
|
// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
|
|
func filterPath(
|
|
path string, d fs.DirEntry,
|
|
targetType fs.FileMode, noFiles bool,
|
|
basePtrn, pathPtrn *regexp.Regexp,
|
|
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
|
|
) (include bool, err error) {
|
|
|
|
var typeMode fs.FileMode
|
|
var fi fs.FileInfo
|
|
var tspec times.Timespec
|
|
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
|
|
|
|
if age != nil {
|
|
if now == nil {
|
|
now = new(time.Time)
|
|
*now = time.Now()
|
|
}
|
|
}
|
|
|
|
// patterns
|
|
if pathPtrn != nil {
|
|
if !pathPtrn.MatchString(path) {
|
|
return
|
|
}
|
|
}
|
|
if basePtrn != nil {
|
|
if !basePtrn.MatchString(filepath.Base(path)) {
|
|
return
|
|
}
|
|
}
|
|
|
|
// age
|
|
if age != nil {
|
|
if tspec, err = times.Stat(path); err != nil {
|
|
return
|
|
}
|
|
if !filterTimes(tspec, age, &ageType, olderThan, now) {
|
|
return
|
|
}
|
|
}
|
|
|
|
// fs object type (file, dir, etc.)
|
|
if fi, err = d.Info(); err != nil {
|
|
return
|
|
}
|
|
typeMode = fi.Mode().Type()
|
|
if typeMode == 0 && noFiles {
|
|
return
|
|
} else if typeMode != 0 {
|
|
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
|
|
return
|
|
}
|
|
}
|
|
|
|
include = true
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
filterTimes checks a times.Timespec of a file using:
|
|
* an age specified by the caller
|
|
* an ageType bitmask for types of times to compare
|
|
* an olderThan bool (if false, the file must be younger than)
|
|
* an optional "now" timestamp for the age derivation.
|
|
*/
|
|
func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType, olderThan bool, now *time.Time) (include bool) {
|
|
|
|
var curAge time.Duration
|
|
var mask *bitmask.MaskBit
|
|
var tfunc func(t *time.Duration) (match bool) = func(t *time.Duration) (match bool) {
|
|
if olderThan {
|
|
match = *t > *age
|
|
} else {
|
|
match = *t < *age
|
|
}
|
|
return
|
|
}
|
|
|
|
if tspec == nil || age == nil || ageType == nil {
|
|
return
|
|
}
|
|
|
|
mask = ageType.Mask()
|
|
|
|
if now == nil {
|
|
now = new(time.Time)
|
|
*now = time.Now()
|
|
}
|
|
|
|
// BTIME (if supported)
|
|
if tspec.HasBirthTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeCreated))) {
|
|
curAge = now.Sub(tspec.BirthTime())
|
|
if include = tfunc(&curAge); include {
|
|
return
|
|
}
|
|
}
|
|
// MTIME
|
|
if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeModified)) {
|
|
curAge = now.Sub(tspec.ModTime())
|
|
if include = tfunc(&curAge); include {
|
|
return
|
|
}
|
|
}
|
|
// CTIME (if supported)
|
|
if tspec.HasChangeTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeChanged))) {
|
|
curAge = now.Sub(tspec.ChangeTime())
|
|
if include = tfunc(&curAge); include {
|
|
return
|
|
}
|
|
}
|
|
// ATIME
|
|
if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeAccessed)) {
|
|
curAge = now.Sub(tspec.AccessTime())
|
|
if include = tfunc(&curAge); include {
|
|
return
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|