go_sysutils/paths/funcs.go

600 lines
15 KiB
Go
Raw Permalink Normal View History

2020-09-26 03:20:17 -04:00
/*
SysUtils - a library to assist with various system-related functions
Copyright (C) 2020 Brent Saner
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
2020-09-06 03:23:14 -04:00
package paths
import (
`context`
2020-09-06 03:23:14 -04:00
"errors"
2022-08-26 02:41:13 -04:00
"fmt"
"io/fs"
2020-09-06 03:23:14 -04:00
"os"
"os/user"
"path/filepath"
`regexp`
`slices`
2022-08-26 02:41:13 -04:00
"strings"
`sync`
`time`
2021-02-19 03:20:50 -05:00
// "syscall"
`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
2020-09-06 03:23:14 -04:00
)
/*
2022-08-26 02:41:13 -04:00
ExpandHome will take a tilde(~)-prefixed path and resolve it to the actual path in-place.
"Nested" user paths (~someuser/somechroot/~someotheruser) are not supported as home directories are expected to be absolute paths.
*/
func ExpandHome(path *string) (err error) {
var unameSplit []string
var uname string
var u *user.User
2020-09-12 01:25:27 -04:00
2020-09-06 03:23:14 -04:00
// Props to this guy.
// https://stackoverflow.com/a/43578461/733214
if len(*path) == 0 {
err = errors.New("empty path")
return
2020-09-06 03:23:14 -04:00
} else if (*path)[0] != '~' {
return
2020-09-06 03:23:14 -04:00
}
2021-02-19 03:20:50 -05:00
// E(ffective)UID (e.g. chown'd user for SUID)
/*
uid := strconv.Itoa(syscall.Geteuid())
u, err := user.LookupId(euid)
2021-02-19 03:20:50 -05:00
*/
// (Real)UID (invoking user)
/*
if u, err = user.Current(); err != nil {
return
}
*/
// K but do it smarter.
2021-12-18 04:46:43 -05:00
unameSplit = strings.SplitN(*path, string(os.PathSeparator), 2)
if len(unameSplit) != 2 {
unameSplit = append(unameSplit, "")
}
uname = strings.TrimPrefix(unameSplit[0], "~")
if uname == "" {
if u, err = user.Current(); err != nil {
return
}
} else {
if u, err = user.Lookup(uname); err != nil {
return
2020-09-25 16:54:33 -04:00
}
}
*path = filepath.Join(u.HomeDir, unameSplit[1])
return
2020-09-25 16:54:33 -04:00
}
/*
GetFirst is the file equivalent of envs.GetFirst.
It iterates through paths, normalizing them along the way
(so abstracted paths such as ~/foo/bar.txt and relative paths
such as bar/baz.txt will still work), and returns the content
of the first found existing file. If the first found path
is a directory, content will be nil but isDir will be true
(as will ok).
If no path exists, ok will be false.
As always, results are not guaranteed due to permissions, etc.
potentially returning an inaccurate result.
This is a thin wrapper around GetFirstWithRef.
*/
func GetFirst(paths []string) (content []byte, isDir, ok bool) {
content, isDir, ok, _ = GetFirstWithRef(paths)
return
}
/*
GetFirstWithRef is the file equivalent of envs.GetFirstWithRef.
It behaves exactly like GetFirst, but with an additional returned value, idx,
which specifies the index in paths in which a path was found.
As always, results are not guaranteed due to permissions, etc.
potentially returning an inaccurate result.
*/
func GetFirstWithRef(paths []string) (content []byte, isDir, ok bool, idx int) {
var locPaths []string
var exists bool
var stat os.FileInfo
var err error
idx = -1
// We have to be a little less cavalier about this.
if paths == nil {
return
}
locPaths = make([]string, len(paths))
locPaths = paths[:] // Create an explicit copy so we don't modify paths.
for i, p := range locPaths {
if exists, stat, err = RealPathExistsStat(&p); err != nil {
err = nil
continue
}
if !exists {
continue
}
isDir = stat.IsDir()
if !isDir {
if content, err = os.ReadFile(p); err != nil {
continue
}
}
ok = true
idx = i
return
}
return
}
/*
2022-08-26 02:41:13 -04:00
MakeDirIfNotExist will create a directory at a given path if it doesn't exist.
2022-08-26 02:41:13 -04:00
See also the documentation for RealPath.
This is a bit more sane option than os.MkdirAll as it will normalize paths a little better.
*/
func MakeDirIfNotExist(path string) (err error) {
var stat os.FileInfo
var exists bool
var locPath string = path
if exists, stat, err = RealPathExistsStat(&locPath); err != nil {
2020-09-06 03:23:14 -04:00
if !exists {
// This, at least as of golang 1.15, uses the user's umask.
// It does not actually create a dir with 0777.
// It's up to the caller to do an os.Chmod() on the path after, if desired.
if err = os.MkdirAll(locPath, 0777); err != nil {
return
}
err = nil
return
2020-09-06 03:23:14 -04:00
} else {
return
2020-09-06 03:23:14 -04:00
}
}
2020-09-06 03:23:14 -04:00
// So it exists, but it probably isn't a dir.
if !stat.Mode().IsDir() {
err = errors.New(fmt.Sprintf("path %v exists but is not a directory", locPath))
return
2022-08-26 02:41:13 -04:00
} else {
return
2020-09-06 03:23:14 -04:00
}
2020-09-06 03:23:14 -04:00
// This should probably never happen. Probably.
err = errors.New("undefined")
return
2020-09-06 03:23:14 -04:00
}
/*
2022-08-26 02:41:13 -04:00
RealPath will transform a given path into the very best guess for an absolute path in-place.
2022-08-26 02:41:13 -04:00
It is recommended to check err (if not nil) for an invalid path error. If this is true, the
path syntax/string itself is not supported on the runtime OS. This can be done via:
2022-08-26 02:41:13 -04:00
if errors.Is(err, fs.ErrInvalid) {...}
*/
func RealPath(path *string) (err error) {
if err = ExpandHome(path); err != nil {
return
2020-09-06 03:23:14 -04:00
}
if *path, err = filepath.Abs(*path); err != nil {
return
2020-09-06 03:23:14 -04:00
}
return
2020-09-06 03:23:14 -04:00
}
/*
2022-08-26 02:41:13 -04:00
RealPathExists is like RealPath, but will also return a boolean as to whether the path
actually exists or not.
2022-08-26 02:41:13 -04:00
Note that err *may* be os.ErrPermission/fs.ErrPermission, in which case the exists value
cannot be trusted as a permission error occurred when trying to stat the path - if the
calling user/process does not have read permission on e.g. a parent directory, then
exists may be false but the path may actually exist. This condition can be checked via
via:
2022-08-26 02:41:13 -04:00
if errors.Is(err, fs.ErrPermission) {...}
2022-08-26 02:41:13 -04:00
See also the documentation for RealPath.
2022-08-26 02:41:13 -04:00
In those cases, it may be preferable to use RealPathExistsStat and checking stat for nil.
*/
func RealPathExists(path *string) (exists bool, err error) {
if err = RealPath(path); err != nil {
return
2020-09-06 03:23:14 -04:00
}
if _, err = os.Stat(*path); err != nil {
if errors.Is(err, fs.ErrNotExist) {
err = nil
}
return
2020-09-06 03:23:14 -04:00
}
exists = true
return
2020-09-06 03:23:14 -04:00
}
/*
2022-08-26 02:41:13 -04:00
RealPathExistsStat is like RealPathExists except it will also return the os.FileInfo
for the path (assuming it exists).
2022-08-26 02:41:13 -04:00
If stat is nil, it is highly recommended to check err via the methods suggested
in the documentation for RealPath and RealPathExists.
*/
func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error) {
if exists, err = RealPathExists(path); err != nil {
return
2020-09-06 03:23:14 -04:00
}
if stat, err = os.Stat(*path); err != nil {
return
2020-09-06 03:23:14 -04:00
}
return
2020-09-06 03:23:14 -04:00
}
/*
SearchPaths gets a file/directory path list based on the provided criteria.
targetType defines what should be included in the path list.
It can consist of one or more (io/)fs.FileMode types OR'd together
(ensure they are part of (io/)fs.ModeType).
(You can use 0 to match regular files explicitly, and/or noFiles = true to exclude them.)
noFiles, if true, will explicitly filter out regular files from the path results.
(Normally they are *always* included regardless of targetType.)
basePtrn may be nil; if it isn't, it will be applied to *base names*
(that is, quux.txt rather than /foo/bar/baz/quux.txt).
pathPtrn is like basePtrn except it applies to the *entire* path,
not just the basename, if not nil (e.g. /foo/bar/baz/quux.txt,
not just quux.txt).
If age is not nil, it will be applied to the path object.
It will match older files/directories/etc. if olderThan is true,
otherwise it will match newer files/directories/etc.
(olderThan is not used otherwise.)
ageType is one or more Time* constants OR'd together to describe which timestamp type to check.
(Note that TimeCreated may not match if specified as it is only available on certain OSes,
kernel versions, and filesystems. This may lead to files being excluded that may have otherwise
been included.)
(You can use TimeAny to specify any supported time.)
*Any* matching timestamp of all specified (and supported) timestamp types matches,
so be judicious with your selection. They are processed in order of:
* btime (birth/creation time) (if supported)
* mtime (modification time -- contents have changed)
* ctime (OS-specific behavior; generally disk metadata has changed) (if supported)
* atime (access time)
olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.
now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
*/
func SearchFsPaths(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (foundPaths []string, err error) {
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
if err = RealPath(&root); err != nil {
return
}
if err = filepath.WalkDir(
root,
func(path string, d fs.DirEntry, inErr error) (outErr error) {
var include bool
if inErr != nil {
outErr = inErr
return
}
if include, outErr = filterPath(
path, d,
targetType, noFiles,
basePtrn, pathPtrn,
age, ageType, olderThan, now,
); outErr != nil {
return
}
if include {
foundPaths = append(foundPaths, path)
}
return
},
); err != nil {
return
}
// And sort them.
slices.Sort(foundPaths)
return
}
/*
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
workers for the filtering logic instead of performing iteratively/recursively.
It may, in some cases, be *slightly more* performant and *slightly less* in others.
Additional options are documented below.
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
guaranteed to be in any predictable order.
All channels are expected to have been initialized by the caller ahead of time,
and all provided channels will be closed upon completion (so they are only safe
to READ from after invoking SearchFsPathsAsync).
foundPathsChan is a channel to which matched filepaths will be written.
sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
(semCtx is the context.Context used for sem when acquiring. It may be nil;
one will be locally created if so.)
The default will be to spawn all filtering logic concurrently.
For very large directories, you almost assuredly do not want that -- it
can cause a significant amount of I/O and CPU wait.
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)
wg *must not* be nil, and must be managed by the caller.
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.
errChan will receive any/all encountered errors.
*/
func SearchFsPathsAsync(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
foundPathsChan chan string,
sem *semaphore.Weighted, semCtx context.Context,
wg *sync.WaitGroup,
errChan chan error,
) {
var err error
var localWg sync.WaitGroup
if wg == nil {
return
}
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
if sem != nil && semCtx == nil {
semCtx = context.Background()
}
if err = filepath.WalkDir(
root,
func(path string, de fs.DirEntry, inErr error) (outErr error) {
localWg.Add(1)
wg.Add(1)
if sem != nil {
if err = sem.Acquire(semCtx, 1); err != nil {
return
}
}
go func(p string, d fs.DirEntry) {
var pErr error
var pInclude bool
defer localWg.Done()
defer wg.Done()
if sem != nil {
defer sem.Release(1)
}
if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
errChan <- pErr
return
}
if pInclude {
foundPathsChan <- p
}
}(path, de)
return
},
); err != nil {
errChan <- err
return
}
go func() {
localWg.Wait()
close(foundPathsChan)
close(errChan)
}()
return
}
// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
func filterPath(
path string, d fs.DirEntry,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (include bool, err error) {
var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}
// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}
// age
if age != nil {
if tspec, err = times.Stat(path); err != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, now) {
return
}
}
// fs object type (file, dir, etc.)
if fi, err = d.Info(); err != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
return
}
}
include = true
return
}
/*
filterTimes checks a times.Timespec of a file using:
* an age specified by the caller
* an ageType bitmask for types of times to compare
* an olderThan bool (if false, the file must be younger than)
* an optional "now" timestamp for the age derivation.
*/
func filterTimes(tspec times.Timespec, age *time.Duration, ageType *pathTimeType, olderThan bool, now *time.Time) (include bool) {
var curAge time.Duration
var mask *bitmask.MaskBit
var tfunc func(t *time.Duration) (match bool) = func(t *time.Duration) (match bool) {
if olderThan {
match = *t > *age
} else {
match = *t < *age
}
return
}
if tspec == nil || age == nil || ageType == nil {
return
}
mask = ageType.Mask()
if now == nil {
now = new(time.Time)
*now = time.Now()
}
// BTIME (if supported)
if tspec.HasBirthTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeCreated))) {
curAge = now.Sub(tspec.BirthTime())
if include = tfunc(&curAge); include {
return
}
}
// MTIME
if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeModified)) {
curAge = now.Sub(tspec.ModTime())
if include = tfunc(&curAge); include {
return
}
}
// CTIME (if supported)
if tspec.HasChangeTime() && (mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeChanged))) {
curAge = now.Sub(tspec.ChangeTime())
if include = tfunc(&curAge); include {
return
}
}
// ATIME
if mask.HasFlag(bitmask.MaskBit(TimeAny)) || mask.HasFlag(bitmask.MaskBit(TimeAccessed)) {
curAge = now.Sub(tspec.AccessTime())
if include = tfunc(&curAge); include {
return
}
}
return
}