ADDED:
* fsutils: better/additional fsattrs functionality
* paths: highly filterable filesystem searching
This commit is contained in:
brent saner 2024-11-16 01:28:24 -05:00
parent b82f0c02ed
commit eefe02afaf
Signed by: bts
GPG Key ID: 8C004C2F93481F6B
11 changed files with 384 additions and 92 deletions

3
fsutils/TODO Normal file
View File

@ -0,0 +1,3 @@
- XATTRS
(see FS_XFLAG_* in fs.h, FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR)
- fs label, UUID? (fs.h)

36
fsutils/consts.go Normal file
View File

@ -0,0 +1,36 @@
package fsutils

var (
/*
linuxFsAttrsListOrder defines the order the attributes are printed in per e2fsprogs.

See flags_name at https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c for order.
Up to date as of e2fsprogs v1.47.1, Linux 6.12-rc7.

The below are the struct field names for easier reflection.
*/
linuxFsAttrsListOrder []string = []string{
"SecureDelete",
"UnDelete",
"SyncUpdate",
"DirSync",
"Immutable",
"AppendOnly",
"NoDumpFile",
"NoUpdateAtime",
"CompressFile",
"EncFile",
"ReservedExt3",
"HashIdxDir",
"NoMergeTail",
"DirTop",
"Extents",
"NoCOWFile",
"DAX",
"CaseInsensitive",
"ReservedExt4c",
"UseParentProjId",
"VerityProtected",
"NoCompress",
}
)

View File

@ -1,8 +1,9 @@
//go:build linux

package fsutils

// https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h "Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)"
/*
https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h "Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)"
Up to date as of Linux 6.12-rc7.
*/
const (
SecureDelete fsAttr = 1 << iota // Secure deletion
UnDelete // Undelete

16
fsutils/funcs.go Normal file
View File

@ -0,0 +1,16 @@
package fsutils

// invertMap returns some handy consts remapping for easier lookups.
func invertMap(origMap map[string]fsAttr) (newMap map[fsAttr]string) {

if origMap == nil {
return
}
newMap = make(map[fsAttr]string)

for k, v := range origMap {
newMap[v] = k
}

return
}

96
fsutils/funcs_fsattrs.go Normal file
View File

@ -0,0 +1,96 @@
package fsutils

import (
`reflect`
`strings`
)

/*
String returns a string representation (comparable to lsattr(1)) of an FsAttrs.

Not all flags are represented, as this aims for compatibility with e2fsprogs/lsattr output.
*/
func (f *FsAttrs) String() (s string) {

// Flags have their short name printed if set, otherwise a '-' placeholder is used.
// https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c

var refType reflect.Type
var refVal reflect.Value
var refField reflect.StructField
var fieldVal reflect.Value
var tagVal string
var sb strings.Builder

if f == nil {
s = strings.Repeat("-", len(linuxFsAttrsListOrder))
return
}

refVal = reflect.ValueOf(*f)
refType = refVal.Type()
for _, fn := range linuxFsAttrsListOrder {
refField, _ = refType.FieldByName(fn)
tagVal = refField.Tag.Get("fsAttrShort")
if tagVal == "" || tagVal == "-" {
continue
}
fieldVal = refVal.FieldByName(fn)
if fieldVal.Bool() {
sb.WriteString(tagVal)
} else {
sb.WriteString("-")
}
}

s = sb.String()

return
}

/*
StringLong returns a more extensive/"human-friendly" representation (comparable to lsattr(1) wiih -l) of an Fsattrs.

Not all flags are represented, as this aims for compatibility with e2fsprogs/lsattr output.
*/
func (f *FsAttrs) StringLong() (s string) {

// The long names are separated via a commma then a space.
// If no attrs are set, the string "---" is used.
// https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/e2p/pf.c

var refType reflect.Type
var refVal reflect.Value
var refField reflect.StructField
var fieldVal reflect.Value
var tagVal string
var out []string

if f == nil {
s = strings.Repeat("-", 3)
return
}

refVal = reflect.ValueOf(*f)
refType = refVal.Type()
for _, fn := range linuxFsAttrsListOrder {
refField, _ = refType.FieldByName(fn)
tagVal = refField.Tag.Get("fsAttrLong")
if tagVal == "" || tagVal == "-" {
continue
}
fieldVal = refVal.FieldByName(fn)
if fieldVal.Bool() {
out = append(out, tagVal)
}
}

if out == nil || len(out) == 0 {
s = strings.Repeat("-", 3)
return
}

s = strings.Join(out, ", ")

return
}

View File

@ -15,12 +15,14 @@ func (f *FsAttrs) Apply(path string) (err error) {
var reflectVal reflect.Value
var fieldVal reflect.Value

var myPath string = path

if err = paths.RealPath(&myPath); err != nil {
if f == nil {
return
}
if file, err = os.Open(myPath); err != nil {

if err = paths.RealPath(&path); err != nil {
return
}
if file, err = os.Open(path); err != nil {
return
}
defer file.Close()

View File

@ -73,21 +73,6 @@ func getAttrs(f *os.File) (attrVal fsAttr, err error) {
return
}

// invertMap returns some handy consts remapping for easier lookups.
func invertMap(origMap map[string]fsAttr) (newMap map[fsAttr]string) {

if origMap == nil {
return
}
newMap = make(map[fsAttr]string)

for k, v := range origMap {
newMap[v] = k
}

return
}

// setAttrs is the unexported low-level syscall to set attributes. attrs may be OR'd.
func setAttrs(f *os.File, attrs fsAttr) (err error) {


View File

@ -6,36 +6,39 @@ import (

type fsAttr bitmask.MaskBit

// FsAttrs is a struct representation of filesystem attributes on Linux.
/*
FsAttrs is a struct representation of filesystem attributes on Linux.
Up to date as of Linux 6.12-rc7.
*/
type FsAttrs struct {
SecureDelete bool
UnDelete bool
CompressFile bool
SyncUpdate bool
Immutable bool
AppendOnly bool
NoDumpFile bool
NoUpdateAtime bool
IsDirty bool
CompressedClusters bool
NoCompress bool
EncFile bool
BtreeFmt bool
HashIdxDir bool
AfsDir bool
ReservedExt3 bool
NoMergeTail bool
DirSync bool
DirTop bool
ReservedExt4a bool
Extents bool
VerityProtected bool
LargeEaInode bool
ReservedExt4b bool
NoCOWFile bool
DAX bool
ReservedExt4c bool
UseParentProjId bool
CaseInsensitive bool
ReservedExt2 bool
SecureDelete bool `fsAttrShort:"s" fsAttrLong:"Secure_Deletion" fsAttrKern:"FS_SECRM_FL" json:"secure_delete" toml:"SecureDelete" yaml:"Secure Delete" xml:"secureDelete,attr"`
UnDelete bool `fsAttrShort:"u" fsAttrLong:"Undelete" fsAttrKern:"FS_UNRM_FL" json:"undelete" toml:"Undelete" yaml:"Undelete" xml:"undelete,attr"`
CompressFile bool `fsAttrShort:"c" fsAttrLong:"Compression_Requested" fsAttrKern:"FS_COMPR_FL" json:"compress" toml:"Compress" yaml:"Compress" xml:"compress,attr"`
SyncUpdate bool `fsAttrShort:"S" fsAttrLong:"Synchronous_Updates" fsAttrKern:"FS_SYNC_FL" json:"sync" toml:"SyncUpdate" yaml:"Synchronized Update" xml:"syncUpdate,attr"`
Immutable bool `fsAttrShort:"i" fsAttrLong:"Immutable" fsAttrKern:"FS_IMMUTABLE_FL" json:"immutable" toml:"Immutable" yaml:"Immutable" xml:"immutable,attr"`
AppendOnly bool `fsAttrShort:"a" fsAttrLong:"Append_Only" fsAttrKern:"FS_APPEND_FL" json:"append_only" toml:"AppendOnly" yaml:"Append Only" xml:"appendOnly,attr"`
NoDumpFile bool `fsAttrShort:"d" fsAttrLong:"No_Dump" fsAttrKern:"FS_NODUMP_FL" json:"no_dump" toml:"NoDump" yaml:"Disable Dumping" xml:"noDump,attr"`
NoUpdateAtime bool `fsAttrShort:"A" fsAttrLong:"No_Atime" fsAttrKern:"FS_NOATIME_FL" json:"no_atime" toml:"DisableAtime" yaml:"Disable Atime Updating" xml:"noAtime,attr"`
IsDirty bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_DIRTY_FL" json:"dirty" toml:"Dirty" yaml:"Dirty" xml:"dirty,attr"`
CompressedClusters bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_COMPRBLK_FL" json:"compress_clst" toml:"CompressedClusters" yaml:"Compressed Clusters" xml:"compressClst,attr"`
NoCompress bool `fsAttrShort:"m" fsAttrLong:"Dont_Compress" fsAttrKern:"FS_NOCOMP_FL" json:"no_compress" toml:"DisableCompression" yaml:"Disable Compression" xml:"noCompress,attr"`
EncFile bool `fsAttrShort:"E" fsAttrLong:"Encrypted" fsAttrKern:"FS_ENCRYPT_FL" json:"enc" toml:"Encrypted" yaml:"Encrypted" xml:"enc,attr"`
BtreeFmt bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_BTREE_FL" json:"btree" toml:"Btree" yaml:"Btree" xml:"btree,attr"`
HashIdxDir bool `fsAttrShort:"I" fsAttrLong:"Indexed_directory" fsAttrKern:"FS_INDEX_FL" json:"idx_dir" toml:"IdxDir" yaml:"Indexed Directory" xml:"idxDir,attr"`
AfsDir bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_IMAGIC_FL" json:"afs" toml:"AFS" yaml:"AFS" xml:"afs,attr"`
ReservedExt3 bool `fsAttrShort:"j" fsAttrLong:"Journaled_Data" fsAttrKern:"FS_JOURNAL_DATA_FL" json:"res_ext3" toml:"ReservedExt3" yaml:"Reserved Ext3" xml:"resExt3,attr"`
NoMergeTail bool `fsAttrShort:"t" fsAttrLong:"No_Tailmerging" fsAttrKern:"FS_NOTAIL_FL" json:"no_merge_tail" toml:"DisableTailmerging" yaml:"Disable Tailmerging" xml:"noMergeTail,attr"`
DirSync bool `fsAttrShort:"D" fsAttrLong:"Synchronous_Directory_Updates" fsAttrKern:"FS_DIRSYNC_FL" json:"dir_sync" toml:"DirSync" yaml:"Synchronized Directory Updates" xml:"dirSync,attr"`
DirTop bool `fsAttrShort:"T" fsAttrLong:"Top_of_Directory_Hierarchies" fsAttrKern:"FS_TOPDIR_FL" json:"dir_top" toml:"DirTop" yaml:"Top of Directory Hierarchies" xml:"dirTop,attr"`
ReservedExt4a bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_HUGE_FILE_FL" json:"res_ext4a" toml:"ReservedExt4A" yaml:"Reserved Ext4 A" xml:"resExt4a,attr"`
Extents bool `fsAttrShort:"e" fsAttrLong:"Extents" fsAttrKern:"FS_EXTENT_FL" json:"extents" toml:"Extents" yaml:"Extents" xml:"extents,attr"`
VerityProtected bool `fsAttrShort:"V" fsAttrLong:"Verity" fsAttrKern:"FS_VERITY_FL" json:"verity" toml:"Verity" yaml:"Verity Protected" xml:"verity,attr"`
LargeEaInode bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_EA_INODE_FL" json:"ea" toml:"EAInode" yaml:"EA Inode" xml:"ea,attr"`
ReservedExt4b bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_EOFBLOCKS_FL" json:"res_ext4b" toml:"ReservedExt4B" yaml:"Reserved Ext4 B" xml:"resExt4b,attr"`
NoCOWFile bool `fsAttrShort:"C" fsAttrLong:"No_COW" fsAttrKern:"FS_NOCOW_FL" json:"no_cow" toml:"NoCOW" yaml:"Disable COW" xml:"noCOW,attr"`
DAX bool `fsAttrShort:"x" fsAttrLong:"DAX" fsAttrKern:"FS_DAX_FL" json:"dax" toml:"DAX" yaml:"DAX" xml:"DAX,attr"`
ReservedExt4c bool `fsAttrShort:"N" fsAttrLong:"Inline_Data" fsAttrKern:"FS_INLINE_DATA_FL" json:"res_ext4c" toml:"ReservedExt4C" yaml:"Reserved Ext4 C" xml:"resExt4c,attr"`
UseParentProjId bool `fsAttrShort:"P" fsAttrLong:"Project_Hierarchy" fsAttrKern:"FS_PROJINHERIT_FL" json:"parent_proj_id" toml:"ParentProjId" yaml:"Use Parent Project ID" xml:"parentProjId,attr"`
CaseInsensitive bool `fsAttrShort:"F" fsAttrLong:"Casefold" fsAttrKern:"FS_CASEFOLD_FL" json:"case_ins" toml:"CaseInsensitive" yaml:"Case Insensitive" xml:"caseIns,attr"`
ReservedExt2 bool `fsAttrShort:"-" fsAttrLong:"-" fsAttrKern:"FS_RESERVED_FL" json:"res_ext2" toml:"ReservedExt2" yaml:"Reserved Ext2" xml:"resExt2,attr"`
}

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/djherbis/times v1.6.0
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
golang.org/x/sync v0.9.0
golang.org/x/sys v0.26.0
honnef.co/go/augeas v0.0.0-20161110001225-ca62e35ed6b8
r00t2.io/goutils v1.7.1

2
go.sum
View File

@ -6,6 +6,8 @@ github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYC
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=

View File

@ -19,6 +19,7 @@
package paths

import (
`context`
"errors"
"fmt"
"io/fs"
@ -28,11 +29,13 @@ import (
`regexp`
`slices`
"strings"
`sync`
`time`

// "syscall"

`github.com/djherbis/times`
`golang.org/x/sync/semaphore`
`r00t2.io/goutils/bitmask`
)

@ -311,15 +314,22 @@ func RealPathExistsStat(path *string) (exists bool, stat os.FileInfo, err error)
* atime (access time)

olderThan (as mentioned above) will find paths *older* than age if true, otherwise *newer*.

now, if not nil, will be used to compare the age of files. (If nil, it will be populated at time of call.)
*/
func SearchFsPaths(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (foundPaths []string, err error) {

var now time.Time = time.Now()
if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}

if err = RealPath(&root); err != nil {
return
@ -329,52 +339,25 @@ func SearchFsPaths(
root,
func(path string, d fs.DirEntry, inErr error) (outErr error) {

var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))
var include bool

if inErr != nil {
outErr = inErr
return
}

// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}

// age
if age != nil {
if tspec, outErr = times.Stat(path); outErr != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, &now) {
return
}
}

// fs object type (file, dir, etc.)
if fi, outErr = d.Info(); outErr != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
}
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
if include, outErr = filterPath(
path, d,
targetType, noFiles,
basePtrn, pathPtrn,
age, ageType, olderThan, now,
); outErr != nil {
return
}

// All filters passed at this point.
foundPaths = append(foundPaths, path)
if include {
foundPaths = append(foundPaths, path)
}

return
},
@ -388,6 +371,170 @@ func SearchFsPaths(
return
}

/*
SearchFsPathsAsync is exactly like SearchFsPaths, but dispatches off concurrent
workers for the filtering logic instead of performing iteratively/recursively.
It may, in some cases, be *slightly more* performant and *slightly less* in others.
Additional options are documented below.
Note that unlike SearchFsPaths, the results written to foundPathsChan are not
guaranteed to be in any predictable order.

All channels are expected to have been initialized by the caller ahead of time,
and all provided channels will be closed upon completion (so they are only safe
to READ from after invoking SearchFsPathsAsync).

foundPathsChan is a channel to which matched filepaths will be written.

sem/semCtx are optional; if not nil, they can be used to limit/"batch" concurrent tasks.
(semCtx is the context.Context used for sem when acquiring. It may be nil;
one will be locally created if so.)
The default will be to spawn all filtering logic concurrently.
For very large directories, you almost assuredly do not want that -- it
can cause a significant amount of I/O and CPU wait.
(See https://pkg.go.dev/golang.org/x/sync/semaphore for details.)

wg *must not* be nil, and must be managed by the caller.
SearchFsPathsAsync will exit with no errors but no-op if wg is nil.

errChan will receive any/all encountered errors.
*/
func SearchFsPathsAsync(
root string,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
foundPathsChan chan string,
sem *semaphore.Weighted, semCtx context.Context,
wg *sync.WaitGroup,
errChan chan error,
) {

var err error
var localWg sync.WaitGroup

if wg == nil {
return
}

if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}

if sem != nil && semCtx == nil {
semCtx = context.Background()
}

if err = filepath.WalkDir(
root,
func(path string, de fs.DirEntry, inErr error) (outErr error) {
localWg.Add(1)
wg.Add(1)
if sem != nil {
if err = sem.Acquire(semCtx, 1); err != nil {
return
}
}

go func(p string, d fs.DirEntry) {
var pErr error
var pInclude bool

defer localWg.Done()
defer wg.Done()

if sem != nil {
defer sem.Release(1)
}

if pInclude, pErr = filterPath(p, d, targetType, noFiles, basePtrn, pathPtrn, age, ageType, olderThan, now); pErr != nil {
errChan <- pErr
return
}

if pInclude {
foundPathsChan <- p
}
}(path, de)

return
},
); err != nil {
errChan <- err
return
}

go func() {
localWg.Wait()
close(foundPathsChan)
close(errChan)
}()

return
}

// filterPath applies the filter logic used by SearchFSPaths and SearchFsPathsAync.
func filterPath(
path string, d fs.DirEntry,
targetType fs.FileMode, noFiles bool,
basePtrn, pathPtrn *regexp.Regexp,
age *time.Duration, ageType pathTimeType, olderThan bool, now *time.Time,
) (include bool, err error) {

var typeMode fs.FileMode
var fi fs.FileInfo
var tspec times.Timespec
var typeFilter *bitmask.MaskBit = bitmask.NewMaskBitExplicit(uint(targetType))

if age != nil {
if now == nil {
now = new(time.Time)
*now = time.Now()
}
}

// patterns
if pathPtrn != nil {
if !pathPtrn.MatchString(path) {
return
}
}
if basePtrn != nil {
if !basePtrn.MatchString(filepath.Base(path)) {
return
}
}

// age
if age != nil {
if tspec, err = times.Stat(path); err != nil {
return
}
if !filterTimes(tspec, age, &ageType, olderThan, now) {
return
}
}

// fs object type (file, dir, etc.)
if fi, err = d.Info(); err != nil {
return
}
typeMode = fi.Mode().Type()
if typeMode == 0 && noFiles {
return
} else if typeMode != 0 {
if !typeFilter.HasFlag(bitmask.MaskBit(typeMode)) {
return
}
}

include = true

return
}

/*
filterTimes checks a times.Timespec of a file using:
* an age specified by the caller