v1.14.1

FIXED: * `envs/funcs.go:78:3: unknown field IgnoreWhiteSpace in struct literal of type EnvErrNoVal, but does have IgnoreWhitespace` * `envs/funcs_enverrnoval.go:15:8: sb.WasFound undefined (type *strings.Builder has no field or method WasFound)`
2025-08-13 14:54:49 -04:00
parent 8260e4fa93
commit e797a14911
16 changed files with 790 additions and 28 deletions
--- a/pdsh/consts.go
+++ b/pdsh/consts.go
@@ -0,0 +1,18 @@
+package pdsh
+
+import (
+	"regexp"
+
+	"r00t2.io/goutils/remap"
+)
+
+const (
+	dshGrpPathEnv string = "DSHGROUP_PATH"
+)
+
+// DSH Groups
+var (
+	dshGrpDefGrpDir    string       = "/etc/dsh/group"
+	dshGrpInclPtrn     *remap.ReMap = &remap.ReMap{Regexp: regexp.MustCompile(`^\s*#include\s+(?P<incl>.+)$`)}
+	dshGrpSubTokenPtrn *remap.ReMap = &remap.ReMap{Regexp: regexp.MustCompile(`^(?P<start_pad>0*)(?P<start>[1-9]+[0-9]*)?(?:-(?P<end_pad>0*)(?P<end>[1-9]+[0-9]*))?$`)}
+)
--- a/pdsh/docs.go
+++ b/pdsh/docs.go
@@ -0,0 +1,16 @@
+/*
+Package pdsh (!! WIP !!) provides PDSH-compatible functionality for parsing group files.
+
+Note that this library will *only* source and parse PDSH-compatible host/group files,
+it will not actually connect to anything.
+It simply provides ways of returning lists of hosts using generation rules/patterns.
+
+Currently, the only supported PDSH module is `misc/dshgroup` but additional/all other
+host list modules are planned.
+
+For details, see:
+
+  - https://github.com/chaos/pdsh/
+  - https://github.com/chaos/pdsh/blob/master/doc/pdsh.1.in
+*/
+package pdsh
--- a/pdsh/errs.go
+++ b/pdsh/errs.go
@@ -0,0 +1,10 @@
+package pdsh
+
+import (
+	"errors"
+)
+
+var (
+	ErrInvalidDshGrpSyntax error = errors.New("invalid dsh group file syntax")
+	ErrInvalidDshGrpPtrn   error = errors.New("invalid dsh group pattern syntax")
+)
--- a/pdsh/funcs_dshgrouplister.go
+++ b/pdsh/funcs_dshgrouplister.go
@@ -0,0 +1,174 @@
+package pdsh
+
+import (
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"r00t2.io/sysutils/envs"
+	"r00t2.io/sysutils/paths"
+)
+
+/*
+Evaluate returns a list of directories and files that would be searched/read with
+the given call and DshGroupLister configuration, in order of parsing.
+
+The behavior is the same as DshGroupLister.GroupedHosts, including searchPaths.
+If DshGroupLister.ForceLegacy is false, include files will also be parsed in.
+(This may incur slightly additional processing time.)
+
+Only existing dirs/files are returned. Symlinks are evaluated to their target.
+
+If dedupe is true, deduplication is performed. This adds some cycles, but may be desired if you make heavy use of symlinks.
+*/
+func (d *DshGroupLister) Evaluate(dedupe bool, searchPaths ...string) (dirs, files []string, err error) {
+
+	var exists bool
+	// var u *user.User
+	var spl []string
+	var dPath string
+	var fPath string
+	var incls []string
+	var de fs.DirEntry
+	var stat fs.FileInfo
+	var entries []fs.DirEntry
+	var tmpF []string
+	var fpathMap map[string]bool = make(map[string]bool)
+
+	// TODO: Does/how does pdsh resolve relative symlinks?
+
+	// Dirs first
+	if searchPaths != nil {
+		for _, dPath = range searchPaths {
+			if _, exists, _, _, stat, err = paths.RealPathExistsStatTarget(&dPath, "."); err != nil {
+				return
+			} else if !exists {
+				continue
+			}
+			if !stat.IsDir() {
+				continue
+			}
+			dirs = append(dirs, dPath)
+		}
+	}
+	if !d.NoHome && envs.HasEnv("HOME") {
+		// So pdsh actually checks $HOME, it doesn't pull the homedir for the user.
+		/*
+			if u, err = user.Current(); err != nil {
+				return
+			}
+			dPath = filepath.Join(u.HomeDir, ".dsh", "group")
+		*/
+		dPath = filepath.Join(os.Getenv("HOME"), ".dsh", "group")
+		if _, exists, _, _, stat, err = paths.RealPathExistsStatTarget(&dPath, "."); err != nil {
+			return
+		} else if exists {
+			if stat.IsDir() {
+				dirs = append(dirs, dPath)
+			}
+		}
+	}
+	if !d.NoEnv && envs.HasEnv(dshGrpPathEnv) {
+		spl = strings.Split(os.Getenv(dshGrpPathEnv), string(os.PathListSeparator))
+		for _, dPath = range spl {
+			if strings.TrimSpace(dPath) == "" {
+				continue
+			}
+			if _, exists, _, _, stat, err = paths.RealPathExistsStatTarget(&dPath, "."); err != nil {
+				return
+			} else if !exists {
+				continue
+			}
+			if !stat.IsDir() {
+				continue
+			}
+			dirs = append(dirs, dPath)
+		}
+	}
+	if !d.NoDefault && !envs.HasEnv(dshGrpPathEnv) {
+		dPath = dshGrpDefGrpDir
+		if _, exists, _, _, stat, err = paths.RealPathExistsStatTarget(&dPath, "."); err != nil {
+			return
+		} else if exists {
+			if stat.IsDir() {
+				dirs = append(dirs, dPath)
+			}
+		}
+	}
+
+	// Then files. Do *not* walk the dirs; only first-level is parsed by pdsh so this does the same.
+	for _, dPath = range dirs {
+		if entries, err = os.ReadDir(dPath); err != nil {
+			return
+		}
+		for _, de = range entries {
+			fPath = filepath.Join(dPath, de.Name())
+			// NORMALLY, os.Stat calls stat(2), which follows symlinks. (os.Lstat()/lstat(2) does not.)
+			// But the stat for an fs.DirEntry? Uses lstat.
+			// Whatever, we want to resolve symlinks anyways.
+			if _, exists, _, _, stat, err = paths.RealPathExistsStatTarget(&fPath, "."); err != nil {
+				return
+			} else if exists {
+				if !stat.Mode().IsRegular() {
+					continue
+				}
+				if dedupe {
+					if _, exists = fpathMap[fPath]; !exists {
+						fpathMap[fPath] = true
+						files = append(files, fPath)
+					}
+				} else {
+					files = append(files, fPath)
+				}
+				if !d.ForceLegacy {
+					if incls, err = getDshGrpIncludes(fPath); err != nil {
+						return
+					}
+					if dedupe {
+						for _, i := range incls {
+							if _, exists = fpathMap[i]; !exists {
+								fpathMap[i] = true
+								files = append(files, i)
+							}
+						}
+					} else {
+						files = append(files, incls...)
+					}
+				}
+			}
+		}
+	}
+
+	files = tmpF
+
+	return
+}
+
+/*
+GroupedHosts returns a map of `map[<GROUP>][]string{<HOST>[, <HOST>, ...]}.
+
+Additional search paths may be specified via searchpaths.
+
+If there are any conflicting group names, the first found group name is used.
+For example, assuming the group name `<GROUP>`, the following files will be checked in this order:
+
+ 0. IF searchPaths is not nil:
+    a. searchpaths[0]/<GROUP>
+    b. searchpaths[1]/<GROUP>
+    c. searchpaths[2]/<GROUP>
+    d. ( ... )
+ 1. IF DshGroupLister.NoHome is false:
+    a. `~/.dsh/group/<GROUP>`
+ 2. IF $DSHGROUP_PATH is defined AND DshGroupLister.NoEnv is false:
+    a. `strings.Split(os.Getenv("DSHGROUP_PATH", string(os.PathListSeparator)))[0]/<GROUP>`
+    b. `strings.Split(os.Getenv("DSHGROUP_PATH", string(os.PathListSeparator)))[1]/<GROUP>`
+    c. `strings.Split(os.Getenv("DSHGROUP_PATH", string(os.PathListSeparator)))[2]/<GROUP>`
+    d. ( ... )
+ 3. IF $DSHGROUP_PATH is NOT defined AND DshGroupLister.NoDefault is false:
+    a. `/etc/dsh/group/<GROUP>`
+*/
+func (d *DshGroupLister) GroupedHosts(dedupe bool, searchPaths ...string) (groupedHosts map[string][]string, err error) {
+
+	return
+}
--- a/pdsh/funcs_dshgrp.go
+++ b/pdsh/funcs_dshgrp.go
@@ -0,0 +1,298 @@
+package pdsh
+
+import (
+	"bufio"
+	"bytes"
+	"os"
+	"strconv"
+	"strings"
+
+	"r00t2.io/sysutils/paths"
+)
+
+/*
+ParseDshPtrn parses ptrn using the DSH group pattern ptrn as according to `HOSTLIST EXPRESSSIONS` in pdsh(1).
+`#include` directives are explicitly skipped; this only parses actual generation pattern strings.
+*/
+func ParseDshPtrn(ptrn string) (hostList []string, err error) {
+
+	var r rune
+	var pos int
+	var s string
+	var inToken bool
+	var tokStr string
+	var tok dshGrpToken
+	var strBuf *bytes.Buffer = new(bytes.Buffer)
+	var tokBuf *bytes.Buffer = new(bytes.Buffer)
+	var parser *dshGrpGenerator = &dshGrpGenerator{
+		tokens:    make([]dshGrpToken, 0),
+		tokenized: make([]string, 0),
+		text:      ptrn,
+	}
+
+	s = strings.TrimSpace(ptrn)
+	if s == "" {
+		return
+	}
+	if strings.HasPrefix(s, "#") {
+		return
+	}
+	// A quick sanity check. The end-state from the state machine below will catch any weird bracket issues beyond this.
+	if strings.Count(s, "[") != strings.Count(s, "]") {
+		err = ErrInvalidDshGrpSyntax
+		return
+	}
+
+	// Now the hacky bits. We read until we get to a start-token ('['), end-token (']'), or a pattern separator (',') that is *outside* a range token.
+	for pos, r = range s {
+		switch r {
+		case '[':
+			if inToken {
+				// Nested [...[
+				err = &PtrnParseErr{
+					pos:  uint(pos),
+					ptrn: ptrn,
+					r:    r,
+					err:  ErrInvalidDshGrpSyntax,
+				}
+				return
+			}
+			parser.tokenized = append(parser.tokenized, strBuf.String())
+			strBuf.Reset()
+			inToken = true
+		case ']':
+			if !inToken {
+				// Nested ]...]
+				err = &PtrnParseErr{
+					pos:  uint(pos),
+					ptrn: ptrn,
+					r:    r,
+					err:  ErrInvalidDshGrpSyntax,
+				}
+				return
+			}
+			tokStr = tokBuf.String()
+			if tok, err = parseDshGrpToken(tokStr); err != nil {
+				err = &PtrnParseErr{
+					pos:  uint(pos),
+					ptrn: ptrn,
+					r:    r,
+					err:  err,
+				}
+				return
+			}
+			parser.tokens = append(parser.tokens, tok)
+			tokBuf.Reset()
+			inToken = false
+		default:
+			if inToken {
+				// If it isn't between '0' and '9', isn't '-', and isn't ','...
+				if !(0x30 <= r && r <= 0x39) && (r != 0x2d) && (r != 0x2c) {
+					// It's not a valid token. (The actual syntax is validated in parseDshGrpToken and parseDshGrpSubtoken)
+					err = &PtrnParseErr{
+						pos:  uint(pos),
+						ptrn: ptrn,
+						r:    r,
+						err:  ErrInvalidDshGrpSyntax,
+					}
+					return
+				}
+				tokBuf.WriteRune(r)
+			} else {
+				if strings.TrimSpace(string(r)) == "" || r == '#' {
+					// Whitespace is "invalid" (treat it as the end of the pattern).
+					// Same for end-of-line octothorpes.
+					if tokBuf.Len() > 0 {
+						// This should never happen.
+						err = &PtrnParseErr{
+							pos:  uint(pos),
+							ptrn: ptrn,
+							r:    r,
+							err:  ErrInvalidDshGrpSyntax,
+						}
+						return
+					}
+					if strBuf.Len() > 0 {
+						parser.tokenized = append(parser.tokenized, strBuf.String())
+					}
+					break
+				}
+				// Otherwise we just check for valid DNS chars.
+				if !(0x30 <= r && r <= 0x39) && // '0'-'9'
+					(r != 0x2d) && // '-'
+					(r != 0x2e) && // '.'
+					!(0x41 <= r && r <= 0x5a) && // 'A' through 'Z' (inclusive)
+					!(0x6a <= r && r <= 0x7a) { // 'a' through 'z' (inclusive)
+					err = &PtrnParseErr{
+						pos:  uint(pos),
+						ptrn: ptrn,
+						r:    r,
+						err:  ErrInvalidDshGrpPtrn,
+					}
+					return
+				}
+				// (Probably) valid(-ish), so add it.
+				strBuf.WriteRune(r)
+			}
+		}
+	}
+
+	// If the token never closed, it's also invalid.
+	if inToken {
+		err = ErrInvalidDshGrpSyntax
+		return
+	}
+
+	return
+}
+
+// parseDshGrpToken parses a token string into a dshGrpToken.
+func parseDshGrpToken(tokenStr string) (token dshGrpToken, err error) {
+
+	var s string
+	var st []string
+	var sub dshGrpSubtoken
+
+	s = strings.TrimSpace(tokenStr)
+	st = strings.Split(s, ",")
+	token = dshGrpToken{
+		token:     tokenStr,
+		subtokens: make([]dshGrpSubtoken, 0, len(st)),
+	}
+	for _, s = range st {
+		if strings.TrimSpace(s) == "" {
+			continue
+		}
+		if sub, err = parseDshGrpSubtoken(s); err != nil {
+			return
+		}
+		token.subtokens = append(token.subtokens, sub)
+
+	}
+
+	return
+}
+
+// parseDshGrpSubtoken parses a subtoken string into a dshGrpSubtoken.
+func parseDshGrpSubtoken(subTokenStr string) (subtoken dshGrpSubtoken, err error) {
+
+	var u64 uint64
+	var vals []string
+	var endPad string
+	var startPad string
+	var st dshGrpSubtoken
+	var matches map[string][]string
+
+	if matches = dshGrpSubTokenPtrn.MapString(subTokenStr, false, false, true); matches == nil || len(matches) == 0 {
+		err = ErrInvalidDshGrpPtrn
+		return
+	}
+	if vals = matches["start_pad"]; vals != nil && len(vals) == 1 {
+		startPad = vals[0]
+	}
+	/*
+		Due to a... particular quirk in the regex that I'm too tired to fix,
+		the start_pad may be e.g. "0" (or "00", etc.) and start may be "" if the range starts *at* 0
+		(or 00, 000, etc.).
+	*/
+	if vals = matches["start"]; vals != nil && len(vals) == 1 {
+		if u64, err = strconv.ParseUint(vals[0], 10, 64); err != nil {
+			return
+		}
+		st.start = uint(u64)
+	} else if startPad != "" {
+		// Yeah, regex bug. So we remove one 0 from startPad, and set st.start to 0.
+		st.start = 0 // This is implicit, though.
+		startPad = startPad[:len(startPad)-1]
+	}
+	if vals = matches["end_pad"]; vals != nil && len(vals) == 1 {
+		endPad = vals[0]
+	}
+	if vals = matches["end"]; vals != nil && len(vals) == 1 {
+		if u64, err = strconv.ParseUint(vals[0], 10, 64); err != nil {
+			return
+		}
+		st.end = uint(u64)
+	}
+
+	if startPad != "" && endPad != "" {
+		// We set the pad to the largest.
+		if len(startPad) > len(endPad) {
+			st.pad = startPad
+		} else {
+			st.pad = endPad
+		}
+	} else if startPad != "" {
+		st.pad = startPad
+	} else if endPad != "" {
+		st.pad = endPad
+	}
+
+	subtoken = st
+
+	return
+}
+
+/*
+getDshGrpIncludes parses fpath for `#include ...` directives. It skips any entries in which
+`len(paths.SegmentSys(p) == []string{p}`, as these are inherently included by the dir read.
+
+It is assumed that fpath is a cleaned, absolute filepath.
+*/
+func getDshGrpIncludes(fpath string) (includes []string, err error) {
+
+	var f *os.File
+	var line string
+	var exists bool
+	var inclpath string
+	var subIncl []string
+	var segs []string
+	var scanner *bufio.Scanner
+	var matches map[string][]string
+
+	if f, err = os.Open(fpath); err != nil {
+		return
+	}
+	defer f.Close()
+
+	scanner = bufio.NewScanner(f)
+	for scanner.Scan() {
+		line = strings.TrimSpace(scanner.Text())
+		if line == "" {
+			continue
+		}
+		if !dshGrpInclPtrn.MatchString(line) {
+			continue
+		}
+		matches = dshGrpInclPtrn.MapString(line, false, false, true)
+		if matches == nil {
+			err = ErrInvalidDshGrpSyntax
+			return
+		}
+		if matches["incl"] == nil || len(matches["incl"]) == 0 {
+			err = ErrInvalidDshGrpSyntax
+			return
+		}
+		inclpath = matches["incl"][0]
+		segs = paths.SegmentSys(inclpath, false, false)
+		if segs == nil || len(segs) == 0 || (len(segs) == 1 && segs[0] == inclpath) {
+			continue
+		}
+
+		if exists, err = paths.RealPathExists(&inclpath); err != nil {
+			return
+		}
+		if !exists {
+			continue
+		}
+		includes = append(includes, inclpath)
+		if subIncl, err = getDshGrpIncludes(inclpath); err != nil {
+			return
+		}
+		if subIncl != nil && len(subIncl) > 0 {
+			includes = append(includes, subIncl...)
+		}
+	}
+
+	return
+}
--- a/pdsh/funcs_ptrnparseerr.go
+++ b/pdsh/funcs_ptrnparseerr.go
@@ -0,0 +1,16 @@
+package pdsh
+
+import (
+	"fmt"
+)
+
+// Error conforms a PtrnParseErr to error interface.
+func (p *PtrnParseErr) Error() (errStr string) {
+
+	errStr = fmt.Sprintf(
+		"Parse error in pattern '%s', position %d rune '%s': %v",
+		p.ptrn, p.pos, string(p.r), p.err,
+	)
+
+	return
+}
--- a/pdsh/types.go
+++ b/pdsh/types.go
@@ -0,0 +1,86 @@
+package pdsh
+
+// TODO: This... doesn't really have much usefulness, does it?
+/*
+type (
+	HostLister interface {
+		// Hosts returns ALL hsots (where applicable) that are considered/generated for a Lister.
+		Hosts() (hosts []string, err error)
+	}
+)
+*/
+
+type (
+	/*
+		DshGroupLister behaves like the host list generator
+		for pdsh(1)'s "dshgroup module options" (the `misc/dshgroup`
+		module for pdsh).
+	*/
+	DshGroupLister struct {
+		/*
+			NoEnv, if true, will *not* use DSHGROUP_PATH (force-defaulting to /etc/dsh/group/,
+			but see NoDefault).
+		*/
+		NoEnv bool
+		/*
+			NoDefault, if true, will *not* add the default path `/etc/dsh/group/`
+			to the search paths.
+
+			If NoDefault is false, this path is only added if DSHGROUP_PATH is not defined
+			(or, if it IS defined, if NoEnv is true).
+		*/
+		NoDefault bool
+		// NoHome, if true, will *not* add the `~/.dsh/group/` path to the search paths.
+		NoHome bool
+		/*
+			ForceLegacy, if true, will disable the PDSH `#include <PATH|GROUP>` modification --
+			treating the source as a traditional DSH group file instead (e.g. `#include ...`
+			is treated as just a comment).
+		*/
+		ForceLegacy bool
+	}
+)
+
+type (
+	dshGrpGenerator struct {
+		/*
+			tokens are interleaved with tokenized and indexed *after*;
+			in other words,	str = <substr0><token0><substr1><token1>...
+		*/
+		tokens []dshGrpToken
+		// tokenized holds the split original text with tokens removed and split where the tokens occur.
+		tokenized []string
+		// text holds the original pattern.
+		text string
+	}
+	dshGrpToken struct {
+		/*
+			token contains the original range specifier.
+			Tokens may be e.g.:
+
+				* 3: str3
+				* 3-5: str3, str4, str5
+				* 3,5: str3, str5
+		*/
+		token string
+		// subtokens hold a split of the individual range specifiers.
+		subtokens []dshGrpSubtoken
+	}
+	dshGrpSubtoken struct {
+		// start indicates either the single value or the start of the range.
+		start uint
+		// end, if 0 or less than start, indicates a single-value range.
+		end uint
+		// pad, if non-empty, is a string to add to the beginning of each of the generated substrings for this subtoken.
+		pad string
+	}
+)
+
+type (
+	PtrnParseErr struct {
+		pos  uint
+		ptrn string
+		r    rune
+		err  error
+	}
+)