checking in some WIP

* added some netx funcs * added netx/dnsx * currently updating docs and adding *x funcs to sprigx
2026-02-24 17:41:57 -05:00
parent 4770052b52
commit c6fc692f5e
14 changed files with 2773 additions and 646 deletions
--- a/stringsx/funcs.go
+++ b/stringsx/funcs.go
@@ -1,11 +1,165 @@
 package stringsx

 import (
+	`bytes`
+	`errors`
 	`fmt`
+	`io`
+	`slices`
 	`strings`
 	`unicode`
 )

+/*
+IsAscii returns true if all characters in string s are ASCII.
+
+This simply wraps [IsAsciiSpecial]:
+
+	isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil)
+*/
+func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) {
+
+	if isAscii, err = IsAsciiSpecial(
+		s, allowCtl, true, allowExt, true, nil, nil,
+	); err != nil {
+		return
+	}
+
+	return
+}
+
+/*
+IsAsciiBuf returns true if all of buffer buf is valid ASCII.
+
+Note that the buffer will be consumed/read by this function.
+
+This simply wraps [IsAsciiBufSpecial]:
+
+	isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil)
+*/
+func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) {
+
+	if isAscii, err = IsAsciiBufSpecial(
+		r, allowCtl, true, allowExt, true, nil, nil,
+	); err != nil {
+		return
+	}
+
+	return
+}
+
+/*
+IsAsciiSpecial allows for specifying specific ASCII ranges.
+
+allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive).
+
+allowPrint, if true, will allow printable characters (what most people think of
+when they say "ASCII") (0x20 to 0x7f inclusive).
+
+allowExt, if true, will allow for "extended ASCII" - some later dialects expand
+to a full 8-bit ASCII range (0x80 to 0xff inclusive).
+
+wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space
+(such that allowPrint controls their validation). Thus:
+
+	IsAsciiSpecial(s, false, true, false, true, nil, nil)
+
+has the same effect as specifying:
+
+	IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil)
+
+incl, if non-nil and non-empty, allows *additional* characters to be specified as included
+that would normally *not* be allowed.
+
+excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed.
+
+excl, if specified, takes precedence over incl if specified.
+
+An [AsciiInvalidError] will be returned on the first encountered invalid character.
+*/
+func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
+
+	var buf *bytes.Buffer = bytes.NewBufferString(s)
+
+	if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil {
+		return
+	}
+
+	return
+}
+
+/*
+IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader].
+
+Note that the buffer will be consumed/read by this function.
+
+It will not return an [io.EOF] if encountered, but any other errors encountered will be returned.
+It is expected that r will return an [io.EOF] when exhausted.
+
+An [AsciiInvalidError] will be returned on the first encountered invalid character.
+*/
+func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
+
+	var b rune
+	var bLen int
+	var nextNewline bool
+	var tmpErr *AsciiInvalidError = new(AsciiInvalidError)
+	// I know, I know. This is essentually a lookup table. Keeps it speedy.
+	var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl)
+
+	for {
+		if b, bLen, err = r.ReadRune(); err != nil {
+			if errors.Is(err, io.EOF) {
+				err = nil
+				isAscii = true
+			}
+			return
+		}
+		// Set these *before* OK
+		if nextNewline {
+			tmpErr.Line++
+			tmpErr.LineByte = 0
+			tmpErr.LineChar = 0
+			nextNewline = false
+		} else {
+			tmpErr.LineChar++
+		}
+		tmpErr.Char++
+
+		if b == '\n' {
+			nextNewline = true
+		}
+		if b == rune(0xfffd) {
+			// not even valid unicode
+			tmpErr.BadChar = b
+			tmpErr.BadBytes = []byte(string(b))
+			err = tmpErr
+			return
+		}
+		if bLen > 2 || b > 0xff {
+			// ASCII only occupies a single byte, ISO-8859-1 occupies 2
+			tmpErr.BadChar = b
+			tmpErr.BadBytes = []byte(string(b))
+			err = tmpErr
+			return
+		}
+		if !allowed[byte(b)] {
+			tmpErr.BadChar = b
+			tmpErr.BadBytes = []byte{byte(b)}
+			err = tmpErr
+			return
+		}
+
+		// Set these *after* OK
+		tmpErr.LineByte += uint64(bLen)
+		tmpErr.Byte += uint64(bLen)
+	}
+
+	isAscii = true
+
+	return
+}
+
 /*
 LenSplit formats string `s` to break at, at most, every `width` characters.

@@ -252,6 +406,18 @@ func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted
 	return
 }

+// Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.)
+func Reverse(s string) (revS string) {
+
+	var rsl []rune = []rune(s)
+
+	slices.Reverse(rsl)
+
+	revS = string(rsl)
+
+	return
+}
+
 /*
 TrimLines is like [strings.TrimSpace] but operates on *each line* of s.
 It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic.
@@ -313,6 +479,58 @@ func TrimSpaceRight(s string) (trimmed string) {
 	return
 }

+// getAsciiCharMap returns a lookup "table" for ASCII characters.
+func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) {
+
+	var idx uint8
+
+	if allowCtl {
+		for idx < 0x1f {
+			charmap[idx] = true
+			idx++
+		}
+	} else {
+		idx = 0x1f
+	}
+	if allowPrint {
+		for idx < 0x7f {
+			charmap[idx] = true
+			idx++
+		}
+	} else {
+		idx = 0x7f
+	}
+	if allowExt {
+		for {
+			charmap[idx] = true
+			if idx == 0xff {
+				break
+			}
+			idx++
+		}
+	} else {
+		idx = 0xff
+	}
+	if allowWs {
+		charmap['\t'] = true
+		charmap['\n'] = true
+		charmap['\r'] = true
+	}
+
+	if incl != nil && len(incl) > 0 {
+		for _, idx = range incl {
+			charmap[idx] = true
+		}
+	}
+	if excl != nil && len(excl) > 0 {
+		for _, idx = range excl {
+			charmap[idx] = false
+		}
+	}
+
+	return
+}
+
 // getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported.
 func getNewLine(s string) (nl string) {