checking in some WIP

* added some netx funcs
* added netx/dnsx
* currently updating docs and adding *x funcs to sprigx
This commit is contained in:
brent saner
2026-02-24 17:41:57 -05:00
parent 4770052b52
commit c6fc692f5e
14 changed files with 2773 additions and 646 deletions

View File

@@ -1,11 +1,165 @@
package stringsx
import (
`bytes`
`errors`
`fmt`
`io`
`slices`
`strings`
`unicode`
)
/*
IsAscii returns true if all characters in string s are ASCII.
This simply wraps [IsAsciiSpecial]:
isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil)
*/
func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) {
if isAscii, err = IsAsciiSpecial(
s, allowCtl, true, allowExt, true, nil, nil,
); err != nil {
return
}
return
}
/*
IsAsciiBuf returns true if all of buffer buf is valid ASCII.
Note that the buffer will be consumed/read by this function.
This simply wraps [IsAsciiBufSpecial]:
isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil)
*/
func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) {
if isAscii, err = IsAsciiBufSpecial(
r, allowCtl, true, allowExt, true, nil, nil,
); err != nil {
return
}
return
}
/*
IsAsciiSpecial allows for specifying specific ASCII ranges.
allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive).
allowPrint, if true, will allow printable characters (what most people think of
when they say "ASCII") (0x20 to 0x7f inclusive).
allowExt, if true, will allow for "extended ASCII" - some later dialects expand
to a full 8-bit ASCII range (0x80 to 0xff inclusive).
wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space
(such that allowPrint controls their validation). Thus:
IsAsciiSpecial(s, false, true, false, true, nil, nil)
has the same effect as specifying:
IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil)
incl, if non-nil and non-empty, allows *additional* characters to be specified as included
that would normally *not* be allowed.
excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed.
excl, if specified, takes precedence over incl if specified.
An [AsciiInvalidError] will be returned on the first encountered invalid character.
*/
func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
var buf *bytes.Buffer = bytes.NewBufferString(s)
if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil {
return
}
return
}
/*
IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader].
Note that the buffer will be consumed/read by this function.
It will not return an [io.EOF] if encountered, but any other errors encountered will be returned.
It is expected that r will return an [io.EOF] when exhausted.
An [AsciiInvalidError] will be returned on the first encountered invalid character.
*/
func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
var b rune
var bLen int
var nextNewline bool
var tmpErr *AsciiInvalidError = new(AsciiInvalidError)
// I know, I know. This is essentually a lookup table. Keeps it speedy.
var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl)
for {
if b, bLen, err = r.ReadRune(); err != nil {
if errors.Is(err, io.EOF) {
err = nil
isAscii = true
}
return
}
// Set these *before* OK
if nextNewline {
tmpErr.Line++
tmpErr.LineByte = 0
tmpErr.LineChar = 0
nextNewline = false
} else {
tmpErr.LineChar++
}
tmpErr.Char++
if b == '\n' {
nextNewline = true
}
if b == rune(0xfffd) {
// not even valid unicode
tmpErr.BadChar = b
tmpErr.BadBytes = []byte(string(b))
err = tmpErr
return
}
if bLen > 2 || b > 0xff {
// ASCII only occupies a single byte, ISO-8859-1 occupies 2
tmpErr.BadChar = b
tmpErr.BadBytes = []byte(string(b))
err = tmpErr
return
}
if !allowed[byte(b)] {
tmpErr.BadChar = b
tmpErr.BadBytes = []byte{byte(b)}
err = tmpErr
return
}
// Set these *after* OK
tmpErr.LineByte += uint64(bLen)
tmpErr.Byte += uint64(bLen)
}
isAscii = true
return
}
/*
LenSplit formats string `s` to break at, at most, every `width` characters.
@@ -252,6 +406,18 @@ func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted
return
}
// Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.)
func Reverse(s string) (revS string) {
var rsl []rune = []rune(s)
slices.Reverse(rsl)
revS = string(rsl)
return
}
/*
TrimLines is like [strings.TrimSpace] but operates on *each line* of s.
It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic.
@@ -313,6 +479,58 @@ func TrimSpaceRight(s string) (trimmed string) {
return
}
// getAsciiCharMap returns a lookup "table" for ASCII characters.
func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) {
var idx uint8
if allowCtl {
for idx < 0x1f {
charmap[idx] = true
idx++
}
} else {
idx = 0x1f
}
if allowPrint {
for idx < 0x7f {
charmap[idx] = true
idx++
}
} else {
idx = 0x7f
}
if allowExt {
for {
charmap[idx] = true
if idx == 0xff {
break
}
idx++
}
} else {
idx = 0xff
}
if allowWs {
charmap['\t'] = true
charmap['\n'] = true
charmap['\r'] = true
}
if incl != nil && len(incl) > 0 {
for _, idx = range incl {
charmap[idx] = true
}
}
if excl != nil && len(excl) > 0 {
for _, idx = range excl {
charmap[idx] = false
}
}
return
}
// getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported.
func getNewLine(s string) (nl string) {