Files
go_goutils/stringsx/funcs.go
brent saner c6fc692f5e checking in some WIP
* added some netx funcs
* added netx/dnsx
* currently updating docs and adding *x funcs to sprigx
2026-02-24 17:41:57 -05:00

545 lines
13 KiB
Go

package stringsx
import (
`bytes`
`errors`
`fmt`
`io`
`slices`
`strings`
`unicode`
)
/*
IsAscii returns true if all characters in string s are ASCII.
This simply wraps [IsAsciiSpecial]:
isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil)
*/
func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) {
if isAscii, err = IsAsciiSpecial(
s, allowCtl, true, allowExt, true, nil, nil,
); err != nil {
return
}
return
}
/*
IsAsciiBuf returns true if all of buffer buf is valid ASCII.
Note that the buffer will be consumed/read by this function.
This simply wraps [IsAsciiBufSpecial]:
isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil)
*/
func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) {
if isAscii, err = IsAsciiBufSpecial(
r, allowCtl, true, allowExt, true, nil, nil,
); err != nil {
return
}
return
}
/*
IsAsciiSpecial allows for specifying specific ASCII ranges.
allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive).
allowPrint, if true, will allow printable characters (what most people think of
when they say "ASCII") (0x20 to 0x7f inclusive).
allowExt, if true, will allow for "extended ASCII" - some later dialects expand
to a full 8-bit ASCII range (0x80 to 0xff inclusive).
wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space
(such that allowPrint controls their validation). Thus:
IsAsciiSpecial(s, false, true, false, true, nil, nil)
has the same effect as specifying:
IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil)
incl, if non-nil and non-empty, allows *additional* characters to be specified as included
that would normally *not* be allowed.
excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed.
excl, if specified, takes precedence over incl if specified.
An [AsciiInvalidError] will be returned on the first encountered invalid character.
*/
func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
var buf *bytes.Buffer = bytes.NewBufferString(s)
if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil {
return
}
return
}
/*
IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader].
Note that the buffer will be consumed/read by this function.
It will not return an [io.EOF] if encountered, but any other errors encountered will be returned.
It is expected that r will return an [io.EOF] when exhausted.
An [AsciiInvalidError] will be returned on the first encountered invalid character.
*/
func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
var b rune
var bLen int
var nextNewline bool
var tmpErr *AsciiInvalidError = new(AsciiInvalidError)
// I know, I know. This is essentually a lookup table. Keeps it speedy.
var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl)
for {
if b, bLen, err = r.ReadRune(); err != nil {
if errors.Is(err, io.EOF) {
err = nil
isAscii = true
}
return
}
// Set these *before* OK
if nextNewline {
tmpErr.Line++
tmpErr.LineByte = 0
tmpErr.LineChar = 0
nextNewline = false
} else {
tmpErr.LineChar++
}
tmpErr.Char++
if b == '\n' {
nextNewline = true
}
if b == rune(0xfffd) {
// not even valid unicode
tmpErr.BadChar = b
tmpErr.BadBytes = []byte(string(b))
err = tmpErr
return
}
if bLen > 2 || b > 0xff {
// ASCII only occupies a single byte, ISO-8859-1 occupies 2
tmpErr.BadChar = b
tmpErr.BadBytes = []byte(string(b))
err = tmpErr
return
}
if !allowed[byte(b)] {
tmpErr.BadChar = b
tmpErr.BadBytes = []byte{byte(b)}
err = tmpErr
return
}
// Set these *after* OK
tmpErr.LineByte += uint64(bLen)
tmpErr.Byte += uint64(bLen)
}
isAscii = true
return
}
/*
LenSplit formats string `s` to break at, at most, every `width` characters.
Any existing newlines (e.g. \r\n) will be removed during a string/
substring/line's length calculation. (e.g. `foobarbaz\n` and `foobarbaz\r\n` are
both considered to be lines of length 9, not 10 and 11 respectively).
This also means that any newlines (\n or \r\n) are inherently removed from
`out` (even if included in `wordWrap`; see below).
Note that if `s` is multiline (already contains newlines), they will be respected
as-is - that is, if a line ends with less than `width` chars and then has a newline,
it will be preserved as an empty element. That is to say:
"foo\nbar\n\n" → []string{"foo", "bar", ""}
"foo\n\nbar\n" → []string{"foo", "", "bar"}
This splitter is particularly simple. If you need wordwrapping, it should be done
with e.g. [github.com/muesli/reflow/wordwrap].
*/
func LenSplit(s string, width uint) (out []string) {
var end int
var line string
var lineRunes []rune
if width == 0 {
out = []string{s}
return
}
for line = range strings.Lines(s) {
line = strings.TrimRight(line, "\n")
line = strings.TrimRight(line, "\r")
lineRunes = []rune(line)
if uint(len(lineRunes)) <= width {
out = append(out, line)
continue
}
for i := 0; i < len(lineRunes); i += int(width) {
end = i + int(width)
if end > len(lineRunes) {
end = len(lineRunes)
}
out = append(out, string(lineRunes[i:end]))
}
}
return
}
/*
LenSplitStr wraps [LenSplit] but recombines into a new string with newlines.
It's mostly just a convenience wrapper.
All arguments remain the same as in [LenSplit] with an additional one,
`winNewLine`, which if true will use \r\n as the newline instead of \n.
*/
func LenSplitStr(s string, width uint, winNewline bool) (out string) {
var outSl []string = LenSplit(s, width)
if winNewline {
out = strings.Join(outSl, "\r\n")
} else {
out = strings.Join(outSl, "\n")
}
return
}
/*
Pad pads each element in `s` to length `width` using `pad`.
If `pad` is empty, a single space (0x20) will be assumed.
Note that `width` operates on rune size, not byte size.
(In ASCII, they will be the same size.)
If a line in `s` is greater than or equal to `width`,
no padding will be performed.
If `leftPad` is true, padding will be applied to the "left" (beginning")
of each element instead of the "right" ("end").
*/
func Pad(s []string, width uint, pad string, leftPad bool) (out []string) {
var idx int
var padIdx int
var runeIdx int
var padLen uint
var elem string
var unpadLen uint
var tmpPadLen int
var padRunes []rune
var tmpPad []rune
if width == 0 {
out = s
return
}
out = make([]string, len(s))
// Easy; supported directly in fmt.
if pad == "" {
for idx, elem = range s {
if leftPad {
out[idx] = fmt.Sprintf("%*s", width, elem)
} else {
out[idx] = fmt.Sprintf("%-*s", width, elem)
}
}
return
}
// This gets a little more tricky.
padRunes = []rune(pad)
padLen = uint(len(padRunes))
for idx, elem = range s {
// First we need to know the number of runes in elem.
unpadLen = uint(len([]rune(elem)))
// If it's more than/equal to width, as-is.
if unpadLen >= width {
out[idx] = elem
} else {
// Otherwise, we need to construct/calculate a pad.
if (width-unpadLen)%padLen == 0 {
// Also easy enough.
if leftPad {
out[idx] = fmt.Sprintf("%s%s", strings.Repeat(pad, int((width-unpadLen)/padLen)), elem)
} else {
out[idx] = fmt.Sprintf("%s%s", elem, strings.Repeat(pad, int((width-unpadLen)/padLen)))
}
} else {
// This is where it gets a little hairy.
tmpPad = []rune{}
tmpPadLen = int(width - unpadLen)
idx = 0
padIdx = 0
for runeIdx = range tmpPadLen {
tmpPad[runeIdx] = padRunes[padIdx]
if uint(padIdx) >= padLen {
padIdx = 0
} else {
padIdx++
}
runeIdx++
}
if leftPad {
out[idx] = fmt.Sprintf("%s%s", string(tmpPad), elem)
} else {
out[idx] = fmt.Sprintf("%s%s", elem, string(tmpPad))
}
}
}
}
return
}
/*
Redact provides a "masked" version of string s (e.g. `my_terrible_password` -> `my****************rd`).
maskStr is the character or sequence of characters
to repeat for every masked character of s.
If an empty string, the default [DefMaskStr] will be used.
(maskStr does not need to be a single character.
It is recommended to use a multi-char mask to help obfuscate a string's length.)
leading specifies the number of leading characters of s to leave *unmasked*.
If 0, no leading characters will be unmasked.
trailing specifies the number of trailing characters of s to leave *unmasked*.
if 0, no trailing characters will be unmasked.
newlines, if true, will preserve newline characters - otherwise
they will be treated as regular characters.
As a safety precaution, if:
len(s) <= (leading + trailing)
then the entire string will be *masked* and no unmasking will be performed.
Note that this DOES NOT do a string *replace*, it provides a masked version of `s` itself.
Wrap Redact with [strings.ReplaceAll] if you want to replace a certain value with a masked one.
*/
func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted string) {
var nl string
var numMasked int
var sb strings.Builder
var endIdx int = int(leading)
// This condition functionally won't do anything, so just return the input as-is.
if s == "" {
return
}
if maskStr == "" {
maskStr = DefMaskStr
}
if newlines {
for line := range strings.Lines(s) {
nl = getNewLine(line)
sb.WriteString(
Redact(
strings.TrimSuffix(line, nl), maskStr, leading, trailing, false,
),
)
sb.WriteString(nl)
}
} else {
if len(s) <= int(leading+trailing) {
redacted = strings.Repeat(maskStr, len(s))
return
}
if leading == 0 && trailing == 0 {
redacted = strings.Repeat(maskStr, len(s))
return
}
numMasked = len(s) - int(leading+trailing)
endIdx = endIdx + numMasked
if leading > 0 {
sb.WriteString(s[:int(leading)])
}
sb.WriteString(strings.Repeat(maskStr, numMasked))
if trailing > 0 {
sb.WriteString(s[endIdx:])
}
}
redacted = sb.String()
return
}
// Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.)
func Reverse(s string) (revS string) {
var rsl []rune = []rune(s)
slices.Reverse(rsl)
revS = string(rsl)
return
}
/*
TrimLines is like [strings.TrimSpace] but operates on *each line* of s.
It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic.
The first encountered linebreak (`\n` vs. `\r\n`) are assumed to be
the canonical linebreak for the rest of s.
left, if true, performs a [TrimSpaceLeft] on each line (retaining the newline).
right, if true, performs a [TrimSpaceRight] on each line (retaining the newline).
*/
func TrimLines(s string, left, right bool) (trimmed string) {
var sl string
var nl string
var sb strings.Builder
// These conditions functionally won't do anything, so just return the input as-is.
if s == "" {
return
}
if !left && !right {
trimmed = s
return
}
for line := range strings.Lines(s) {
nl = getNewLine(line)
sl = strings.TrimSuffix(line, nl)
if left && right {
sl = strings.TrimSpace(sl)
} else if left {
sl = TrimSpaceLeft(sl)
} else if right {
sl = TrimSpaceRight(sl)
}
sb.WriteString(sl + nl)
}
trimmed = sb.String()
return
}
// TrimSpaceLeft is like [strings.TrimSpace] but only removes leading whitespace from string `s`.
func TrimSpaceLeft(s string) (trimmed string) {
trimmed = strings.TrimLeftFunc(s, unicode.IsSpace)
return
}
/*
TrimSpaceRight is like [strings.TrimSpace] but only removes trailing whitespace from string s.
*/
func TrimSpaceRight(s string) (trimmed string) {
trimmed = strings.TrimRightFunc(s, unicode.IsSpace)
return
}
// getAsciiCharMap returns a lookup "table" for ASCII characters.
func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) {
var idx uint8
if allowCtl {
for idx < 0x1f {
charmap[idx] = true
idx++
}
} else {
idx = 0x1f
}
if allowPrint {
for idx < 0x7f {
charmap[idx] = true
idx++
}
} else {
idx = 0x7f
}
if allowExt {
for {
charmap[idx] = true
if idx == 0xff {
break
}
idx++
}
} else {
idx = 0xff
}
if allowWs {
charmap['\t'] = true
charmap['\n'] = true
charmap['\r'] = true
}
if incl != nil && len(incl) > 0 {
for _, idx = range incl {
charmap[idx] = true
}
}
if excl != nil && len(excl) > 0 {
for _, idx = range excl {
charmap[idx] = false
}
}
return
}
// getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported.
func getNewLine(s string) (nl string) {
if strings.HasSuffix(s, "\r\n") {
nl = "\r\n"
} else if strings.HasSuffix(s, "\n") {
nl = "\n"
}
return
}