* added some netx funcs * added netx/dnsx * currently updating docs and adding *x funcs to sprigx
545 lines
13 KiB
Go
545 lines
13 KiB
Go
package stringsx
|
|
|
|
import (
|
|
`bytes`
|
|
`errors`
|
|
`fmt`
|
|
`io`
|
|
`slices`
|
|
`strings`
|
|
`unicode`
|
|
)
|
|
|
|
/*
|
|
IsAscii returns true if all characters in string s are ASCII.
|
|
|
|
This simply wraps [IsAsciiSpecial]:
|
|
|
|
isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil)
|
|
*/
|
|
func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) {
|
|
|
|
if isAscii, err = IsAsciiSpecial(
|
|
s, allowCtl, true, allowExt, true, nil, nil,
|
|
); err != nil {
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
IsAsciiBuf returns true if all of buffer buf is valid ASCII.
|
|
|
|
Note that the buffer will be consumed/read by this function.
|
|
|
|
This simply wraps [IsAsciiBufSpecial]:
|
|
|
|
isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil)
|
|
*/
|
|
func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) {
|
|
|
|
if isAscii, err = IsAsciiBufSpecial(
|
|
r, allowCtl, true, allowExt, true, nil, nil,
|
|
); err != nil {
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
IsAsciiSpecial allows for specifying specific ASCII ranges.
|
|
|
|
allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive).
|
|
|
|
allowPrint, if true, will allow printable characters (what most people think of
|
|
when they say "ASCII") (0x20 to 0x7f inclusive).
|
|
|
|
allowExt, if true, will allow for "extended ASCII" - some later dialects expand
|
|
to a full 8-bit ASCII range (0x80 to 0xff inclusive).
|
|
|
|
wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space
|
|
(such that allowPrint controls their validation). Thus:
|
|
|
|
IsAsciiSpecial(s, false, true, false, true, nil, nil)
|
|
|
|
has the same effect as specifying:
|
|
|
|
IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil)
|
|
|
|
incl, if non-nil and non-empty, allows *additional* characters to be specified as included
|
|
that would normally *not* be allowed.
|
|
|
|
excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed.
|
|
|
|
excl, if specified, takes precedence over incl if specified.
|
|
|
|
An [AsciiInvalidError] will be returned on the first encountered invalid character.
|
|
*/
|
|
func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
|
|
|
|
var buf *bytes.Buffer = bytes.NewBufferString(s)
|
|
|
|
if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil {
|
|
return
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader].
|
|
|
|
Note that the buffer will be consumed/read by this function.
|
|
|
|
It will not return an [io.EOF] if encountered, but any other errors encountered will be returned.
|
|
It is expected that r will return an [io.EOF] when exhausted.
|
|
|
|
An [AsciiInvalidError] will be returned on the first encountered invalid character.
|
|
*/
|
|
func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
|
|
|
|
var b rune
|
|
var bLen int
|
|
var nextNewline bool
|
|
var tmpErr *AsciiInvalidError = new(AsciiInvalidError)
|
|
// I know, I know. This is essentually a lookup table. Keeps it speedy.
|
|
var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl)
|
|
|
|
for {
|
|
if b, bLen, err = r.ReadRune(); err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
err = nil
|
|
isAscii = true
|
|
}
|
|
return
|
|
}
|
|
// Set these *before* OK
|
|
if nextNewline {
|
|
tmpErr.Line++
|
|
tmpErr.LineByte = 0
|
|
tmpErr.LineChar = 0
|
|
nextNewline = false
|
|
} else {
|
|
tmpErr.LineChar++
|
|
}
|
|
tmpErr.Char++
|
|
|
|
if b == '\n' {
|
|
nextNewline = true
|
|
}
|
|
if b == rune(0xfffd) {
|
|
// not even valid unicode
|
|
tmpErr.BadChar = b
|
|
tmpErr.BadBytes = []byte(string(b))
|
|
err = tmpErr
|
|
return
|
|
}
|
|
if bLen > 2 || b > 0xff {
|
|
// ASCII only occupies a single byte, ISO-8859-1 occupies 2
|
|
tmpErr.BadChar = b
|
|
tmpErr.BadBytes = []byte(string(b))
|
|
err = tmpErr
|
|
return
|
|
}
|
|
if !allowed[byte(b)] {
|
|
tmpErr.BadChar = b
|
|
tmpErr.BadBytes = []byte{byte(b)}
|
|
err = tmpErr
|
|
return
|
|
}
|
|
|
|
// Set these *after* OK
|
|
tmpErr.LineByte += uint64(bLen)
|
|
tmpErr.Byte += uint64(bLen)
|
|
}
|
|
|
|
isAscii = true
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
LenSplit formats string `s` to break at, at most, every `width` characters.
|
|
|
|
Any existing newlines (e.g. \r\n) will be removed during a string/
|
|
substring/line's length calculation. (e.g. `foobarbaz\n` and `foobarbaz\r\n` are
|
|
both considered to be lines of length 9, not 10 and 11 respectively).
|
|
|
|
This also means that any newlines (\n or \r\n) are inherently removed from
|
|
`out` (even if included in `wordWrap`; see below).
|
|
|
|
Note that if `s` is multiline (already contains newlines), they will be respected
|
|
as-is - that is, if a line ends with less than `width` chars and then has a newline,
|
|
it will be preserved as an empty element. That is to say:
|
|
|
|
"foo\nbar\n\n" → []string{"foo", "bar", ""}
|
|
"foo\n\nbar\n" → []string{"foo", "", "bar"}
|
|
|
|
This splitter is particularly simple. If you need wordwrapping, it should be done
|
|
with e.g. [github.com/muesli/reflow/wordwrap].
|
|
*/
|
|
func LenSplit(s string, width uint) (out []string) {
|
|
|
|
var end int
|
|
var line string
|
|
var lineRunes []rune
|
|
|
|
if width == 0 {
|
|
out = []string{s}
|
|
return
|
|
}
|
|
|
|
for line = range strings.Lines(s) {
|
|
line = strings.TrimRight(line, "\n")
|
|
line = strings.TrimRight(line, "\r")
|
|
|
|
lineRunes = []rune(line)
|
|
|
|
if uint(len(lineRunes)) <= width {
|
|
out = append(out, line)
|
|
continue
|
|
}
|
|
|
|
for i := 0; i < len(lineRunes); i += int(width) {
|
|
end = i + int(width)
|
|
if end > len(lineRunes) {
|
|
end = len(lineRunes)
|
|
}
|
|
out = append(out, string(lineRunes[i:end]))
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
LenSplitStr wraps [LenSplit] but recombines into a new string with newlines.
|
|
|
|
It's mostly just a convenience wrapper.
|
|
|
|
All arguments remain the same as in [LenSplit] with an additional one,
|
|
`winNewLine`, which if true will use \r\n as the newline instead of \n.
|
|
*/
|
|
func LenSplitStr(s string, width uint, winNewline bool) (out string) {
|
|
|
|
var outSl []string = LenSplit(s, width)
|
|
|
|
if winNewline {
|
|
out = strings.Join(outSl, "\r\n")
|
|
} else {
|
|
out = strings.Join(outSl, "\n")
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
Pad pads each element in `s` to length `width` using `pad`.
|
|
If `pad` is empty, a single space (0x20) will be assumed.
|
|
Note that `width` operates on rune size, not byte size.
|
|
(In ASCII, they will be the same size.)
|
|
|
|
If a line in `s` is greater than or equal to `width`,
|
|
no padding will be performed.
|
|
|
|
If `leftPad` is true, padding will be applied to the "left" (beginning")
|
|
of each element instead of the "right" ("end").
|
|
*/
|
|
func Pad(s []string, width uint, pad string, leftPad bool) (out []string) {
|
|
|
|
var idx int
|
|
var padIdx int
|
|
var runeIdx int
|
|
var padLen uint
|
|
var elem string
|
|
var unpadLen uint
|
|
var tmpPadLen int
|
|
var padRunes []rune
|
|
var tmpPad []rune
|
|
|
|
if width == 0 {
|
|
out = s
|
|
return
|
|
}
|
|
|
|
out = make([]string, len(s))
|
|
|
|
// Easy; supported directly in fmt.
|
|
if pad == "" {
|
|
for idx, elem = range s {
|
|
if leftPad {
|
|
out[idx] = fmt.Sprintf("%*s", width, elem)
|
|
} else {
|
|
out[idx] = fmt.Sprintf("%-*s", width, elem)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// This gets a little more tricky.
|
|
padRunes = []rune(pad)
|
|
padLen = uint(len(padRunes))
|
|
for idx, elem = range s {
|
|
// First we need to know the number of runes in elem.
|
|
unpadLen = uint(len([]rune(elem)))
|
|
// If it's more than/equal to width, as-is.
|
|
if unpadLen >= width {
|
|
out[idx] = elem
|
|
} else {
|
|
// Otherwise, we need to construct/calculate a pad.
|
|
if (width-unpadLen)%padLen == 0 {
|
|
// Also easy enough.
|
|
if leftPad {
|
|
out[idx] = fmt.Sprintf("%s%s", strings.Repeat(pad, int((width-unpadLen)/padLen)), elem)
|
|
} else {
|
|
out[idx] = fmt.Sprintf("%s%s", elem, strings.Repeat(pad, int((width-unpadLen)/padLen)))
|
|
}
|
|
} else {
|
|
// This is where it gets a little hairy.
|
|
tmpPad = []rune{}
|
|
tmpPadLen = int(width - unpadLen)
|
|
idx = 0
|
|
padIdx = 0
|
|
for runeIdx = range tmpPadLen {
|
|
tmpPad[runeIdx] = padRunes[padIdx]
|
|
if uint(padIdx) >= padLen {
|
|
padIdx = 0
|
|
} else {
|
|
padIdx++
|
|
}
|
|
runeIdx++
|
|
}
|
|
if leftPad {
|
|
out[idx] = fmt.Sprintf("%s%s", string(tmpPad), elem)
|
|
} else {
|
|
out[idx] = fmt.Sprintf("%s%s", elem, string(tmpPad))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
Redact provides a "masked" version of string s (e.g. `my_terrible_password` -> `my****************rd`).
|
|
|
|
maskStr is the character or sequence of characters
|
|
to repeat for every masked character of s.
|
|
If an empty string, the default [DefMaskStr] will be used.
|
|
(maskStr does not need to be a single character.
|
|
It is recommended to use a multi-char mask to help obfuscate a string's length.)
|
|
|
|
leading specifies the number of leading characters of s to leave *unmasked*.
|
|
If 0, no leading characters will be unmasked.
|
|
|
|
trailing specifies the number of trailing characters of s to leave *unmasked*.
|
|
if 0, no trailing characters will be unmasked.
|
|
|
|
newlines, if true, will preserve newline characters - otherwise
|
|
they will be treated as regular characters.
|
|
|
|
As a safety precaution, if:
|
|
|
|
len(s) <= (leading + trailing)
|
|
|
|
then the entire string will be *masked* and no unmasking will be performed.
|
|
|
|
Note that this DOES NOT do a string *replace*, it provides a masked version of `s` itself.
|
|
Wrap Redact with [strings.ReplaceAll] if you want to replace a certain value with a masked one.
|
|
*/
|
|
func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted string) {
|
|
|
|
var nl string
|
|
var numMasked int
|
|
var sb strings.Builder
|
|
var endIdx int = int(leading)
|
|
|
|
// This condition functionally won't do anything, so just return the input as-is.
|
|
if s == "" {
|
|
return
|
|
}
|
|
|
|
if maskStr == "" {
|
|
maskStr = DefMaskStr
|
|
}
|
|
|
|
if newlines {
|
|
for line := range strings.Lines(s) {
|
|
nl = getNewLine(line)
|
|
sb.WriteString(
|
|
Redact(
|
|
strings.TrimSuffix(line, nl), maskStr, leading, trailing, false,
|
|
),
|
|
)
|
|
sb.WriteString(nl)
|
|
}
|
|
} else {
|
|
if len(s) <= int(leading+trailing) {
|
|
redacted = strings.Repeat(maskStr, len(s))
|
|
return
|
|
}
|
|
|
|
if leading == 0 && trailing == 0 {
|
|
redacted = strings.Repeat(maskStr, len(s))
|
|
return
|
|
}
|
|
|
|
numMasked = len(s) - int(leading+trailing)
|
|
endIdx = endIdx + numMasked
|
|
|
|
if leading > 0 {
|
|
sb.WriteString(s[:int(leading)])
|
|
}
|
|
|
|
sb.WriteString(strings.Repeat(maskStr, numMasked))
|
|
|
|
if trailing > 0 {
|
|
sb.WriteString(s[endIdx:])
|
|
}
|
|
}
|
|
|
|
redacted = sb.String()
|
|
|
|
return
|
|
}
|
|
|
|
// Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.)
|
|
func Reverse(s string) (revS string) {
|
|
|
|
var rsl []rune = []rune(s)
|
|
|
|
slices.Reverse(rsl)
|
|
|
|
revS = string(rsl)
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
TrimLines is like [strings.TrimSpace] but operates on *each line* of s.
|
|
It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic.
|
|
The first encountered linebreak (`\n` vs. `\r\n`) are assumed to be
|
|
the canonical linebreak for the rest of s.
|
|
|
|
left, if true, performs a [TrimSpaceLeft] on each line (retaining the newline).
|
|
|
|
right, if true, performs a [TrimSpaceRight] on each line (retaining the newline).
|
|
*/
|
|
func TrimLines(s string, left, right bool) (trimmed string) {
|
|
|
|
var sl string
|
|
var nl string
|
|
var sb strings.Builder
|
|
|
|
// These conditions functionally won't do anything, so just return the input as-is.
|
|
if s == "" {
|
|
return
|
|
}
|
|
if !left && !right {
|
|
trimmed = s
|
|
return
|
|
}
|
|
|
|
for line := range strings.Lines(s) {
|
|
nl = getNewLine(line)
|
|
sl = strings.TrimSuffix(line, nl)
|
|
if left && right {
|
|
sl = strings.TrimSpace(sl)
|
|
} else if left {
|
|
sl = TrimSpaceLeft(sl)
|
|
} else if right {
|
|
sl = TrimSpaceRight(sl)
|
|
}
|
|
sb.WriteString(sl + nl)
|
|
}
|
|
|
|
trimmed = sb.String()
|
|
|
|
return
|
|
}
|
|
|
|
// TrimSpaceLeft is like [strings.TrimSpace] but only removes leading whitespace from string `s`.
|
|
func TrimSpaceLeft(s string) (trimmed string) {
|
|
|
|
trimmed = strings.TrimLeftFunc(s, unicode.IsSpace)
|
|
|
|
return
|
|
}
|
|
|
|
/*
|
|
TrimSpaceRight is like [strings.TrimSpace] but only removes trailing whitespace from string s.
|
|
*/
|
|
func TrimSpaceRight(s string) (trimmed string) {
|
|
|
|
trimmed = strings.TrimRightFunc(s, unicode.IsSpace)
|
|
|
|
return
|
|
}
|
|
|
|
// getAsciiCharMap returns a lookup "table" for ASCII characters.
|
|
func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) {
|
|
|
|
var idx uint8
|
|
|
|
if allowCtl {
|
|
for idx < 0x1f {
|
|
charmap[idx] = true
|
|
idx++
|
|
}
|
|
} else {
|
|
idx = 0x1f
|
|
}
|
|
if allowPrint {
|
|
for idx < 0x7f {
|
|
charmap[idx] = true
|
|
idx++
|
|
}
|
|
} else {
|
|
idx = 0x7f
|
|
}
|
|
if allowExt {
|
|
for {
|
|
charmap[idx] = true
|
|
if idx == 0xff {
|
|
break
|
|
}
|
|
idx++
|
|
}
|
|
} else {
|
|
idx = 0xff
|
|
}
|
|
if allowWs {
|
|
charmap['\t'] = true
|
|
charmap['\n'] = true
|
|
charmap['\r'] = true
|
|
}
|
|
|
|
if incl != nil && len(incl) > 0 {
|
|
for _, idx = range incl {
|
|
charmap[idx] = true
|
|
}
|
|
}
|
|
if excl != nil && len(excl) > 0 {
|
|
for _, idx = range excl {
|
|
charmap[idx] = false
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
// getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported.
|
|
func getNewLine(s string) (nl string) {
|
|
|
|
if strings.HasSuffix(s, "\r\n") {
|
|
nl = "\r\n"
|
|
} else if strings.HasSuffix(s, "\n") {
|
|
nl = "\n"
|
|
}
|
|
|
|
return
|
|
}
|