checking in some WIP
* added some netx funcs * added netx/dnsx * currently updating docs and adding *x funcs to sprigx
This commit is contained in:
18
stringsx/func_asciiinvaliderror.go
Normal file
18
stringsx/func_asciiinvaliderror.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package stringsx
|
||||
|
||||
import (
|
||||
`fmt`
|
||||
)
|
||||
|
||||
// Error conforms an [AsciiInvalidError] to an error interface.
|
||||
func (a *AsciiInvalidError) Error() (errStr string) {
|
||||
|
||||
errStr = fmt.Sprintf(
|
||||
"non-ASCII character '%c' at line:linepos %d:%d (byte %d), "+
|
||||
"string position %d (byte %d): bytes %#x, UTF-8 codepoint U+%04X",
|
||||
a.BadChar, a.Line, a.LineChar, a.LineByte,
|
||||
a.Char, a.Byte, a.BadBytes, a.BadChar,
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -1,11 +1,165 @@
|
||||
package stringsx
|
||||
|
||||
import (
|
||||
`bytes`
|
||||
`errors`
|
||||
`fmt`
|
||||
`io`
|
||||
`slices`
|
||||
`strings`
|
||||
`unicode`
|
||||
)
|
||||
|
||||
/*
|
||||
IsAscii returns true if all characters in string s are ASCII.
|
||||
|
||||
This simply wraps [IsAsciiSpecial]:
|
||||
|
||||
isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil)
|
||||
*/
|
||||
func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) {
|
||||
|
||||
if isAscii, err = IsAsciiSpecial(
|
||||
s, allowCtl, true, allowExt, true, nil, nil,
|
||||
); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
IsAsciiBuf returns true if all of buffer buf is valid ASCII.
|
||||
|
||||
Note that the buffer will be consumed/read by this function.
|
||||
|
||||
This simply wraps [IsAsciiBufSpecial]:
|
||||
|
||||
isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil)
|
||||
*/
|
||||
func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) {
|
||||
|
||||
if isAscii, err = IsAsciiBufSpecial(
|
||||
r, allowCtl, true, allowExt, true, nil, nil,
|
||||
); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
IsAsciiSpecial allows for specifying specific ASCII ranges.
|
||||
|
||||
allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive).
|
||||
|
||||
allowPrint, if true, will allow printable characters (what most people think of
|
||||
when they say "ASCII") (0x20 to 0x7f inclusive).
|
||||
|
||||
allowExt, if true, will allow for "extended ASCII" - some later dialects expand
|
||||
to a full 8-bit ASCII range (0x80 to 0xff inclusive).
|
||||
|
||||
wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space
|
||||
(such that allowPrint controls their validation). Thus:
|
||||
|
||||
IsAsciiSpecial(s, false, true, false, true, nil, nil)
|
||||
|
||||
has the same effect as specifying:
|
||||
|
||||
IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil)
|
||||
|
||||
incl, if non-nil and non-empty, allows *additional* characters to be specified as included
|
||||
that would normally *not* be allowed.
|
||||
|
||||
excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed.
|
||||
|
||||
excl, if specified, takes precedence over incl if specified.
|
||||
|
||||
An [AsciiInvalidError] will be returned on the first encountered invalid character.
|
||||
*/
|
||||
func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
|
||||
|
||||
var buf *bytes.Buffer = bytes.NewBufferString(s)
|
||||
|
||||
if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader].
|
||||
|
||||
Note that the buffer will be consumed/read by this function.
|
||||
|
||||
It will not return an [io.EOF] if encountered, but any other errors encountered will be returned.
|
||||
It is expected that r will return an [io.EOF] when exhausted.
|
||||
|
||||
An [AsciiInvalidError] will be returned on the first encountered invalid character.
|
||||
*/
|
||||
func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) {
|
||||
|
||||
var b rune
|
||||
var bLen int
|
||||
var nextNewline bool
|
||||
var tmpErr *AsciiInvalidError = new(AsciiInvalidError)
|
||||
// I know, I know. This is essentually a lookup table. Keeps it speedy.
|
||||
var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl)
|
||||
|
||||
for {
|
||||
if b, bLen, err = r.ReadRune(); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
err = nil
|
||||
isAscii = true
|
||||
}
|
||||
return
|
||||
}
|
||||
// Set these *before* OK
|
||||
if nextNewline {
|
||||
tmpErr.Line++
|
||||
tmpErr.LineByte = 0
|
||||
tmpErr.LineChar = 0
|
||||
nextNewline = false
|
||||
} else {
|
||||
tmpErr.LineChar++
|
||||
}
|
||||
tmpErr.Char++
|
||||
|
||||
if b == '\n' {
|
||||
nextNewline = true
|
||||
}
|
||||
if b == rune(0xfffd) {
|
||||
// not even valid unicode
|
||||
tmpErr.BadChar = b
|
||||
tmpErr.BadBytes = []byte(string(b))
|
||||
err = tmpErr
|
||||
return
|
||||
}
|
||||
if bLen > 2 || b > 0xff {
|
||||
// ASCII only occupies a single byte, ISO-8859-1 occupies 2
|
||||
tmpErr.BadChar = b
|
||||
tmpErr.BadBytes = []byte(string(b))
|
||||
err = tmpErr
|
||||
return
|
||||
}
|
||||
if !allowed[byte(b)] {
|
||||
tmpErr.BadChar = b
|
||||
tmpErr.BadBytes = []byte{byte(b)}
|
||||
err = tmpErr
|
||||
return
|
||||
}
|
||||
|
||||
// Set these *after* OK
|
||||
tmpErr.LineByte += uint64(bLen)
|
||||
tmpErr.Byte += uint64(bLen)
|
||||
}
|
||||
|
||||
isAscii = true
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
LenSplit formats string `s` to break at, at most, every `width` characters.
|
||||
|
||||
@@ -252,6 +406,18 @@ func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted
|
||||
return
|
||||
}
|
||||
|
||||
// Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.)
|
||||
func Reverse(s string) (revS string) {
|
||||
|
||||
var rsl []rune = []rune(s)
|
||||
|
||||
slices.Reverse(rsl)
|
||||
|
||||
revS = string(rsl)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
TrimLines is like [strings.TrimSpace] but operates on *each line* of s.
|
||||
It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic.
|
||||
@@ -313,6 +479,58 @@ func TrimSpaceRight(s string) (trimmed string) {
|
||||
return
|
||||
}
|
||||
|
||||
// getAsciiCharMap returns a lookup "table" for ASCII characters.
|
||||
func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) {
|
||||
|
||||
var idx uint8
|
||||
|
||||
if allowCtl {
|
||||
for idx < 0x1f {
|
||||
charmap[idx] = true
|
||||
idx++
|
||||
}
|
||||
} else {
|
||||
idx = 0x1f
|
||||
}
|
||||
if allowPrint {
|
||||
for idx < 0x7f {
|
||||
charmap[idx] = true
|
||||
idx++
|
||||
}
|
||||
} else {
|
||||
idx = 0x7f
|
||||
}
|
||||
if allowExt {
|
||||
for {
|
||||
charmap[idx] = true
|
||||
if idx == 0xff {
|
||||
break
|
||||
}
|
||||
idx++
|
||||
}
|
||||
} else {
|
||||
idx = 0xff
|
||||
}
|
||||
if allowWs {
|
||||
charmap['\t'] = true
|
||||
charmap['\n'] = true
|
||||
charmap['\r'] = true
|
||||
}
|
||||
|
||||
if incl != nil && len(incl) > 0 {
|
||||
for _, idx = range incl {
|
||||
charmap[idx] = true
|
||||
}
|
||||
}
|
||||
if excl != nil && len(excl) > 0 {
|
||||
for _, idx = range excl {
|
||||
charmap[idx] = false
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported.
|
||||
func getNewLine(s string) (nl string) {
|
||||
|
||||
|
||||
@@ -37,6 +37,17 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
func TestFuncsAscii(t *testing.T) {
|
||||
|
||||
var err error
|
||||
// var s string = "This is a §\nmulti-line\nstring 😀 with\nunicode text.\n"
|
||||
var s string = "This is a §\nmulti-line\nstring with\nno unicode text.\n"
|
||||
|
||||
if _, err = IsAscii(s, false, true); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRedact(t *testing.T) {
|
||||
|
||||
var out string
|
||||
@@ -171,6 +182,18 @@ func TestRedact(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestReverse(t *testing.T) {
|
||||
|
||||
var rev string
|
||||
var s string = "012345679abcdef"
|
||||
|
||||
rev = Reverse(s)
|
||||
if rev != "fedcba976543210" {
|
||||
t.Errorf("reverse of s '%s'; expected 'fedcba976543210', got '%s'", s, rev)
|
||||
}
|
||||
t.Logf("s: %s\nReverse: %s", s, rev)
|
||||
}
|
||||
|
||||
func TestTrimLines(t *testing.T) {
|
||||
|
||||
var out string
|
||||
|
||||
25
stringsx/types.go
Normal file
25
stringsx/types.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package stringsx
|
||||
|
||||
type (
|
||||
/*
|
||||
AsciiInvalidError is an error used to return an error for the IsAscii* validations.
|
||||
|
||||
It is returned on the first found instance of an invalid ASCII character.
|
||||
*/
|
||||
AsciiInvalidError struct {
|
||||
// Line is a 0-indexed line number where the invalid character was found.
|
||||
Line uint64
|
||||
// LineByte is the 0-indexed byte position for the current Line.
|
||||
LineByte uint64
|
||||
// LineChar is a 0-indexed character (rune) position where the invalid character was found on line number Line.
|
||||
LineChar uint64
|
||||
// Byte is the 0-indexed byte position across the entire input.
|
||||
Byte uint64
|
||||
// Char is the 0-indexed character (rune) position across the entire input.
|
||||
Char uint64
|
||||
// BadChar is the invalid rune
|
||||
BadChar rune
|
||||
// BadBytes is BadChar as bytes.
|
||||
BadBytes []byte
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user