package stringsx import ( `bytes` `errors` `fmt` `io` `slices` `strings` `unicode` ) /* IsAscii returns true if all characters in string s are ASCII. This simply wraps [IsAsciiSpecial]: isAscii, err = IsAsciiSpecial(s, allowCtl, true, allowExt, true, nil, nil) */ func IsAscii(s string, allowCtl, allowExt bool) (isAscii bool, err error) { if isAscii, err = IsAsciiSpecial( s, allowCtl, true, allowExt, true, nil, nil, ); err != nil { return } return } /* IsAsciiBuf returns true if all of buffer buf is valid ASCII. Note that the buffer will be consumed/read by this function. This simply wraps [IsAsciiBufSpecial]: isAscii, err = IsAsciiBufSpecial(r, allowCtl, true, allowExt, true, nil, nil) */ func IsAsciiBuf(r io.RuneReader, allowCtl, allowExt bool) (isAscii bool, err error) { if isAscii, err = IsAsciiBufSpecial( r, allowCtl, true, allowExt, true, nil, nil, ); err != nil { return } return } /* IsAsciiSpecial allows for specifying specific ASCII ranges. allowCtl, if true, will allow control characters (0x00 to 0x1f inclusive). allowPrint, if true, will allow printable characters (what most people think of when they say "ASCII") (0x20 to 0x7f inclusive). allowExt, if true, will allow for "extended ASCII" - some later dialects expand to a full 8-bit ASCII range (0x80 to 0xff inclusive). wsCtl, if true, "shifts" the "whitespace control characters" (\t, \n, \r) to the "printable" space (such that allowPrint controls their validation). Thus: IsAsciiSpecial(s, false, true, false, true, nil, nil) has the same effect as specifying: IsAsciiSpecial(s, false, true, false, (-), []byte("\t\n\r"), nil) incl, if non-nil and non-empty, allows *additional* characters to be specified as included that would normally *not* be allowed. excl, if non-nil and non-empty, invalidates on additional characters that would normally be allowed. excl, if specified, takes precedence over incl if specified. An [AsciiInvalidError] will be returned on the first encountered invalid character. */ func IsAsciiSpecial(s string, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) { var buf *bytes.Buffer = bytes.NewBufferString(s) if isAscii, err = IsAsciiBufSpecial(buf, allowCtl, allowPrint, allowExt, allowWs, incl, excl); err != nil { return } return } /* IsAsciiBufSpecial is the same as [IsAsciiSpecial] but operates on an [io.RuneReader]. Note that the buffer will be consumed/read by this function. It will not return an [io.EOF] if encountered, but any other errors encountered will be returned. It is expected that r will return an [io.EOF] when exhausted. An [AsciiInvalidError] will be returned on the first encountered invalid character. */ func IsAsciiBufSpecial(r io.RuneReader, allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (isAscii bool, err error) { var b rune var bLen int var nextNewline bool var tmpErr *AsciiInvalidError = new(AsciiInvalidError) // I know, I know. This is essentually a lookup table. Keeps it speedy. var allowed [256]bool = getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs, incl, excl) for { if b, bLen, err = r.ReadRune(); err != nil { if errors.Is(err, io.EOF) { err = nil isAscii = true } return } // Set these *before* OK if nextNewline { tmpErr.Line++ tmpErr.LineByte = 0 tmpErr.LineChar = 0 nextNewline = false } else { tmpErr.LineChar++ } tmpErr.Char++ if b == '\n' { nextNewline = true } if b == rune(0xfffd) { // not even valid unicode tmpErr.BadChar = b tmpErr.BadBytes = []byte(string(b)) err = tmpErr return } if bLen > 2 || b > 0xff { // ASCII only occupies a single byte, ISO-8859-1 occupies 2 tmpErr.BadChar = b tmpErr.BadBytes = []byte(string(b)) err = tmpErr return } if !allowed[byte(b)] { tmpErr.BadChar = b tmpErr.BadBytes = []byte{byte(b)} err = tmpErr return } // Set these *after* OK tmpErr.LineByte += uint64(bLen) tmpErr.Byte += uint64(bLen) } isAscii = true return } /* LenSplit formats string `s` to break at, at most, every `width` characters. Any existing newlines (e.g. \r\n) will be removed during a string/ substring/line's length calculation. (e.g. `foobarbaz\n` and `foobarbaz\r\n` are both considered to be lines of length 9, not 10 and 11 respectively). This also means that any newlines (\n or \r\n) are inherently removed from `out` (even if included in `wordWrap`; see below). Note that if `s` is multiline (already contains newlines), they will be respected as-is - that is, if a line ends with less than `width` chars and then has a newline, it will be preserved as an empty element. That is to say: "foo\nbar\n\n" → []string{"foo", "bar", ""} "foo\n\nbar\n" → []string{"foo", "", "bar"} This splitter is particularly simple. If you need wordwrapping, it should be done with e.g. [github.com/muesli/reflow/wordwrap]. */ func LenSplit(s string, width uint) (out []string) { var end int var line string var lineRunes []rune if width == 0 { out = []string{s} return } for line = range strings.Lines(s) { line = strings.TrimRight(line, "\n") line = strings.TrimRight(line, "\r") lineRunes = []rune(line) if uint(len(lineRunes)) <= width { out = append(out, line) continue } for i := 0; i < len(lineRunes); i += int(width) { end = i + int(width) if end > len(lineRunes) { end = len(lineRunes) } out = append(out, string(lineRunes[i:end])) } } return } /* LenSplitStr wraps [LenSplit] but recombines into a new string with newlines. It's mostly just a convenience wrapper. All arguments remain the same as in [LenSplit] with an additional one, `winNewLine`, which if true will use \r\n as the newline instead of \n. */ func LenSplitStr(s string, width uint, winNewline bool) (out string) { var outSl []string = LenSplit(s, width) if winNewline { out = strings.Join(outSl, "\r\n") } else { out = strings.Join(outSl, "\n") } return } /* Pad pads each element in `s` to length `width` using `pad`. If `pad` is empty, a single space (0x20) will be assumed. Note that `width` operates on rune size, not byte size. (In ASCII, they will be the same size.) If a line in `s` is greater than or equal to `width`, no padding will be performed. If `leftPad` is true, padding will be applied to the "left" (beginning") of each element instead of the "right" ("end"). */ func Pad(s []string, width uint, pad string, leftPad bool) (out []string) { var idx int var padIdx int var runeIdx int var padLen uint var elem string var unpadLen uint var tmpPadLen int var padRunes []rune var tmpPad []rune if width == 0 { out = s return } out = make([]string, len(s)) // Easy; supported directly in fmt. if pad == "" { for idx, elem = range s { if leftPad { out[idx] = fmt.Sprintf("%*s", width, elem) } else { out[idx] = fmt.Sprintf("%-*s", width, elem) } } return } // This gets a little more tricky. padRunes = []rune(pad) padLen = uint(len(padRunes)) for idx, elem = range s { // First we need to know the number of runes in elem. unpadLen = uint(len([]rune(elem))) // If it's more than/equal to width, as-is. if unpadLen >= width { out[idx] = elem } else { // Otherwise, we need to construct/calculate a pad. if (width-unpadLen)%padLen == 0 { // Also easy enough. if leftPad { out[idx] = fmt.Sprintf("%s%s", strings.Repeat(pad, int((width-unpadLen)/padLen)), elem) } else { out[idx] = fmt.Sprintf("%s%s", elem, strings.Repeat(pad, int((width-unpadLen)/padLen))) } } else { // This is where it gets a little hairy. tmpPad = []rune{} tmpPadLen = int(width - unpadLen) idx = 0 padIdx = 0 for runeIdx = range tmpPadLen { tmpPad[runeIdx] = padRunes[padIdx] if uint(padIdx) >= padLen { padIdx = 0 } else { padIdx++ } runeIdx++ } if leftPad { out[idx] = fmt.Sprintf("%s%s", string(tmpPad), elem) } else { out[idx] = fmt.Sprintf("%s%s", elem, string(tmpPad)) } } } } return } /* Redact provides a "masked" version of string s (e.g. `my_terrible_password` -> `my****************rd`). maskStr is the character or sequence of characters to repeat for every masked character of s. If an empty string, the default [DefMaskStr] will be used. (maskStr does not need to be a single character. It is recommended to use a multi-char mask to help obfuscate a string's length.) leading specifies the number of leading characters of s to leave *unmasked*. If 0, no leading characters will be unmasked. trailing specifies the number of trailing characters of s to leave *unmasked*. if 0, no trailing characters will be unmasked. newlines, if true, will preserve newline characters - otherwise they will be treated as regular characters. As a safety precaution, if: len(s) <= (leading + trailing) then the entire string will be *masked* and no unmasking will be performed. Note that this DOES NOT do a string *replace*, it provides a masked version of `s` itself. Wrap Redact with [strings.ReplaceAll] if you want to replace a certain value with a masked one. */ func Redact(s, maskStr string, leading, trailing uint, newlines bool) (redacted string) { var nl string var numMasked int var sb strings.Builder var endIdx int = int(leading) // This condition functionally won't do anything, so just return the input as-is. if s == "" { return } if maskStr == "" { maskStr = DefMaskStr } if newlines { for line := range strings.Lines(s) { nl = getNewLine(line) sb.WriteString( Redact( strings.TrimSuffix(line, nl), maskStr, leading, trailing, false, ), ) sb.WriteString(nl) } } else { if len(s) <= int(leading+trailing) { redacted = strings.Repeat(maskStr, len(s)) return } if leading == 0 && trailing == 0 { redacted = strings.Repeat(maskStr, len(s)) return } numMasked = len(s) - int(leading+trailing) endIdx = endIdx + numMasked if leading > 0 { sb.WriteString(s[:int(leading)]) } sb.WriteString(strings.Repeat(maskStr, numMasked)) if trailing > 0 { sb.WriteString(s[endIdx:]) } } redacted = sb.String() return } // Reverse reverses string s. (It's absolutely insane that this isn't in stdlib.) func Reverse(s string) (revS string) { var rsl []rune = []rune(s) slices.Reverse(rsl) revS = string(rsl) return } /* TrimLines is like [strings.TrimSpace] but operates on *each line* of s. It is *NIX-newline (`\n`) vs. Windows-newline (`\r\n`) agnostic. The first encountered linebreak (`\n` vs. `\r\n`) are assumed to be the canonical linebreak for the rest of s. left, if true, performs a [TrimSpaceLeft] on each line (retaining the newline). right, if true, performs a [TrimSpaceRight] on each line (retaining the newline). */ func TrimLines(s string, left, right bool) (trimmed string) { var sl string var nl string var sb strings.Builder // These conditions functionally won't do anything, so just return the input as-is. if s == "" { return } if !left && !right { trimmed = s return } for line := range strings.Lines(s) { nl = getNewLine(line) sl = strings.TrimSuffix(line, nl) if left && right { sl = strings.TrimSpace(sl) } else if left { sl = TrimSpaceLeft(sl) } else if right { sl = TrimSpaceRight(sl) } sb.WriteString(sl + nl) } trimmed = sb.String() return } // TrimSpaceLeft is like [strings.TrimSpace] but only removes leading whitespace from string `s`. func TrimSpaceLeft(s string) (trimmed string) { trimmed = strings.TrimLeftFunc(s, unicode.IsSpace) return } /* TrimSpaceRight is like [strings.TrimSpace] but only removes trailing whitespace from string s. */ func TrimSpaceRight(s string) (trimmed string) { trimmed = strings.TrimRightFunc(s, unicode.IsSpace) return } // getAsciiCharMap returns a lookup "table" for ASCII characters. func getAsciiCharMap(allowCtl, allowPrint, allowExt, allowWs bool, incl, excl []byte) (charmap [256]bool) { var idx uint8 if allowCtl { for idx < 0x1f { charmap[idx] = true idx++ } } else { idx = 0x1f } if allowPrint { for idx < 0x7f { charmap[idx] = true idx++ } } else { idx = 0x7f } if allowExt { for { charmap[idx] = true if idx == 0xff { break } idx++ } } else { idx = 0xff } if allowWs { charmap['\t'] = true charmap['\n'] = true charmap['\r'] = true } if incl != nil && len(incl) > 0 { for _, idx = range incl { charmap[idx] = true } } if excl != nil && len(excl) > 0 { for _, idx = range excl { charmap[idx] = false } } return } // getNewLine is too unpredictable/nuanced to be used as part of a public API promise so it isn't exported. func getNewLine(s string) (nl string) { if strings.HasSuffix(s, "\r\n") { nl = "\r\n" } else if strings.HasSuffix(s, "\n") { nl = "\n" } return }