5 Commits

Author SHA1 Message Date
brent saner
07951f1f03 v1.9.4
FIXED:
* remap.ReMap.MapString() was not properly correllating groups. It is
  now.
2025-08-17 00:45:24 -04:00
brent saner
bae0abe960 v1.9.3
IMPROVED:
* Better documentation for remap
2025-08-12 00:06:51 -04:00
brent saner
368ae0cb8e v1.9.2
FIX:
* Yeah so the ReMap.Map* stuff was kind of broken hard. It's fixed now.
2025-08-04 04:26:52 +00:00
brent saner
154170c0e5 v1.9.1
ADDED:
* remap package
2025-08-02 09:39:06 -04:00
brent saner
d9bd928edb v1.9.0
ADD:
* `iox` subpackage

FIX:
* `logging` now has a way to return logWritier directly
* added significant `io.*` interface compat to logWriter -- allowing a `logging.Logger` to essentially be used for a large amount of io interaction in other libraries.
2025-07-31 03:45:32 -04:00
25 changed files with 913 additions and 42 deletions

15
go.mod
View File

@@ -1,10 +1,15 @@
module r00t2.io/goutils
go 1.16
go 1.24.5
require (
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/google/uuid v1.3.0
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e
r00t2.io/sysutils v1.1.1
github.com/coreos/go-systemd/v22 v22.5.0
github.com/google/uuid v1.6.0
golang.org/x/sys v0.34.0
r00t2.io/sysutils v1.14.0
)
require (
github.com/djherbis/times v1.6.0 // indirect
golang.org/x/sync v0.16.0 // indirect
)

21
go.sum
View File

@@ -1,8 +1,13 @@
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
r00t2.io/sysutils v1.1.1 h1:q2P5u50HIIRk6muCPo1Gpapy6sNT4oaB1l2O/C/mi3A=
r00t2.io/sysutils v1.1.1/go.mod h1:Wlfi1rrJpoKBOjWiYM9rw2FaiZqraD6VpXyiHgoDo/o=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/djherbis/times v1.6.0 h1:w2ctJ92J8fBvWPxugmXIv7Nz7Q3iDMKNx9v5ocVH20c=
github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYCY99oc0=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
r00t2.io/sysutils v1.14.0/go.mod h1:ZJ7gZxFVQ7QIokQ5fPZr7wl0XO5Iu+LqtE8j3ciRINw=

4
iox/docs.go Normal file
View File

@@ -0,0 +1,4 @@
/*
Package iox includes extensions to the stdlib `io` module.
*/
package iox

9
iox/errs.go Normal file
View File

@@ -0,0 +1,9 @@
package iox
import (
`errors`
)
var (
ErrBufTooSmall error = errors.New("buffer too small; buffer size must be > 0")
)

41
iox/funcs.go Normal file
View File

@@ -0,0 +1,41 @@
package iox
import (
`io`
)
/*
CopyBufN is a mix between io.CopyN and io.CopyBuffer.
Despite what the docs may suggest, io.CopyN does NOT *read* n bytes from src AND write n bytes to dst.
Instead, it always reads 32 KiB from src, and writes n bytes to dst.
There are, of course, cases where this is deadfully undesired.
One can, of course, use io.CopyBuffer, but this is a bit annoying since you then have to provide a buffer yourself.
This convenience-wraps io.CopyBuffer to have a similar signature to io.CopyN but properly uses n for both reading and writing.
*/
func CopyBufN(dst io.Writer, src io.Reader, n int64) (written int64, err error) {
var b []byte
if n <= 0 {
err = ErrBufTooSmall
return
}
b = make([]byte, n)
written, err = io.CopyBuffer(dst, src, b)
return
}
// CopyBufWith allows for specifying a buffer allocator function, otherwise acts as CopyBufN.
func CopyBufWith(dst io.Writer, src io.Reader, bufFunc func() (b []byte)) (written int64, err error) {
written, err = io.CopyBuffer(dst, src, bufFunc())
return
}

8
iox/types.go Normal file
View File

@@ -0,0 +1,8 @@
package iox
type (
// RuneWriter matches the behavior of *(bytes.Buffer).WriteRune and *(bufio.Writer).WriteRune
RuneWriter interface {
WriteRune(r rune) (n int, err error)
}
)

View File

@@ -4,19 +4,20 @@
-- no native Go support (yet)?
--- https://developer.apple.com/forums/thread/773369
- add a `log/slog` logging.Logger?
- Implement code line/func/etc. (only for debug?):
https://stackoverflow.com/a/24809646
https://golang.org/pkg/runtime/#Caller
-- log.LlongFile and log.Lshortfile flags don't currently work properly for StdLogger/FileLogger; they refer to the file in logging package rather than the caller.
-- ZeroLog seems to be able to do it, take a peek there.
- StdLogger2; where stdout and stderr are both logged to depending on severity level.
- make configurable via OR bitmask
- Suport remote loggers? (eventlog, syslog, systemd)
- Suport remote loggers? (eventlog, syslog, journald)
- JSON logger? YAML logger? XML logger?
- DOCS.
-- Done, but flesh out.
- Implement io.Writer interfaces

View File

@@ -11,9 +11,12 @@ These particular loggers (logging.Logger) available are:
WinLogger (Windows only)
There is a seventh type of logging.Logger, MultiLogger, that allows for multiple loggers to be written to with a single call.
As you may have guessed, NullLogger doesn't actually log anything but is fully "functional" as a logging.Logger.
(This is similar to stdlib's io.MultiWriter()'s return value, but with priority awareness and fmt string support).
Note that for some Loggers, the prefix may be modified - "literal" loggers (StdLogger and FileLogger) will append a space to the end of the prefix.
As you may have guessed, NullLogger doesn't actually log anything but is fully "functional" as a logging.Logger (similar to io.discard/io.Discard()'s return).
Note that for some Loggers, the prefix may be modified after the Logger has already initialized.
"Literal" loggers (StdLogger and FileLogger) will append a space to the end of the prefix by default.
If this is undesired (unlikely), you will need to modify (Logger).Prefix and run (Logger).Logger.SetPrefix(yourPrefixHere) for the respective logger.
Every logging.Logger type has the following methods that correspond to certain "levels".
@@ -45,5 +48,17 @@ logging.Logger types also have the following methods:
In some cases, Logger.Setup and Logger.Shutdown are no-ops. In other cases, they perform necessary initialization/cleanup and closing of the logger.
It is recommended to *always* run Setup and Shutdown before and after using, respectively, regardless of the actual logging.Logger type.
Lastly, all logging.Loggers have a ToLogger() method. This returns a *log.Logger (from stdlib log), which also conforms to io.Writer inherently.
In addition. all have a ToRaw() method, which extends a Logger even further and returns an unexported type (*logging.logWriter) compatible with:
- io.ByteWriter
- io.Writer
- io.WriteCloser (Shutdown() on the Logger backend is called during Close(), rendering the underlying Logger unsafe to use afterwards)
- io.StringWriter
and, if stdlib io ever defines an e.g. RuneWriter (WriteRune(r rune) (n int, err error)), it will conform to that too (see (r00t2.io/goutils/iox).RuneWriter).
Obviously this and io.ByteWriter are fairly silly, as they're intended to be high-speed throughput-optimized methods, but if you wanted to e.g.
log every single byte on a wire as a separate log message, go ahead; I'm not your dad.
*/
package logging

View File

@@ -1,7 +1,7 @@
package logging
import (
`errors`
"errors"
)
var (
@@ -12,6 +12,8 @@ var (
exists with too restrictive perms to write/append to, and/or could not be created.
*/
ErrInvalidFile error = errors.New("a FileLogger was requested but the file does not exist and cannot be created")
// ErrInvalidRune is returned if a rune was expected but it is not a valid UTF-8 codepoint.
ErrInvalidRune error = errors.New("specified rune is not valid UTF-8 codepoint")
// ErrNoEntry indicates that the user attempted to MultiLogger.RemoveLogger a Logger but one by that identifier does not exist.
ErrNoEntry error = errors.New("the Logger specified to be removed does not exist")
)

View File

@@ -1,9 +1,33 @@
package logging
import (
"log"
"os"
)
/*
ToLog returns a stdlib *log.Logger from a logging.Logger. It simply wraps the (logging.Logger).ToLogger() methods.
prio is an OR'd logPrio of the Priority* constants.
*/
func ToLog(l Logger, prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = l.ToLogger(prio)
return
}
// ToRaw returns a *logWriter from a logging.Logger. It is an alternative to the (logging.Logger).ToRaw() methods.
func ToRaw(l Logger, prio logPrio) (raw *logWriter) {
raw = &logWriter{
backend: l,
prio: prio,
}
return
}
// testOpen attempts to open a file for writing to test for suitability as a LogFile path.
func testOpen(path string) (success bool, err error) {

View File

@@ -223,7 +223,15 @@ func (l *FileLogger) Warning(s string, v ...interface{}) (err error) {
// ToLogger returns a stdlib log.Logger.
func (l *FileLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: l, prio: prio}, "", 0)
stdLibLog = log.New(l.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (l *FileLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}

View File

@@ -5,7 +5,7 @@ import (
`os`
`path`
sysd `github.com/coreos/go-systemd/journal`
sysd `github.com/coreos/go-systemd/v22/journal`
`r00t2.io/goutils/bitmask`
`r00t2.io/sysutils/paths`
)

View File

@@ -1,10 +1,34 @@
package logging
import (
`r00t2.io/goutils/multierr`
"unicode/utf8"
"r00t2.io/goutils/multierr"
)
// Write writes bytes b to the underlying Logger's priority level if the logWriter's priority level(s) match.
/*
Close calls Logger.Shutdown() on the underlying Logger.
The Logger *must not be used* after this; it will need to be re-initialized with Logger.Setup()
or a new Logger (and thuse new logWriter) must be created to replace it.
It (along with logWriter.Write()) conforms to WriteCloser().
*/
func (l *logWriter) Close() (err error) {
if err = l.backend.Shutdown(); err != nil {
return
}
return
}
/*
Write writes bytes b to the underlying Logger's priority level if the logWriter's priority level(s) match.
It conforms to io.Writer. n will *always* == len(b) on success, because otherwise n would technically be >= len(b)
(if multiple priorities are enabled), which is undefined behavior per io.Writer.
b is converted to a string to normalize to the underlying Logger.
*/
func (l *logWriter) Write(b []byte) (n int, err error) {
var s string
@@ -70,5 +94,116 @@ func (l *logWriter) Write(b []byte) (n int, err error) {
return
}
n = len(b)
return
}
/*
WriteByte conforms a logWriter to an io.ByteWriter. (It just wraps logWriter.Write().)
You should probably never use this; the logging overhead/prefix is going to be more data than the single byte itself.
c is converted to a string to normalize to the underlying Logger.
*/
func (l *logWriter) WriteByte(c byte) (err error) {
if _, err = l.Write([]byte{c}); err != nil {
return
}
return
}
/*
WriteRune follows the same signature of (bytes.Buffer).WriteRune() and (bufio.Writer).WriteRune(); thus if `io` ever defines an io.RuneWriter interface, here ya go.
n will *always* be equal to (unicode/utf8).RuneLen(r), unless r is an "invalid rune" -- in which case n will be 0 and err will be ErrInvalidRune..
*/
func (l *logWriter) WriteRune(r rune) (n int, err error) {
var b []byte
n = utf8.RuneLen(r)
if n < 0 {
err = ErrInvalidRune
n = 0
return
}
b = make([]byte, n)
utf8.EncodeRune(b, r)
if n, err = l.Write(b); err != nil {
return
}
return
}
/*
WriteString writes string s to the underlying Logger's priority level if the logWriter's priority level(s) match.
It conforms to io.StringWriter. n will *always* == len(s) on success, because otherwise n would technically be >= len(s)
(if multiple priorities are enabled), which is undefined behavior per io.StringWriter.
*/
func (l *logWriter) WriteString(s string) (n int, err error) {
var mErr *multierr.MultiError = multierr.NewMultiError(nil)
if l.prio.HasFlag(PriorityEmergency) {
if err = l.backend.Emerg(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityAlert) {
if err = l.backend.Alert(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityCritical) {
if err = l.backend.Crit(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityError) {
if err = l.backend.Err(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityWarning) {
if err = l.backend.Warning(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityNotice) {
if err = l.backend.Notice(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityInformational) {
if err = l.backend.Info(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if l.prio.HasFlag(PriorityDebug) {
if err = l.backend.Debug(s); err != nil {
mErr.AddError(err)
err = nil
}
}
if !mErr.IsEmpty() {
err = mErr
return
}
n = len(s)
return
}

View File

@@ -3,7 +3,7 @@ package logging
import (
"errors"
"fmt"
`log`
"log"
"sync"
"r00t2.io/goutils/multierr"
@@ -375,7 +375,15 @@ func (m *MultiLogger) Warning(s string, v ...interface{}) (err error) {
// ToLogger returns a stdlib log.Logger.
func (m *MultiLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: m, prio: prio}, "", 0)
stdLibLog = log.New(m.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (m *MultiLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: m, prio: prio}
return
}

View File

@@ -1,7 +1,7 @@
package logging
import (
sysd "github.com/coreos/go-systemd/journal"
sysd "github.com/coreos/go-systemd/v22/journal"
"github.com/google/uuid"
)

View File

@@ -1,7 +1,7 @@
package logging
import (
`log`
"log"
)
// Setup does nothing at all; it's here for interface compat. 🙃
@@ -84,3 +84,11 @@ func (l *NullLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
return
}
// ToRaw returns a *logWriter. (This is a little less efficient than using ToLogger's log.Logger as an io.Writer if that's all you need.)
func (l *NullLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}

View File

@@ -1,6 +1,18 @@
package logging
// nulLWriter writes... nothing. To avoid errors, however, in downstream code it pretends it does (n will *always* == len(b)).
import (
"unicode/utf8"
)
// Close conforms a nullWriter to an io.WriteCloser. It obviously does nothing, and will always return with err == nil.
func (nw *nullWriter) Close() (err error) {
// NO-OP
return
}
// Write conforms a nullWriter to an io.Writer, but it writes... nothing. To avoid errors, however, in downstream code it pretends it does (n will *always* == len(b)).
func (nw *nullWriter) Write(b []byte) (n int, err error) {
if b == nil {
@@ -10,3 +22,37 @@ func (nw *nullWriter) Write(b []byte) (n int, err error) {
return
}
// WriteByte conforms to an io.ByteWriter but again... nothing is actually written anywhere.
func (nw *nullWriter) WriteByte(c byte) (err error) {
// NO-OP
_ = c
return
}
/*
WriteRune conforms to the other Loggers. It WILL return the proper value for n (matching (bytes.Buffer).WriteRune() and (bufio.Writer).WriteRune() signatures,
and it WILL return an ErrInvalidRune if r is not a valid rune, but otherwise it will no-op.
*/
func (nw *nullWriter) WriteRune(r rune) (n int, err error) {
n = utf8.RuneLen(r)
if n < 0 {
err = ErrInvalidRune
n = 0
return
}
return
}
// WriteString conforms to an io.StringWriter but nothing is actually written. (n will *always* == len(s))
func (nw *nullWriter) WriteString(s string) (n int, err error) {
n = len(s)
return
}

View File

@@ -235,6 +235,22 @@ func (l *StdLogger) Warning(s string, v ...interface{}) (err error) {
return
}
// ToLogger returns a stdlib log.Logger.
func (l *StdLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(l.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (l *StdLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}
// renderWrite prepares/formats a log message to be written to this StdLogger.
func (l *StdLogger) renderWrite(msg, prio string) {
@@ -244,11 +260,3 @@ func (l *StdLogger) renderWrite(msg, prio string) {
return
}
// ToLogger returns a stdlib log.Logger.
func (l *StdLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: l, prio: prio}, "", 0)
return
}

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"log"
"github.com/coreos/go-systemd/journal"
"github.com/coreos/go-systemd/v22/journal"
)
/*
@@ -227,7 +227,15 @@ func (l *SystemDLogger) renderWrite(msg string, prio journal.Priority) {
// ToLogger returns a stdlib log.Logger.
func (l *SystemDLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: l, prio: prio}, "", 0)
stdLibLog = log.New(l.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (l *SystemDLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}

View File

@@ -273,7 +273,15 @@ func (l *SyslogLogger) Warning(s string, v ...interface{}) (err error) {
// ToLogger returns a stdlib log.Logger.
func (l *SyslogLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: l, prio: prio}, "", 0)
stdLibLog = log.New(l.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (l *SyslogLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}

View File

@@ -3,7 +3,7 @@ package logging
import (
"errors"
"fmt"
`log`
"log"
"os"
"os/exec"
"syscall"
@@ -347,7 +347,15 @@ func (l *WinLogger) Warning(s string, v ...interface{}) (err error) {
// ToLogger returns a stdlib log.Logger.
func (l *WinLogger) ToLogger(prio logPrio) (stdLibLog *log.Logger) {
stdLibLog = log.New(&logWriter{backend: l, prio: prio}, "", 0)
stdLibLog = log.New(l.ToRaw(prio), "", 0)
return
}
// ToRaw returns a *logWriter.
func (l *WinLogger) ToRaw(prio logPrio) (raw *logWriter) {
raw = &logWriter{backend: l, prio: prio}
return
}

View File

@@ -4,7 +4,7 @@ import (
"log"
"os"
`r00t2.io/goutils/bitmask`
"r00t2.io/goutils/bitmask"
)
type logPrio bitmask.MaskBit
@@ -28,6 +28,7 @@ type Logger interface {
Setup() (err error)
Shutdown() (err error)
ToLogger(prio logPrio) (stdLibLog *log.Logger)
ToRaw(prio logPrio) (raw *logWriter)
}
/*

4
remap/doc.go Normal file
View File

@@ -0,0 +1,4 @@
/*
Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups.
*/
package remap

488
remap/funcs_remap.go Normal file
View File

@@ -0,0 +1,488 @@
package remap
/*
Map returns a map[string][]<match bytes> for regexes with named capture groups matched in bytes b.
Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups
using the same group name (though your IDE might complain; I know GoLand does).
Each match for each group is in a slice keyed under that group name, with that slice
ordered by the indexing done by the regex match itself.
In summary, the parameters are as follows:
# inclNoMatch
If true, then attempt to return a non-nil matches (as long as b isn't nil).
Group keys will be populated and explicitly defined as nil.
For example, if a pattern
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
is provided but b does not match then matches will be:
map[string][][]byte{
"g1": nil,
"g2": nil,
}
# inclNoMatchStrict
If true (and inclNoMatch is true), instead of a single nil the group's values will be
a slice of nil values explicitly matching the number of times the group name is specified
in the pattern.
For example, if a pattern:
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
is provided but b does not match then matches will be:
map[string][][]byte{
"g1": [][]byte{
nil,
nil,
},
"g2": [][]byte{
nil,
},
}
# mustMatch
If true, matches will be nil if the entirety of b does not match the pattern (and thus
no capture groups matched) (overrides inclNoMatch) -- explicitly:
matches == nil
Otherwise if false (and assuming inclNoMatch is false), matches will be:
map[string][][]byte{}{}
# Condition Tree
In detail, matches and/or its values may be nil or empty under the following condition tree:
IF b is nil:
THEN matches will always be nil
ELSE:
IF all of b does not match pattern
IF mustMuch is true
THEN matches == nil
ELSE
THEN matches == map[string][][]byte{} (non-nil but empty)
ELSE IF pattern has no named capture groups
IF inclNoMatch is true
THEN matches == map[string][][]byte{} (non-nil but empty)
ELSE
THEN matches == nil
ELSE
IF there are no named group matches
IF inclNoMatch is true
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches == nil
ELSE
IF <group name> does not have a match
IF inclNoMatch is true
IF inclNoMatchStrict is true
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
(matches[<group name>] == [][]byte{nil[, nil...]})
ELSE
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
ELSE
matches[<group name>] == []{<match>[, <match>...]}
*/
func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
var ok bool
var mIdx int
var match []byte
var grpNm string
var names []string
var matchBytes [][]byte
var tmpMap map[string][][]byte = make(map[string][][]byte)
if b == nil {
return
}
names = r.Regexp.SubexpNames()
matchBytes = r.Regexp.FindSubmatch(b)
if matchBytes == nil {
// b does not match pattern
if !mustMatch {
matches = make(map[string][][]byte)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
/*
no named capture groups;
technically only the last condition would be the case.
*/
if inclNoMatch {
matches = make(map[string][][]byte)
}
return
}
names = names[1:]
if len(matchBytes) == 0 || len(matchBytes) == 1 {
/*
no submatches whatsoever.
*Technically* I don't think this condition can actually be reached.
This is more of a safe-return before we re-slice.
*/
matches = make(map[string][][]byte)
if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names {
matches[grpNm] = nil
}
}
}
return
}
matchBytes = matchBytes[1:]
for mIdx, match = range matchBytes {
grpNm = names[mIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group).
*/
if grpNm == "" {
continue
}
if match == nil {
// group did not match
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
} else {
tmpMap[grpNm] = [][]byte{nil}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
}
}
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([][]byte, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], match)
}
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
if len(tmpMap) > 0 {
matches = tmpMap
}
return
}
/*
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead.
(matches will always be nil if s == “.)
A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
*not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after.
Particularly:
# inclNoMatch
If true, then attempt to return a non-nil matches (as long as s isn't empty).
Group keys will be populated and explicitly defined as nil.
For example, if a pattern
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
is provided but s does not match then matches will be:
map[string][]string{
"g1": nil,
"g2": nil,
}
# inclNoMatchStrict
If true (and inclNoMatch is true), instead of a single nil the group's values will be
a slice of eempty string values explicitly matching the number of times the group name is specified
in the pattern.
For example, if a pattern:
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
is provided but s does not match then matches will be:
map[string][]string{
"g1": []string{
"",
"",
},
"g2": []string{
"",
},
}
# mustMatch
If true, matches will be nil if the entirety of s does not match the pattern (and thus
no capture groups matched) (overrides inclNoMatch) -- explicitly:
matches == nil
Otherwise if false (and assuming inclNoMatch is false), matches will be:
map[string][]string{}{}
# Condition Tree
In detail, matches and/or its values may be nil or empty under the following condition tree:
IF s is empty:
THEN matches will always be nil
ELSE:
IF all of s does not match pattern
IF mustMuch is true
THEN matches == nil
ELSE
THEN matches == map[string][]string{} (non-nil but empty)
ELSE IF pattern has no named capture groups
IF inclNoMatch is true
THEN matches == map[string][]string{} (non-nil but empty)
ELSE
THEN matches == nil
ELSE
IF there are no named group matches
IF inclNoMatch is true
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches == nil
ELSE
IF <group name> does not have a match
IF inclNoMatch is true
IF inclNoMatchStrict is true
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
(matches[<group name>] == []string{""[, ""...]})
ELSE
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
ELSE
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
ELSE
matches[<group name>] == []{<match>[, <match>...]}
*/
func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
var ok bool
var endIdx int
var startIdx int
var chunkIdx int
var grpNm string
var names []string
var matchStr string
/*
A slice of indices or index pairs.
For each element `e` in idxChunks,
* if `e` is nil, no group match.
* if len(e) == 1, only a single character was matched.
* otherwise len(e) == 2, the start and end of the match.
*/
var idxChunks [][]int
var matchIndices []int
var chunkIndices []int // always 2 elements; start pos and end pos
var tmpMap map[string][]string = make(map[string][]string)
/*
OK so this is a bit of a deviation.
It's not as straightforward as above, because there isn't an explicit way
like above to determine if a pattern was *matched as an empty string* vs.
*not matched*.
So instead do roundabout index-y things.
*/
if s == "" {
return
}
/*
I'm not entirely sure how serious they are about "the slice should not be modified"...
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
it will be duplicated here in proper order and the ordering is tied to
the ordering of matchIndices.
*/
names = r.Regexp.SubexpNames()[:]
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
if matchIndices == nil {
// s does not match pattern at all.
if !mustMatch {
matches = make(map[string][]string)
}
return
}
if names == nil || len(names) <= 1 {
/*
No named capture groups;
technically only the last condition would be the case,
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
return a `[]string{""}`.
*/
if inclNoMatch {
matches = make(map[string][]string)
}
return
}
if len(matchIndices) == 0 || len(matchIndices) == 1 {
/*
No (sub)matches whatsoever.
*technically* I don't think this condition can actually be reached;
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
and modulo 2 thereafter since they're PAIRS of indices...
Why they didn't just return a [][]int or [][2]int or something
instead of an []int, who knows.
But we're correcting that poor design.
This is more of a safe-return before we chunk the indices.
*/
matches = make(map[string][]string)
if inclNoMatch {
for _, grpNm = range names {
if grpNm != "" {
matches[grpNm] = nil
}
}
}
return
}
/*
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0]).
Thus the len(matchIndices) == 2*len(names), *even* if you
Keep in mind that since the first element of names is removed,
the first pair here is skipped.
This provides a bit more consistent readability, though.
*/
idxChunks = make([][]int, len(names))
chunkIdx = 0
endIdx = 0
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
endIdx = startIdx + 2
// This technically should never happen.
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match
chunkIndices = nil
} else {
if chunkIndices[0] == chunkIndices[1] {
chunkIndices = []int{chunkIndices[0]}
} else {
chunkIndices = matchIndices[startIdx:endIdx]
}
}
idxChunks[chunkIdx] = chunkIndices
chunkIdx++
}
// Now associate with names and pull the string sequence.
for chunkIdx, chunkIndices = range idxChunks {
grpNm = names[chunkIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == ""
(which is either an *unnamed* capture group
OR the first element in `names`, which is always
the entire match).
*/
if grpNm == "" {
continue
}
if chunkIndices == nil || len(chunkIndices) == 0 {
// group did not match
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
} else {
tmpMap[grpNm] = []string{""}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], "")
}
}
continue
}
switch len(chunkIndices) {
case 1:
// Single character
matchStr = string(s[chunkIndices[0]])
case 2:
// Multiple characters
matchStr = s[chunkIndices[0]:chunkIndices[1]]
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([]string, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
}
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
if len(tmpMap) > 0 {
matches = tmpMap
}
return
}

27
remap/types.go Normal file
View File

@@ -0,0 +1,27 @@
package remap
import (
"regexp"
)
type (
// ReMap provides some map-related functions around a regexp.Regexp.
ReMap struct {
*regexp.Regexp
}
// TODO?
/*
ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
capture group result is a hit (a group matched, but e.g. the match value is empty string)
or not (a group did not match).
*/
/*
ExplicitStringMatch struct {
Group string
IsMatch bool
Value string
}
*/
)