Compare commits
8 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
965657d1b2 | ||
![]() |
970acd0ee4 | ||
![]() |
2222cea7fb | ||
![]() |
688abd0874 | ||
![]() |
a1f87d6b51 | ||
![]() |
07951f1f03 | ||
![]() |
bae0abe960 | ||
![]() |
368ae0cb8e |
19
.encoding.TODO/bit/docs.go
Normal file
19
.encoding.TODO/bit/docs.go
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
Package bit aims to provide feature parity with stdlib's [encoding/hex].
|
||||
|
||||
It's a ludicrous tragedy that hex/base16, base32, base64 all have libraries for converting
|
||||
to/from string representations... but there's nothing for binary ('01010001' etc.) whatsoever.
|
||||
|
||||
This package also provides some extra convenience functions and types in an attempt to provide
|
||||
an abstracted bit-level fidelity in Go. A [Bit] is a bool type, in which that underlying bool
|
||||
being false represents a 0 and that underlying bool being true represents a 1.
|
||||
|
||||
Note that a [Bit] or arbitrary-length or non-octal-aligned [][Bit] may take up more bytes in memory
|
||||
than expected; a [Bit] will actually always occupy a single byte -- thus representing
|
||||
`00000000 00000000` as a [][Bit] or [16][Bit] will actually occupy *sixteen bytes* in memory,
|
||||
NOT 2 bytes (nor, obviously, [2][Byte])!
|
||||
It is recommended instead to use a [Bits] instead of a [Bit] slice or array, as it will try to properly align to the
|
||||
smallest memory allocation possible (at the cost of a few extra CPU cycles on adding/removing one or more [Bit]).
|
||||
It will properly retain any appended, prepended, leading, or trailing bits that do not currently align to a byte.
|
||||
*/
|
||||
package bit
|
14
.encoding.TODO/bit/funcs.go
Normal file
14
.encoding.TODO/bit/funcs.go
Normal file
@ -0,0 +1,14 @@
|
||||
package bit
|
||||
|
||||
// TODO: Provide analogues of encoding/hex, encoding/base64, etc. functions etc.
|
||||
|
||||
/*
|
||||
TODO: Also provide interfaces for the following:
|
||||
|
||||
* https://pkg.go.dev/encoding#BinaryAppender
|
||||
* https://pkg.go.dev/encoding#BinaryMarshaler
|
||||
* https://pkg.go.dev/encoding#BinaryUnmarshaler
|
||||
* https://pkg.go.dev/encoding#TextAppender
|
||||
* https://pkg.go.dev/encoding#TextMarshaler
|
||||
* https://pkg.go.dev/encoding#TextUnmarshaler
|
||||
*/
|
34
.encoding.TODO/bit/types.go
Normal file
34
.encoding.TODO/bit/types.go
Normal file
@ -0,0 +1,34 @@
|
||||
package bit
|
||||
|
||||
type (
|
||||
// Bit aims to provide a native-like type for a single bit (Golang operates on the smallest fidelity level of *byte*/uint8).
|
||||
Bit bool
|
||||
|
||||
// Bits is an arbitrary length of bits.
|
||||
Bits struct {
|
||||
/*
|
||||
leading is a series of Bit that do not cleanly align to the beginning of Bits.b.
|
||||
They will always be the bits at the *beginning* of the sequence.
|
||||
len(Bits.leading) will *never* be more than 7;
|
||||
it's converted into a byte, prepended to Bits.b, and cleared if it reaches that point.
|
||||
*/
|
||||
leading []Bit
|
||||
// b is the condensed/memory-aligned alternative to an [][8]Bit (or []Bit, or [][]Bit, etc.).
|
||||
b []byte
|
||||
/*
|
||||
remaining is a series of Bit that do not cleanly align to the end of Bits.b.
|
||||
They will always be the bits at the *end* of the sequence.
|
||||
len(Bits.remaining) will *never* be more than 7;
|
||||
it's converted into a byte, appended to Bits.b, and cleared if it reaches that point.
|
||||
*/
|
||||
remaining []Bit
|
||||
// fixedLen, if 0, represents a "slice". If >= 1, it represents an "array".
|
||||
fixedLen uint
|
||||
}
|
||||
|
||||
// Byte is this package's representation of a byte. It's primarily for convenience.
|
||||
Byte byte
|
||||
|
||||
// Bytes is defined as a type for convenience single-call functions.
|
||||
Bytes []Byte
|
||||
)
|
@ -34,12 +34,56 @@ func NewMaskBitExplicit(value uint) (m *MaskBit) {
|
||||
return
|
||||
}
|
||||
|
||||
// HasFlag is true if m has MaskBit flag set/enabled.
|
||||
/*
|
||||
HasFlag is true if m has MaskBit flag set/enabled.
|
||||
|
||||
THIS WILL RETURN FALSE FOR OR'd FLAGS.
|
||||
|
||||
For example:
|
||||
|
||||
flagA MaskBit = 0x01
|
||||
flagB MaskBit = 0x02
|
||||
flagComposite = flagA | flagB
|
||||
|
||||
m *MaskBit = NewMaskBitExplicit(uint(flagA))
|
||||
|
||||
m.HasFlag(flagComposite) will return false even though flagComposite is an OR
|
||||
that contains flagA.
|
||||
Use [MaskBit.IsOneOf] instead if you do not desire this behavior,
|
||||
and instead want to test composite flag *membership*.
|
||||
(MaskBit.IsOneOf will also return true for non-composite equality.)
|
||||
|
||||
To be more clear, if MaskBit flag is a composite MaskBit (e.g. flagComposite above),
|
||||
HasFlag will only return true of ALL bits in flag are also set in MaskBit m.
|
||||
*/
|
||||
func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {
|
||||
|
||||
var b MaskBit = *m
|
||||
|
||||
if b&flag != 0 {
|
||||
if b&flag == flag {
|
||||
r = true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
IsOneOf is like a "looser" form of [MaskBit.HasFlag]
|
||||
in that it allows for testing composite membership.
|
||||
|
||||
See [MaskBit.HasFlag] for more information.
|
||||
|
||||
If composite is *not* an OR'd MaskBit (i.e.
|
||||
it falls directly on a boundary -- 0, 1, 2, 4, 8, 16, etc.),
|
||||
then IsOneOf will behave exactly like HasFlag.
|
||||
|
||||
If m is a composite MaskBit (it usually is) and composite is ALSO a composite MaskBit,
|
||||
IsOneOf will return true if ANY of the flags set in m is set in composite.
|
||||
*/
|
||||
func (m *MaskBit) IsOneOf(composite MaskBit) (r bool) {
|
||||
|
||||
var b MaskBit = *m
|
||||
|
||||
if b&composite != 0 {
|
||||
r = true
|
||||
}
|
||||
return
|
||||
|
119
bitmask/doc.go
119
bitmask/doc.go
@ -1,9 +1,35 @@
|
||||
/*
|
||||
Package bitmask handles a flag-like opt/bitmask system.
|
||||
|
||||
See https://yourbasic.org/golang/bitmask-flag-set-clear/ for more information.
|
||||
See https://yourbasic.org/golang/bitmask-flag-set-clear/ for basic information on what bitmasks are and why they're useful.
|
||||
|
||||
To use this, set constants like thus:
|
||||
Specifically, in the case of Go, they allow you to essentially manage many, many, many "booleans" as part of a single value.
|
||||
|
||||
A single bool value in Go takes up 8 bits/1 byte, unavoidably.
|
||||
|
||||
However, a [bitmask.MaskBit] is backed by a uint which (depending on your platform) is either 32 bits/4 bytes or 64 bits/8 bytes.
|
||||
|
||||
"But wait, that takes up more memory though!"
|
||||
|
||||
Yep, but bitmasking lets you store a "boolean" AT EACH BIT - it operates on
|
||||
whether a bit in a byte/set of bytes at a given position is 0 or 1.
|
||||
|
||||
Which means on 32-bit platforms, a [MaskBit] can have up to 4294967295 "booleans" in a single value (0 to (2^32)-1).
|
||||
|
||||
On 64-bit platforms, a [MaskBit] can have up to 18446744073709551615 "booleans" in a single value (0 to (2^64)-1).
|
||||
|
||||
If you tried to do that with Go bool values, that'd take up 4294967295 bytes (4 GiB)
|
||||
or 18446744073709551615 bytes (16 EiB - yes, that's [exbibytes]) of RAM for 32-bit/64-bit platforms respectively.
|
||||
|
||||
"But that has to be so slow to unpack that!"
|
||||
|
||||
Nope. It's not using compression or anything, the CPU is just comparing bit "A" vs. bit "B" 32/64 times. That's super easy work for a CPU.
|
||||
|
||||
There's a reason Doom used bitmasking for the "dmflags" value in its server configs.
|
||||
|
||||
# Usage
|
||||
|
||||
To use this library, set constants like thus:
|
||||
|
||||
package main
|
||||
|
||||
@ -42,12 +68,95 @@ But this would return false:
|
||||
|
||||
MyMask.HasFlag(OPT2)
|
||||
|
||||
# Technical Caveats
|
||||
|
||||
TARGETING
|
||||
|
||||
When implementing, you should always set MyMask (from Usage section above) as the actual value.
|
||||
For example, if you are checking a permissions set for a user that has the value, say, 6
|
||||
|
||||
var userPerms uint = 6 // 0x0000000000000006
|
||||
|
||||
and your library has the following permission bits defined:
|
||||
|
||||
const PermsNone bitmask.MaskBit = 0
|
||||
const (
|
||||
PermsList bitmask.MaskBit = 1 << iota // 1
|
||||
PermsRead // 2
|
||||
PermsWrite // 4
|
||||
PermsExec // 8
|
||||
PermsAdmin // 16
|
||||
)
|
||||
|
||||
And you want to see if the user has the PermsRead flag set, you would do:
|
||||
|
||||
userPermMask = bitmask.NewMaskBitExplicit(userPerms)
|
||||
if userPermMask.HasFlag(PermsRead) {
|
||||
// ...
|
||||
}
|
||||
|
||||
NOT:
|
||||
|
||||
userPermMask = bitmask.NewMaskBitExplicit(PermsRead)
|
||||
// Nor:
|
||||
// userPermMask = PermsRead
|
||||
if userPermMask.HasFlag(userPerms) {
|
||||
// ...
|
||||
}
|
||||
|
||||
This will be terribly, horribly wrong, cause incredibly unexpected results,
|
||||
and quite possibly cause massive security issues. Don't do it.
|
||||
|
||||
COMPOSITES
|
||||
|
||||
If you want to define a set of flags that are a combination of other flags,
|
||||
your inclination would be to bitwise-OR them together:
|
||||
|
||||
const (
|
||||
flagA bitmask.MaskBit = 1 << iota // 1
|
||||
flagB // 2
|
||||
)
|
||||
|
||||
const (
|
||||
flagAB bitmask.MaskBit = flagA | flagB // 3
|
||||
)
|
||||
|
||||
Which is fine and dandy. But if you then have:
|
||||
|
||||
var myMask *bitmask.MaskBit = bitmask.NewMaskBit()
|
||||
|
||||
myMask.AddFlag(flagA)
|
||||
|
||||
You may expect this call to [MaskBit.HasFlag]:
|
||||
|
||||
myMask.HasFlag(flagAB)
|
||||
|
||||
to be true, since flagA is "in" flagAB.
|
||||
It will return false - HasFlag does strict comparisons.
|
||||
It will only return true if you then ALSO do:
|
||||
|
||||
// This would require setting flagA first.
|
||||
// The order of setting flagA/flagB doesn't matter,
|
||||
// but you must have both set for HasFlag(flagAB) to return true.
|
||||
myMask.AddFlag(flagB)
|
||||
|
||||
or if you do:
|
||||
|
||||
// This can be done with or without additionally setting flagA.
|
||||
myMask.AddFlag(flagAB)
|
||||
|
||||
Instead, if you want to see if a mask has membership within a composite flag,
|
||||
you can use [MaskBit.IsOneOf].
|
||||
|
||||
# Other Options
|
||||
|
||||
If you need something with more flexibility (as always, at the cost of complexity),
|
||||
you may be interested in one of the following libraries:
|
||||
|
||||
. github.com/alvaroloes/enumer
|
||||
. github.com/abice/go-enum
|
||||
. github.com/jeffreyrichter/enum/enum
|
||||
* [github.com/alvaroloes/enumer]
|
||||
* [github.com/abice/go-enum]
|
||||
* [github.com/jeffreyrichter/enum/enum]
|
||||
|
||||
[exbibytes]: https://simple.wikipedia.org/wiki/Exbibyte
|
||||
*/
|
||||
package bitmask
|
||||
|
@ -4,6 +4,8 @@
|
||||
-- no native Go support (yet)?
|
||||
--- https://developer.apple.com/forums/thread/773369
|
||||
|
||||
- The log destinations for e.g. consts_nix.go et. al. probably should be unexported types.
|
||||
|
||||
- add a `log/slog` logging.Logger?
|
||||
|
||||
- Implement code line/func/etc. (only for debug?):
|
||||
|
@ -23,8 +23,8 @@ const (
|
||||
// LogUndefined indicates an undefined Logger type.
|
||||
const LogUndefined bitmask.MaskBit = iota
|
||||
const (
|
||||
// LogJournald flags a SystemDLogger Logger type.
|
||||
LogJournald = 1 << iota
|
||||
// LogJournald flags a SystemDLogger Logger type. This will, for hopefully obvious reasons, only work on Linux systemd systems.
|
||||
LogJournald bitmask.MaskBit = 1 << iota
|
||||
// LogSyslog flags a SyslogLogger Logger type.
|
||||
LogSyslog
|
||||
// LogFile flags a FileLogger Logger type.
|
||||
|
@ -3,16 +3,14 @@ package logging
|
||||
import (
|
||||
`os`
|
||||
`path/filepath`
|
||||
|
||||
`r00t2.io/goutils/bitmask`
|
||||
)
|
||||
|
||||
// Flags for logger configuration. These are used internally.
|
||||
// LogUndefined indicates an undefined Logger type.
|
||||
const LogUndefined bitmask.MaskBit = 0
|
||||
const (
|
||||
// LogUndefined indicates an undefined Logger type.
|
||||
LogUndefined bitmask.MaskBit = 1 << iota
|
||||
// LogWinLogger indicates a WinLogger Logger type (Event Log).
|
||||
LogWinLogger
|
||||
LogWinLogger bitmask.MaskBit = 1 << iota
|
||||
// LogFile flags a FileLogger Logger type.
|
||||
LogFile
|
||||
// LogStdout flags a StdLogger Logger type.
|
||||
|
@ -17,7 +17,9 @@ func (l *logPrio) HasFlag(prio logPrio) (hasFlag bool) {
|
||||
m = bitmask.NewMaskBitExplicit(uint(*l))
|
||||
p = bitmask.NewMaskBitExplicit(uint(prio))
|
||||
|
||||
hasFlag = m.HasFlag(*p)
|
||||
// Use IsOneOf instead in case PriorityAll is passed for prio.
|
||||
// hasFlag = m.HasFlag(*p)
|
||||
hasFlag = m.IsOneOf(*p)
|
||||
|
||||
return
|
||||
}
|
||||
|
@ -40,6 +40,8 @@ func (l *logWriter) Write(b []byte) (n int, err error) {
|
||||
|
||||
s = string(b)
|
||||
|
||||
// Since this explicitly checks each priority level, there's no need for IsOneOf in case of PriorityAll.
|
||||
|
||||
if l.prio.HasFlag(PriorityEmergency) {
|
||||
if err = l.backend.Emerg(s); err != nil {
|
||||
mErr.AddError(err)
|
||||
|
4
netx/docs.go
Normal file
4
netx/docs.go
Normal file
@ -0,0 +1,4 @@
|
||||
/*
|
||||
Package netx includes extensions to the stdlib `net` module.
|
||||
*/
|
||||
package netx
|
24
netx/inetcksum/consts.go
Normal file
24
netx/inetcksum/consts.go
Normal file
@ -0,0 +1,24 @@
|
||||
package inetcksum
|
||||
|
||||
import (
|
||||
`encoding/binary`
|
||||
)
|
||||
|
||||
const (
|
||||
// EmptyCksum is returned for checksums of 0-length byte slices/buffers.
|
||||
EmptyCksum uint16 = 0xffff
|
||||
)
|
||||
|
||||
const (
|
||||
// cksumMask is AND'd with a checksum to get the "carried ones".
|
||||
cksumMask uint32 = 0x0000ffff
|
||||
// cksumShift is used in the "carried-ones folding".
|
||||
cksumShift uint32 = 0x00000010
|
||||
// padShift is used to "pad out" a checksum for odd-length buffers by left-shifting.
|
||||
padShift uint32 = 0x00000008
|
||||
)
|
||||
|
||||
var (
|
||||
// ord is the byte order used by the Internet Checksum.
|
||||
ord binary.ByteOrder = binary.BigEndian
|
||||
)
|
32
netx/inetcksum/docs.go
Normal file
32
netx/inetcksum/docs.go
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
Package inetcksum applies the "Internet Checksum" algorithm as specified/described in:
|
||||
|
||||
* [RFC 1071]
|
||||
* [RFC 1141]
|
||||
* [RFC 1624]
|
||||
|
||||
It provides [InetChecksum], which can be used as a:
|
||||
|
||||
* [hash.Hash]
|
||||
* [io.ByteWriter]
|
||||
* [io.StringWriter]
|
||||
* [io.Writer]
|
||||
* [io.WriterTo]
|
||||
|
||||
and allows one to retrieve the actual bytes that were checksummed.
|
||||
It is also fully concurrency-safe.
|
||||
|
||||
There is also an [InetChecksumSimple] provided, which is more
|
||||
tailored for performance/resource usage at the cost of no concurrency
|
||||
safety and no data retention, which can be used as a:
|
||||
|
||||
* [hash.Hash]
|
||||
* [io.ByteWriter]
|
||||
* [io.StringWriter]
|
||||
* [io.Writer]
|
||||
|
||||
[RFC 1071]: https://datatracker.ietf.org/doc/html/rfc1071
|
||||
[RFC 1141]: https://datatracker.ietf.org/doc/html/rfc1141
|
||||
[RFC 1624]: https://datatracker.ietf.org/doc/html/rfc1624
|
||||
*/
|
||||
package inetcksum
|
62
netx/inetcksum/funcs.go
Normal file
62
netx/inetcksum/funcs.go
Normal file
@ -0,0 +1,62 @@
|
||||
package inetcksum
|
||||
|
||||
import (
|
||||
`io`
|
||||
)
|
||||
|
||||
// New returns a new initialized [InetChecksum]. It will never panic.
|
||||
func New() (i *InetChecksum) {
|
||||
|
||||
i = &InetChecksum{}
|
||||
_ = i.Aligned()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
NewFromBytes returns a new [InetChecksum] initialized with explicit bytes.
|
||||
|
||||
b may be nil or 0-length; this will not cause an error.
|
||||
*/
|
||||
func NewFromBytes(b []byte) (i *InetChecksum, copied int, err error) {
|
||||
|
||||
var cksum InetChecksum
|
||||
|
||||
if b != nil && len(b) > 0 {
|
||||
if copied, err = cksum.Write(b); err != nil {
|
||||
return
|
||||
}
|
||||
_ = i.Aligned()
|
||||
} else {
|
||||
i = New()
|
||||
return
|
||||
}
|
||||
|
||||
i = &cksum
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
NewFromBuf returns an [InetChecksum] from a specified [io.Reader].
|
||||
|
||||
buf may be nil. If it isn't, NewFromBuf will call [io.Copy] on buf.
|
||||
Note that this may exhaust your passed buf or advance its current seek position/offset,
|
||||
depending on its type.
|
||||
*/
|
||||
func NewFromBuf(buf io.Reader) (i *InetChecksum, copied int64, err error) {
|
||||
|
||||
var cksum InetChecksum
|
||||
|
||||
_ = i.Aligned()
|
||||
|
||||
if buf != nil {
|
||||
if copied, err = io.Copy(&cksum, buf); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
i = &cksum
|
||||
|
||||
return
|
||||
}
|
351
netx/inetcksum/funcs_inetchecksum.go
Normal file
351
netx/inetcksum/funcs_inetchecksum.go
Normal file
@ -0,0 +1,351 @@
|
||||
package inetcksum
|
||||
|
||||
import (
|
||||
`io`
|
||||
)
|
||||
|
||||
/*
|
||||
Aligned returns true if the current underlying buffer in an InetChecksum is
|
||||
aligned to the algorithm's requirement for an even number of bytes.
|
||||
|
||||
Note that if Aligned returns false, a single null pad byte will be applied
|
||||
to the underlying data buffer at time of a Sum* call, but will not be written
|
||||
to the persistent underlying storage.
|
||||
|
||||
If aligned's underlying buffer/storage is empty or nil, aligned will be true.
|
||||
|
||||
Aligned will also force-set the internal state's aligned status.
|
||||
*/
|
||||
func (i *InetChecksum) Aligned() (aligned bool) {
|
||||
|
||||
i.alignLock.Lock()
|
||||
defer i.alignLock.Unlock()
|
||||
|
||||
i.bufLock.RLock()
|
||||
aligned = i.buf.Len()&2 == 0
|
||||
i.bufLock.RUnlock()
|
||||
|
||||
i.aligned = aligned
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// BlockSize returns the number of bytes at a time that InetChecksum operates on. (It will always return 1.)
|
||||
func (i *InetChecksum) BlockSize() (blockSize int) {
|
||||
|
||||
blockSize = 1
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Bytes returns teh bytes currently in the internal storage.
|
||||
|
||||
curBuf will be nil if the internal storage has not yet been initialized.
|
||||
*/
|
||||
func (i *InetChecksum) Bytes() (curBuf []byte) {
|
||||
|
||||
i.bufLock.RLock()
|
||||
defer i.bufLock.RUnlock()
|
||||
|
||||
if i.buf.Len() != 0 {
|
||||
curBuf = i.buf.Bytes()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Clear empties the internal buffer (but does not affect the checksum state).
|
||||
func (i *InetChecksum) Clear() {
|
||||
|
||||
i.bufLock.Lock()
|
||||
defer i.bufLock.Unlock()
|
||||
|
||||
i.buf.Reset()
|
||||
}
|
||||
|
||||
/*
|
||||
DisablePersist disables the internal persistence of an InetChecksum.
|
||||
|
||||
This is recommended for integrations that desire the concurrency safety
|
||||
of an InetChecksum but want a smaller memory footprint and do not need a copy
|
||||
of data that was hashed.
|
||||
|
||||
Any data existing in the buffer will NOT be cleared out if DisablePersist is called.
|
||||
You must call [InetChecksum.Clear] to do that.
|
||||
|
||||
Persistence CANNOT be reenabled once disabled. [InetChecksum.Reset]
|
||||
must be called to re-enable persistence.
|
||||
*/
|
||||
func (i *InetChecksum) DisablePersist() {
|
||||
|
||||
i.bufLock.Lock()
|
||||
defer i.bufLock.Unlock()
|
||||
|
||||
i.disabledBuf = true
|
||||
}
|
||||
|
||||
// Len returns the current amount of bytes stored in this InetChecksum's internal buffer.
|
||||
func (i *InetChecksum) Len() (l int) {
|
||||
|
||||
i.bufLock.RLock()
|
||||
defer i.bufLock.RUnlock()
|
||||
l = i.buf.Len()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Reset resets the internal buffer/storage to an empty state.
|
||||
|
||||
If persistence was disabled ([InetChecksum.DisablePersist]),
|
||||
this method will re-enable it with an empty buffer.
|
||||
If you wish the buffer to be disabled, you must invoke [InetChecksum.DisablePersist]
|
||||
again.
|
||||
|
||||
If you only wish to clear the buffer without losing the checksum state,
|
||||
use [InetChecksum.Clear].
|
||||
*/
|
||||
func (i *InetChecksum) Reset() {
|
||||
|
||||
i.alignLock.Lock()
|
||||
i.bufLock.Lock()
|
||||
i.sumLock.Lock()
|
||||
i.lastLock.Lock()
|
||||
|
||||
i.aligned = false
|
||||
i.alignLock.Unlock()
|
||||
|
||||
i.buf.Reset()
|
||||
i.disabledBuf = false
|
||||
i.bufLock.Unlock()
|
||||
|
||||
i.last = 0x00
|
||||
i.lastLock.Unlock()
|
||||
|
||||
i.sum = 0
|
||||
i.sumLock.Unlock()
|
||||
}
|
||||
|
||||
// Size returns how many bytes a checksum is. (It will always return 2.)
|
||||
func (i *InetChecksum) Size() (bufSize int) {
|
||||
|
||||
bufSize = 2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Sum computes the checksum cksum of the current buffer and appends it as big-endian bytes to b.
|
||||
func (i *InetChecksum) Sum(b []byte) (cksumAppended []byte) {
|
||||
|
||||
var sum16 []byte = i.Sum16Bytes()
|
||||
|
||||
cksumAppended = append(b, sum16...)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Sum16 computes the checksum of the current buffer and returns it as a uint16.
|
||||
|
||||
This is the native number used in the IPv4 header.
|
||||
All other Sum* methods wrap this method.
|
||||
|
||||
If the underlying buffer is empty or nil, cksum will be 0xffff (65535)
|
||||
in line with common implementations.
|
||||
*/
|
||||
func (i *InetChecksum) Sum16() (cksum uint16) {
|
||||
|
||||
var thisSum uint32
|
||||
|
||||
i.alignLock.RLock()
|
||||
i.lastLock.RLock()
|
||||
i.sumLock.RLock()
|
||||
|
||||
thisSum = i.sum
|
||||
i.sumLock.RUnlock()
|
||||
|
||||
if !i.aligned {
|
||||
/*
|
||||
"Pad" at the end of the additive ops - a bitshift is used on the sum integer itself
|
||||
instead of a binary.Append() or append() or such to avoid additional memory allocation.
|
||||
*/
|
||||
thisSum += uint32(i.last) << padShift
|
||||
}
|
||||
i.lastLock.RUnlock()
|
||||
i.alignLock.RUnlock()
|
||||
|
||||
// Fold the "carried ones".
|
||||
for thisSum > cksumMask {
|
||||
thisSum = (thisSum & cksumMask) + (thisSum >> cksumShift)
|
||||
}
|
||||
cksum = ^uint16(thisSum)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Sum16Bytes is a convenience wrapper around [InetChecksum.Sum16]
|
||||
which returns a slice of the uint16 as a 2-byte-long slice instead.
|
||||
*/
|
||||
func (i *InetChecksum) Sum16Bytes() (cksum []byte) {
|
||||
|
||||
var sum16 uint16 = i.Sum16()
|
||||
|
||||
cksum = make([]byte, 2)
|
||||
|
||||
ord.PutUint16(cksum, sum16)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Write writes data to the underlying InetChecksum buffer. It conforms to [io.Writer].
|
||||
|
||||
If this operation returns an error, you MUST call [InetChecksum.Reset] as the instance
|
||||
being used can no longer be considered to be in a consistent state.
|
||||
|
||||
p may be nil or empty; no error will be returned and n will be 0 if so.
|
||||
|
||||
Write is concurrency safe; a copy of p is made first and all hashing/internal
|
||||
storage writing is performed on/which that copy.
|
||||
*/
|
||||
func (i *InetChecksum) Write(p []byte) (n int, err error) {
|
||||
|
||||
var idx int
|
||||
var bufLen int
|
||||
var buf []byte
|
||||
var iter int
|
||||
var origLast byte
|
||||
var origAligned bool
|
||||
var origSum uint32
|
||||
|
||||
if p == nil || len(p) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// The TL;DR here is the checksum boils down to:
|
||||
// cksum = cksum + ((high << 8) | low)
|
||||
|
||||
bufLen = len(p)
|
||||
buf = make([]byte, bufLen)
|
||||
copy(buf, p)
|
||||
|
||||
i.alignLock.Lock()
|
||||
defer i.alignLock.Unlock()
|
||||
i.bufLock.Lock()
|
||||
defer i.bufLock.Unlock()
|
||||
i.sumLock.Lock()
|
||||
defer i.sumLock.Unlock()
|
||||
i.lastLock.Lock()
|
||||
defer i.lastLock.Unlock()
|
||||
|
||||
origLast = i.last
|
||||
origAligned = i.aligned
|
||||
origSum = i.sum
|
||||
|
||||
if !i.aligned {
|
||||
// Last write was unaligned, so pair i.last in.
|
||||
i.sum += (uint32(i.last) << padShift) | uint32(buf[0])
|
||||
i.aligned = true
|
||||
idx = 1
|
||||
}
|
||||
|
||||
// Operate on bytepairs.
|
||||
// Note that idx is set to either 0 or 1 depending on if
|
||||
// buf[0] has already been summed in.
|
||||
for iter = idx; iter < bufLen; iter += 2 {
|
||||
if iter+1 < bufLen {
|
||||
// Technically could use "i.sum += uint32(ord.Uint16(buf[iter:iter+2))" here instead.
|
||||
i.sum += (uint32(buf[iter]) << padShift) | uint32(buf[iter+1])
|
||||
} else {
|
||||
i.last = buf[iter]
|
||||
i.aligned = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !i.disabledBuf {
|
||||
if n, err = i.buf.Write(buf); err != nil {
|
||||
i.sum = origSum
|
||||
i.aligned = origAligned
|
||||
i.last = origLast
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// WriteByte writes a single byte to the underlying storage. It conforms to [io.ByteWriter].
|
||||
func (i *InetChecksum) WriteByte(c byte) (err error) {
|
||||
|
||||
var origLast byte
|
||||
var origAligned bool
|
||||
var origSum uint32
|
||||
|
||||
i.alignLock.Lock()
|
||||
defer i.alignLock.Unlock()
|
||||
i.bufLock.Lock()
|
||||
defer i.bufLock.Unlock()
|
||||
i.sumLock.Lock()
|
||||
defer i.sumLock.Unlock()
|
||||
i.lastLock.Lock()
|
||||
defer i.lastLock.Unlock()
|
||||
|
||||
origLast = i.last
|
||||
origAligned = i.aligned
|
||||
origSum = i.sum
|
||||
|
||||
if i.aligned {
|
||||
// Since it's a single byte, we just set i.last and unalign.
|
||||
i.last = c
|
||||
i.aligned = false
|
||||
} else {
|
||||
// It's unaligned, so join with i.last and align.
|
||||
i.sum += (uint32(i.last) << padShift) | uint32(c)
|
||||
i.aligned = true
|
||||
}
|
||||
|
||||
if !i.disabledBuf {
|
||||
if err = i.WriteByte(c); err != nil {
|
||||
i.sum = origSum
|
||||
i.aligned = origAligned
|
||||
i.last = origLast
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// WriteString writes a string to the underlying storage. It conforms to [io.StringWriter].
|
||||
func (i *InetChecksum) WriteString(s string) (n int, err error) {
|
||||
|
||||
if n, err = i.Write([]byte(s)); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// WriteTo writes the current contents of the underlying buffer to w. The contents are not drained. Noop if persistence is disabled.
|
||||
func (i *InetChecksum) WriteTo(w io.Writer) (n int64, err error) {
|
||||
|
||||
var wrtn int
|
||||
|
||||
if i.disabledBuf {
|
||||
return
|
||||
}
|
||||
|
||||
i.bufLock.RLock()
|
||||
defer i.bufLock.RUnlock()
|
||||
|
||||
if wrtn, err = w.Write(i.buf.Bytes()); err != nil {
|
||||
n = int64(wrtn)
|
||||
return
|
||||
}
|
||||
n = int64(wrtn)
|
||||
|
||||
return
|
||||
}
|
172
netx/inetcksum/funcs_inetchecksumsimple.go
Normal file
172
netx/inetcksum/funcs_inetchecksumsimple.go
Normal file
@ -0,0 +1,172 @@
|
||||
package inetcksum
|
||||
|
||||
/*
|
||||
Aligned returns true if the current checksum for an InetChecksumSimple is
|
||||
aligned to the algorithm's requirement for an even number of bytes.
|
||||
|
||||
Note that if Aligned returns false, a single null pad byte will be applied
|
||||
to the underlying data buffer at time of a Sum* call.
|
||||
*/
|
||||
func (i *InetChecksumSimple) Aligned() (aligned bool) {
|
||||
|
||||
aligned = i.aligned
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// BlockSize returns the number of bytes at a time that InetChecksumSimple operates on. (It will always return 1.)
|
||||
func (i *InetChecksumSimple) BlockSize() (blockSize int) {
|
||||
|
||||
blockSize = 1
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Reset resets the state of an InetChecksumSimple.
|
||||
func (i *InetChecksumSimple) Reset() {
|
||||
|
||||
i.last = 0x00
|
||||
i.sum = 0
|
||||
i.last = 0x00
|
||||
|
||||
}
|
||||
|
||||
// Size returns how many bytes a checksum is. (It will always return 2.)
|
||||
func (i *InetChecksumSimple) Size() (bufSize int) {
|
||||
|
||||
bufSize = 2
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Sum computes the checksum cksum of the current buffer and appends it as big-endian bytes to b.
|
||||
func (i *InetChecksumSimple) Sum(b []byte) (cksumAppended []byte) {
|
||||
|
||||
var sum16 []byte = i.Sum16Bytes()
|
||||
|
||||
cksumAppended = append(b, sum16...)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Sum16 computes the checksum of the current buffer and returns it as a uint16.
|
||||
|
||||
This is the native number used in the IPv4 header.
|
||||
All other Sum* methods wrap this method.
|
||||
|
||||
If the underlying buffer is empty or nil, cksum will be 0xffff (65535)
|
||||
in line with common implementations.
|
||||
*/
|
||||
func (i *InetChecksumSimple) Sum16() (cksum uint16) {
|
||||
|
||||
var thisSum uint32
|
||||
|
||||
thisSum = i.sum
|
||||
|
||||
if !i.aligned {
|
||||
/*
|
||||
"Pad" at the end of the additive ops - a bitshift is used on the sum integer itself
|
||||
instead of a binary.Append() or append() or such to avoid additional memory allocation.
|
||||
*/
|
||||
thisSum += uint32(i.last) << padShift
|
||||
}
|
||||
|
||||
// Fold the "carried ones".
|
||||
for thisSum > cksumMask {
|
||||
thisSum = (thisSum & cksumMask) + (thisSum >> cksumShift)
|
||||
}
|
||||
cksum = ^uint16(thisSum)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Sum16Bytes is a convenience wrapper around [InetChecksumSimple.Sum16]
|
||||
which returns a slice of the uint16 as a 2-byte-long slice instead.
|
||||
*/
|
||||
func (i *InetChecksumSimple) Sum16Bytes() (cksum []byte) {
|
||||
|
||||
var sum16 uint16 = i.Sum16()
|
||||
|
||||
cksum = make([]byte, 2)
|
||||
|
||||
ord.PutUint16(cksum, sum16)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Write writes data to the underlying InetChecksumSimple buffer. It conforms to [io.Writer].
|
||||
|
||||
p may be nil or empty; no error will be returned and n will be 0 if so.
|
||||
|
||||
A copy of p is made first and all hashing operations are performed on that copy.
|
||||
*/
|
||||
func (i *InetChecksumSimple) Write(p []byte) (n int, err error) {
|
||||
|
||||
var idx int
|
||||
var bufLen int
|
||||
var buf []byte
|
||||
var iter int
|
||||
|
||||
if p == nil || len(p) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// The TL;DR here is the checksum boils down to:
|
||||
// cksum = cksum + ((high << 8) | low)
|
||||
|
||||
bufLen = len(p)
|
||||
buf = make([]byte, bufLen)
|
||||
copy(buf, p)
|
||||
|
||||
if !i.aligned {
|
||||
// Last write was unaligned, so pair i.last in.
|
||||
i.sum += (uint32(i.last) << padShift) | uint32(buf[0])
|
||||
i.aligned = true
|
||||
idx = 1
|
||||
}
|
||||
|
||||
// Operate on bytepairs.
|
||||
// Note that idx is set to either 0 or 1 depending on if
|
||||
// buf[0] has already been summed in.
|
||||
for iter = idx; iter < bufLen; iter += 2 {
|
||||
if iter+1 < bufLen {
|
||||
// Technically could use "i.sum += uint32(ord.Uint16(buf[iter:iter+2))" here instead.
|
||||
i.sum += (uint32(buf[iter]) << padShift) | uint32(buf[iter+1])
|
||||
} else {
|
||||
i.last = buf[iter]
|
||||
i.aligned = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// WriteByte checksums a single byte. It conforms to [io.ByteWriter].
|
||||
func (i *InetChecksumSimple) WriteByte(c byte) (err error) {
|
||||
|
||||
if i.aligned {
|
||||
// Since it's a single byte, we just set i.last and unalign.
|
||||
i.last = c
|
||||
i.aligned = false
|
||||
} else {
|
||||
// It's unaligned, so join with i.last and align.
|
||||
i.sum += (uint32(i.last) << padShift) | uint32(c)
|
||||
i.aligned = true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// WriteString checksums a string. It conforms to [io.StringWriter].
|
||||
func (i *InetChecksumSimple) WriteString(s string) (n int, err error) {
|
||||
|
||||
if n, err = i.Write([]byte(s)); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
68
netx/inetcksum/types.go
Normal file
68
netx/inetcksum/types.go
Normal file
@ -0,0 +1,68 @@
|
||||
package inetcksum
|
||||
|
||||
import (
|
||||
`bytes`
|
||||
`sync`
|
||||
)
|
||||
|
||||
type (
|
||||
/*
|
||||
InetChecksum implements [hash.Hash] and various other stdlib interfaces.
|
||||
|
||||
If the current data in an InetChecksum's buffer is not aligned
|
||||
to an even number of bytes -- e.g. InetChecksum.buf.Len() % 2 != 0,
|
||||
[InetChecksum.Aligned] will return false (otherwise it will return
|
||||
true).
|
||||
|
||||
If [InetChecksum.Aligned] returns false, the checksum result of an
|
||||
[InetChecksum.Sum] or [InetChecksum.Sum16] (or any other operation
|
||||
returning a sum) will INCLUDE THE PAD NULL BYTE (which is only
|
||||
applied *at the time of the Sum/Sum32 call) and is NOT applied to
|
||||
the persistent underlying storage.
|
||||
|
||||
InetChecksum differs from [InetChecksumSimple] in that it:
|
||||
|
||||
* Is MUCH better-suited/safer for concurrent operations - ALL
|
||||
methods are concurrency-safe.
|
||||
* Allows the data that is hashed to be recovered from a
|
||||
sequential internal buffer. (See [InetChecksum.DisablePersist]
|
||||
to disable the persistent internal buffer.)
|
||||
|
||||
At the cost of increased memory usage and additional cycles for mutexing.
|
||||
|
||||
Note that once persistence is disabled for an InetChecksum, it cannot be
|
||||
re-enabled until/unless [InetChecksum.Reset] is called (which will reset
|
||||
the persistence to enabled with a fresh buffer). Any data within the
|
||||
persistent buffer will be removed if [InetChecksum.DisablePersist] is called.
|
||||
*/
|
||||
InetChecksum struct {
|
||||
buf bytes.Buffer
|
||||
disabledBuf bool
|
||||
aligned bool
|
||||
last byte
|
||||
sum uint32
|
||||
bufLock sync.RWMutex
|
||||
alignLock sync.RWMutex
|
||||
lastLock sync.RWMutex
|
||||
sumLock sync.RWMutex
|
||||
}
|
||||
|
||||
/*
|
||||
InetChecksumSimple is like [InetChecksum], but with a few key differences.
|
||||
|
||||
It is MUCH much more performant/optimized for *single throughput* operations.
|
||||
Because it also does not retain a buffer of what was hashed, it uses *far* less
|
||||
memory over time.
|
||||
|
||||
However, the downside is it is NOT concurrency safe. There are no promises made
|
||||
about safety or proper checksum ordering with concurrency for this type, but it
|
||||
should have much better performance for non-concurrent use.
|
||||
|
||||
It behaves much more like a traditional [hash.Hash].
|
||||
*/
|
||||
InetChecksumSimple struct {
|
||||
aligned bool
|
||||
last byte
|
||||
sum uint32
|
||||
}
|
||||
)
|
@ -1,4 +1,4 @@
|
||||
/*
|
||||
Package remap provides convenience functions around regular expressions.
|
||||
Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups.
|
||||
*/
|
||||
package remap
|
||||
|
@ -1,20 +1,198 @@
|
||||
package remap
|
||||
|
||||
/*
|
||||
Map returns a map[string]<match bytes> for regexes with named capture groups matched in bytes b.
|
||||
Map returns a map[string][]<match bytes> for regexes with named capture groups matched in bytes b.
|
||||
Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups
|
||||
using the same group name (though your IDE might complain; I know GoLand does).
|
||||
|
||||
matches will be nil if no named capture group matches were found.
|
||||
Each match for each group is in a slice keyed under that group name, with that slice
|
||||
ordered by the indexing done by the regex match itself.
|
||||
|
||||
In summary, the parameters are as follows:
|
||||
|
||||
# inclNoMatch
|
||||
|
||||
If true, then attempt to return a non-nil matches (as long as b isn't nil).
|
||||
Group keys will be populated and explicitly defined as nil.
|
||||
|
||||
For example, if a pattern
|
||||
|
||||
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||
|
||||
is provided but b does not match then matches will be:
|
||||
|
||||
map[string][][]byte{
|
||||
"g1": nil,
|
||||
"g2": nil,
|
||||
}
|
||||
|
||||
# inclNoMatchStrict
|
||||
|
||||
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||
a slice of nil values explicitly matching the number of times the group name is specified
|
||||
in the pattern.
|
||||
|
||||
For example, if a pattern:
|
||||
|
||||
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||
|
||||
is provided but b does not match then matches will be:
|
||||
|
||||
map[string][][]byte{
|
||||
"g1": [][]byte{
|
||||
nil,
|
||||
nil,
|
||||
},
|
||||
"g2": [][]byte{
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
# mustMatch
|
||||
|
||||
If true, matches will be nil if the entirety of b does not match the pattern (and thus
|
||||
no capture groups matched) (overrides inclNoMatch) -- explicitly:
|
||||
|
||||
matches == nil
|
||||
|
||||
Otherwise if false (and assuming inclNoMatch is false), matches will be:
|
||||
|
||||
map[string][][]byte{}{}
|
||||
|
||||
# Condition Tree
|
||||
|
||||
In detail, matches and/or its values may be nil or empty under the following condition tree:
|
||||
|
||||
IF b is nil:
|
||||
THEN matches will always be nil
|
||||
ELSE:
|
||||
IF all of b does not match pattern
|
||||
IF mustMuch is true
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||
ELSE IF pattern has no named capture groups
|
||||
IF inclNoMatch is true
|
||||
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||
ELSE
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
IF there are no named group matches
|
||||
IF inclNoMatch is true
|
||||
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
|
||||
ELSE
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
IF <group name> does not have a match
|
||||
IF inclNoMatch is true
|
||||
IF inclNoMatchStrict is true
|
||||
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
|
||||
(matches[<group name>] == [][]byte{nil[, nil...]})
|
||||
ELSE
|
||||
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
|
||||
ELSE
|
||||
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
|
||||
ELSE
|
||||
matches[<group name>] == []{<match>[, <match>...]}
|
||||
*/
|
||||
func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
|
||||
func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
|
||||
|
||||
var m [][]byte
|
||||
var tmpMap map[string][]byte = make(map[string][]byte)
|
||||
var ok bool
|
||||
var mIdx int
|
||||
var match []byte
|
||||
var grpNm string
|
||||
var names []string
|
||||
var matchBytes [][]byte
|
||||
var tmpMap map[string][][]byte = make(map[string][][]byte)
|
||||
|
||||
m = r.Regexp.FindSubmatch(b)
|
||||
if b == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for idx, grpNm := range r.Regexp.SubexpNames() {
|
||||
if idx != 0 && grpNm != "" {
|
||||
tmpMap[grpNm] = m[idx]
|
||||
names = r.Regexp.SubexpNames()
|
||||
matchBytes = r.Regexp.FindSubmatch(b)
|
||||
|
||||
if matchBytes == nil {
|
||||
// b does not match pattern
|
||||
if !mustMatch {
|
||||
matches = make(map[string][][]byte)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||
/*
|
||||
no named capture groups;
|
||||
technically only the last condition would be the case.
|
||||
*/
|
||||
if inclNoMatch {
|
||||
matches = make(map[string][][]byte)
|
||||
}
|
||||
return
|
||||
}
|
||||
names = names[1:]
|
||||
|
||||
if len(matchBytes) == 0 || len(matchBytes) == 1 {
|
||||
/*
|
||||
no submatches whatsoever.
|
||||
*Technically* I don't think this condition can actually be reached.
|
||||
This is more of a safe-return before we re-slice.
|
||||
*/
|
||||
matches = make(map[string][][]byte)
|
||||
if inclNoMatch {
|
||||
if len(names) >= 1 {
|
||||
for _, grpNm = range names {
|
||||
matches[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
matchBytes = matchBytes[1:]
|
||||
|
||||
for mIdx, match = range matchBytes {
|
||||
grpNm = names[mIdx]
|
||||
/*
|
||||
Thankfully, it's actually a build error if a pattern specifies a named
|
||||
capture group with an empty name.
|
||||
So we don't need to worry about accounting for that,
|
||||
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
||||
*/
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if match == nil {
|
||||
// group did not match
|
||||
if !inclNoMatch {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
if !inclNoMatchStrict {
|
||||
tmpMap[grpNm] = nil
|
||||
} else {
|
||||
tmpMap[grpNm] = [][]byte{nil}
|
||||
}
|
||||
} else {
|
||||
if inclNoMatchStrict {
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = make([][]byte, 0)
|
||||
}
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], match)
|
||||
}
|
||||
|
||||
// This *technically* should be completely handled above.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,20 +204,279 @@ func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
|
||||
}
|
||||
|
||||
/*
|
||||
MapString returns a map[string]<match string> for regexes with named capture groups matched in string s.
|
||||
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead.
|
||||
(matches will always be nil if s == “.)
|
||||
|
||||
matches will be nil if no named capture group matches were found.
|
||||
A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
|
||||
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
|
||||
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
|
||||
*not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after.
|
||||
|
||||
Particularly:
|
||||
|
||||
# inclNoMatch
|
||||
|
||||
If true, then attempt to return a non-nil matches (as long as s isn't empty).
|
||||
Group keys will be populated and explicitly defined as nil.
|
||||
|
||||
For example, if a pattern
|
||||
|
||||
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||
|
||||
is provided but s does not match then matches will be:
|
||||
|
||||
map[string][]string{
|
||||
"g1": nil,
|
||||
"g2": nil,
|
||||
}
|
||||
|
||||
# inclNoMatchStrict
|
||||
|
||||
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||
a slice of eempty string values explicitly matching the number of times the group name is specified
|
||||
in the pattern.
|
||||
|
||||
For example, if a pattern:
|
||||
|
||||
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||
|
||||
is provided but s does not match then matches will be:
|
||||
|
||||
map[string][]string{
|
||||
"g1": []string{
|
||||
"",
|
||||
"",
|
||||
},
|
||||
"g2": []string{
|
||||
"",
|
||||
},
|
||||
}
|
||||
|
||||
# mustMatch
|
||||
|
||||
If true, matches will be nil if the entirety of s does not match the pattern (and thus
|
||||
no capture groups matched) (overrides inclNoMatch) -- explicitly:
|
||||
|
||||
matches == nil
|
||||
|
||||
Otherwise if false (and assuming inclNoMatch is false), matches will be:
|
||||
|
||||
map[string][]string{}{}
|
||||
|
||||
# Condition Tree
|
||||
|
||||
In detail, matches and/or its values may be nil or empty under the following condition tree:
|
||||
|
||||
IF s is empty:
|
||||
THEN matches will always be nil
|
||||
ELSE:
|
||||
IF all of s does not match pattern
|
||||
IF mustMuch is true
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
THEN matches == map[string][]string{} (non-nil but empty)
|
||||
ELSE IF pattern has no named capture groups
|
||||
IF inclNoMatch is true
|
||||
THEN matches == map[string][]string{} (non-nil but empty)
|
||||
ELSE
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
IF there are no named group matches
|
||||
IF inclNoMatch is true
|
||||
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
|
||||
ELSE
|
||||
THEN matches == nil
|
||||
ELSE
|
||||
IF <group name> does not have a match
|
||||
IF inclNoMatch is true
|
||||
IF inclNoMatchStrict is true
|
||||
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
|
||||
(matches[<group name>] == []string{""[, ""...]})
|
||||
ELSE
|
||||
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
|
||||
ELSE
|
||||
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
|
||||
ELSE
|
||||
matches[<group name>] == []{<match>[, <match>...]}
|
||||
*/
|
||||
func (r *ReMap) MapString(s string) (matches map[string]string) {
|
||||
func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
|
||||
|
||||
var m []string
|
||||
var tmpMap map[string]string = make(map[string]string)
|
||||
var ok bool
|
||||
var endIdx int
|
||||
var startIdx int
|
||||
var chunkIdx int
|
||||
var grpNm string
|
||||
var names []string
|
||||
var matchStr string
|
||||
/*
|
||||
A slice of indices or index pairs.
|
||||
For each element `e` in idxChunks,
|
||||
* if `e` is nil, no group match.
|
||||
* if len(e) == 1, only a single character was matched.
|
||||
* otherwise len(e) == 2, the start and end of the match.
|
||||
*/
|
||||
var idxChunks [][]int
|
||||
var matchIndices []int
|
||||
var chunkIndices []int // always 2 elements; start pos and end pos
|
||||
var tmpMap map[string][]string = make(map[string][]string)
|
||||
|
||||
m = r.Regexp.FindStringSubmatch(s)
|
||||
/*
|
||||
OK so this is a bit of a deviation.
|
||||
|
||||
for idx, grpNm := range r.Regexp.SubexpNames() {
|
||||
if idx != 0 && grpNm != "" {
|
||||
tmpMap[grpNm] = m[idx]
|
||||
It's not as straightforward as above, because there isn't an explicit way
|
||||
like above to determine if a pattern was *matched as an empty string* vs.
|
||||
*not matched*.
|
||||
|
||||
So instead do roundabout index-y things.
|
||||
*/
|
||||
|
||||
if s == "" {
|
||||
return
|
||||
}
|
||||
/*
|
||||
I'm not entirely sure how serious they are about "the slice should not be modified"...
|
||||
|
||||
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
|
||||
it will be duplicated here in proper order and the ordering is tied to
|
||||
the ordering of matchIndices.
|
||||
*/
|
||||
names = r.Regexp.SubexpNames()[:]
|
||||
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
|
||||
|
||||
if matchIndices == nil {
|
||||
// s does not match pattern at all.
|
||||
if !mustMatch {
|
||||
matches = make(map[string][]string)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if names == nil || len(names) <= 1 {
|
||||
/*
|
||||
No named capture groups;
|
||||
technically only the last condition would be the case,
|
||||
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
|
||||
return a `[]string{""}`.
|
||||
*/
|
||||
if inclNoMatch {
|
||||
matches = make(map[string][]string)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
||||
/*
|
||||
No (sub)matches whatsoever.
|
||||
*technically* I don't think this condition can actually be reached;
|
||||
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
|
||||
and modulo 2 thereafter since they're PAIRS of indices...
|
||||
Why they didn't just return a [][]int or [][2]int or something
|
||||
instead of an []int, who knows.
|
||||
But we're correcting that poor design.
|
||||
This is more of a safe-return before we chunk the indices.
|
||||
*/
|
||||
matches = make(map[string][]string)
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if grpNm != "" {
|
||||
matches[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
|
||||
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
||||
and the first pair is the entire pattern match (un-resliced names[0]).
|
||||
Thus the len(matchIndices) == 2*len(names), *even* if you
|
||||
Keep in mind that since the first element of names is removed,
|
||||
the first pair here is skipped.
|
||||
This provides a bit more consistent readability, though.
|
||||
*/
|
||||
idxChunks = make([][]int, len(names))
|
||||
chunkIdx = 0
|
||||
endIdx = 0
|
||||
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
|
||||
endIdx = startIdx + 2
|
||||
// This technically should never happen.
|
||||
if endIdx > len(matchIndices) {
|
||||
endIdx = len(matchIndices)
|
||||
}
|
||||
|
||||
chunkIndices = matchIndices[startIdx:endIdx]
|
||||
|
||||
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
||||
// group did not match
|
||||
chunkIndices = nil
|
||||
} else {
|
||||
if chunkIndices[0] == chunkIndices[1] {
|
||||
chunkIndices = []int{chunkIndices[0]}
|
||||
} else {
|
||||
chunkIndices = matchIndices[startIdx:endIdx]
|
||||
}
|
||||
}
|
||||
idxChunks[chunkIdx] = chunkIndices
|
||||
chunkIdx++
|
||||
}
|
||||
|
||||
// Now associate with names and pull the string sequence.
|
||||
for chunkIdx, chunkIndices = range idxChunks {
|
||||
grpNm = names[chunkIdx]
|
||||
/*
|
||||
Thankfully, it's actually a build error if a pattern specifies a named
|
||||
capture group with an empty name.
|
||||
So we don't need to worry about accounting for that,
|
||||
and can just skip over grpNm == ""
|
||||
(which is either an *unnamed* capture group
|
||||
OR the first element in `names`, which is always
|
||||
the entire match).
|
||||
*/
|
||||
if grpNm == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if chunkIndices == nil || len(chunkIndices) == 0 {
|
||||
// group did not match
|
||||
if !inclNoMatch {
|
||||
continue
|
||||
}
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
if !inclNoMatchStrict {
|
||||
tmpMap[grpNm] = nil
|
||||
} else {
|
||||
tmpMap[grpNm] = []string{""}
|
||||
}
|
||||
} else {
|
||||
if inclNoMatchStrict {
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], "")
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
switch len(chunkIndices) {
|
||||
case 1:
|
||||
// Single character
|
||||
matchStr = string(s[chunkIndices[0]])
|
||||
case 2:
|
||||
// Multiple characters
|
||||
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
||||
}
|
||||
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = make([]string, 0)
|
||||
}
|
||||
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
||||
}
|
||||
|
||||
// This *technically* should be completely handled above.
|
||||
if inclNoMatch {
|
||||
for _, grpNm = range names {
|
||||
if _, ok = tmpMap[grpNm]; !ok {
|
||||
tmpMap[grpNm] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,27 @@
|
||||
package remap
|
||||
|
||||
import (
|
||||
`regexp`
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// ReMap provides some map-related functions around a regexp.Regexp.
|
||||
type ReMap struct {
|
||||
*regexp.Regexp
|
||||
}
|
||||
type (
|
||||
// ReMap provides some map-related functions around a regexp.Regexp.
|
||||
ReMap struct {
|
||||
*regexp.Regexp
|
||||
}
|
||||
|
||||
// TODO?
|
||||
/*
|
||||
ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
|
||||
capture group result is a hit (a group matched, but e.g. the match value is empty string)
|
||||
or not (a group did not match).
|
||||
*/
|
||||
/*
|
||||
ExplicitStringMatch struct {
|
||||
Group string
|
||||
IsMatch bool
|
||||
Value string
|
||||
}
|
||||
|
||||
*/
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user