3 Commits

Author SHA1 Message Date
brent saner
688abd0874 v1.9.5
FIXED:
* HasFlag would inappropriately report true for m = A, flag = A | B.
  This has been rectified, and this behavior is now explicitly
  exposed via IsOneOf.
2025-08-26 20:39:29 -04:00
brent saner
a1f87d6b51 stubbing encoding/bit 2025-08-23 19:32:48 -04:00
brent saner
07951f1f03 v1.9.4
FIXED:
* remap.ReMap.MapString() was not properly correllating groups. It is
  now.
2025-08-17 00:45:24 -04:00
5 changed files with 186 additions and 32 deletions

View File

@@ -0,0 +1,19 @@
/*
Package bit aims to provide feature parity with stdlib's [encoding/hex].
It's a ludicrous tragedy that hex/base16, base32, base64 all have libraries for converting
to/from string representations... but there's nothing for binary ('01010001' etc.) whatsoever.
This package also provides some extra convenience functions and types in an attempt to provide
an abstracted bit-level fidelity in Go. A [Bit] is a bool type, in which that underlying bool
being false represents a 0 and that underlying bool being true represents a 1.
Note that a [Bit] or arbitrary-length or non-octal-aligned [][Bit] may take up more bytes in memory
than expected; a [Bit] will actually always occupy a single byte -- thus representing
`00000000 00000000` as a [][Bit] or [16][Bit] will actually occupy *sixteen bytes* in memory,
NOT 2 bytes (nor, obviously, [2][Byte])!
It is recommended instead to use a [Bits] instead of a [Bit] slice or array, as it will try to properly align to the
smallest memory allocation possible (at the cost of a few extra CPU cycles on adding/removing one or more [Bit]).
It will properly retain any appended, prepended, leading, or trailing bits that do not currently align to a byte.
*/
package bit

View File

@@ -0,0 +1,14 @@
package bit
// TODO: Provide analogues of encoding/hex, encoding/base64, etc. functions etc.
/*
TODO: Also provide interfaces for the following:
* https://pkg.go.dev/encoding#BinaryAppender
* https://pkg.go.dev/encoding#BinaryMarshaler
* https://pkg.go.dev/encoding#BinaryUnmarshaler
* https://pkg.go.dev/encoding#TextAppender
* https://pkg.go.dev/encoding#TextMarshaler
* https://pkg.go.dev/encoding#TextUnmarshaler
*/

View File

@@ -0,0 +1,34 @@
package bit
type (
// Bit aims to provide a native-like type for a single bit (Golang operates on the smallest fidelity level of *byte*/uint8).
Bit bool
// Bits is an arbitrary length of bits.
Bits struct {
/*
leading is a series of Bit that do not cleanly align to the beginning of Bits.b.
They will always be the bits at the *beginning* of the sequence.
len(Bits.leading) will *never* be more than 7;
it's converted into a byte, prepended to Bits.b, and cleared if it reaches that point.
*/
leading []Bit
// b is the condensed/memory-aligned alternative to an [][8]Bit (or []Bit, or [][]Bit, etc.).
b []byte
/*
remaining is a series of Bit that do not cleanly align to the end of Bits.b.
They will always be the bits at the *end* of the sequence.
len(Bits.remaining) will *never* be more than 7;
it's converted into a byte, appended to Bits.b, and cleared if it reaches that point.
*/
remaining []Bit
// fixedLen, if 0, represents a "slice". If >= 1, it represents an "array".
fixedLen uint
}
// Byte is this package's representation of a byte. It's primarily for convenience.
Byte byte
// Bytes is defined as a type for convenience single-call functions.
Bytes []Byte
)

View File

@@ -34,11 +34,47 @@ func NewMaskBitExplicit(value uint) (m *MaskBit) {
return
}
// HasFlag is true if m has MaskBit flag set/enabled.
/*
HasFlag is true if m has MaskBit flag set/enabled.
THIS WILL RETURN FALSE FOR OR'd FLAGS.
For example:
flagA MaskBit = 0x01
flagB MaskBit = 0x02
flagComposite = flagA | flagB
m *MaskBit = NewMaskBitExplicit(uint(flagA))
m.HasFlag(flagComposite) will return false even though flagComposite is an OR
that contains flagA.
Use [MaskBit.IsOneOf] instead if you do not desire this behavior,
and instead want to test composite flag *membership*.
(MaskBit.IsOneOf will also return true for non-composite equality.)
*/
func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {
var b MaskBit = *m
if b&flag == flag {
r = true
}
return
}
/*
IsOneOf is like a "looser" form of [MaskBit.HasFlag]
in that it allows for testing composite membership.
See [MaskBit.HasFlag] for more information.
If composite is *not* an OR'd MaskBit (i.e.
it falls directly on a boundary -- 0, 1, 2, 4, 8, 16, etc.),
then IsOneOf will behave exactly like HasFlag.
*/
func (m *MaskBit) IsOneOf(composite MaskBit) (r bool) {
var b MaskBit = *m
if b&flag != 0 {
r = true
}

View File

@@ -135,7 +135,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
if len(matchBytes) == 0 || len(matchBytes) == 1 {
/*
no submatches whatsoever.
*technically* I don't think this condition can actually be reached.
*Technically* I don't think this condition can actually be reached.
This is more of a safe-return before we re-slice.
*/
matches = make(map[string][][]byte)
@@ -308,6 +308,13 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
var grpNm string
var names []string
var matchStr string
/*
A slice of indices or index pairs.
For each element `e` in idxChunks,
* if `e` is nil, no group match.
* if len(e) == 1, only a single character was matched.
* otherwise len(e) == 2, the start and end of the match.
*/
var idxChunks [][]int
var matchIndices []int
var chunkIndices []int // always 2 elements; start pos and end pos
@@ -317,7 +324,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
OK so this is a bit of a deviation.
It's not as straightforward as above, because there isn't an explicit way
like above to determine if a patterb was *matched as an empty string* vs.
like above to determine if a pattern was *matched as an empty string* vs.
*not matched*.
So instead do roundabout index-y things.
@@ -326,73 +333,111 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
if s == "" {
return
}
names = r.Regexp.SubexpNames()
/*
I'm not entirely sure how serious they are about "the slice should not be modified"...
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
it will be duplicated here in proper order and the ordering is tied to
the ordering of matchIndices.
*/
names = r.Regexp.SubexpNames()[:]
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
if matchIndices == nil {
// s does not match pattern
// s does not match pattern at all.
if !mustMatch {
matches = make(map[string][]string)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
if names == nil || len(names) <= 1 {
/*
no named capture groups;
technically only the last condition would be the case.
No named capture groups;
technically only the last condition would be the case,
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
return a `[]string{""}`.
*/
if inclNoMatch {
matches = make(map[string][]string)
}
return
}
names = names[1:]
if len(matchIndices) == 0 || len(matchIndices) == 1 {
/*
no submatches whatsoever.
*technically* I don't think this condition can actually be reached.
No (sub)matches whatsoever.
*technically* I don't think this condition can actually be reached;
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
and modulo 2 thereafter since they're PAIRS of indices...
Why they didn't just return a [][]int or [][2]int or something
instead of an []int, who knows.
But we're correcting that poor design.
This is more of a safe-return before we chunk the indices.
*/
matches = make(map[string][]string)
if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names {
for _, grpNm = range names {
if grpNm != "" {
matches[grpNm] = nil
}
}
}
return
}
/*
The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first *pair* is the entire pattern match.
Thus the len(matchIndices) == 2*len(names).
Keep in mind that since the first element of names is removed,
the first pair here is also removed.
*/
matchIndices = matchIndices[2:]
/*
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0]).
Thus the len(matchIndices) == 2*len(names), *even* if you
Keep in mind that since the first element of names is removed,
the first pair here is skipped.
This provides a bit more consistent readability, though.
*/
idxChunks = make([][]int, len(names))
for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
chunkIdx = 0
endIdx = 0
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
endIdx = startIdx + 2
// This technically should never happen.
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match
chunkIndices = nil
} else {
if chunkIndices[0] == chunkIndices[1] {
chunkIndices = []int{chunkIndices[0]}
} else {
chunkIndices = matchIndices[startIdx:endIdx]
}
}
idxChunks[chunkIdx] = chunkIndices
chunkIdx++
}
// Now associate with names and pull the string sequence.
for chunkIdx, chunkIndices = range idxChunks {
grpNm = names[chunkIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group).
and can just skip over grpNm == ""
(which is either an *unnamed* capture group
OR the first element in `names`, which is always
the entire match).
*/
if grpNm == "" {
continue
}
// This technically should never happen.
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
if chunkIndices == nil || len(chunkIndices) == 0 {
// group did not match
if !inclNoMatch {
continue
@@ -411,13 +456,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
continue
}
matchStr = s[chunkIndices[0]:chunkIndices[1]]
switch len(chunkIndices) {
case 1:
// Single character
matchStr = string(s[chunkIndices[0]])
case 2:
// Multiple characters
matchStr = s[chunkIndices[0]:chunkIndices[1]]
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([]string, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
chunkIdx++
}
// This *technically* should be completely handled above.