3 Commits

Author SHA1 Message Date
brent saner
688abd0874 v1.9.5
FIXED:
* HasFlag would inappropriately report true for m = A, flag = A | B.
  This has been rectified, and this behavior is now explicitly
  exposed via IsOneOf.
2025-08-26 20:39:29 -04:00
brent saner
a1f87d6b51 stubbing encoding/bit 2025-08-23 19:32:48 -04:00
brent saner
07951f1f03 v1.9.4
FIXED:
* remap.ReMap.MapString() was not properly correllating groups. It is
  now.
2025-08-17 00:45:24 -04:00
5 changed files with 186 additions and 32 deletions

View File

@@ -0,0 +1,19 @@
/*
Package bit aims to provide feature parity with stdlib's [encoding/hex].
It's a ludicrous tragedy that hex/base16, base32, base64 all have libraries for converting
to/from string representations... but there's nothing for binary ('01010001' etc.) whatsoever.
This package also provides some extra convenience functions and types in an attempt to provide
an abstracted bit-level fidelity in Go. A [Bit] is a bool type, in which that underlying bool
being false represents a 0 and that underlying bool being true represents a 1.
Note that a [Bit] or arbitrary-length or non-octal-aligned [][Bit] may take up more bytes in memory
than expected; a [Bit] will actually always occupy a single byte -- thus representing
`00000000 00000000` as a [][Bit] or [16][Bit] will actually occupy *sixteen bytes* in memory,
NOT 2 bytes (nor, obviously, [2][Byte])!
It is recommended instead to use a [Bits] instead of a [Bit] slice or array, as it will try to properly align to the
smallest memory allocation possible (at the cost of a few extra CPU cycles on adding/removing one or more [Bit]).
It will properly retain any appended, prepended, leading, or trailing bits that do not currently align to a byte.
*/
package bit

View File

@@ -0,0 +1,14 @@
package bit
// TODO: Provide analogues of encoding/hex, encoding/base64, etc. functions etc.
/*
TODO: Also provide interfaces for the following:
* https://pkg.go.dev/encoding#BinaryAppender
* https://pkg.go.dev/encoding#BinaryMarshaler
* https://pkg.go.dev/encoding#BinaryUnmarshaler
* https://pkg.go.dev/encoding#TextAppender
* https://pkg.go.dev/encoding#TextMarshaler
* https://pkg.go.dev/encoding#TextUnmarshaler
*/

View File

@@ -0,0 +1,34 @@
package bit
type (
// Bit aims to provide a native-like type for a single bit (Golang operates on the smallest fidelity level of *byte*/uint8).
Bit bool
// Bits is an arbitrary length of bits.
Bits struct {
/*
leading is a series of Bit that do not cleanly align to the beginning of Bits.b.
They will always be the bits at the *beginning* of the sequence.
len(Bits.leading) will *never* be more than 7;
it's converted into a byte, prepended to Bits.b, and cleared if it reaches that point.
*/
leading []Bit
// b is the condensed/memory-aligned alternative to an [][8]Bit (or []Bit, or [][]Bit, etc.).
b []byte
/*
remaining is a series of Bit that do not cleanly align to the end of Bits.b.
They will always be the bits at the *end* of the sequence.
len(Bits.remaining) will *never* be more than 7;
it's converted into a byte, appended to Bits.b, and cleared if it reaches that point.
*/
remaining []Bit
// fixedLen, if 0, represents a "slice". If >= 1, it represents an "array".
fixedLen uint
}
// Byte is this package's representation of a byte. It's primarily for convenience.
Byte byte
// Bytes is defined as a type for convenience single-call functions.
Bytes []Byte
)

View File

@@ -34,11 +34,47 @@ func NewMaskBitExplicit(value uint) (m *MaskBit) {
return return
} }
// HasFlag is true if m has MaskBit flag set/enabled. /*
HasFlag is true if m has MaskBit flag set/enabled.
THIS WILL RETURN FALSE FOR OR'd FLAGS.
For example:
flagA MaskBit = 0x01
flagB MaskBit = 0x02
flagComposite = flagA | flagB
m *MaskBit = NewMaskBitExplicit(uint(flagA))
m.HasFlag(flagComposite) will return false even though flagComposite is an OR
that contains flagA.
Use [MaskBit.IsOneOf] instead if you do not desire this behavior,
and instead want to test composite flag *membership*.
(MaskBit.IsOneOf will also return true for non-composite equality.)
*/
func (m *MaskBit) HasFlag(flag MaskBit) (r bool) { func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {
var b MaskBit = *m var b MaskBit = *m
if b&flag == flag {
r = true
}
return
}
/*
IsOneOf is like a "looser" form of [MaskBit.HasFlag]
in that it allows for testing composite membership.
See [MaskBit.HasFlag] for more information.
If composite is *not* an OR'd MaskBit (i.e.
it falls directly on a boundary -- 0, 1, 2, 4, 8, 16, etc.),
then IsOneOf will behave exactly like HasFlag.
*/
func (m *MaskBit) IsOneOf(composite MaskBit) (r bool) {
var b MaskBit = *m
if b&flag != 0 { if b&flag != 0 {
r = true r = true
} }

View File

@@ -135,7 +135,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
if len(matchBytes) == 0 || len(matchBytes) == 1 { if len(matchBytes) == 0 || len(matchBytes) == 1 {
/* /*
no submatches whatsoever. no submatches whatsoever.
*technically* I don't think this condition can actually be reached. *Technically* I don't think this condition can actually be reached.
This is more of a safe-return before we re-slice. This is more of a safe-return before we re-slice.
*/ */
matches = make(map[string][][]byte) matches = make(map[string][][]byte)
@@ -308,6 +308,13 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
var grpNm string var grpNm string
var names []string var names []string
var matchStr string var matchStr string
/*
A slice of indices or index pairs.
For each element `e` in idxChunks,
* if `e` is nil, no group match.
* if len(e) == 1, only a single character was matched.
* otherwise len(e) == 2, the start and end of the match.
*/
var idxChunks [][]int var idxChunks [][]int
var matchIndices []int var matchIndices []int
var chunkIndices []int // always 2 elements; start pos and end pos var chunkIndices []int // always 2 elements; start pos and end pos
@@ -317,7 +324,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
OK so this is a bit of a deviation. OK so this is a bit of a deviation.
It's not as straightforward as above, because there isn't an explicit way It's not as straightforward as above, because there isn't an explicit way
like above to determine if a patterb was *matched as an empty string* vs. like above to determine if a pattern was *matched as an empty string* vs.
*not matched*. *not matched*.
So instead do roundabout index-y things. So instead do roundabout index-y things.
@@ -326,73 +333,111 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
if s == "" { if s == "" {
return return
} }
names = r.Regexp.SubexpNames() /*
I'm not entirely sure how serious they are about "the slice should not be modified"...
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
it will be duplicated here in proper order and the ordering is tied to
the ordering of matchIndices.
*/
names = r.Regexp.SubexpNames()[:]
matchIndices = r.Regexp.FindStringSubmatchIndex(s) matchIndices = r.Regexp.FindStringSubmatchIndex(s)
if matchIndices == nil { if matchIndices == nil {
// s does not match pattern // s does not match pattern at all.
if !mustMatch { if !mustMatch {
matches = make(map[string][]string) matches = make(map[string][]string)
} }
return return
} }
if names == nil || len(names) == 0 || len(names) == 1 { if names == nil || len(names) <= 1 {
/* /*
no named capture groups; No named capture groups;
technically only the last condition would be the case. technically only the last condition would be the case,
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
return a `[]string{""}`.
*/ */
if inclNoMatch { if inclNoMatch {
matches = make(map[string][]string) matches = make(map[string][]string)
} }
return return
} }
names = names[1:]
if len(matchIndices) == 0 || len(matchIndices) == 1 { if len(matchIndices) == 0 || len(matchIndices) == 1 {
/* /*
no submatches whatsoever. No (sub)matches whatsoever.
*technically* I don't think this condition can actually be reached. *technically* I don't think this condition can actually be reached;
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
and modulo 2 thereafter since they're PAIRS of indices...
Why they didn't just return a [][]int or [][2]int or something
instead of an []int, who knows.
But we're correcting that poor design.
This is more of a safe-return before we chunk the indices. This is more of a safe-return before we chunk the indices.
*/ */
matches = make(map[string][]string) matches = make(map[string][]string)
if inclNoMatch { if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names { for _, grpNm = range names {
if grpNm != "" {
matches[grpNm] = nil matches[grpNm] = nil
} }
} }
} }
return return
} }
/*
The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first *pair* is the entire pattern match.
Thus the len(matchIndices) == 2*len(names).
Keep in mind that since the first element of names is removed,
the first pair here is also removed.
*/
matchIndices = matchIndices[2:]
/*
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0]).
Thus the len(matchIndices) == 2*len(names), *even* if you
Keep in mind that since the first element of names is removed,
the first pair here is skipped.
This provides a bit more consistent readability, though.
*/
idxChunks = make([][]int, len(names)) idxChunks = make([][]int, len(names))
for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 { chunkIdx = 0
endIdx = 0
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
endIdx = startIdx + 2 endIdx = startIdx + 2
// This technically should never happen.
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match
chunkIndices = nil
} else {
if chunkIndices[0] == chunkIndices[1] {
chunkIndices = []int{chunkIndices[0]}
} else {
chunkIndices = matchIndices[startIdx:endIdx]
}
}
idxChunks[chunkIdx] = chunkIndices
chunkIdx++
}
// Now associate with names and pull the string sequence.
for chunkIdx, chunkIndices = range idxChunks {
grpNm = names[chunkIdx] grpNm = names[chunkIdx]
/* /*
Thankfully, it's actually a build error if a pattern specifies a named Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name. capture group with an empty name.
So we don't need to worry about accounting for that, So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group). and can just skip over grpNm == ""
(which is either an *unnamed* capture group
OR the first element in `names`, which is always
the entire match).
*/ */
if grpNm == "" { if grpNm == "" {
continue continue
} }
// This technically should never happen.
if endIdx > len(matchIndices) { if chunkIndices == nil || len(chunkIndices) == 0 {
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match // group did not match
if !inclNoMatch { if !inclNoMatch {
continue continue
@@ -411,13 +456,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
continue continue
} }
switch len(chunkIndices) {
case 1:
// Single character
matchStr = string(s[chunkIndices[0]])
case 2:
// Multiple characters
matchStr = s[chunkIndices[0]:chunkIndices[1]] matchStr = s[chunkIndices[0]:chunkIndices[1]]
}
if _, ok = tmpMap[grpNm]; !ok { if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([]string, 0) tmpMap[grpNm] = make([]string, 0)
} }
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr) tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
chunkIdx++
} }
// This *technically* should be completely handled above. // This *technically* should be completely handled above.