v1.9.5

FIXED: * HasFlag would inappropriately report true for m = A, flag = A | B. This has been rectified, and this behavior is now explicitly exposed via IsOneOf.
stubbing encoding/bit
2025-08-26 20:39:29 -04:00 · 2025-08-23 19:32:48 -04:00 · 2025-08-17 00:45:24 -04:00
5 changed files with 186 additions and 32 deletions
--- a/.encoding.TODO/bit/docs.go
+++ b/.encoding.TODO/bit/docs.go
@@ -0,0 +1,19 @@
+/*
+Package bit aims to provide feature parity with stdlib's [encoding/hex].
+
+It's a ludicrous tragedy that hex/base16, base32, base64 all have libraries for converting
+to/from string representations... but there's nothing for binary ('01010001' etc.) whatsoever.
+
+This package also provides some extra convenience functions and types in an attempt to provide
+an abstracted bit-level fidelity in Go. A [Bit] is a bool type, in which that underlying bool
+being false represents a 0 and that underlying bool being true represents a 1.
+
+Note that a [Bit] or arbitrary-length or non-octal-aligned [][Bit] may take up more bytes in memory
+than expected; a [Bit] will actually always occupy a single byte -- thus representing
+`00000000 00000000` as a [][Bit] or [16][Bit] will actually occupy *sixteen bytes* in memory,
+NOT 2 bytes (nor, obviously, [2][Byte])!
+It is recommended instead to use a [Bits] instead of a [Bit] slice or array, as it will try to properly align to the
+smallest memory allocation possible (at the cost of a few extra CPU cycles on adding/removing one or more [Bit]).
+It will properly retain any appended, prepended, leading, or trailing bits that do not currently align to a byte.
+*/
+package bit
--- a/.encoding.TODO/bit/funcs.go
+++ b/.encoding.TODO/bit/funcs.go
@@ -0,0 +1,14 @@
+package bit
+
+// TODO: Provide analogues of encoding/hex, encoding/base64, etc. functions etc.
+
+/*
+	TODO: Also provide interfaces for the following:
+
+	* https://pkg.go.dev/encoding#BinaryAppender
+	* https://pkg.go.dev/encoding#BinaryMarshaler
+	* https://pkg.go.dev/encoding#BinaryUnmarshaler
+	* https://pkg.go.dev/encoding#TextAppender
+	* https://pkg.go.dev/encoding#TextMarshaler
+	* https://pkg.go.dev/encoding#TextUnmarshaler
+*/
--- a/.encoding.TODO/bit/types.go
+++ b/.encoding.TODO/bit/types.go
@@ -0,0 +1,34 @@
+package bit
+
+type (
+	// Bit aims to provide a native-like type for a single bit (Golang operates on the smallest fidelity level of *byte*/uint8).
+	Bit bool
+
+	// Bits is an arbitrary length of bits.
+	Bits struct {
+		/*
+			leading is a series of Bit that do not cleanly align to the beginning of Bits.b.
+			They will always be the bits at the *beginning* of the sequence.
+			len(Bits.leading) will *never* be more than 7;
+			it's converted into a byte, prepended to Bits.b, and cleared if it reaches that point.
+		*/
+		leading []Bit
+		// b is the condensed/memory-aligned alternative to an [][8]Bit (or []Bit, or [][]Bit, etc.).
+		b []byte
+		/*
+			remaining is a series of Bit that do not cleanly align to the end of Bits.b.
+			They will always be the bits at the *end* of the sequence.
+			len(Bits.remaining) will *never* be more than 7;
+			it's converted into a byte, appended to Bits.b, and cleared if it reaches that point.
+		*/
+		remaining []Bit
+		// fixedLen, if 0, represents a "slice". If >= 1, it represents an "array".
+		fixedLen uint
+	}
+
+	// Byte is this package's representation of a byte. It's primarily for convenience.
+	Byte byte
+
+	// Bytes is defined as a type for convenience single-call functions.
+	Bytes []Byte
+)
--- a/bitmask/bitmask.go
+++ b/bitmask/bitmask.go
@@ -34,11 +34,47 @@ func NewMaskBitExplicit(value uint) (m *MaskBit) {
 	return
 }

-// HasFlag is true if m has MaskBit flag set/enabled.
+/*
+	HasFlag is true if m has MaskBit flag set/enabled.
+	THIS WILL RETURN FALSE FOR OR'd FLAGS.
+	For example:
+
+		flagA MaskBit = 0x01
+		flagB MaskBit = 0x02
+		flagComposite = flagA | flagB
+
+		m *MaskBit = NewMaskBitExplicit(uint(flagA))
+	
+	m.HasFlag(flagComposite) will return false even though flagComposite is an OR
+	that contains flagA.
+	Use [MaskBit.IsOneOf] instead if you do not desire this behavior,
+	and instead want to test composite flag *membership*.
+	(MaskBit.IsOneOf will also return true for non-composite equality.)
+*/
 func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {

 	var b MaskBit = *m

+	if b&flag == flag {
+		r = true
+	}
+	return
+}
+
+/*
+	IsOneOf is like a "looser" form of [MaskBit.HasFlag]
+	in that it allows for testing composite membership.
+
+	See [MaskBit.HasFlag] for more information.
+
+	If composite is *not* an OR'd MaskBit (i.e.
+	it falls directly on a boundary -- 0, 1, 2, 4, 8, 16, etc.),
+	then IsOneOf will behave exactly like HasFlag.
+*/
+func (m *MaskBit) IsOneOf(composite MaskBit) (r bool) {
+
+	var b MaskBit = *m
+
 	if b&flag != 0 {
 		r = true
 	}
--- a/remap/funcs_remap.go
+++ b/remap/funcs_remap.go
@@ -135,7 +135,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
 	if len(matchBytes) == 0 || len(matchBytes) == 1 {
 		/*
 			no submatches whatsoever.
-			*technically* I don't think this condition can actually be reached.
+			*Technically* I don't think this condition can actually be reached.
 			This is more of a safe-return before we re-slice.
 		*/
 		matches = make(map[string][][]byte)
@@ -308,6 +308,13 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 	var grpNm string
 	var names []string
 	var matchStr string
+	/*
+		A slice of indices or index pairs.
+		For each element `e` in idxChunks,
+		* if `e` is nil, no group match.
+		* if len(e) == 1, only a single character was matched.
+		* otherwise len(e) == 2, the start and end of the match.
+	*/
 	var idxChunks [][]int
 	var matchIndices []int
 	var chunkIndices []int // always 2 elements; start pos and end pos
@@ -317,7 +324,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 		OK so this is a bit of a deviation.

 		It's not as straightforward as above, because there isn't an explicit way
-		like above to determine if a patterb was *matched as an empty string* vs.
+		like above to determine if a pattern was *matched as an empty string* vs.
 		*not matched*.

 		So instead do roundabout index-y things.
@@ -326,73 +333,111 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 	if s == "" {
 		return
 	}
-	names = r.Regexp.SubexpNames()
+	/*
+		I'm not entirely sure how serious they are about "the slice should not be modified"...
+
+		DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
+		it will be duplicated here in proper order and the ordering is tied to
+		the ordering of matchIndices.
+	*/
+	names = r.Regexp.SubexpNames()[:]
 	matchIndices = r.Regexp.FindStringSubmatchIndex(s)

 	if matchIndices == nil {
-		// s does not match pattern
+		// s does not match pattern at all.
 		if !mustMatch {
 			matches = make(map[string][]string)
 		}
 		return
 	}

-	if names == nil || len(names) == 0 || len(names) == 1 {
+	if names == nil || len(names) <= 1 {
 		/*
-			no named capture groups;
-			technically only the last condition would be the case.
+			No named capture groups;
+			technically only the last condition would be the case,
+			as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
+			return a `[]string{""}`.
 		*/
 		if inclNoMatch {
 			matches = make(map[string][]string)
 		}
 		return
 	}
-	names = names[1:]

 	if len(matchIndices) == 0 || len(matchIndices) == 1 {
 		/*
-			no submatches whatsoever.
-			*technically* I don't think this condition can actually be reached.
+			No (sub)matches whatsoever.
+			*technically* I don't think this condition can actually be reached;
+			matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
+			and modulo 2 thereafter since they're PAIRS of indices...
+			Why they didn't just return a [][]int or [][2]int or something
+			instead of an []int, who knows.
+			But we're correcting that poor design.
 			This is more of a safe-return before we chunk the indices.
 		*/
 		matches = make(map[string][]string)
 		if inclNoMatch {
-			if len(names) >= 1 {
-				for _, grpNm = range names {
+			for _, grpNm = range names {
+				if grpNm != "" {
 					matches[grpNm] = nil
 				}
 			}
 		}
 		return
 	}
-	/*
-		The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
-		and the first *pair* is the entire pattern match.
-		Thus the len(matchIndices) == 2*len(names).
-		Keep in mind that since the first element of names is removed,
-		the first pair here is also removed.
-	*/
-	matchIndices = matchIndices[2:]

+	/*
+		A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
+		because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
+		and the first pair is the entire pattern match (un-resliced names[0]).
+		Thus the len(matchIndices) == 2*len(names), *even* if you
+		Keep in mind that since the first element of names is removed,
+		the first pair here is skipped.
+		This provides a bit more consistent readability, though.
+	*/
 	idxChunks = make([][]int, len(names))
-	for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
+	chunkIdx = 0
+	endIdx = 0
+	for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
 		endIdx = startIdx + 2
+		// This technically should never happen.
+		if endIdx > len(matchIndices) {
+			endIdx = len(matchIndices)
+		}
+
+		chunkIndices = matchIndices[startIdx:endIdx]
+
+		if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
+			// group did not match
+			chunkIndices = nil
+		} else {
+			if chunkIndices[0] == chunkIndices[1] {
+				chunkIndices = []int{chunkIndices[0]}
+			} else {
+				chunkIndices = matchIndices[startIdx:endIdx]
+			}
+		}
+		idxChunks[chunkIdx] = chunkIndices
+		chunkIdx++
+	}
+
+	// Now associate with names and pull the string sequence.
+	for chunkIdx, chunkIndices = range idxChunks {
 		grpNm = names[chunkIdx]
 		/*
 			Thankfully, it's actually a build error if a pattern specifies a named
 			capture group with an empty name.
 			So we don't need to worry about accounting for that,
-			and can just skip over grpNm == "" (which is an *unnamed* capture group).
+			and can just skip over grpNm == ""
+			(which is either an *unnamed* capture group
+			OR the first element in `names`, which is always
+			the entire match).
 		*/
 		if grpNm == "" {
 			continue
 		}
-		// This technically should never happen.
-		if endIdx > len(matchIndices) {
-			endIdx = len(matchIndices)
-		}
-		chunkIndices = matchIndices[startIdx:endIdx]
-		if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
+
+		if chunkIndices == nil || len(chunkIndices) == 0 {
 			// group did not match
 			if !inclNoMatch {
 				continue
@@ -411,13 +456,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 			continue
 		}

-		matchStr = s[chunkIndices[0]:chunkIndices[1]]
+		switch len(chunkIndices) {
+		case 1:
+			// Single character
+			matchStr = string(s[chunkIndices[0]])
+		case 2:
+			// Multiple characters
+			matchStr = s[chunkIndices[0]:chunkIndices[1]]
+		}
+
 		if _, ok = tmpMap[grpNm]; !ok {
 			tmpMap[grpNm] = make([]string, 0)
 		}
 		tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
-
-		chunkIdx++
 	}

 	// This *technically* should be completely handled above.