Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
688abd0874
|
||
|
|
a1f87d6b51
|
||
|
|
07951f1f03
|
||
|
|
bae0abe960
|
||
|
|
368ae0cb8e
|
19
.encoding.TODO/bit/docs.go
Normal file
19
.encoding.TODO/bit/docs.go
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
/*
|
||||||
|
Package bit aims to provide feature parity with stdlib's [encoding/hex].
|
||||||
|
|
||||||
|
It's a ludicrous tragedy that hex/base16, base32, base64 all have libraries for converting
|
||||||
|
to/from string representations... but there's nothing for binary ('01010001' etc.) whatsoever.
|
||||||
|
|
||||||
|
This package also provides some extra convenience functions and types in an attempt to provide
|
||||||
|
an abstracted bit-level fidelity in Go. A [Bit] is a bool type, in which that underlying bool
|
||||||
|
being false represents a 0 and that underlying bool being true represents a 1.
|
||||||
|
|
||||||
|
Note that a [Bit] or arbitrary-length or non-octal-aligned [][Bit] may take up more bytes in memory
|
||||||
|
than expected; a [Bit] will actually always occupy a single byte -- thus representing
|
||||||
|
`00000000 00000000` as a [][Bit] or [16][Bit] will actually occupy *sixteen bytes* in memory,
|
||||||
|
NOT 2 bytes (nor, obviously, [2][Byte])!
|
||||||
|
It is recommended instead to use a [Bits] instead of a [Bit] slice or array, as it will try to properly align to the
|
||||||
|
smallest memory allocation possible (at the cost of a few extra CPU cycles on adding/removing one or more [Bit]).
|
||||||
|
It will properly retain any appended, prepended, leading, or trailing bits that do not currently align to a byte.
|
||||||
|
*/
|
||||||
|
package bit
|
||||||
14
.encoding.TODO/bit/funcs.go
Normal file
14
.encoding.TODO/bit/funcs.go
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
package bit
|
||||||
|
|
||||||
|
// TODO: Provide analogues of encoding/hex, encoding/base64, etc. functions etc.
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO: Also provide interfaces for the following:
|
||||||
|
|
||||||
|
* https://pkg.go.dev/encoding#BinaryAppender
|
||||||
|
* https://pkg.go.dev/encoding#BinaryMarshaler
|
||||||
|
* https://pkg.go.dev/encoding#BinaryUnmarshaler
|
||||||
|
* https://pkg.go.dev/encoding#TextAppender
|
||||||
|
* https://pkg.go.dev/encoding#TextMarshaler
|
||||||
|
* https://pkg.go.dev/encoding#TextUnmarshaler
|
||||||
|
*/
|
||||||
34
.encoding.TODO/bit/types.go
Normal file
34
.encoding.TODO/bit/types.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package bit
|
||||||
|
|
||||||
|
type (
|
||||||
|
// Bit aims to provide a native-like type for a single bit (Golang operates on the smallest fidelity level of *byte*/uint8).
|
||||||
|
Bit bool
|
||||||
|
|
||||||
|
// Bits is an arbitrary length of bits.
|
||||||
|
Bits struct {
|
||||||
|
/*
|
||||||
|
leading is a series of Bit that do not cleanly align to the beginning of Bits.b.
|
||||||
|
They will always be the bits at the *beginning* of the sequence.
|
||||||
|
len(Bits.leading) will *never* be more than 7;
|
||||||
|
it's converted into a byte, prepended to Bits.b, and cleared if it reaches that point.
|
||||||
|
*/
|
||||||
|
leading []Bit
|
||||||
|
// b is the condensed/memory-aligned alternative to an [][8]Bit (or []Bit, or [][]Bit, etc.).
|
||||||
|
b []byte
|
||||||
|
/*
|
||||||
|
remaining is a series of Bit that do not cleanly align to the end of Bits.b.
|
||||||
|
They will always be the bits at the *end* of the sequence.
|
||||||
|
len(Bits.remaining) will *never* be more than 7;
|
||||||
|
it's converted into a byte, appended to Bits.b, and cleared if it reaches that point.
|
||||||
|
*/
|
||||||
|
remaining []Bit
|
||||||
|
// fixedLen, if 0, represents a "slice". If >= 1, it represents an "array".
|
||||||
|
fixedLen uint
|
||||||
|
}
|
||||||
|
|
||||||
|
// Byte is this package's representation of a byte. It's primarily for convenience.
|
||||||
|
Byte byte
|
||||||
|
|
||||||
|
// Bytes is defined as a type for convenience single-call functions.
|
||||||
|
Bytes []Byte
|
||||||
|
)
|
||||||
@@ -34,11 +34,47 @@ func NewMaskBitExplicit(value uint) (m *MaskBit) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// HasFlag is true if m has MaskBit flag set/enabled.
|
/*
|
||||||
|
HasFlag is true if m has MaskBit flag set/enabled.
|
||||||
|
THIS WILL RETURN FALSE FOR OR'd FLAGS.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
flagA MaskBit = 0x01
|
||||||
|
flagB MaskBit = 0x02
|
||||||
|
flagComposite = flagA | flagB
|
||||||
|
|
||||||
|
m *MaskBit = NewMaskBitExplicit(uint(flagA))
|
||||||
|
|
||||||
|
m.HasFlag(flagComposite) will return false even though flagComposite is an OR
|
||||||
|
that contains flagA.
|
||||||
|
Use [MaskBit.IsOneOf] instead if you do not desire this behavior,
|
||||||
|
and instead want to test composite flag *membership*.
|
||||||
|
(MaskBit.IsOneOf will also return true for non-composite equality.)
|
||||||
|
*/
|
||||||
func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {
|
func (m *MaskBit) HasFlag(flag MaskBit) (r bool) {
|
||||||
|
|
||||||
var b MaskBit = *m
|
var b MaskBit = *m
|
||||||
|
|
||||||
|
if b&flag == flag {
|
||||||
|
r = true
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
IsOneOf is like a "looser" form of [MaskBit.HasFlag]
|
||||||
|
in that it allows for testing composite membership.
|
||||||
|
|
||||||
|
See [MaskBit.HasFlag] for more information.
|
||||||
|
|
||||||
|
If composite is *not* an OR'd MaskBit (i.e.
|
||||||
|
it falls directly on a boundary -- 0, 1, 2, 4, 8, 16, etc.),
|
||||||
|
then IsOneOf will behave exactly like HasFlag.
|
||||||
|
*/
|
||||||
|
func (m *MaskBit) IsOneOf(composite MaskBit) (r bool) {
|
||||||
|
|
||||||
|
var b MaskBit = *m
|
||||||
|
|
||||||
if b&flag != 0 {
|
if b&flag != 0 {
|
||||||
r = true
|
r = true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
/*
|
/*
|
||||||
Package remap provides convenience functions around regular expressions.
|
Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups.
|
||||||
*/
|
*/
|
||||||
package remap
|
package remap
|
||||||
|
|||||||
@@ -1,20 +1,198 @@
|
|||||||
package remap
|
package remap
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Map returns a map[string]<match bytes> for regexes with named capture groups matched in bytes b.
|
Map returns a map[string][]<match bytes> for regexes with named capture groups matched in bytes b.
|
||||||
|
Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups
|
||||||
|
using the same group name (though your IDE might complain; I know GoLand does).
|
||||||
|
|
||||||
matches will be nil if no named capture group matches were found.
|
Each match for each group is in a slice keyed under that group name, with that slice
|
||||||
|
ordered by the indexing done by the regex match itself.
|
||||||
|
|
||||||
|
In summary, the parameters are as follows:
|
||||||
|
|
||||||
|
# inclNoMatch
|
||||||
|
|
||||||
|
If true, then attempt to return a non-nil matches (as long as b isn't nil).
|
||||||
|
Group keys will be populated and explicitly defined as nil.
|
||||||
|
|
||||||
|
For example, if a pattern
|
||||||
|
|
||||||
|
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||||
|
|
||||||
|
is provided but b does not match then matches will be:
|
||||||
|
|
||||||
|
map[string][][]byte{
|
||||||
|
"g1": nil,
|
||||||
|
"g2": nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
# inclNoMatchStrict
|
||||||
|
|
||||||
|
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||||
|
a slice of nil values explicitly matching the number of times the group name is specified
|
||||||
|
in the pattern.
|
||||||
|
|
||||||
|
For example, if a pattern:
|
||||||
|
|
||||||
|
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||||
|
|
||||||
|
is provided but b does not match then matches will be:
|
||||||
|
|
||||||
|
map[string][][]byte{
|
||||||
|
"g1": [][]byte{
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
"g2": [][]byte{
|
||||||
|
nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# mustMatch
|
||||||
|
|
||||||
|
If true, matches will be nil if the entirety of b does not match the pattern (and thus
|
||||||
|
no capture groups matched) (overrides inclNoMatch) -- explicitly:
|
||||||
|
|
||||||
|
matches == nil
|
||||||
|
|
||||||
|
Otherwise if false (and assuming inclNoMatch is false), matches will be:
|
||||||
|
|
||||||
|
map[string][][]byte{}{}
|
||||||
|
|
||||||
|
# Condition Tree
|
||||||
|
|
||||||
|
In detail, matches and/or its values may be nil or empty under the following condition tree:
|
||||||
|
|
||||||
|
IF b is nil:
|
||||||
|
THEN matches will always be nil
|
||||||
|
ELSE:
|
||||||
|
IF all of b does not match pattern
|
||||||
|
IF mustMuch is true
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||||
|
ELSE IF pattern has no named capture groups
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF there are no named group matches
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF <group name> does not have a match
|
||||||
|
IF inclNoMatch is true
|
||||||
|
IF inclNoMatchStrict is true
|
||||||
|
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
|
||||||
|
(matches[<group name>] == [][]byte{nil[, nil...]})
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
|
||||||
|
ELSE
|
||||||
|
matches[<group name>] == []{<match>[, <match>...]}
|
||||||
*/
|
*/
|
||||||
func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
|
func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
|
||||||
|
|
||||||
var m [][]byte
|
var ok bool
|
||||||
var tmpMap map[string][]byte = make(map[string][]byte)
|
var mIdx int
|
||||||
|
var match []byte
|
||||||
|
var grpNm string
|
||||||
|
var names []string
|
||||||
|
var matchBytes [][]byte
|
||||||
|
var tmpMap map[string][][]byte = make(map[string][][]byte)
|
||||||
|
|
||||||
m = r.Regexp.FindSubmatch(b)
|
if b == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
for idx, grpNm := range r.Regexp.SubexpNames() {
|
names = r.Regexp.SubexpNames()
|
||||||
if idx != 0 && grpNm != "" {
|
matchBytes = r.Regexp.FindSubmatch(b)
|
||||||
tmpMap[grpNm] = m[idx]
|
|
||||||
|
if matchBytes == nil {
|
||||||
|
// b does not match pattern
|
||||||
|
if !mustMatch {
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||||
|
/*
|
||||||
|
no named capture groups;
|
||||||
|
technically only the last condition would be the case.
|
||||||
|
*/
|
||||||
|
if inclNoMatch {
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
names = names[1:]
|
||||||
|
|
||||||
|
if len(matchBytes) == 0 || len(matchBytes) == 1 {
|
||||||
|
/*
|
||||||
|
no submatches whatsoever.
|
||||||
|
*Technically* I don't think this condition can actually be reached.
|
||||||
|
This is more of a safe-return before we re-slice.
|
||||||
|
*/
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
if inclNoMatch {
|
||||||
|
if len(names) >= 1 {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
matches[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
matchBytes = matchBytes[1:]
|
||||||
|
|
||||||
|
for mIdx, match = range matchBytes {
|
||||||
|
grpNm = names[mIdx]
|
||||||
|
/*
|
||||||
|
Thankfully, it's actually a build error if a pattern specifies a named
|
||||||
|
capture group with an empty name.
|
||||||
|
So we don't need to worry about accounting for that,
|
||||||
|
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
||||||
|
*/
|
||||||
|
if grpNm == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if match == nil {
|
||||||
|
// group did not match
|
||||||
|
if !inclNoMatch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
if !inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
} else {
|
||||||
|
tmpMap[grpNm] = [][]byte{nil}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = make([][]byte, 0)
|
||||||
|
}
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], match)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This *technically* should be completely handled above.
|
||||||
|
if inclNoMatch {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,20 +204,279 @@ func (r *ReMap) Map(b []byte) (matches map[string][]byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
MapString returns a map[string]<match string> for regexes with named capture groups matched in string s.
|
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead.
|
||||||
|
(matches will always be nil if s == “.)
|
||||||
|
|
||||||
matches will be nil if no named capture group matches were found.
|
A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
|
||||||
|
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
|
||||||
|
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
|
||||||
|
*not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after.
|
||||||
|
|
||||||
|
Particularly:
|
||||||
|
|
||||||
|
# inclNoMatch
|
||||||
|
|
||||||
|
If true, then attempt to return a non-nil matches (as long as s isn't empty).
|
||||||
|
Group keys will be populated and explicitly defined as nil.
|
||||||
|
|
||||||
|
For example, if a pattern
|
||||||
|
|
||||||
|
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||||
|
|
||||||
|
is provided but s does not match then matches will be:
|
||||||
|
|
||||||
|
map[string][]string{
|
||||||
|
"g1": nil,
|
||||||
|
"g2": nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
# inclNoMatchStrict
|
||||||
|
|
||||||
|
If true (and inclNoMatch is true), instead of a single nil the group's values will be
|
||||||
|
a slice of eempty string values explicitly matching the number of times the group name is specified
|
||||||
|
in the pattern.
|
||||||
|
|
||||||
|
For example, if a pattern:
|
||||||
|
|
||||||
|
^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
|
||||||
|
|
||||||
|
is provided but s does not match then matches will be:
|
||||||
|
|
||||||
|
map[string][]string{
|
||||||
|
"g1": []string{
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
"g2": []string{
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# mustMatch
|
||||||
|
|
||||||
|
If true, matches will be nil if the entirety of s does not match the pattern (and thus
|
||||||
|
no capture groups matched) (overrides inclNoMatch) -- explicitly:
|
||||||
|
|
||||||
|
matches == nil
|
||||||
|
|
||||||
|
Otherwise if false (and assuming inclNoMatch is false), matches will be:
|
||||||
|
|
||||||
|
map[string][]string{}{}
|
||||||
|
|
||||||
|
# Condition Tree
|
||||||
|
|
||||||
|
In detail, matches and/or its values may be nil or empty under the following condition tree:
|
||||||
|
|
||||||
|
IF s is empty:
|
||||||
|
THEN matches will always be nil
|
||||||
|
ELSE:
|
||||||
|
IF all of s does not match pattern
|
||||||
|
IF mustMuch is true
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
THEN matches == map[string][]string{} (non-nil but empty)
|
||||||
|
ELSE IF pattern has no named capture groups
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches == map[string][]string{} (non-nil but empty)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF there are no named group matches
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF <group name> does not have a match
|
||||||
|
IF inclNoMatch is true
|
||||||
|
IF inclNoMatchStrict is true
|
||||||
|
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
|
||||||
|
(matches[<group name>] == []string{""[, ""...]})
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
|
||||||
|
ELSE
|
||||||
|
matches[<group name>] == []{<match>[, <match>...]}
|
||||||
*/
|
*/
|
||||||
func (r *ReMap) MapString(s string) (matches map[string]string) {
|
func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
|
||||||
|
|
||||||
var m []string
|
var ok bool
|
||||||
var tmpMap map[string]string = make(map[string]string)
|
var endIdx int
|
||||||
|
var startIdx int
|
||||||
|
var chunkIdx int
|
||||||
|
var grpNm string
|
||||||
|
var names []string
|
||||||
|
var matchStr string
|
||||||
|
/*
|
||||||
|
A slice of indices or index pairs.
|
||||||
|
For each element `e` in idxChunks,
|
||||||
|
* if `e` is nil, no group match.
|
||||||
|
* if len(e) == 1, only a single character was matched.
|
||||||
|
* otherwise len(e) == 2, the start and end of the match.
|
||||||
|
*/
|
||||||
|
var idxChunks [][]int
|
||||||
|
var matchIndices []int
|
||||||
|
var chunkIndices []int // always 2 elements; start pos and end pos
|
||||||
|
var tmpMap map[string][]string = make(map[string][]string)
|
||||||
|
|
||||||
m = r.Regexp.FindStringSubmatch(s)
|
/*
|
||||||
|
OK so this is a bit of a deviation.
|
||||||
|
|
||||||
for idx, grpNm := range r.Regexp.SubexpNames() {
|
It's not as straightforward as above, because there isn't an explicit way
|
||||||
if idx != 0 && grpNm != "" {
|
like above to determine if a pattern was *matched as an empty string* vs.
|
||||||
tmpMap[grpNm] = m[idx]
|
*not matched*.
|
||||||
|
|
||||||
|
So instead do roundabout index-y things.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if s == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
I'm not entirely sure how serious they are about "the slice should not be modified"...
|
||||||
|
|
||||||
|
DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
|
||||||
|
it will be duplicated here in proper order and the ordering is tied to
|
||||||
|
the ordering of matchIndices.
|
||||||
|
*/
|
||||||
|
names = r.Regexp.SubexpNames()[:]
|
||||||
|
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
|
||||||
|
|
||||||
|
if matchIndices == nil {
|
||||||
|
// s does not match pattern at all.
|
||||||
|
if !mustMatch {
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if names == nil || len(names) <= 1 {
|
||||||
|
/*
|
||||||
|
No named capture groups;
|
||||||
|
technically only the last condition would be the case,
|
||||||
|
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
|
||||||
|
return a `[]string{""}`.
|
||||||
|
*/
|
||||||
|
if inclNoMatch {
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
||||||
|
/*
|
||||||
|
No (sub)matches whatsoever.
|
||||||
|
*technically* I don't think this condition can actually be reached;
|
||||||
|
matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
|
||||||
|
and modulo 2 thereafter since they're PAIRS of indices...
|
||||||
|
Why they didn't just return a [][]int or [][2]int or something
|
||||||
|
instead of an []int, who knows.
|
||||||
|
But we're correcting that poor design.
|
||||||
|
This is more of a safe-return before we chunk the indices.
|
||||||
|
*/
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
if inclNoMatch {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
if grpNm != "" {
|
||||||
|
matches[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
|
||||||
|
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
||||||
|
and the first pair is the entire pattern match (un-resliced names[0]).
|
||||||
|
Thus the len(matchIndices) == 2*len(names), *even* if you
|
||||||
|
Keep in mind that since the first element of names is removed,
|
||||||
|
the first pair here is skipped.
|
||||||
|
This provides a bit more consistent readability, though.
|
||||||
|
*/
|
||||||
|
idxChunks = make([][]int, len(names))
|
||||||
|
chunkIdx = 0
|
||||||
|
endIdx = 0
|
||||||
|
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
|
||||||
|
endIdx = startIdx + 2
|
||||||
|
// This technically should never happen.
|
||||||
|
if endIdx > len(matchIndices) {
|
||||||
|
endIdx = len(matchIndices)
|
||||||
|
}
|
||||||
|
|
||||||
|
chunkIndices = matchIndices[startIdx:endIdx]
|
||||||
|
|
||||||
|
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
||||||
|
// group did not match
|
||||||
|
chunkIndices = nil
|
||||||
|
} else {
|
||||||
|
if chunkIndices[0] == chunkIndices[1] {
|
||||||
|
chunkIndices = []int{chunkIndices[0]}
|
||||||
|
} else {
|
||||||
|
chunkIndices = matchIndices[startIdx:endIdx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
idxChunks[chunkIdx] = chunkIndices
|
||||||
|
chunkIdx++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now associate with names and pull the string sequence.
|
||||||
|
for chunkIdx, chunkIndices = range idxChunks {
|
||||||
|
grpNm = names[chunkIdx]
|
||||||
|
/*
|
||||||
|
Thankfully, it's actually a build error if a pattern specifies a named
|
||||||
|
capture group with an empty name.
|
||||||
|
So we don't need to worry about accounting for that,
|
||||||
|
and can just skip over grpNm == ""
|
||||||
|
(which is either an *unnamed* capture group
|
||||||
|
OR the first element in `names`, which is always
|
||||||
|
the entire match).
|
||||||
|
*/
|
||||||
|
if grpNm == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if chunkIndices == nil || len(chunkIndices) == 0 {
|
||||||
|
// group did not match
|
||||||
|
if !inclNoMatch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
if !inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
} else {
|
||||||
|
tmpMap[grpNm] = []string{""}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch len(chunkIndices) {
|
||||||
|
case 1:
|
||||||
|
// Single character
|
||||||
|
matchStr = string(s[chunkIndices[0]])
|
||||||
|
case 2:
|
||||||
|
// Multiple characters
|
||||||
|
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = make([]string, 0)
|
||||||
|
}
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This *technically* should be completely handled above.
|
||||||
|
if inclNoMatch {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,27 @@
|
|||||||
package remap
|
package remap
|
||||||
|
|
||||||
import (
|
import (
|
||||||
`regexp`
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
// ReMap provides some map-related functions around a regexp.Regexp.
|
// ReMap provides some map-related functions around a regexp.Regexp.
|
||||||
type ReMap struct {
|
ReMap struct {
|
||||||
*regexp.Regexp
|
*regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO?
|
||||||
|
/*
|
||||||
|
ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
|
||||||
|
capture group result is a hit (a group matched, but e.g. the match value is empty string)
|
||||||
|
or not (a group did not match).
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
ExplicitStringMatch struct {
|
||||||
|
Group string
|
||||||
|
IsMatch bool
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user