Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
368ae0cb8e
|
||
|
|
154170c0e5
|
1
go.sum
1
go.sum
@@ -10,3 +10,4 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
|||||||
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
|
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
|
||||||
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||||
|
r00t2.io/sysutils v1.14.0/go.mod h1:ZJ7gZxFVQ7QIokQ5fPZr7wl0XO5Iu+LqtE8j3ciRINw=
|
||||||
|
|||||||
4
remap/doc.go
Normal file
4
remap/doc.go
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
/*
|
||||||
|
Package remap provides convenience functions around regular expressions.
|
||||||
|
*/
|
||||||
|
package remap
|
||||||
290
remap/funcs_remap.go
Normal file
290
remap/funcs_remap.go
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
package remap
|
||||||
|
|
||||||
|
/*
|
||||||
|
Map returns a map[string]<match bytes> for regexes with named capture groups matched in bytes b.
|
||||||
|
Note that this supports non-unique group names; regexp.Regexp allows for patterns with multiple groups
|
||||||
|
using the same group name. Each match for each group is in a slice keyed under that group name, with
|
||||||
|
that slice ordered by the indexing done by the regex match itself.
|
||||||
|
matches and/or its values may be nil or empty under the following condition tree:
|
||||||
|
|
||||||
|
IF b is nil:
|
||||||
|
THEN matches will always be nil
|
||||||
|
ELSE:
|
||||||
|
IF all of b does not match pattern
|
||||||
|
IF mustMuch is true
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||||
|
ELSE IF pattern has no named capture groups
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches == map[string][][]byte{} (non-nil but empty)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF there are no named group matches
|
||||||
|
IF inclNoMatch is true
|
||||||
|
THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches == nil
|
||||||
|
ELSE
|
||||||
|
IF <group name> does not have a match
|
||||||
|
IF inclNoMatch is true
|
||||||
|
IF inclNoMatchStrict is true
|
||||||
|
THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
|
||||||
|
(matches[<group name>] == [][]byte{nil[, nil...]})
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
|
||||||
|
ELSE
|
||||||
|
THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
|
||||||
|
ELSE
|
||||||
|
matches[<group name>] == []{<match>[, <match>...]}
|
||||||
|
*/
|
||||||
|
func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
|
||||||
|
|
||||||
|
var ok bool
|
||||||
|
var mIdx int
|
||||||
|
var match []byte
|
||||||
|
var grpNm string
|
||||||
|
var names []string
|
||||||
|
var matchBytes [][]byte
|
||||||
|
var tmpMap map[string][][]byte = make(map[string][][]byte)
|
||||||
|
|
||||||
|
if b == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
names = r.Regexp.SubexpNames()
|
||||||
|
matchBytes = r.Regexp.FindSubmatch(b)
|
||||||
|
|
||||||
|
if matchBytes == nil {
|
||||||
|
// b does not match pattern
|
||||||
|
if !mustMatch {
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||||
|
/*
|
||||||
|
no named capture groups;
|
||||||
|
technically only the last condition would be the case.
|
||||||
|
*/
|
||||||
|
if inclNoMatch {
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
names = names[1:]
|
||||||
|
|
||||||
|
if len(matchBytes) == 0 || len(matchBytes) == 1 {
|
||||||
|
/*
|
||||||
|
no submatches whatsoever.
|
||||||
|
*technically* I don't think this condition can actually be reached.
|
||||||
|
This is more of a safe-return before we re-slice.
|
||||||
|
*/
|
||||||
|
matches = make(map[string][][]byte)
|
||||||
|
if inclNoMatch {
|
||||||
|
if len(names) >= 1 {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
matches[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
matchBytes = matchBytes[1:]
|
||||||
|
|
||||||
|
for mIdx, match = range matchBytes {
|
||||||
|
grpNm = names[mIdx]
|
||||||
|
/*
|
||||||
|
Thankfully, it's actually a build error if a pattern specifies a named
|
||||||
|
capture group with an empty name.
|
||||||
|
So we don't need to worry about accounting for that,
|
||||||
|
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
||||||
|
*/
|
||||||
|
if grpNm == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if match == nil {
|
||||||
|
// group did not match
|
||||||
|
if !inclNoMatch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
if !inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
} else {
|
||||||
|
tmpMap[grpNm] = [][]byte{nil}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = make([][]byte, 0)
|
||||||
|
}
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], match)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This *technically* should be completely handled above.
|
||||||
|
if inclNoMatch {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tmpMap) > 0 {
|
||||||
|
matches = tmpMap
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead. (matches will always be nil if s == ``.)
|
||||||
|
|
||||||
|
A small deviation, though; empty strings instead of nils (because duh) will occupy placeholders (if inclNoMatchStrict is specified).
|
||||||
|
*/
|
||||||
|
func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
|
||||||
|
|
||||||
|
var ok bool
|
||||||
|
var endIdx int
|
||||||
|
var startIdx int
|
||||||
|
var chunkIdx int
|
||||||
|
var grpNm string
|
||||||
|
var names []string
|
||||||
|
var matchStr string
|
||||||
|
var idxChunks [][]int
|
||||||
|
var matchIndices []int
|
||||||
|
var chunkIndices []int // always 2 elements; start pos and end pos
|
||||||
|
var tmpMap map[string][]string = make(map[string][]string)
|
||||||
|
|
||||||
|
/*
|
||||||
|
OK so this is a bit of a deviation.
|
||||||
|
|
||||||
|
It's not as straightforward as above, because there isn't an explicit way
|
||||||
|
like above to determine if a patterb was *matched as an empty string* vs.
|
||||||
|
*not matched*.
|
||||||
|
|
||||||
|
So instead do roundabout index-y things.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if s == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
names = r.Regexp.SubexpNames()
|
||||||
|
matchIndices = r.Regexp.FindStringSubmatchIndex(s)
|
||||||
|
|
||||||
|
if matchIndices == nil {
|
||||||
|
// s does not match pattern
|
||||||
|
if !mustMatch {
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if names == nil || len(names) == 0 || len(names) == 1 {
|
||||||
|
/*
|
||||||
|
no named capture groups;
|
||||||
|
technically only the last condition would be the case.
|
||||||
|
*/
|
||||||
|
if inclNoMatch {
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
names = names[1:]
|
||||||
|
|
||||||
|
if len(matchIndices) == 0 || len(matchIndices) == 1 {
|
||||||
|
/*
|
||||||
|
no submatches whatsoever.
|
||||||
|
*technically* I don't think this condition can actually be reached.
|
||||||
|
This is more of a safe-return before we chunk the indices.
|
||||||
|
*/
|
||||||
|
matches = make(map[string][]string)
|
||||||
|
if inclNoMatch {
|
||||||
|
if len(names) >= 1 {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
matches[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
|
||||||
|
and the first *pair* is the entire pattern match.
|
||||||
|
Thus the len(matchIndices) == 2*len(names).
|
||||||
|
Keep in mind that since the first element of names is removed,
|
||||||
|
the first pair here is also removed.
|
||||||
|
*/
|
||||||
|
matchIndices = matchIndices[2:]
|
||||||
|
|
||||||
|
idxChunks = make([][]int, len(names))
|
||||||
|
for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
|
||||||
|
endIdx = startIdx + 2
|
||||||
|
grpNm = names[chunkIdx]
|
||||||
|
/*
|
||||||
|
Thankfully, it's actually a build error if a pattern specifies a named
|
||||||
|
capture group with an empty name.
|
||||||
|
So we don't need to worry about accounting for that,
|
||||||
|
and can just skip over grpNm == "" (which is an *unnamed* capture group).
|
||||||
|
*/
|
||||||
|
if grpNm == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// This technically should never happen.
|
||||||
|
if endIdx > len(matchIndices) {
|
||||||
|
endIdx = len(matchIndices)
|
||||||
|
}
|
||||||
|
chunkIndices = matchIndices[startIdx:endIdx]
|
||||||
|
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
|
||||||
|
// group did not match
|
||||||
|
if !inclNoMatch {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
if !inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
} else {
|
||||||
|
tmpMap[grpNm] = []string{""}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if inclNoMatchStrict {
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
matchStr = s[chunkIndices[0]:chunkIndices[1]]
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = make([]string, 0)
|
||||||
|
}
|
||||||
|
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
|
||||||
|
|
||||||
|
chunkIdx++
|
||||||
|
}
|
||||||
|
|
||||||
|
// This *technically* should be completely handled above.
|
||||||
|
if inclNoMatch {
|
||||||
|
for _, grpNm = range names {
|
||||||
|
if _, ok = tmpMap[grpNm]; !ok {
|
||||||
|
tmpMap[grpNm] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tmpMap) > 0 {
|
||||||
|
matches = tmpMap
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
23
remap/types.go
Normal file
23
remap/types.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package remap
|
||||||
|
|
||||||
|
import (
|
||||||
|
`regexp`
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
// ReMap provides some map-related functions around a regexp.Regexp.
|
||||||
|
ReMap struct {
|
||||||
|
*regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
|
||||||
|
capture group result is a hit (a group matched, but e.g. the match value is empty string)
|
||||||
|
or not (a group did not match)
|
||||||
|
*/
|
||||||
|
ExplicitStringMatch struct {
|
||||||
|
Group string
|
||||||
|
IsMatch bool
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user