FIXED:
* Properly parse into map, add *All* variants
This commit is contained in:
brent saner
2026-01-07 19:02:52 -05:00
parent 834395c050
commit bb71be187f
7 changed files with 613 additions and 86 deletions

3
go.sum
View File

@@ -11,6 +11,5 @@ golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
r00t2.io/sysutils v1.15.0 h1:FSnREfbXDhBQEO7LMpnRQeKlPshozxk9XHw3YgWRgRg=
r00t2.io/sysutils v1.15.0/go.mod h1:28qB0074EIRQ8Sy/ybaA5jC3qA32iW2aYLkMCRhyAFM=
r00t2.io/sysutils v1.15.1 h1:0EVZZAxTFqQN6jjfjqUKkXye0LMshUA5MO7l3Wd6wH8=
r00t2.io/sysutils v1.15.1/go.mod h1:T0iOnaZaSG5NE1hbXTqojRZc0ia/u8TB73lV7zhMz58=

11
remap/errs.go Normal file
View File

@@ -0,0 +1,11 @@
package remap
import (
`errors`
)
var (
ErrInvalidIdxPair error = errors.New("invalid index pair; [1] must be >= [0]")
ErrNoStr error = errors.New("no string to slice/reslice/subslice")
ErrShortStr error = errors.New("string too short to slice/reslice/subslice")
)

View File

@@ -111,3 +111,60 @@ func MustCompilePOSIX(expr string) (r *ReMap) {
return
}
/*
strIdxSlicer takes string s, and returns the substring marked by idxPair,
where:
idxPair = [2]int{
<substring START POSITION>,
<substring END BOUNDARY>,
}
That is, to get `oo` from `foobar`,
idxPair = [2]int{1, 3}
# NOT:
#idxPair = [2]int{1, 2}
subStr will be empty and matched will be false if:
* idxPair[0] < 0
* idxPair[1] < 0
It will panic with [ErrShortStr] if:
* idxPair[0] > len(s)-1
* idxPair[1] > len(s)
It will panic with [ErrInvalidIdxPair] if:
* idxPair[0] > idxPair[1]
It will properly handle single-character addresses (i.e. idxPair[0] == idxPair[1]).
*/
func strIdxSlicer(s string, idxPair [2]int) (subStr string, matched bool) {
if idxPair[0] < 0 || idxPair[1] < 0 {
return
}
matched = true
if (idxPair[0] > (len(s) - 1)) ||
(idxPair[1] > len(s)) {
panic(ErrShortStr)
}
if idxPair[0] > idxPair[1] {
panic(ErrInvalidIdxPair)
}
if idxPair[0] == idxPair[1] {
// single character
subStr = string(s[idxPair[0]])
} else {
// multiple characters
subStr = s[idxPair[0]:idxPair[1]]
}
return
}

View File

@@ -10,6 +10,9 @@ It will panic if the embedded [regexp.Regexp] is nil.
Each match for each group is in a slice keyed under that group name, with that slice
ordered by the indexing done by the regex match itself.
This operates on only the first found match (like [regexp.Regexp.FindSubmatch]).
To operate on *all* matches, use [ReMap.MapAll].
In summary, the parameters are as follows:
# inclNoMatch
@@ -33,6 +36,7 @@ is provided but b does not match then matches will be:
If true (and inclNoMatch is true), instead of a single nil the group's values will be
a slice of nil values explicitly matching the number of times the group name is specified
in the pattern.
May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns.
For example, if a pattern:
@@ -144,6 +148,9 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
if inclNoMatch {
if len(names) >= 1 {
for _, grpNm = range names {
if grpNm == "" {
continue
}
matches[grpNm] = nil
}
}
@@ -156,7 +163,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
grpNm = names[mIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
capture group with an matched name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == "" (which is an *unnamed* capture group).
*/
@@ -192,6 +199,138 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
if len(tmpMap) > 0 {
matches = tmpMap
}
return
}
/*
MapAll behaves exactly like [ReMap.Map] but will "squash"/consolidate *all* found matches, not just the first occurrence,
into the group name.
You likely want to use this instead of [ReMap.Map] for multiline patterns.
*/
func (r *ReMap) MapAll(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) {
var ok bool
var mIdx int
var isEmpty bool
var match []byte
var grpNm string
var names []string
var mbGrp [][]byte
var ptrnNms []string
var matchBytes [][][]byte
var tmpMap map[string][][]byte = make(map[string][][]byte)
if b == nil {
return
}
names = r.Regexp.SubexpNames()[:]
matchBytes = r.Regexp.FindAllSubmatch(b, -1)
if matchBytes == nil {
// b does not match pattern
if !mustMatch {
matches = make(map[string][][]byte)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
/*
no named capture groups;
technically only the last condition would be the case.
*/
if inclNoMatch {
matches = make(map[string][][]byte)
}
return
}
names = names[1:]
tmpMap = make(map[string][][]byte)
// From here, it behaves (sort of) like ReMap.Map
// except mbGrp is like matchBytes in Map.
for _, mbGrp = range matchBytes {
// Unlike ReMap.Map, we have to do a little additional logic.
isEmpty = false
ptrnNms = make([]string, 0, len(names))
if mbGrp == nil {
isEmpty = true
}
if !isEmpty {
if len(mbGrp) == 0 || len(mbGrp) == 1 {
/*
no submatches whatsoever.
*/
isEmpty = true
} else {
mbGrp = mbGrp[1:]
for mIdx, match = range mbGrp {
if mIdx > len(names) {
break
}
grpNm = names[mIdx]
if grpNm == "" {
continue
}
ptrnNms = append(ptrnNms, grpNm)
if match == nil {
// This specific group didn't match, but it matched the whole pattern.
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
} else {
tmpMap[grpNm] = [][]byte{nil}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], nil)
}
}
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([][]byte, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], match)
}
}
}
// I can't recall why I capture this.
_ = ptrnNms
}
// *Theoretically* all of these should be populated with at least a nil.
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
@@ -207,10 +346,14 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
/*
MapString is exactly like [ReMap.Map], but operates on (and returns) strings instead.
(matches will always be nil if s == .)
(matches will always be nil if s == "".)
It will panic if the embedded [regexp.Regexp] is nil.
This operates on only the first found match (like [regexp.Regexp.FindStringSubmatch]).
To operate on *all* matches, use [ReMap.MapStringAll].
To operate on *all* matches with retained grouping, use [ReMap.MapStringAllSplit].
A small deviation and caveat, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
@@ -239,6 +382,7 @@ is provided but s does not match then matches will be:
If true (and inclNoMatch is true), instead of a single nil the group's values will be
a slice of empty string values explicitly matching the number of times the group name is specified
in the pattern.
May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns.
For example, if a pattern:
@@ -308,27 +452,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
var ok bool
var endIdx int
var startIdx int
var chunkIdx int
var grpIdx int
var grpNm string
var names []string
var matchStr string
/*
A slice of indices or index pairs.
For each element `e` in idxChunks,
* if `e` is nil, no group match.
* if len(e) == 1, only a single character was matched.
* otherwise len(e) == 2, the start and end of the match.
*/
var idxChunks [][]int
var si stringIndexer
var matchIndices []int
var chunkIndices []int // always 2 elements; start pos and end pos
var tmpMap map[string][]string = make(map[string][]string)
/*
OK so this is a bit of a deviation.
It's not as straightforward as above, because there isn't an explicit way
like above to determine if a pattern was *matched as an empty string* vs.
like above to determine if a pattern was *matched as an matched string* vs.
*not matched*.
So instead do roundabout index-y things.
@@ -384,26 +520,34 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
matches = make(map[string][]string)
if inclNoMatch {
for _, grpNm = range names {
if grpNm != "" {
matches[grpNm] = nil
if grpNm == "" {
continue
}
matches[grpNm] = nil
}
}
return
}
matchIndices = matchIndices[2:]
/*
A reslice of `matchIndices` starts at 2 (as long as `names` is sliced [1:])
because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0]).
Thus the len(matchIndices) == 2*len(names), *even* if you reslice.
The reslice of `matchIndices` starts at 2 because they're in pairs:
[]int{<start>, <end>, <start>, <end>, ...}
and the first pair is the entire pattern match (un-resliced names[0],
un-resliced matchIndices[0]).
Thus the len(matchIndices) == 2*len(names) (*should*, that is), *even* if you reslice.
Keep in mind that since the first element of names is removed,
we reslices matchIndices as well (above).
we reslice matchIndices as well.
*/
idxChunks = make([][]int, len(names))
chunkIdx = 0
endIdx = 0
matchIndices = matchIndices[2:]
tmpMap = make(map[string][]string)
// Note that the second index is the *upper boundary*, not a *position in the string*
// so these indices are perfectly usable as-is as returned from the regexp methods.
// http://golang.org/ref/spec#Slice_expressions
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
endIdx = startIdx + 2
// This technically should never happen.
@@ -411,77 +555,253 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
endIdx = len(matchIndices)
}
chunkIndices = matchIndices[startIdx:endIdx]
if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
// group did not match
chunkIndices = nil
} else {
// single character
if chunkIndices[0] == chunkIndices[1] {
chunkIndices = []int{chunkIndices[0]}
} else {
chunkIndices = matchIndices[startIdx:endIdx]
}
if grpIdx >= len(names) {
break
}
idxChunks[chunkIdx] = chunkIndices
chunkIdx++
}
// Now associate with names and pull the string sequence.
for chunkIdx, chunkIndices = range idxChunks {
grpNm = names[chunkIdx]
/*
Thankfully, it's actually a build error if a pattern specifies a named
capture group with an empty name.
So we don't need to worry about accounting for that,
and can just skip over grpNm == ""
(which is either an *unnamed* capture group
OR the first element in `names`, which is always
the entire match).
(We reslice out the latter.)
*/
if grpNm == "" {
si = stringIndexer{
group: grpIdx,
start: matchIndices[startIdx],
end: matchIndices[endIdx-1],
matched: true,
nm: names[grpIdx],
grpS: "",
s: &matchStr,
ptrn: r.Regexp,
}
grpIdx++
if si.nm == "" {
// unnamed capture group
continue
}
if chunkIndices == nil || len(chunkIndices) == 0 {
// group did not match
// sets si.matched and si.grpS
si.idxSlice(&s)
if !si.matched {
if !inclNoMatch {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
if _, ok = tmpMap[si.nm]; !ok {
if !inclNoMatchStrict {
tmpMap[grpNm] = nil
tmpMap[si.nm] = nil
} else {
tmpMap[grpNm] = []string{""}
tmpMap[si.nm] = []string{""}
}
} else {
if inclNoMatchStrict {
tmpMap[grpNm] = append(tmpMap[grpNm], "")
tmpMap[si.nm] = append(tmpMap[si.nm], "")
}
}
continue
}
switch len(chunkIndices) {
case 1:
// Single character
matchStr = string(s[chunkIndices[0]])
case 2:
// Multiple characters
matchStr = s[chunkIndices[0]:chunkIndices[1]]
if _, ok = tmpMap[si.nm]; !ok {
tmpMap[si.nm] = make([]string, 0)
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = make([]string, 0)
}
tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS)
}
// This *technically* should be completely handled above.
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
if len(tmpMap) > 0 {
matches = tmpMap
}
return
}
/*
MapStringAll behaves exactly like [ReMap.MapString] but will "squash"/consolidate *all* found matches, not just the first occurrence,
into the group name.
You likely want to use this instead of [ReMap.MapString] for multiline patterns.
*/
func (r *ReMap) MapStringAll(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
var ok bool
var endIdx int
var startIdx int
var grpIdx int
var grpNm string
var names []string
var matchStr string
var si stringIndexer
var matchIndices []int
var allMatchIndices [][]int
var tmpMap map[string][]string = make(map[string][]string)
if s == "" {
return
}
names = r.Regexp.SubexpNames()[:]
allMatchIndices = r.Regexp.FindAllStringSubmatchIndex(s, -1)
if allMatchIndices == nil {
// s does not match pattern at all.
if !mustMatch {
matches = make(map[string][]string)
}
return
}
if names == nil || len(names) == 0 || len(names) == 1 {
/*
No named capture groups;
technically only the last condition would be the case,
as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
return a `[]string{""}`.
*/
if inclNoMatch {
matches = make(map[string][]string)
}
return
}
names = names[1:]
if len(allMatchIndices) == 0 {
// No matches (and thus submatches) whatsoever.
// I think this is actually covered by the `if allMatchIndices == nil { ... }` above,
// but this is still here for safety and efficiency - early return on no matches to iterate.
matches = make(map[string][]string)
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
matches[grpNm] = nil
}
}
return
}
// Do *NOT* trim/reslice allMatchIndices!
// The reslicing is done below, *inside* each matchIndices iteration!
tmpMap = make(map[string][]string)
// From here, it behaves (sort of) like ReMap.MapString.
// Build the strictly-paired chunk indexes and populate them.
// We are iterating over *match sets*; matchIndices here should be analgous
// to matchIndices in ReMap.MapString.
for _, matchIndices = range allMatchIndices {
if matchIndices == nil {
// I *think* the exception with the *All* variant here
// is the *entire* return (allMatchIndices) is nil if there
// aren't any matches; I can't imagine there'd be any feasible
// way it'd insert a nil *element* for an index mapping group.
// So just continuing here should be fine;
// this continue SHOULD be unreachable.
continue
}
// Reslice *here*, on the particular match index group.
// Grap the matchStr first; it's not currently *used* by anything but may in the future.
matchStr, ok = strIdxSlicer(
s,
*(*[2]int)(matchIndices[0:2]),
)
if len(matchIndices) == 0 || len(matchIndices) == 1 {
// No *sub*matches (capture groups) in this match, but it still matched the pattern.
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
// We don't immediately return, though; we just stage out group names just in case.
// That's why we use tmpMap and not matches.
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}
}
}
continue
}
matchIndices = matchIndices[2:]
// Reset from previous loop
endIdx = 0
grpIdx = 0
for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
endIdx = startIdx + 2
if endIdx > len(matchIndices) {
endIdx = len(matchIndices)
}
if grpIdx >= len(names) {
break
}
si = stringIndexer{
group: grpIdx,
start: matchIndices[startIdx],
end: matchIndices[endIdx-1],
matched: true,
nm: names[grpIdx],
grpS: "",
ptrn: r.Regexp,
}
grpIdx++
// We do not include the entire match string here;
// we don't need it for this. Waste of memory.
_ = matchStr
/*
si.s = new(string)
*si.s = matchStr
*/
if si.nm == "" {
// unnamed capture group
continue
}
// sets si.matched and si.grpS
si.idxSlice(&s)
if !si.matched {
if !inclNoMatch {
continue
}
if _, ok = tmpMap[si.nm]; !ok {
if !inclNoMatchStrict {
tmpMap[si.nm] = nil
} else {
tmpMap[si.nm] = []string{""}
}
} else {
if inclNoMatchStrict {
tmpMap[si.nm] = append(tmpMap[si.nm], "")
}
}
continue
}
if _, ok = tmpMap[si.nm]; !ok {
tmpMap[si.nm] = make([]string, 0)
}
tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS)
}
}
if inclNoMatch {
for _, grpNm = range names {
if grpNm == "" {
continue
}
if _, ok = tmpMap[grpNm]; !ok {
tmpMap[grpNm] = nil
}

View File

@@ -1,9 +1,9 @@
package remap
import (
`fmt`
`reflect`
`regexp`
"fmt"
"reflect"
"regexp"
"testing"
)
@@ -12,6 +12,7 @@ type (
Nm string
S string
M *ReMap
All bool
Expected map[string][][]byte
ExpectedStr map[string][]string
ParamInclNoMatch bool
@@ -25,12 +26,14 @@ func TestRemap(t *testing.T) {
var matches map[string][][]byte
for midx, m := range []testMatcher{
// 1
testMatcher{
Nm: "No matches",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
Expected: nil,
},
// 2
testMatcher{
Nm: "Single mid match",
S: "This contains a single match in the middle of a string",
@@ -39,6 +42,7 @@ func TestRemap(t *testing.T) {
"g1": [][]byte{[]byte("match")},
},
},
// 3
testMatcher{
Nm: "multi mid match",
S: "This contains a single match and another match in the middle of a string",
@@ -50,6 +54,7 @@ func TestRemap(t *testing.T) {
},
},
},
// 4
testMatcher{
Nm: "line match",
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
@@ -60,10 +65,12 @@ func TestRemap(t *testing.T) {
},
},
},
// 5
testMatcher{
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
All: true,
Expected: map[string][][]byte{
"g1": [][]byte{
[]byte("match"),
@@ -71,8 +78,32 @@ func TestRemap(t *testing.T) {
},
},
},
// 6
// More closely mirrors something closer to real-life
testMatcher{
Nm: "mixed match",
S: " # No longer log hits/reqs/resps to file.\n" +
" #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" +
" #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" +
" access_log off;\n" +
" error_log /dev/null;\n\n" +
" ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" +
" ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n",
M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P<logpath>.+);\s*$`)},
All: true,
Expected: map[string][][]byte{
"logpath": [][]byte{
[]byte("off"),
[]byte("/dev/null"),
},
},
},
} {
matches = m.M.Map([]byte(m.S), false, false, false)
if m.All {
matches = m.M.MapAll([]byte(m.S), false, false, false)
} else {
matches = m.M.Map([]byte(m.S), false, false, false)
}
t.Logf(
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
midx+1,
@@ -81,7 +112,7 @@ func TestRemap(t *testing.T) {
testBmapToStrMap(matches),
)
if !reflect.DeepEqual(matches, m.Expected) {
t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.Expected, matches)
t.Fatalf("Case #%d (\"%s\"): expected '%#v' != received '%#v'", midx+1, m.Nm, m.Expected, matches)
}
}
@@ -165,7 +196,11 @@ func TestRemapParams(t *testing.T) {
ParamInclMustMatch: true,
},
} {
matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
if m.All {
matches = m.M.MapAll([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
} else {
matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch)
}
t.Logf(
"%d: %v/%v/%v: %#v\n",
midx+1, m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch, matches,
@@ -182,12 +217,14 @@ func TestRemapString(t *testing.T) {
var matches map[string][]string
for midx, m := range []testMatcher{
// 1
testMatcher{
Nm: "No matches",
S: "this is a test",
M: &ReMap{regexp.MustCompile(``)},
ExpectedStr: nil,
},
// 2
testMatcher{
Nm: "Single mid match",
S: "This contains a single match in the middle of a string",
@@ -196,6 +233,7 @@ func TestRemapString(t *testing.T) {
"g1": []string{"match"},
},
},
// 3
testMatcher{
Nm: "multi mid match",
S: "This contains a single match and another match in the middle of a string",
@@ -207,6 +245,7 @@ func TestRemapString(t *testing.T) {
},
},
},
// 4
testMatcher{
Nm: "line match",
S: "This\ncontains a\nsingle\nmatch\non a dedicated line",
@@ -217,10 +256,12 @@ func TestRemapString(t *testing.T) {
},
},
},
// 5
testMatcher{
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
Nm: "multiline match",
S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string",
M: &ReMap{regexp.MustCompile(`\s+(?P<g1>match) and another\s+(?P<g1>match)\s+`)},
All: true,
ExpectedStr: map[string][]string{
"g1": []string{
"match",
@@ -228,8 +269,32 @@ func TestRemapString(t *testing.T) {
},
},
},
// 6
// More closely mirrors something closer to real-life
testMatcher{
Nm: "mixed match",
S: " # No longer log hits/reqs/resps to file.\n" +
" #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" +
" #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" +
" access_log off;\n" +
" error_log /dev/null;\n\n" +
" ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" +
" ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n",
M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P<logpath>.+);\s*$`)},
All: true,
ExpectedStr: map[string][]string{
"logpath": []string{
"off",
"/dev/null",
},
},
},
} {
matches = m.M.MapString(m.S, false, false, false)
if m.All {
matches = m.M.MapStringAll(m.S, false, false, false)
} else {
matches = m.M.MapString(m.S, false, false, false)
}
t.Logf(
"#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n",
midx+1,

View File

@@ -0,0 +1,34 @@
package remap
// idx returns []int{s.start, s.end}.
func (s *stringIndexer) idx() (i []int) {
return []int{s.start, s.end}
}
// idxStrict returns [2]int{s.start, s.end}.
func (s *stringIndexer) idxStrict() (i [2]int) {
return [2]int{s.start, s.end}
}
/*
idxSlice populates s.grpS using s.start and s.end.
If str is nil, it will use s.s.
If str is nil and s.s is nil, it will panic with [ErrNoStr].
If the pattern does not match (s.start < 0 or s.end < 0),
s.matched will be set to false (otherwise true).
*/
func (s *stringIndexer) idxSlice(str *string) {
if str == nil {
if s.s == nil {
panic(ErrNoStr)
}
str = s.s
}
s.grpS, s.matched = strIdxSlicer(*str, s.idxStrict())
return
}

View File

@@ -24,4 +24,45 @@ type (
}
*/
stringIndexer struct {
// group is the capture group index for this match.
group int
// start is the string index (from the original string) where the matched group starts
start int
// end is the string index where the matched group ends
end int
/*
matched indicates if explicitly no match was found.
(This is normally indeterminate with string regex returns,
as e.g. `(?P<mygrp>\s*)`, `(?P<mygrp>(?:somestring)?)`, etc. all can be a *matched* "".)
If grpS == "" and matched == true, it DID match an empty string.
If grpS == "" and matched == false, it DID NOT MATCH the pattern.
If grpS != "", matched can be completely disregarded.
*/
matched bool
// nm is the match group name.
nm string
/*
grpS is the actual group-matched *substring*.
It will ALWAYS be either:
* the entirety of s
* a substring of s
* an empty string
it will never, and cannot be, a SUPERset of s.
it may not always be included/populated to save on memory.
*/
grpS string
/*
s is the *entire* MATCHED (sub)string.
It may not always be populated if not needed to save memory.
*/
s *string
// ptrn is the pattern applied to s.
ptrn *regexp.Regexp
}
)