From bb71be187f21fe3fbd11177ad916f81948cb932a Mon Sep 17 00:00:00 2001 From: brent saner Date: Wed, 7 Jan 2026 19:02:52 -0500 Subject: [PATCH] v1.15.3 FIXED: * Properly parse into map, add *All* variants --- go.sum | 3 +- remap/errs.go | 11 + remap/funcs.go | 57 +++++ remap/funcs_remap.go | 462 +++++++++++++++++++++++++++++------ remap/funcs_remap_test.go | 91 ++++++- remap/funcs_stringindexer.go | 34 +++ remap/types.go | 41 ++++ 7 files changed, 613 insertions(+), 86 deletions(-) create mode 100644 remap/errs.go create mode 100644 remap/funcs_stringindexer.go diff --git a/go.sum b/go.sum index 3c97bd1..4a5ec2c 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,5 @@ golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -r00t2.io/sysutils v1.15.0 h1:FSnREfbXDhBQEO7LMpnRQeKlPshozxk9XHw3YgWRgRg= -r00t2.io/sysutils v1.15.0/go.mod h1:28qB0074EIRQ8Sy/ybaA5jC3qA32iW2aYLkMCRhyAFM= +r00t2.io/sysutils v1.15.1 h1:0EVZZAxTFqQN6jjfjqUKkXye0LMshUA5MO7l3Wd6wH8= r00t2.io/sysutils v1.15.1/go.mod h1:T0iOnaZaSG5NE1hbXTqojRZc0ia/u8TB73lV7zhMz58= diff --git a/remap/errs.go b/remap/errs.go new file mode 100644 index 0000000..e5c4f27 --- /dev/null +++ b/remap/errs.go @@ -0,0 +1,11 @@ +package remap + +import ( + `errors` +) + +var ( + ErrInvalidIdxPair error = errors.New("invalid index pair; [1] must be >= [0]") + ErrNoStr error = errors.New("no string to slice/reslice/subslice") + ErrShortStr error = errors.New("string too short to slice/reslice/subslice") +) diff --git a/remap/funcs.go b/remap/funcs.go index 86e93b4..54ab294 100644 --- a/remap/funcs.go +++ b/remap/funcs.go @@ -111,3 +111,60 @@ func MustCompilePOSIX(expr string) (r *ReMap) { return } + +/* +strIdxSlicer takes string s, and returns the substring marked by idxPair, +where: + + idxPair = [2]int{ + , + , + } + +That is, to get `oo` from `foobar`, + + idxPair = [2]int{1, 3} + # NOT: + #idxPair = [2]int{1, 2} + +subStr will be empty and matched will be false if: + + * idxPair[0] < 0 + * idxPair[1] < 0 + +It will panic with [ErrShortStr] if: + + * idxPair[0] > len(s)-1 + * idxPair[1] > len(s) + +It will panic with [ErrInvalidIdxPair] if: + + * idxPair[0] > idxPair[1] + +It will properly handle single-character addresses (i.e. idxPair[0] == idxPair[1]). +*/ +func strIdxSlicer(s string, idxPair [2]int) (subStr string, matched bool) { + + if idxPair[0] < 0 || idxPair[1] < 0 { + return + } + matched = true + + if (idxPair[0] > (len(s) - 1)) || + (idxPair[1] > len(s)) { + panic(ErrShortStr) + } + if idxPair[0] > idxPair[1] { + panic(ErrInvalidIdxPair) + } + + if idxPair[0] == idxPair[1] { + // single character + subStr = string(s[idxPair[0]]) + } else { + // multiple characters + subStr = s[idxPair[0]:idxPair[1]] + } + + return +} diff --git a/remap/funcs_remap.go b/remap/funcs_remap.go index f1749cf..6b93d90 100644 --- a/remap/funcs_remap.go +++ b/remap/funcs_remap.go @@ -10,6 +10,9 @@ It will panic if the embedded [regexp.Regexp] is nil. Each match for each group is in a slice keyed under that group name, with that slice ordered by the indexing done by the regex match itself. +This operates on only the first found match (like [regexp.Regexp.FindSubmatch]). +To operate on *all* matches, use [ReMap.MapAll]. + In summary, the parameters are as follows: # inclNoMatch @@ -33,6 +36,7 @@ is provided but b does not match then matches will be: If true (and inclNoMatch is true), instead of a single nil the group's values will be a slice of nil values explicitly matching the number of times the group name is specified in the pattern. +May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns. For example, if a pattern: @@ -144,6 +148,9 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m if inclNoMatch { if len(names) >= 1 { for _, grpNm = range names { + if grpNm == "" { + continue + } matches[grpNm] = nil } } @@ -156,7 +163,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m grpNm = names[mIdx] /* Thankfully, it's actually a build error if a pattern specifies a named - capture group with an empty name. + capture group with an matched name. So we don't need to worry about accounting for that, and can just skip over grpNm == "" (which is an *unnamed* capture group). */ @@ -192,6 +199,138 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m // This *technically* should be completely handled above. if inclNoMatch { for _, grpNm = range names { + if grpNm == "" { + continue + } + if _, ok = tmpMap[grpNm]; !ok { + tmpMap[grpNm] = nil + } + } + } + + if len(tmpMap) > 0 { + matches = tmpMap + } + + return +} + +/* +MapAll behaves exactly like [ReMap.Map] but will "squash"/consolidate *all* found matches, not just the first occurrence, +into the group name. + +You likely want to use this instead of [ReMap.Map] for multiline patterns. +*/ +func (r *ReMap) MapAll(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][][]byte) { + + var ok bool + var mIdx int + var isEmpty bool + var match []byte + var grpNm string + var names []string + var mbGrp [][]byte + var ptrnNms []string + var matchBytes [][][]byte + var tmpMap map[string][][]byte = make(map[string][][]byte) + + if b == nil { + return + } + + names = r.Regexp.SubexpNames()[:] + matchBytes = r.Regexp.FindAllSubmatch(b, -1) + + if matchBytes == nil { + // b does not match pattern + if !mustMatch { + matches = make(map[string][][]byte) + } + return + } + + if names == nil || len(names) == 0 || len(names) == 1 { + /* + no named capture groups; + technically only the last condition would be the case. + */ + if inclNoMatch { + matches = make(map[string][][]byte) + } + return + } + names = names[1:] + + tmpMap = make(map[string][][]byte) + + // From here, it behaves (sort of) like ReMap.Map + // except mbGrp is like matchBytes in Map. + for _, mbGrp = range matchBytes { + + // Unlike ReMap.Map, we have to do a little additional logic. + isEmpty = false + ptrnNms = make([]string, 0, len(names)) + + if mbGrp == nil { + isEmpty = true + } + + if !isEmpty { + if len(mbGrp) == 0 || len(mbGrp) == 1 { + /* + no submatches whatsoever. + */ + isEmpty = true + } else { + mbGrp = mbGrp[1:] + + for mIdx, match = range mbGrp { + if mIdx > len(names) { + break + } + grpNm = names[mIdx] + if grpNm == "" { + continue + } + ptrnNms = append(ptrnNms, grpNm) + + if match == nil { + // This specific group didn't match, but it matched the whole pattern. + if !inclNoMatch { + continue + } + if _, ok = tmpMap[grpNm]; !ok { + if !inclNoMatchStrict { + tmpMap[grpNm] = nil + } else { + tmpMap[grpNm] = [][]byte{nil} + } + } else { + if inclNoMatchStrict { + tmpMap[grpNm] = append(tmpMap[grpNm], nil) + } + } + continue + } + + if _, ok = tmpMap[grpNm]; !ok { + tmpMap[grpNm] = make([][]byte, 0) + } + tmpMap[grpNm] = append(tmpMap[grpNm], match) + } + } + } + + // I can't recall why I capture this. + _ = ptrnNms + } + + // *Theoretically* all of these should be populated with at least a nil. + if inclNoMatch { + for _, grpNm = range names { + if grpNm == "" { + continue + } if _, ok = tmpMap[grpNm]; !ok { tmpMap[grpNm] = nil } @@ -207,10 +346,14 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m /* MapString is exactly like [ReMap.Map], but operates on (and returns) strings instead. -(matches will always be nil if s == “.) +(matches will always be nil if s == "".) It will panic if the embedded [regexp.Regexp] is nil. +This operates on only the first found match (like [regexp.Regexp.FindStringSubmatch]). +To operate on *all* matches, use [ReMap.MapStringAll]. +To operate on *all* matches with retained grouping, use [ReMap.MapStringAllSplit]. + A small deviation and caveat, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified). This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either @@ -239,6 +382,7 @@ is provided but s does not match then matches will be: If true (and inclNoMatch is true), instead of a single nil the group's values will be a slice of empty string values explicitly matching the number of times the group name is specified in the pattern. +May be unpredictable if the same name is used multiple times for different capture groups across multiple patterns. For example, if a pattern: @@ -308,27 +452,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo var ok bool var endIdx int var startIdx int - var chunkIdx int + var grpIdx int var grpNm string var names []string var matchStr string - /* - A slice of indices or index pairs. - For each element `e` in idxChunks, - * if `e` is nil, no group match. - * if len(e) == 1, only a single character was matched. - * otherwise len(e) == 2, the start and end of the match. - */ - var idxChunks [][]int + var si stringIndexer var matchIndices []int - var chunkIndices []int // always 2 elements; start pos and end pos var tmpMap map[string][]string = make(map[string][]string) /* OK so this is a bit of a deviation. It's not as straightforward as above, because there isn't an explicit way - like above to determine if a pattern was *matched as an empty string* vs. + like above to determine if a pattern was *matched as an matched string* vs. *not matched*. So instead do roundabout index-y things. @@ -384,26 +520,34 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo matches = make(map[string][]string) if inclNoMatch { for _, grpNm = range names { - if grpNm != "" { - matches[grpNm] = nil + if grpNm == "" { + continue } + matches[grpNm] = nil } } return } - matchIndices = matchIndices[2:] /* - A reslice of `matchIndices` starts at 2 (as long as `names` is sliced [1:]) - because they're in pairs: []int{, , , , ...} - and the first pair is the entire pattern match (un-resliced names[0]). - Thus the len(matchIndices) == 2*len(names), *even* if you reslice. + The reslice of `matchIndices` starts at 2 because they're in pairs: + + []int{, , , , ...} + + and the first pair is the entire pattern match (un-resliced names[0], + un-resliced matchIndices[0]). + + Thus the len(matchIndices) == 2*len(names) (*should*, that is), *even* if you reslice. Keep in mind that since the first element of names is removed, - we reslices matchIndices as well (above). + we reslice matchIndices as well. */ - idxChunks = make([][]int, len(names)) - chunkIdx = 0 - endIdx = 0 + matchIndices = matchIndices[2:] + + tmpMap = make(map[string][]string) + + // Note that the second index is the *upper boundary*, not a *position in the string* + // so these indices are perfectly usable as-is as returned from the regexp methods. + // http://golang.org/ref/spec#Slice_expressions for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 { endIdx = startIdx + 2 // This technically should never happen. @@ -411,77 +555,253 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo endIdx = len(matchIndices) } - chunkIndices = matchIndices[startIdx:endIdx] - - if chunkIndices[0] == -1 || chunkIndices[1] == -1 { - // group did not match - chunkIndices = nil - } else { - // single character - if chunkIndices[0] == chunkIndices[1] { - chunkIndices = []int{chunkIndices[0]} - } else { - chunkIndices = matchIndices[startIdx:endIdx] - } + if grpIdx >= len(names) { + break } - idxChunks[chunkIdx] = chunkIndices - chunkIdx++ - } - // Now associate with names and pull the string sequence. - for chunkIdx, chunkIndices = range idxChunks { - grpNm = names[chunkIdx] - /* - Thankfully, it's actually a build error if a pattern specifies a named - capture group with an empty name. - So we don't need to worry about accounting for that, - and can just skip over grpNm == "" - (which is either an *unnamed* capture group - OR the first element in `names`, which is always - the entire match). - (We reslice out the latter.) - */ - if grpNm == "" { + si = stringIndexer{ + group: grpIdx, + start: matchIndices[startIdx], + end: matchIndices[endIdx-1], + matched: true, + nm: names[grpIdx], + grpS: "", + s: &matchStr, + ptrn: r.Regexp, + } + grpIdx++ + + if si.nm == "" { + // unnamed capture group continue } - if chunkIndices == nil || len(chunkIndices) == 0 { - // group did not match + // sets si.matched and si.grpS + si.idxSlice(&s) + + if !si.matched { if !inclNoMatch { continue } - if _, ok = tmpMap[grpNm]; !ok { + if _, ok = tmpMap[si.nm]; !ok { if !inclNoMatchStrict { - tmpMap[grpNm] = nil + tmpMap[si.nm] = nil } else { - tmpMap[grpNm] = []string{""} + tmpMap[si.nm] = []string{""} } } else { if inclNoMatchStrict { - tmpMap[grpNm] = append(tmpMap[grpNm], "") + tmpMap[si.nm] = append(tmpMap[si.nm], "") } } continue } - switch len(chunkIndices) { - case 1: - // Single character - matchStr = string(s[chunkIndices[0]]) - case 2: - // Multiple characters - matchStr = s[chunkIndices[0]:chunkIndices[1]] + if _, ok = tmpMap[si.nm]; !ok { + tmpMap[si.nm] = make([]string, 0) } - - if _, ok = tmpMap[grpNm]; !ok { - tmpMap[grpNm] = make([]string, 0) - } - tmpMap[grpNm] = append(tmpMap[grpNm], matchStr) + tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS) } // This *technically* should be completely handled above. if inclNoMatch { for _, grpNm = range names { + if grpNm == "" { + continue + } + if _, ok = tmpMap[grpNm]; !ok { + tmpMap[grpNm] = nil + } + } + } + + if len(tmpMap) > 0 { + matches = tmpMap + } + + return +} + +/* +MapStringAll behaves exactly like [ReMap.MapString] but will "squash"/consolidate *all* found matches, not just the first occurrence, +into the group name. + +You likely want to use this instead of [ReMap.MapString] for multiline patterns. +*/ +func (r *ReMap) MapStringAll(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) { + + var ok bool + var endIdx int + var startIdx int + var grpIdx int + var grpNm string + var names []string + var matchStr string + var si stringIndexer + var matchIndices []int + var allMatchIndices [][]int + var tmpMap map[string][]string = make(map[string][]string) + + if s == "" { + return + } + + names = r.Regexp.SubexpNames()[:] + allMatchIndices = r.Regexp.FindAllStringSubmatchIndex(s, -1) + + if allMatchIndices == nil { + // s does not match pattern at all. + if !mustMatch { + matches = make(map[string][]string) + } + return + } + + if names == nil || len(names) == 0 || len(names) == 1 { + /* + No named capture groups; + technically only the last condition would be the case, + as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST + return a `[]string{""}`. + */ + if inclNoMatch { + matches = make(map[string][]string) + } + return + } + names = names[1:] + + if len(allMatchIndices) == 0 { + // No matches (and thus submatches) whatsoever. + // I think this is actually covered by the `if allMatchIndices == nil { ... }` above, + // but this is still here for safety and efficiency - early return on no matches to iterate. + matches = make(map[string][]string) + if inclNoMatch { + for _, grpNm = range names { + if grpNm == "" { + continue + } + matches[grpNm] = nil + } + } + return + } + // Do *NOT* trim/reslice allMatchIndices! + // The reslicing is done below, *inside* each matchIndices iteration! + + tmpMap = make(map[string][]string) + + // From here, it behaves (sort of) like ReMap.MapString. + + // Build the strictly-paired chunk indexes and populate them. + // We are iterating over *match sets*; matchIndices here should be analgous + // to matchIndices in ReMap.MapString. + for _, matchIndices = range allMatchIndices { + + if matchIndices == nil { + // I *think* the exception with the *All* variant here + // is the *entire* return (allMatchIndices) is nil if there + // aren't any matches; I can't imagine there'd be any feasible + // way it'd insert a nil *element* for an index mapping group. + // So just continuing here should be fine; + // this continue SHOULD be unreachable. + continue + } + + // Reslice *here*, on the particular match index group. + // Grap the matchStr first; it's not currently *used* by anything but may in the future. + matchStr, ok = strIdxSlicer( + s, + *(*[2]int)(matchIndices[0:2]), + ) + if len(matchIndices) == 0 || len(matchIndices) == 1 { + // No *sub*matches (capture groups) in this match, but it still matched the pattern. + if inclNoMatch { + for _, grpNm = range names { + if grpNm == "" { + continue + } + // We don't immediately return, though; we just stage out group names just in case. + // That's why we use tmpMap and not matches. + if _, ok = tmpMap[grpNm]; !ok { + tmpMap[grpNm] = nil + } + } + } + continue + } + matchIndices = matchIndices[2:] + + // Reset from previous loop + endIdx = 0 + grpIdx = 0 + + for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 { + endIdx = startIdx + 2 + if endIdx > len(matchIndices) { + endIdx = len(matchIndices) + } + + if grpIdx >= len(names) { + break + } + + si = stringIndexer{ + group: grpIdx, + start: matchIndices[startIdx], + end: matchIndices[endIdx-1], + matched: true, + nm: names[grpIdx], + grpS: "", + ptrn: r.Regexp, + } + grpIdx++ + // We do not include the entire match string here; + // we don't need it for this. Waste of memory. + _ = matchStr + /* + si.s = new(string) + *si.s = matchStr + */ + + if si.nm == "" { + // unnamed capture group + continue + } + + // sets si.matched and si.grpS + si.idxSlice(&s) + + if !si.matched { + if !inclNoMatch { + continue + } + if _, ok = tmpMap[si.nm]; !ok { + if !inclNoMatchStrict { + tmpMap[si.nm] = nil + } else { + tmpMap[si.nm] = []string{""} + } + } else { + if inclNoMatchStrict { + tmpMap[si.nm] = append(tmpMap[si.nm], "") + } + } + continue + } + + if _, ok = tmpMap[si.nm]; !ok { + tmpMap[si.nm] = make([]string, 0) + } + tmpMap[si.nm] = append(tmpMap[si.nm], si.grpS) + } + } + + if inclNoMatch { + for _, grpNm = range names { + if grpNm == "" { + continue + } if _, ok = tmpMap[grpNm]; !ok { tmpMap[grpNm] = nil } diff --git a/remap/funcs_remap_test.go b/remap/funcs_remap_test.go index 9a25fbf..0b5d7de 100644 --- a/remap/funcs_remap_test.go +++ b/remap/funcs_remap_test.go @@ -1,9 +1,9 @@ package remap import ( - `fmt` - `reflect` - `regexp` + "fmt" + "reflect" + "regexp" "testing" ) @@ -12,6 +12,7 @@ type ( Nm string S string M *ReMap + All bool Expected map[string][][]byte ExpectedStr map[string][]string ParamInclNoMatch bool @@ -25,12 +26,14 @@ func TestRemap(t *testing.T) { var matches map[string][][]byte for midx, m := range []testMatcher{ + // 1 testMatcher{ Nm: "No matches", S: "this is a test", M: &ReMap{regexp.MustCompile(``)}, Expected: nil, }, + // 2 testMatcher{ Nm: "Single mid match", S: "This contains a single match in the middle of a string", @@ -39,6 +42,7 @@ func TestRemap(t *testing.T) { "g1": [][]byte{[]byte("match")}, }, }, + // 3 testMatcher{ Nm: "multi mid match", S: "This contains a single match and another match in the middle of a string", @@ -50,6 +54,7 @@ func TestRemap(t *testing.T) { }, }, }, + // 4 testMatcher{ Nm: "line match", S: "This\ncontains a\nsingle\nmatch\non a dedicated line", @@ -60,10 +65,12 @@ func TestRemap(t *testing.T) { }, }, }, + // 5 testMatcher{ - Nm: "multiline match", - S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", - M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + Nm: "multiline match", + S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + All: true, Expected: map[string][][]byte{ "g1": [][]byte{ []byte("match"), @@ -71,8 +78,32 @@ func TestRemap(t *testing.T) { }, }, }, + // 6 + // More closely mirrors something closer to real-life + testMatcher{ + Nm: "mixed match", + S: " # No longer log hits/reqs/resps to file.\n" + + " #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" + + " #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" + + " access_log off;\n" + + " error_log /dev/null;\n\n" + + " ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" + + " ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n", + M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P.+);\s*$`)}, + All: true, + Expected: map[string][][]byte{ + "logpath": [][]byte{ + []byte("off"), + []byte("/dev/null"), + }, + }, + }, } { - matches = m.M.Map([]byte(m.S), false, false, false) + if m.All { + matches = m.M.MapAll([]byte(m.S), false, false, false) + } else { + matches = m.M.Map([]byte(m.S), false, false, false) + } t.Logf( "#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n", midx+1, @@ -81,7 +112,7 @@ func TestRemap(t *testing.T) { testBmapToStrMap(matches), ) if !reflect.DeepEqual(matches, m.Expected) { - t.Fatalf("Case #%d (\"%s\"): '%#v' != '%#v'", midx+1, m.Nm, m.Expected, matches) + t.Fatalf("Case #%d (\"%s\"): expected '%#v' != received '%#v'", midx+1, m.Nm, m.Expected, matches) } } @@ -165,7 +196,11 @@ func TestRemapParams(t *testing.T) { ParamInclMustMatch: true, }, } { - matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch) + if m.All { + matches = m.M.MapAll([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch) + } else { + matches = m.M.Map([]byte(m.S), m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch) + } t.Logf( "%d: %v/%v/%v: %#v\n", midx+1, m.ParamInclNoMatch, m.ParamInclNoMatchStrict, m.ParamInclMustMatch, matches, @@ -182,12 +217,14 @@ func TestRemapString(t *testing.T) { var matches map[string][]string for midx, m := range []testMatcher{ + // 1 testMatcher{ Nm: "No matches", S: "this is a test", M: &ReMap{regexp.MustCompile(``)}, ExpectedStr: nil, }, + // 2 testMatcher{ Nm: "Single mid match", S: "This contains a single match in the middle of a string", @@ -196,6 +233,7 @@ func TestRemapString(t *testing.T) { "g1": []string{"match"}, }, }, + // 3 testMatcher{ Nm: "multi mid match", S: "This contains a single match and another match in the middle of a string", @@ -207,6 +245,7 @@ func TestRemapString(t *testing.T) { }, }, }, + // 4 testMatcher{ Nm: "line match", S: "This\ncontains a\nsingle\nmatch\non a dedicated line", @@ -217,10 +256,12 @@ func TestRemapString(t *testing.T) { }, }, }, + // 5 testMatcher{ - Nm: "multiline match", - S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", - M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + Nm: "multiline match", + S: "This\ncontains a\nsingle match and another\nmatch\nin the middle of a string", + M: &ReMap{regexp.MustCompile(`\s+(?Pmatch) and another\s+(?Pmatch)\s+`)}, + All: true, ExpectedStr: map[string][]string{ "g1": []string{ "match", @@ -228,8 +269,32 @@ func TestRemapString(t *testing.T) { }, }, }, + // 6 + // More closely mirrors something closer to real-life + testMatcher{ + Nm: "mixed match", + S: " # No longer log hits/reqs/resps to file.\n" + + " #access_log /mnt/nginx_logs/vhost/tenant/site/access.log main;\n" + + " #error_log /mnt/nginx_logs/vhost/tenant/site/error.log;\n" + + " access_log off;\n" + + " error_log /dev/null;\n\n" + + " ssl_certificate /etc/nginx/tls/crt/tenant.pem;\n" + + " ssl_certificate_key /etc/nginx/tls/key/tenant.pem;\n\n", + M: &ReMap{regexp.MustCompile(`(?m)^\s*(?:error|access)_log\s+(?P.+);\s*$`)}, + All: true, + ExpectedStr: map[string][]string{ + "logpath": []string{ + "off", + "/dev/null", + }, + }, + }, } { - matches = m.M.MapString(m.S, false, false, false) + if m.All { + matches = m.M.MapStringAll(m.S, false, false, false) + } else { + matches = m.M.MapString(m.S, false, false, false) + } t.Logf( "#%d:\n\tsrc:\t'%s'\n\tptrn:\t'%s'\n\tmatch:\t%s\n", midx+1, diff --git a/remap/funcs_stringindexer.go b/remap/funcs_stringindexer.go new file mode 100644 index 0000000..95f484b --- /dev/null +++ b/remap/funcs_stringindexer.go @@ -0,0 +1,34 @@ +package remap + +// idx returns []int{s.start, s.end}. +func (s *stringIndexer) idx() (i []int) { + return []int{s.start, s.end} +} + +// idxStrict returns [2]int{s.start, s.end}. +func (s *stringIndexer) idxStrict() (i [2]int) { + return [2]int{s.start, s.end} +} + +/* +idxSlice populates s.grpS using s.start and s.end. + +If str is nil, it will use s.s. +If str is nil and s.s is nil, it will panic with [ErrNoStr]. + +If the pattern does not match (s.start < 0 or s.end < 0), +s.matched will be set to false (otherwise true). +*/ +func (s *stringIndexer) idxSlice(str *string) { + + if str == nil { + if s.s == nil { + panic(ErrNoStr) + } + str = s.s + } + + s.grpS, s.matched = strIdxSlicer(*str, s.idxStrict()) + + return +} diff --git a/remap/types.go b/remap/types.go index c54fc37..f23d6ab 100644 --- a/remap/types.go +++ b/remap/types.go @@ -24,4 +24,45 @@ type ( } */ + + stringIndexer struct { + // group is the capture group index for this match. + group int + // start is the string index (from the original string) where the matched group starts + start int + // end is the string index where the matched group ends + end int + /* + matched indicates if explicitly no match was found. + (This is normally indeterminate with string regex returns, + as e.g. `(?P\s*)`, `(?P(?:somestring)?)`, etc. all can be a *matched* "".) + + If grpS == "" and matched == true, it DID match an empty string. + If grpS == "" and matched == false, it DID NOT MATCH the pattern. + If grpS != "", matched can be completely disregarded. + */ + matched bool + // nm is the match group name. + nm string + /* + grpS is the actual group-matched *substring*. + + It will ALWAYS be either: + + * the entirety of s + * a substring of s + * an empty string + + it will never, and cannot be, a SUPERset of s. + it may not always be included/populated to save on memory. + */ + grpS string + /* + s is the *entire* MATCHED (sub)string. + It may not always be populated if not needed to save memory. + */ + s *string + // ptrn is the pattern applied to s. + ptrn *regexp.Regexp + } )