v1.9.4

FIXED: * remap.ReMap.MapString() was not properly correllating groups. It is now.
v1.9.3
2025-08-17 00:45:24 -04:00 · 2025-08-12 00:06:51 -04:00
3 changed files with 248 additions and 46 deletions
--- a/remap/doc.go
+++ b/remap/doc.go
@@ -1,4 +1,4 @@
 /*
-Package remap provides convenience functions around regular expressions.
+Package remap provides convenience functions around regular expressions, primarily offering maps for named capture groups.
 */
 package remap
--- a/remap/funcs_remap.go
+++ b/remap/funcs_remap.go
@@ -1,11 +1,67 @@
 package remap
 /*
-	Map returns a map[string]<match bytes> for regexes with named capture groups matched in bytes b.
+Map returns a map[string][]<match bytes> for regexes with named capture groups matched in bytes b.
-	Note that this supports non-unique group names; regexp.Regexp allows for patterns with multiple groups
+Note that this supports non-unique group names; [regexp.Regexp] allows for patterns with multiple groups
-	using the same group name. Each match for each group is in a slice keyed under that group name, with
+using the same group name (though your IDE might complain; I know GoLand does).
-	that slice ordered by the indexing done by the regex match itself.
+
-	matches and/or its values may be nil or empty under the following condition tree:
+Each match for each group is in a slice keyed under that group name, with that slice
 ordered by the indexing done by the regex match itself.
 In summary, the parameters are as follows:
 # inclNoMatch
 If true, then attempt to return a non-nil matches (as long as b isn't nil).
 Group keys will be populated and explicitly defined as nil.
 For example, if a pattern
 	^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
 is provided but b does not match then matches will be:
 	map[string][][]byte{
 		"g1": nil,
 		"g2": nil,
 	}
 # inclNoMatchStrict
 If true (and inclNoMatch is true), instead of a single nil the group's values will be
 a slice of nil values explicitly matching the number of times the group name is specified
 in the pattern.
 For example, if a pattern:
 	^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
 is provided but b does not match then matches will be:
 	map[string][][]byte{
 		"g1": [][]byte{
 			nil,
 			nil,
 		},
 		"g2": [][]byte{
 			nil,
 		},
 	}
 # mustMatch
 If true, matches will be nil if the entirety of b does not match the pattern (and thus
 no capture groups matched) (overrides inclNoMatch) -- explicitly:
 	matches == nil
 Otherwise if false (and assuming inclNoMatch is false), matches will be:
 	map[string][][]byte{}{}
 # Condition Tree
 In detail, matches and/or its values may be nil or empty under the following condition tree:
 	IF b is nil:
 		THEN matches will always be nil
@@ -79,7 +135,7 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
 	if len(matchBytes) == 0 || len(matchBytes) == 1 {
 		/*
 			no submatches whatsoever.
-			*technically* I don't think this condition can actually be reached.
+			*Technically* I don't think this condition can actually be reached.
 			This is more of a safe-return before we re-slice.
 		*/
 		matches = make(map[string][][]byte)
@@ -148,9 +204,100 @@ func (r *ReMap) Map(b []byte, inclNoMatch, inclNoMatchStrict, mustMatch bool) (m
 }
 /*
-	MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead. (matches will always be nil if s == ``.)
+MapString is exactly like ReMap.Map(), but operates on (and returns) strings instead.
 (matches will always be nil if s == “.)
-	A small deviation, though; empty strings instead of nils (because duh) will occupy placeholders (if inclNoMatchStrict is specified).
+A small deviation, though; empty strings instead of nils (because duh) will occupy slice placeholders (if `inclNoMatchStrict` is specified).
 This unfortunately *does not provide any indication* if an empty string positively matched the pattern (a "hit") or if it was simply
 not matched at all (a "miss"). If you need definitive determination between the two conditions, it is instead recommended to either
 *not* use inclNoMatchStrict or to use ReMap.Map() instead and convert any non-nil values to strings after.
 Particularly:
 # inclNoMatch
 If true, then attempt to return a non-nil matches (as long as s isn't empty).
 Group keys will be populated and explicitly defined as nil.
 For example, if a pattern
 	^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
 is provided but s does not match then matches will be:
 	map[string][]string{
 		"g1": nil,
 		"g2": nil,
 	}
 # inclNoMatchStrict
 If true (and inclNoMatch is true), instead of a single nil the group's values will be
 a slice of eempty string values explicitly matching the number of times the group name is specified
 in the pattern.
 For example, if a pattern:
 	^(?P<g1>foo)(?P<g1>bar)(?P<g2>baz)$
 is provided but s does not match then matches will be:
 	map[string][]string{
 		"g1": []string{
 			"",
 			"",
 		},
 		"g2": []string{
 			"",
 		},
 	}
 # mustMatch
 If true, matches will be nil if the entirety of s does not match the pattern (and thus
 no capture groups matched) (overrides inclNoMatch) -- explicitly:
 	matches == nil
 Otherwise if false (and assuming inclNoMatch is false), matches will be:
 	map[string][]string{}{}
 # Condition Tree
 In detail, matches and/or its values may be nil or empty under the following condition tree:
 	IF s is empty:
 		THEN matches will always be nil
 	ELSE:
 		IF all of s does not match pattern
 			IF mustMuch is true
 				THEN matches == nil
 			ELSE
 				THEN matches == map[string][]string{} (non-nil but empty)
 		ELSE IF pattern has no named capture groups
 			IF inclNoMatch is true
 				THEN matches == map[string][]string{} (non-nil but empty)
 			ELSE
 				THEN matches == nil
 		ELSE
 			IF there are no named group matches
 				IF inclNoMatch is true
 					THEN matches is non-nil; matches[<group name>, ...] is/are defined but nil (_, ok = matches[<group name>]; ok == true)
 				ELSE
 					THEN matches == nil
 			ELSE
 				IF <group name> does not have a match
 					IF inclNoMatch is true
 						IF inclNoMatchStrict is true
 							THEN matches[<group name>] is defined and non-nil, but populated with placeholder nils
 								(matches[<group name>] == []string{""[, ""...]})
 						ELSE
 							THEN matches[<group name>] is guaranteed defined but may be nil (_, ok = matches[<group name>]; ok == true)
 					ELSE
 						THEN matches[<group name>] is not defined (_, ok = matches[<group name>]; ok == false)
 				ELSE
 					matches[<group name>] == []{<match>[, <match>...]}
 */
 func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bool) (matches map[string][]string) {
@@ -161,6 +308,13 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 	var grpNm string
 	var names []string
 	var matchStr string
 	/*
 		A slice of indices or index pairs.
 		For each element `e` in idxChunks,
 		* if `e` is nil, no group match.
 		* if len(e) == 1, only a single character was matched.
 		* otherwise len(e) == 2, the start and end of the match.
 	*/
 	var idxChunks [][]int
 	var matchIndices []int
 	var chunkIndices []int // always 2 elements; start pos and end pos
@@ -170,7 +324,7 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 		OK so this is a bit of a deviation.
 		It's not as straightforward as above, because there isn't an explicit way
-		like above to determine if a patterb was *matched as an empty string* vs.
+		like above to determine if a pattern was *matched as an empty string* vs.
 		*not matched*.
 		So instead do roundabout index-y things.
@@ -179,73 +333,111 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 	if s == "" {
 		return
 	}
-	names = r.Regexp.SubexpNames()
+	/*
 		I'm not entirely sure how serious they are about "the slice should not be modified"...
 		DO NOT sort or dedupe `names`! If the same name for groups is duplicated,
 		it will be duplicated here in proper order and the ordering is tied to
 		the ordering of matchIndices.
 	*/
 	names = r.Regexp.SubexpNames()[:]
 	matchIndices = r.Regexp.FindStringSubmatchIndex(s)
 	if matchIndices == nil {
-		// s does not match pattern
+		// s does not match pattern at all.
 		if !mustMatch {
 			matches = make(map[string][]string)
 		}
 		return
 	}
-	if names == nil || len(names) == 0 || len(names) == 1 {
+	if names == nil || len(names) <= 1 {
 		/*
-			no named capture groups;
+			No named capture groups;
-			technically only the last condition would be the case.
+			technically only the last condition would be the case,
 			as (regexp.Regexp).SubexpNames() will ALWAYS at the LEAST
 			return a `[]string{""}`.
 		*/
 		if inclNoMatch {
 			matches = make(map[string][]string)
 		}
 		return
 	}
 	names = names[1:]
 	if len(matchIndices) == 0 || len(matchIndices) == 1 {
 		/*
-			no submatches whatsoever.
+			No (sub)matches whatsoever.
-			*technically* I don't think this condition can actually be reached.
+			*technically* I don't think this condition can actually be reached;
 			matchIndices should ALWAYS either be `nil` or len will be at LEAST 2,
 			and modulo 2 thereafter since they're PAIRS of indices...
 			Why they didn't just return a [][]int or [][2]int or something
 			instead of an []int, who knows.
 			But we're correcting that poor design.
 			This is more of a safe-return before we chunk the indices.
 		*/
 		matches = make(map[string][]string)
 		if inclNoMatch {
 			if len(names) >= 1 {
 			for _, grpNm = range names {
 				if grpNm != "" {
 					matches[grpNm] = nil
 				}
 			}
 		}
 		return
 	}
 	/*
 		The reslice starts at 2 because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
 		and the first *pair* is the entire pattern match.
 		Thus the len(matchIndices) == 2*len(names).
 		Keep in mind that since the first element of names is removed,
 		the first pair here is also removed.
 	*/
 	matchIndices = matchIndices[2:]
 	/*
 		A reslice of `matchIndices` could technically start at 2 (as long as `names` is sliced [1:])
 		because they're in pairs: []int{<start>, <end>, <start>, <end>, ...}
 		and the first pair is the entire pattern match (un-resliced names[0]).
 		Thus the len(matchIndices) == 2*len(names), *even* if you
 		Keep in mind that since the first element of names is removed,
 		the first pair here is skipped.
 		This provides a bit more consistent readability, though.
 	*/
 	idxChunks = make([][]int, len(names))
-	for startIdx = 0; startIdx < len(idxChunks); startIdx += 2 {
+	chunkIdx = 0
 	endIdx = 0
 	for startIdx = 0; endIdx < len(matchIndices); startIdx += 2 {
 		endIdx = startIdx + 2
 		// This technically should never happen.
 		if endIdx > len(matchIndices) {
 			endIdx = len(matchIndices)
 		}
 		chunkIndices = matchIndices[startIdx:endIdx]
 		if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
 			// group did not match
 			chunkIndices = nil
 		} else {
 			if chunkIndices[0] == chunkIndices[1] {
 				chunkIndices = []int{chunkIndices[0]}
 			} else {
 				chunkIndices = matchIndices[startIdx:endIdx]
 			}
 		}
 		idxChunks[chunkIdx] = chunkIndices
 		chunkIdx++
 	}
 	// Now associate with names and pull the string sequence.
 	for chunkIdx, chunkIndices = range idxChunks {
 		grpNm = names[chunkIdx]
 		/*
 			Thankfully, it's actually a build error if a pattern specifies a named
 			capture group with an empty name.
 			So we don't need to worry about accounting for that,
-			and can just skip over grpNm == "" (which is an *unnamed* capture group).
+			and can just skip over grpNm == ""
 			(which is either an *unnamed* capture group
 			OR the first element in `names`, which is always
 			the entire match).
 		*/
 		if grpNm == "" {
 			continue
 		}
-		// This technically should never happen.
+
-		if endIdx > len(matchIndices) {
+		if chunkIndices == nil || len(chunkIndices) == 0 {
 			endIdx = len(matchIndices)
 		}
 		chunkIndices = matchIndices[startIdx:endIdx]
 		if chunkIndices[0] == -1 || chunkIndices[1] == -1 {
 			// group did not match
 			if !inclNoMatch {
 				continue
@@ -264,13 +456,19 @@ func (r *ReMap) MapString(s string, inclNoMatch, inclNoMatchStrict, mustMatch bo
 			continue
 		}
 		switch len(chunkIndices) {
 		case 1:
 			// Single character
 			matchStr = string(s[chunkIndices[0]])
 		case 2:
 			// Multiple characters
 			matchStr = s[chunkIndices[0]:chunkIndices[1]]
 		}
 		if _, ok = tmpMap[grpNm]; !ok {
 			tmpMap[grpNm] = make([]string, 0)
 		}
 		tmpMap[grpNm] = append(tmpMap[grpNm], matchStr)
 		chunkIdx++
 	}
 	// This *technically* should be completely handled above.
--- a/remap/types.go
+++ b/remap/types.go
@@ -1,7 +1,7 @@
 package remap
 import (
-	`regexp`
+	"regexp"
 )
 type (
@@ -10,14 +10,18 @@ type (
 		*regexp.Regexp
 	}
 	// TODO?
 	/*
 		ExplicitStringMatch is used with ReMap.MapStringExplicit to indicate if a
 		capture group result is a hit (a group matched, but e.g. the match value is empty string)
-		or not (a group did not match)
+		or not (a group did not match).
 	*/
 	/*
 		ExplicitStringMatch struct {
 			Group   string
 			IsMatch bool
 			Value   string
 		}
 	*/
 )
Author	SHA1	Message	Date
brent saner	07951f1f03	v1.9.4 FIXED: * remap.ReMap.MapString() was not properly correllating groups. It is now.	2025-08-17 00:45:24 -04:00
brent saner	bae0abe960	v1.9.3 IMPROVED: * Better documentation for remap	2025-08-12 00:06:51 -04:00