Browse Source

sed: fix handling of escaped delimiters in s/// search pattern, closes 14541

function                                             old     new   delta
copy_parsing_escapes                                  67      96     +29
parse_regex_delim                                    109     111      +2
get_address                                          213     215      +2
add_cmd                                             1176    1178      +2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0)               Total: 35 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Denys Vlasenko 2 years ago
parent
commit
e998c7c032
2 changed files with 21 additions and 8 deletions
  1. 11 8
      editors/sed.c
  2. 10 0
      testsuite/sed.tests

+ 11 - 8
editors/sed.c

@@ -246,7 +246,6 @@ static void cleanup_outname(void)
 }
 
 /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
-
 static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
 {
 	char *d = dest;
@@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from
 	return d - dest;
 }
 
-static char *copy_parsing_escapes(const char *string, int len)
+static char *copy_parsing_escapes(const char *string, int len, char delim)
 {
 	const char *s;
 	char *dest = xmalloc(len + 1);
@@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len)
 		len = parse_escapes(dest, string, len, s[1], s[0]);
 		string = dest;
 	}
+	if (delim) {
+		/* we additionally unescape any instances of escaped delimiter.
+		 * For example, in 's+9\++X+' the pattern is "9+", not "9\+".
+		 */
+		len = parse_escapes(dest, string, len, delim, delim);
+	}
 	return dest;
 }
 
-
 /*
  * index_of_next_unescaped_regexp_delim - walks left to right through a string
  * beginning at a specified index and returns the index of the next regular
@@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 
 	/* save the match string */
 	idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
-	*match = copy_parsing_escapes(cmdstr_ptr, idx);
-
+	*match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter);
 	/* save the replacement string */
 	cmdstr_ptr += idx + 1;
 	idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
-	*replace = copy_parsing_escapes(cmdstr_ptr, idx);
+	*replace = copy_parsing_escapes(cmdstr_ptr, idx, 0);
 
 	return ((cmdstr_ptr - cmdstr) + idx);
 }
@@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
 			delimiter = *++pos;
 		next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
 		if (next != 0) {
-			temp = copy_parsing_escapes(pos, next);
+			temp = copy_parsing_escapes(pos, next, 0);
 			G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
 			xregcomp(*regex, temp, G.regex_type);
 			free(temp);
@@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
 			cmdstr++;
 		}
 		len = strlen(cmdstr);
-		sed_cmd->string = copy_parsing_escapes(cmdstr, len);
+		sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0);
 		cmdstr += len;
 		/* "\anychar" -> "anychar" */
 		parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');

+ 10 - 0
testsuite/sed.tests

@@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \
 	"sed 's/ *$/_/g'" \
 	"qwerty_\n" "" "qwerty\n"
 
+# the pattern here is interpreted as "9+", not as "9\+"
+testing "sed special char as s/// delimiter, in pattern" \
+	"sed 's+9\++X+'" \
+	"X8=17\n" "" "9+8=17\n"
+
+# but in replacement string, "\&" remains "\&", not interpreted as "&"
+testing "sed special char as s/// delimiter, in replacement" \
+	"sed 's&9&X\&&'" \
+	"X&+8=17\n" "" "9+8=17\n"
+
 testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \
 	"sed ': testcont; /\\\\$/{ =; N; b testcont }'" \
 	"\