File 0001-awk-restore-assignment-precedence-to-be-lower-than-t.patch of Package busybox.43070

From 75553aa56c2306898998132d639107544b4282f1 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 9 Jul 2024 15:30:46 +0200
Subject: [PATCH] awk: restore assignment precedence to be lower than ternary
 ?:

Something is fishy with constrcts like "3==v=3" in gawk,
they should not work, but do. Ignore those for now.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
(cherry picked from commit 38335df9e9f45378c3407defd38b5b610578bdda)
---
 editors/awk.c       | 65 ++++++++++++++++++++++++++++------
 testsuite/awk.tests | 85 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 129 insertions(+), 21 deletions(-)

diff --git a/editors/awk.c b/editors/awk.c
index c05ac3083..9c6b459bd 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 =
 	;
 
 static const uint32_t tokeninfo[] ALIGN4 = {
-	0,
-	0,
+	0, /* ( */
+	0, /* ) */
 #define TI_REGEXP OC_REGEXP
-	TI_REGEXP,
+	TI_REGEXP, /* / */
+	/* >> > | */
 	xS|'a',                  xS|'w',                  xS|'|',
+	/* ++ -- */
 	OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 #define TI_PREINC (OC_UNARY|xV|P(9)|'P')
 #define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
+	/* ++ -- $ */
 	TI_PREINC,               TI_PREDEC,               OC_FIELD|xV|P(5),
-	OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(38),        OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-',
-	OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&',
-	OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&',
+	/* == = += -= */
+	OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
+	/* *= /= %= ^= (^ is exponentiation, NOT xor) */
+	OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
+	/* + - **= ** */
+	OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
+	/* / % ^ * */
 	OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
+	/* != >= <= > */
 	OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 #define TI_LESS     (OC_COMPARE|VV|P(39)|2)
+	/* < !~ ~ && */
 	TI_LESS,                 OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 #define TI_TERNARY  (OC_TERNARY|Vx|P(64)|'?')
 #define TI_COLON    (OC_COLON|xx|P(67)|':')
+	/* || ? : */
 	OC_LOR|Vx|P(59),         TI_TERNARY,              TI_COLON,
 #define TI_IN       (OC_IN|SV|P(49))
 	TI_IN,
 #define TI_COMMA    (OC_COMMA|SS|P(80))
 	TI_COMMA,
 #define TI_PGETLINE (OC_PGETLINE|SV|P(37))
-	TI_PGETLINE,
+	TI_PGETLINE, /* | */
+	/* + - ! */
 	OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 	0, /* ] */
-	0,
-	0,
-	0,
+	0, /* { */
+	0, /* } */
+	0, /* ; */
 	0, /* \n */
 	ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 	OC_CONTINUE,  OC_DELETE|Rx, OC_PRINT,
@@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = {
 #undef OC_F
 };
 
+/* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows:
+ *  ......
+ *  < > <= >= == !=
+ *  ~ !~
+ *  in
+ *  &&
+ *  ||
+ *  ?:
+ *  = += -= *= /= %= ^=
+ * But there are some abnormalities:
+ * awk 'BEGIN { print v=3==3,v }' - ok:
+ * 1 1
+ * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment:
+ * 1 3
+ * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}'
+ * More than one comparison op fails to parse:
+ * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work)
+ * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work)
+ *
+ * The ternary a?b:c works as follows in gawk: "a" can't be assignment
+ * ("= has lower precedence than ?") but inside "b" or "c", assignment
+ * is higher precedence:
+ * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }'
+ * 5
+ * 5 1 5
+ * This differs from C and shell's "test" rules for ?: which have implicit ()
+ * around "b" in ?:, but not around "c" - they would barf on "w=5" above.
+ * gawk allows nesting of ?: - this works:
+ * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9)
+ * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)"
+ */
+
 /* internal variable names and their initial values       */
 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 enum {
@@ -1372,7 +1415,7 @@ static node *parse_expr(uint32_t term_tc)
 				vn = vn->a.n;
 				if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
 			}
-			if (t_info == TI_TERNARY)
+			if (t_info == TI_TERNARY) /* "?" operator */
 //TODO: why?
 				t_info += PRECEDENCE(6);
 			cn = vn->a.n->r.n = new_node(t_info);
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 4aa6b0915..2cdac5ac8 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -5,6 +5,7 @@
 
 . ./testing.sh
 
+sq="'"
 # testing "description" "command" "result" "infile" "stdin"
 
 testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" ""    "" ""
@@ -475,12 +476,6 @@ testing 'awk backslash+newline eaten with no trace' \
 	"Hello world\n" \
 	'' ''
 
-testing 'awk assign while test' \
-	"awk '\$1==\$1=\"foo\" {print \$1}'" \
-	"foo\n" \
-	"" \
-	"foo"
-
 # User-supplied bug (SEGV) example, was causing use-after-realloc
 testing 'awk assign while assign' \
 	"awk '\$5=\$\$5=\$0'; echo \$?" \
@@ -539,13 +534,83 @@ testing 'awk assign while assign' \
 # If field separator FS=' ' (default), fields are split only on
 # space or tab or linefeed, NOT other whitespace.
 testing 'awk does not split on CR (char 13)' \
-	"awk '{ \$1=\$0; print }'" \
+	'awk '$sq'{ $1=$0; print }'$sq \
 	'word1 word2 word3\r word2 word3\r\n' \
 	'' 'word1 word2 word3\r'
 
-testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
-	"awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
-	'0\n1\n2\n1\n3\n' \
+# No, it seems a bug in gawk parser.
+#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
+#	"awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
+#	'0\n1\n2\n1\n3\n' \
+#	'' ''
+#
+#testing 'awk assign while test' \
+#	'awk '$sq'$1==$1="foo" {print $1}'$sq \
+#	"foo\n" \
+#	"" \
+#	"foo"
+
+testing "awk = and ?: precedence" \
+	'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \
+	'ok\n' \
+	'' ''
+
+# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}'
+# and even this: u=0?v=4?5:6:w=7?8:9
+
+testing 'awk gensub backslashes \' \
+	'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\
+\\|\\
+' '' ''
+testing 'awk gensub backslashes \\' \
+	'awk '$sq'BEGIN { s="\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\\\
+\\|\\
+' '' ''
+# gawk 5.1.1 handles trailing unpaired \ inconsistently.
+# If replace string is single \, it is used verbatim,
+# but if it is \\\ (three slashes), gawk uses "\<NUL>" (!!!), not "\\" as you would expect.
+testing 'awk gensub backslashes \\\' \
+	'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\\\\\
+\\\\|\\\\
+' '' ''
+testing 'awk gensub backslashes \\\\' \
+	'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\\\\\\\
+\\\\|\\\\
+' '' ''
+testing 'awk gensub backslashes \&' \
+	'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\&
+&|&
+' '' ''
+testing 'awk gensub backslashes \0' \
+	'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\0
+a|a
+' '' ''
+testing 'awk gensub backslashes \\0' \
+	'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
+	's=\\\\0
+\\0|\\0
+' '' ''
+
+# References to empty (not provided in the input) fields in first versus subsequent lines
+testing 'awk references to empty fields' \
+	'awk '$sq'$2 != 0'$sq \
+	'a
+b
+' '' 'a\nb\n'
+
+# The "b" in "abc" should not match <b* pattern.
+# Currently we use REG_STARTEND ("This flag is a BSD extension, not present in POSIX")
+# to implement the code to handle this correctly, but if your libc has no REG_STARTEND,
+# the alternative code mishandles this case.
+testing 'awk gsub erroneous word start match' \
+	"awk 'BEGIN { a=\"abc\"; gsub(/\<b*/,\"\",a); print a }'" \
+	'abc\n' \
 	'' ''
 
 exit $FAILCOUNT
-- 
2.52.0

openSUSE Build Service is sponsored by