File 0286-Reject-invalid-use-of-caret-notation.patch of Package erlang
From b0b36babf25cd65abf9fa6133bd876f311e3d54a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 24 Nov 2022 09:18:32 +0100
Subject: [PATCH] Reject invalid use of caret notation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
It is documented that `$\^X` is the code point (ASCII code) for
Control X, where X is an uppercase or lowercase letter.
It turns out that it works for **all** characters:
1> $\^@.
0
2> $\^_.
31
3> $\^Γ.
19
4> $\*.
42
5> $\^😀.
0
6> $\^?.
31
Some of those are reasonable. `^@` and `^_` are allowed in Emacs and
most other tools that support the caret notation.
It is reasonable to allow `^?`, but the value of it should
be 127 (Delete) as in all other tools.
Allowing arbitrary letters and symbols does not make sense.
Therefore, this commit modifies the rules for the caret notation to
allow all characters in the range `16#40` through `16#4F` as well as
lowercase `a` through `z`. That is, the following characters are
allowed: `@`, `A`-`Z`, `[`, `\`, `]`, `^`, `_`, and `a`-`z`. That
makes it possible to express all control codes from 0 through 31.
Also allowed is `?`, but the value of it will now be 127 instead
of 31. That is a potential incompatibility, but only for users who
used it despite it being undocumented.
Closes #6477
---
lib/stdlib/src/erl_scan.erl | 16 ++--
lib/stdlib/test/erl_scan_SUITE.erl | 23 ++++--
system/doc/reference_manual/data_types.xml | 86 +++++++++++++++-------
3 files changed, 88 insertions(+), 37 deletions(-)
diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index e7e2582c03..2d5e5e1b1e 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -957,12 +957,14 @@ scan_escape([$x,H1], _Col) when ?HEX(H1) ->
more;
scan_escape([$x|Cs], Col) ->
{error,Cs,{illegal,character},incr_column(Col, 1)};
-%% \^X -> CTL-X
-scan_escape([$^=C0,$\n=C|Cs], Col) ->
- {nl,C,[C0,C],Cs,new_column(Col, 1)};
+%% \^X -> Control-X
scan_escape([$^=C0,C|Cs], Col) when ?CHAR(C) ->
- Val = C band 31,
- {Val,[C0,C],Cs,incr_column(Col, 2)};
+ case caret_char_code(C) of
+ error ->
+ {error,[C|Cs],{illegal,character},incr_column(Col, 1)};
+ Code ->
+ {Code,[C0,C],Cs,incr_column(Col, 2)}
+ end;
scan_escape([$^], _Col) ->
more;
scan_escape([$^|eof], Col) ->
@@ -1017,6 +1019,10 @@ escape_char($s) -> $\s; % \s = SPC
escape_char($d) -> $\d; % \d = DEL
escape_char(C) -> C.
+caret_char_code($?) -> 16#7f;
+caret_char_code(C) when $@ =< C, C =< $_; $a =< C, C =< $z -> C band 16#1f;
+caret_char_code(_) -> error.
+
scan_number(Cs, St, Line, Col, Toks, {Ncs, Us}) ->
scan_number(Cs, St, Line, Col, Toks, Ncs, Us).
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index 6c4694bebe..96c68039ae 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -516,12 +516,17 @@ chars() ->
test_string(L, Ts)
end || C <- lists:seq(0, 255)],
- %% $\^\n now increments the line...
+ %% GH-6477. Test legal use of caret notation.
[begin
L = "$\\^" ++ [C],
- Ts = [{char,{1,1},C band 2#11111}],
+ Ts = case C of
+ $? ->
+ [{char,{1,1},127}];
+ _ ->
+ [{char,{1,1},C band 2#11111}]
+ end,
test_string(L, Ts)
- end || C <- lists:seq(0, 255)],
+ end || C <- lists:seq($?, $Z) ++ lists:seq($a, $z)],
[begin
L = "$\\" ++ [C],
@@ -672,6 +677,12 @@ illegal() ->
erl_scan:string(String, {1,1}),
{done,{error,{{1,4},erl_scan,{illegal,character}},{1,14}},"34\". "} =
erl_scan:tokens([], String++". ", {1,1}),
+
+ %% GH-6477. Test for illegal characters in caret notation.
+ _ = [begin
+ S = [$$,$\\,$^,C],
+ {error,{1,erl_scan,{illegal,character}},1} = erl_scan:string(S)
+ end || C <- lists:seq(0, 16#3e) ++ [16#60] ++ lists:seq($z+1, 16#10ffff)],
ok.
crashes() ->
@@ -874,7 +885,7 @@ unicode() ->
erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}),
test("\"a"++[1089]++"b\""),
- {ok,[{char,1,1}],1} =
+ {error,{1,erl_scan,{illegal,character}},1} =
erl_scan_string([$$,$\\,$^,1089], 1),
{error,{1,erl_scan,Error},1} =
@@ -911,7 +922,7 @@ unicode() ->
U3 = "\"a\n\\x{fff}\n\"",
{ok,[{string,1,[$a,$\n,$\x{fff},$\n]}],3} = erl_scan_string(U3, 1),
- U4 = "\"\\^\n\\x{aaa}\\^\n\"",
+ U4 = "\"\n\\x{aaa}\n\"",
{ok,[{string,1,[$\n,$\x{aaa},$\n]}],3} = erl_scan_string(U4, 1),
%% Keep these tests:
@@ -1026,7 +1037,7 @@ otp_10302(Config) when is_list(Config) ->
U3 = "\"a\n\\x{fff}\n\"",
{ok,[{string,1,[97,10,4095,10]}],3} = erl_scan_string(U3, 1),
- U4 = "\"\\^\n\\x{aaa}\\^\n\"",
+ U4 = "\"\n\\x{aaa}\n\"",
{ok,[{string,1,[10,2730,10]}],3} = erl_scan_string(U4, 1,[]),
Str1 = "\"ab" ++ [1089] ++ "cd\"",
diff --git a/system/doc/reference_manual/data_types.xml b/system/doc/reference_manual/data_types.xml
index 1a4bdc5680..b8e2f5d7d5 100644
--- a/system/doc/reference_manual/data_types.xml
+++ b/system/doc/reference_manual/data_types.xml
@@ -421,75 +421,109 @@ true</pre>
<cell align="left" valign="middle"><em>Description</em></cell>
</row>
<row>
- <cell align="left" valign="middle">\b</cell>
- <cell align="left" valign="middle">Backspace</cell>
+ <cell align="left" valign="middle"><c>\b</c></cell>
+ <cell align="left" valign="middle">Backspace (ASCII code 8)</cell>
</row>
<row>
- <cell align="left" valign="middle">\d</cell>
- <cell align="left" valign="middle">Delete</cell>
+ <cell align="left" valign="middle"><c>\d</c></cell>
+ <cell align="left" valign="middle">Delete (ASCII code 127)</cell>
</row>
<row>
- <cell align="left" valign="middle">\e</cell>
- <cell align="left" valign="middle">Escape</cell>
+ <cell align="left" valign="middle"><c>\e</c></cell>
+ <cell align="left" valign="middle">Escape (ASCII code 27)</cell>
</row>
<row>
- <cell align="left" valign="middle">\f</cell>
- <cell align="left" valign="middle">Form feed</cell>
+ <cell align="left" valign="middle"><c>\f</c></cell>
+ <cell align="left" valign="middle">Form Feed (ASCII code 12)</cell>
</row>
<row>
- <cell align="left" valign="middle">\n</cell>
- <cell align="left" valign="middle">Newline</cell>
+ <cell align="left" valign="middle"><c>\n</c></cell>
+ <cell align="left" valign="middle">Line Feed/Newline (ASCII code 10)</cell>
</row>
<row>
- <cell align="left" valign="middle">\r</cell>
- <cell align="left" valign="middle">Carriage return</cell>
+ <cell align="left" valign="middle"><c>\r</c></cell>
+ <cell align="left" valign="middle">Carriage Return (ASCII code 13)</cell>
</row>
<row>
- <cell align="left" valign="middle">\s</cell>
- <cell align="left" valign="middle">Space</cell>
+ <cell align="left" valign="middle"><c>\s</c></cell>
+ <cell align="left" valign="middle">Space (ASCII code 32)</cell>
</row>
<row>
- <cell align="left" valign="middle">\t</cell>
- <cell align="left" valign="middle">Tab</cell>
+ <cell align="left" valign="middle"><c>\t</c></cell>
+ <cell align="left" valign="middle">(Horizontal) Tab (ASCII code 9)</cell>
</row>
<row>
- <cell align="left" valign="middle">\v</cell>
- <cell align="left" valign="middle">Vertical tab</cell>
+ <cell align="left" valign="middle"><c>\v</c></cell>
+ <cell align="left" valign="middle">Vertical Tab (ASCII code 11)</cell>
</row>
<row>
- <cell align="left" valign="middle">\XYZ, \YZ, \Z</cell>
+ <cell align="left" valign="middle"><c>\</c>XYZ, <c>\</c>YZ, <c>\</c>Z</cell>
<cell align="left" valign="middle">Character with octal
representation XYZ, YZ or Z</cell>
</row>
<row>
- <cell align="left" valign="middle">\xXY</cell>
+ <cell align="left" valign="middle"><c>\xXY</c></cell>
<cell align="left" valign="middle">Character with hexadecimal
representation XY</cell>
</row>
<row>
- <cell align="left" valign="middle">\x{X...}</cell>
+ <cell align="left" valign="middle"><c>\x{</c>X...<c>}</c></cell>
<cell align="left" valign="middle">Character with hexadecimal
representation; X... is one or more hexadecimal characters</cell>
</row>
<row>
- <cell align="left" valign="middle">\^a...\^z <br></br>
-\^A...\^Z</cell>
+ <cell align="left" valign="middle"><c>\^a</c>...<c>\^z</c> <br></br>
+<c>\^A</c>...<c>\^Z</c></cell>
<cell align="left" valign="middle">Control A to control Z</cell>
</row>
<row>
- <cell align="left" valign="middle">\'</cell>
+ <cell align="left" valign="middle"><c>\^@</c></cell>
+ <cell align="left" valign="middle">NUL (ASCII code 0)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^[</c></cell>
+ <cell align="left" valign="middle">Escape (ASCII code 27)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^\</c></cell>
+ <cell align="left" valign="middle">File Separator (ASCII code 28)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^]</c></cell>
+ <cell align="left" valign="middle">Group Separator (ASCII code 29)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^^</c></cell>
+ <cell align="left" valign="middle">Record Separator (ASCII code 30)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^_</c></cell>
+ <cell align="left" valign="middle">Unit Separator (ASCII code 31)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\^?</c></cell>
+ <cell align="left" valign="middle">Delete (ASCII code 127)</cell>
+ </row>
+ <row>
+ <cell align="left" valign="middle"><c>\'</c></cell>
<cell align="left" valign="middle">Single quote</cell>
</row>
<row>
- <cell align="left" valign="middle">\"</cell>
+ <cell align="left" valign="middle"><c>\"</c></cell>
<cell align="left" valign="middle">Double quote</cell>
</row>
<row>
- <cell align="left" valign="middle">\\</cell>
+ <cell align="left" valign="middle"><c>\\</c></cell>
<cell align="left" valign="middle">Backslash</cell>
</row>
<tcaption>Recognized Escape Sequences</tcaption>
</table>
+
+
+ <note><p>As of Erlang/OTP 26, the value of <c>$\^?</c> has been
+ changed to be 127 (Delete), instead of 31. Previous releases
+ would allow any character following <c>$\^</c>; as of Erlang/OTP
+ 26, only the documented characters are allowed.</p></note>
</section>
<section>
--
2.35.3