File 5011-Implement-triple-quoted-strings.patch of Package erlang

From ff367d9f5641a6f805d68b0aa5dfa1da848f4db8 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Thu, 22 Jun 2023 18:22:50 +0200
Subject: [PATCH 1/8] Implement triple-quoted strings

As according to EEP 64, with the N>=3-quoted string extension,
all implemented in the scanner/lexer/tokenizer `erl_scan`.
---
 lib/stdlib/src/erl_scan.erl        | 251 ++++++++++++++++++++++++++++-
 lib/stdlib/test/erl_scan_SUITE.erl | 139 +++++++++++++++-
 2 files changed, 378 insertions(+), 12 deletions(-)

diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl
index b7975c6ed2..ae5a080ab1 100644
--- a/lib/stdlib/src/erl_scan.erl
+++ b/lib/stdlib/src/erl_scan.erl
@@ -121,12 +121,16 @@
 format_error({string,Quote,Head}) ->
     lists:flatten(["unterminated " ++ string_thing(Quote) ++
                    " starting with " ++
-                   io_lib:write_string(Head, Quote)]);
+                   io_lib:write_string(Head, string_quote(Quote))]);
 format_error({illegal,Type}) ->
     lists:flatten(io_lib:fwrite("illegal ~w", [Type]));
 format_error(char) -> "unterminated character";
 format_error({base,Base}) ->
     lists:flatten(io_lib:fwrite("illegal base '~w'", [Base]));
+format_error(indentation) ->
+    "bad indentation in triple-quoted string";
+format_error(white_space) ->
+    "non-whitespace after start of triple-quoted string";
 format_error(Other) ->
     lists:flatten(io_lib:write(Other)).
 
@@ -262,12 +266,20 @@ symbol(T) ->
 %%% Local functions
 %%%
 
-string_thing($') -> "atom";   %' Stupid Emacs
-string_thing(_) -> "string".
+string_thing($') -> %' Stupid Emacs
+    "atom";
+string_thing($") -> %"
+    "string";
+string_thing({$",_}) -> %"
+    "triple-quoted string".
+
+string_quote($') -> $';
+string_quote($") -> $";
+string_quote({$",_}) -> $".
 
 -define(WHITE_SPACE(C),
-        is_integer(C) andalso
-         (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)).
+        (is_integer(C) andalso
+         (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240))).
 -define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)).
 -define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)).
 -define(UNICODE(C),
@@ -370,6 +382,7 @@ string1(Cs, St, Line, Col, Toks) ->
             Error
     end.
 
+
 scan(Cs, #erl_scan{}=St, Line, Col, Toks, _) ->
     scan1(Cs, St, Line, Col, Toks).
 
@@ -425,6 +438,12 @@ scan1("."=Cs, St, Line, Col, Toks) ->
     {more,{Cs,St,Col,Toks,Line,[],fun scan/6}};
 scan1([$.=C|Cs], St, Line, Col, Toks) ->
     scan_dot(Cs, St, Line, Col, Toks, [C]);
+scan1([$",$",$"|Cs], St, Line, Col, Toks) -> %" Emacs
+    scan_tqstring(Cs, St, Line, Col, Toks, 3); % Number of quote chars
+scan1([$",$"]=Cs, St, Line, Col, Toks) ->
+    {more,{Cs,St,Col,Toks,Line,[],fun scan/6}};
+scan1([$"]=Cs, St, Line, Col, Toks) -> %" Emacs
+    {more,{Cs,St,Col,Toks,Line,[],fun scan/6}};
 scan1([$"|Cs], St, Line, Col, Toks) -> %" Emacs
     State0 = {[],[],Line,Col},
     scan_string(Cs, St, Line, incr_column(Col, 1), Toks, State0);
@@ -828,6 +847,222 @@ scan_char([], St, Line, Col, Toks) ->
 scan_char(eof, _St, Line, Col, _Toks) ->
     scan_error(char, Line, Col, Line, incr_column(Col, 1), eof).
 
+-record(tqs, % Triple-quoted String state
+        {line,                  % Line number of first quote character
+         col,                   % Column number of  - " -
+         qs,                    % Number of quote characters in delimiter
+         content_r = []}).      % Reverse list of reversed content lines
+
+%% Scan leading $" characters until we have them all, then scan lines
+%%
+scan_tqstring(Cs, St, Line, Col, Toks, Qs) ->
+    case Cs of
+        [$"|Ncs] ->
+            scan_tqstring(Ncs, St, Line, Col, Toks, Qs+1);
+        [] ->
+            {more, {[], St, Col, Toks, Line, Qs, fun scan_tqstring/6}};
+        _ ->
+            Tqs = #tqs{ line = Line, col = Col, qs = Qs },
+            scan_tqstring_line(
+              Cs, St, Line, incr_column(Col, Qs), Toks,
+              0, Tqs, [])
+    end.
+
+scan_tqstring_line(Cs, St, Line, Col, Toks, {Qs, Tqs, Acc}) ->
+    scan_tqstring_line(Cs, St, Line, Col, Toks, Qs, Tqs, Acc).
+%%
+scan_tqstring_line(Cs, St, Line, Col, Toks, Qs, Tqs, Acc) ->
+    %% Qs  :: Number of end quote chars to search for, 0 means not searching
+    %% Tqs :: #tqs{}
+    %% Acc :: Reversed current line
+    case Cs of
+        [$\n=C|Ncs] ->
+            Ncol = new_column(Col, 1),
+            Nacc = [C|Acc],
+            Nqs = Tqs#tqs.qs, % Start searching for end quote chars
+            scan_tqstring_line(
+              Ncs, St, Line+1, Ncol, Toks,
+              Nqs, Tqs#tqs{ content_r = [Nacc | Tqs#tqs.content_r] }, []);
+        [$"=C|Ncs] when Qs =/= 0 ->
+            %% Possible end quote char
+            Ncol = incr_column(Col, 1),
+            Nacc = [C|Acc],
+            if
+                Qs =:= 1 ->
+                    %% This is the last end quote char
+                    %% - post process the content
+                    scan_tqstring_finish(
+                      Ncs, St, Line, Ncol, Toks, Tqs,
+                      lists:nthtail(Tqs#tqs.qs, Nacc)); % Strip them
+                true ->
+                    %% Collect and Count this end quote char
+                    scan_tqstring_line(
+                      Ncs, St, Line, Ncol, Toks,
+                      Qs-1, Tqs, Nacc)
+            end;
+        [C|Ncs] ->
+            Ncol = incr_column(Col, 1),
+            Nacc = [C|Acc],
+            if
+                Qs =/= 0, ?WHITE_SPACE(C) ->
+                    %% White space while searching for end quote chars
+                    if
+                        Qs =:= Tqs#tqs.qs ->
+                            %% White space before first end quote char
+                            %% - just collect
+                            scan_tqstring_line(
+                              Ncs, St, Line, Ncol, Toks,
+                              Qs, Tqs, Nacc);
+                        true ->
+                            %% White space after too few quote chars
+                            %% - stop searching for end quote chars
+                            scan_tqstring_line(
+                              Ncs, St, Line, Ncol, Toks,
+                              0, Tqs, Nacc)
+                    end;
+                ?UNI255(C) ->
+                    scan_tqstring_line(
+                      Ncs, St, Line, Ncol, Toks,
+                      0, Tqs, Nacc); % Stop searching for end quote chars
+                ?CHAR(C) ->
+                    %% Illegal Unicode character
+                    scan_error(
+                      {illegal,character}, Line, Col, Line, Ncol, Ncs)
+            end;
+        [] ->
+            {more,
+             {[], St, Col, Toks, Line, {Qs, Tqs, Acc},
+              fun scan_tqstring_line/6}};
+        eof ->
+            ContentR = [Acc|Tqs#tqs.content_r],
+            Estr = string:slice(tqstring_chars(ContentR), 0, 16),
+            scan_error(
+              {string,{$",Tqs#tqs.qs},Estr},%"
+              Tqs#tqs.line, Tqs#tqs.col, Line, Col, eof)
+    end.
+
+%% Strip last line newline,
+%% strip indentation,
+%% check white space on first line,
+%% create the string token,
+%% done
+%%
+scan_tqstring_finish(Cs, St, Line, Col, Toks, Tqs, IndentR) ->
+    %% IndentR :: Indentation characters, reversed
+    #tqs{ line = Line0, col = Col0, content_r = ContentR } = Tqs,
+    {Stripped, NcontentR} = strip_last_line_newline(ContentR),
+    case
+        tqstring_finish(lists:reverse(IndentR), NcontentR, Line-1, Stripped)
+    of
+        Content when is_list(Content) ->
+            Qs = Tqs#tqs.qs,
+            Chars =
+                lists_duplicate(
+                  Qs, $",%"
+                  tqstring_chars(ContentR, lists:duplicate(Qs, $"))),%"
+            Anno = anno(Line0, Col0, St, ?STR(string, St, Chars)),
+            scan1(Cs, St, Line, Col, [{string,Anno,Content}|Toks]);
+        {Tag=indentation, ErrorLine, ErrorCol, StringR} ->
+            scan_error(
+              Tag, ErrorLine, new_column(Col, 1),
+              ErrorLine, new_column(Col, ErrorCol),
+              lists:nthtail(ErrorCol, lists:reverse(StringR)));
+        {Tag=white_space, N, String} ->
+            scan_error(
+              Tag, Line0, incr_column(Col0, Tqs#tqs.qs),
+              Line0, incr_column(Col0, Tqs#tqs.qs+length(String)),
+              lists:nthtail(N, String))
+    end.
+
+%% Reconstruct the scanned triple-quoted string from content lines
+%%
+tqstring_chars(ContentR) ->
+    tqstring_chars(ContentR, "").
+%%
+tqstring_chars([], Chars) -> Chars;
+tqstring_chars([StringR|StringsR], Chars) ->
+    tqstring_chars(StringsR, lists:reverse(StringR, Chars)).
+
+%% Strip newline from the last line, but not if it is the only line,
+%% and return the newline character(s)
+%%
+strip_last_line_newline(ContentR=[_]) ->
+    {"", ContentR};
+strip_last_line_newline([LastLineR|ContentR]) ->
+    {Newline, NlastLineR} = strip_newline(LastLineR),
+    {Newline, [NlastLineR|ContentR]}.
+
+strip_newline("\n\r"++R) ->
+    {"\n\r", R};
+strip_newline("\n"++R) ->
+    {"\n", R}.
+
+%% Strip indentation from all content lines but the first,
+%% which contains the characters after the start quote chars,
+%% check that they are white space.
+%%
+%% Loop from last to first line and remember the last error,
+%% so the last error that is found will be the one reported,
+%% that is: the first in the string.
+%%
+%% Build the string content by prepending all indentation stripped
+%% lines onto the string: Content.
+%%
+tqstring_finish(Indent, ContentR, Line, Stripped) ->
+    tqstring_finish(Indent, ContentR, Line, Stripped, undefined, "").
+%%
+tqstring_finish(_Indent, [FirstLineR], _Line, _Stripped, Error, Content) ->
+    {_, NfirstLineR} = strip_newline(FirstLineR),
+    FirstLine = lists:reverse(NfirstLineR),
+    %% First line; check that it is all white space
+    case check_white_space(FirstLine) of
+        ok ->
+            if
+                Error =:= undefined ->
+                    Content;
+                true ->
+                    Error
+            end;
+        N ->
+            {white_space, N, FirstLine}
+    end;
+tqstring_finish(
+  Indent, [StringR|StringsR], Line, Stripped, Error, Content) ->
+    %% Stripped :: newline characters stripped from the last line,
+    %%             only used if there is an error on that line
+    case strip_indent(Indent, lists:reverse(StringR, Content)) of
+        Ncontent when is_list(Ncontent) ->
+            tqstring_finish(Indent, StringsR, Line-1, "", Error, Ncontent);
+        ErrorCol when is_integer(ErrorCol) ->
+            Nerror = {indentation, Line, ErrorCol, Stripped++StringR},
+            tqstring_finish(Indent, StringsR, Line-1, "", Nerror, "")
+    end.
+
+%% Strip indentation
+%%
+strip_indent(Indent, Cs) ->
+    strip_indent(Indent, Cs, 1).
+%%
+strip_indent([C|Indent], [C|Cs], Col) ->
+    strip_indent(Indent, Cs, Col+1);    % Strip
+strip_indent([], Cs, _) -> Cs;          % Done
+strip_indent(_, _, Col) -> Col.         % Incorrect indentation
+
+%% Check that all characters are white space and return 'ok',
+%% or return the number of white space characters
+check_white_space(Cs) ->
+    check_white_space(Cs, 0).
+%%
+check_white_space([], _) ->
+    ok;
+check_white_space([C|Cs], N) ->
+    if
+        ?WHITE_SPACE(C) ->
+            check_white_space(Cs, N+1);
+        true ->
+            N
+    end.
+
 scan_string(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) ->
     case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %"
         {more,Ncs,Nline,Ncol,Nstr,Nwcs} ->
@@ -1322,6 +1557,10 @@ new_column(no_col=Col, _Ncol) ->
 new_column(Col, Ncol) when is_integer(Col) ->
     Ncol.
 
+%% lists:duplicate/3
+lists_duplicate(0, _, L) -> L;
+lists_duplicate(N, X, L) -> lists_duplicate(N-1, X, [X|L]).
+
 nl_spcs(2)  -> "\n ";
 nl_spcs(3)  -> "\n  ";
 nl_spcs(4)  -> "\n   ";
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index 96c68039ae..317225f7fb 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1998-2022. All Rights Reserved.
+%% Copyright Ericsson AB 1998-2023. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@
 
 -export([error_1/1, error_2/1, iso88591/1, otp_7810/1, otp_10302/1,
 	 otp_10990/1, otp_10992/1, otp_11807/1, otp_16480/1, otp_17024/1,
-         text_fun/1]).
+         text_fun/1, triple_quoted_string/1]).
 
 -import(lists, [nth/2,flatten/1]).
 -import(io_lib, [print/1]).
@@ -59,7 +59,7 @@ suite() ->
 
 all() -> 
     [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992,
-     otp_11807, otp_16480, otp_17024, text_fun].
+     otp_11807, otp_16480, otp_17024, text_fun, triple_quoted_string].
 
 groups() -> 
     [{error, [], [error_1, error_2]}].
@@ -90,14 +90,17 @@ error_2(Config) when is_list(Config) ->
 
 error_cases() ->
     ["'a",
-     "\"a",
+     "\"a",%"
      "'\\",
-     "\"\\",
+     "\"\\",%"
      "$",
      "$\\",
      "2.3e",
      "2.3e-",
-     "91#9"
+     "91#9",
+     "\"\"\"x",%"
+     "\"\"\"\n\"\"",%"
+     "\"\"\"\nx\n \"\"\""
     ].
 
 assert_type(N, integer) when is_integer(N) ->
@@ -1299,6 +1302,130 @@ text_fun(Config) when is_list(Config) ->
         erl_scan:string(String(All), 7, [{text_fun, KeepClass('{')}]),
     [Sep1] = lists:filter(fun(T) -> T /= undefined end, Texts(Tokens5)).
 
+triple_quoted_string(Config) when is_list(Config) ->
+    {ok,[{string,1,""}],2} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "\"\"\""),
+
+    {ok,[{string,1,""}],3} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "\n"
+          "\"\"\""),
+
+    {ok,[{string,1,"\n"}],4} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "\n\n"
+          "\"\"\""),
+
+    {ok,[{string,1,"CR LF"}],3} =
+        erl_scan:string(
+          "\"\"\" \t\r\n"
+          "CR LF\r\n"
+          "\"\"\""),
+
+    {ok,[{string,1,"this is a\nvery long\nstring"}],5} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "this is a\n"
+          "very long\n"
+          "string\n"
+          "\"\"\""),
+
+    {ok,[{string,1,"this is a\r\nvery long\r\nstring"}],5} =
+        erl_scan:string(
+          "\"\"\"\r\n"
+          "  this is a\r\n"
+          "  very long\r\n"
+          "  string\r\n"
+          "  \"\"\""),
+
+    {ok,[{string,1,"  this is a\n    very long\n  string"}],5} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "\t  this is a\n"
+          "\t    very long\n"
+          "\t  string\n"
+          "\t\"\"\""),
+
+    {ok,[{string,1,"this is a \\\\\nvery long \\\\\nstring\\\\"}],5} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "this is a \\\\\n"
+          "very long \\\\\n"
+          "string\\\\\n"
+          "\"\"\""),
+
+    {ok,[{string,1,
+          "this contains \"quotes\"\n"
+          "and \"\"\"triple quotes\"\"\"\n"
+          " \"\" \"\"\" and\n"
+          "ends here"}],6} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "this contains \"quotes\"\n"
+          "and \"\"\"triple quotes\"\"\"\n"
+          " \"\" \"\"\" and\n"
+          "ends here\n"
+          "\"\"\""),
+
+    {ok,[{string,{1,1},
+          "```erlang\n"
+          "foo() ->\n"
+          "    \"\"\"\n"
+          "    foo\n"
+          "    bar\n"
+          "    \"\"\".\n"
+          "```"}],{9,5}} =
+        erl_scan:string(
+          "\"\"\"\"\n"
+          "```erlang\n"
+          "foo() ->\n"
+          "    \"\"\"\n"
+          "    foo\n"
+          "    bar\n"
+          "    \"\"\".\n"
+          "```\n"
+          "\"\"\"\"", {1,1}, []),
+
+    {ok,[{string,1,"5-quoted"}],3} =
+        erl_scan:string(
+          "\"\"\"\"\"\n"
+          "  5-quoted\n"
+          "  \"\"\"\"\""),
+
+    {error,{{1,4},erl_scan,white_space},{1,7}} =
+        erl_scan:string(
+          "\"\"\"foo\n"
+          "\"\"\"", {1,1}, []),
+
+    {error,{{2,1},erl_scan,indentation},{2,2}} =
+        erl_scan:string(
+          "\"\"\"\n"
+          " foo\n"
+          "  \"\"\"", {1,1}, []),
+
+    {error,{{2,1},erl_scan,indentation},{2,8}} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "       \tfoo\n"
+          "        \"\"\"", {1,1}, []),
+
+    {error,{{1,1},erl_scan,{string,{$",3},"\n\tx\n\t\"\""}},{3,4}} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "\tx\n"
+          "\t\"\"", {1,1}, []),
+
+    {error,{{3,4},erl_scan,{string,$",[]}},{3,5}} =
+        erl_scan:string(
+          "\"\"\"\n"
+          "x\n"
+          "\"\"\"\"", {1,1}, []),
+
+    ok.
 
 test_string(String, ExpectedWithCol) ->
     {ok, ExpectedWithCol, _EndWithCol} = erl_scan_string(String, {1, 1}, []),
-- 
2.35.3

openSUSE Build Service is sponsored by