Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang
erlang
4011-Implement-triple-quoted-strings.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 4011-Implement-triple-quoted-strings.patch of Package erlang
From ff367d9f5641a6f805d68b0aa5dfa1da848f4db8 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen <raimo@erlang.org> Date: Thu, 22 Jun 2023 18:22:50 +0200 Subject: [PATCH 1/8] Implement triple-quoted strings As according to EEP 64, with the N>=3-quoted string extension, all implemented in the scanner/lexer/tokenizer `erl_scan`. --- lib/stdlib/src/erl_scan.erl | 251 ++++++++++++++++++++++++++++- lib/stdlib/test/erl_scan_SUITE.erl | 139 +++++++++++++++- 2 files changed, 378 insertions(+), 12 deletions(-) diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index b7975c6ed2..ae5a080ab1 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -121,12 +121,16 @@ format_error({string,Quote,Head}) -> lists:flatten(["unterminated " ++ string_thing(Quote) ++ " starting with " ++ - io_lib:write_string(Head, Quote)]); + io_lib:write_string(Head, string_quote(Quote))]); format_error({illegal,Type}) -> lists:flatten(io_lib:fwrite("illegal ~w", [Type])); format_error(char) -> "unterminated character"; format_error({base,Base}) -> lists:flatten(io_lib:fwrite("illegal base '~w'", [Base])); +format_error(indentation) -> + "bad indentation in triple-quoted string"; +format_error(white_space) -> + "non-whitespace after start of triple-quoted string"; format_error(Other) -> lists:flatten(io_lib:write(Other)). @@ -262,12 +266,20 @@ symbol(T) -> %%% Local functions %%% -string_thing($') -> "atom"; %' Stupid Emacs -string_thing(_) -> "string". +string_thing($') -> %' Stupid Emacs + "atom"; +string_thing($") -> %" + "string"; +string_thing({$",_}) -> %" + "triple-quoted string". + +string_quote($') -> $'; +string_quote($") -> $"; +string_quote({$",_}) -> $". -define(WHITE_SPACE(C), - is_integer(C) andalso - (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240)). + (is_integer(C) andalso + (C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240))). -define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)). -define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)). -define(UNICODE(C), @@ -370,6 +382,7 @@ string1(Cs, St, Line, Col, Toks) -> Error end. + scan(Cs, #erl_scan{}=St, Line, Col, Toks, _) -> scan1(Cs, St, Line, Col, Toks). @@ -425,6 +438,12 @@ scan1("."=Cs, St, Line, Col, Toks) -> {more,{Cs,St,Col,Toks,Line,[],fun scan/6}}; scan1([$.=C|Cs], St, Line, Col, Toks) -> scan_dot(Cs, St, Line, Col, Toks, [C]); +scan1([$",$",$"|Cs], St, Line, Col, Toks) -> %" Emacs + scan_tqstring(Cs, St, Line, Col, Toks, 3); % Number of quote chars +scan1([$",$"]=Cs, St, Line, Col, Toks) -> + {more,{Cs,St,Col,Toks,Line,[],fun scan/6}}; +scan1([$"]=Cs, St, Line, Col, Toks) -> %" Emacs + {more,{Cs,St,Col,Toks,Line,[],fun scan/6}}; scan1([$"|Cs], St, Line, Col, Toks) -> %" Emacs State0 = {[],[],Line,Col}, scan_string(Cs, St, Line, incr_column(Col, 1), Toks, State0); @@ -828,6 +847,222 @@ scan_char([], St, Line, Col, Toks) -> scan_char(eof, _St, Line, Col, _Toks) -> scan_error(char, Line, Col, Line, incr_column(Col, 1), eof). +-record(tqs, % Triple-quoted String state + {line, % Line number of first quote character + col, % Column number of - " - + qs, % Number of quote characters in delimiter + content_r = []}). % Reverse list of reversed content lines + +%% Scan leading $" characters until we have them all, then scan lines +%% +scan_tqstring(Cs, St, Line, Col, Toks, Qs) -> + case Cs of + [$"|Ncs] -> + scan_tqstring(Ncs, St, Line, Col, Toks, Qs+1); + [] -> + {more, {[], St, Col, Toks, Line, Qs, fun scan_tqstring/6}}; + _ -> + Tqs = #tqs{ line = Line, col = Col, qs = Qs }, + scan_tqstring_line( + Cs, St, Line, incr_column(Col, Qs), Toks, + 0, Tqs, []) + end. + +scan_tqstring_line(Cs, St, Line, Col, Toks, {Qs, Tqs, Acc}) -> + scan_tqstring_line(Cs, St, Line, Col, Toks, Qs, Tqs, Acc). +%% +scan_tqstring_line(Cs, St, Line, Col, Toks, Qs, Tqs, Acc) -> + %% Qs :: Number of end quote chars to search for, 0 means not searching + %% Tqs :: #tqs{} + %% Acc :: Reversed current line + case Cs of + [$\n=C|Ncs] -> + Ncol = new_column(Col, 1), + Nacc = [C|Acc], + Nqs = Tqs#tqs.qs, % Start searching for end quote chars + scan_tqstring_line( + Ncs, St, Line+1, Ncol, Toks, + Nqs, Tqs#tqs{ content_r = [Nacc | Tqs#tqs.content_r] }, []); + [$"=C|Ncs] when Qs =/= 0 -> + %% Possible end quote char + Ncol = incr_column(Col, 1), + Nacc = [C|Acc], + if + Qs =:= 1 -> + %% This is the last end quote char + %% - post process the content + scan_tqstring_finish( + Ncs, St, Line, Ncol, Toks, Tqs, + lists:nthtail(Tqs#tqs.qs, Nacc)); % Strip them + true -> + %% Collect and Count this end quote char + scan_tqstring_line( + Ncs, St, Line, Ncol, Toks, + Qs-1, Tqs, Nacc) + end; + [C|Ncs] -> + Ncol = incr_column(Col, 1), + Nacc = [C|Acc], + if + Qs =/= 0, ?WHITE_SPACE(C) -> + %% White space while searching for end quote chars + if + Qs =:= Tqs#tqs.qs -> + %% White space before first end quote char + %% - just collect + scan_tqstring_line( + Ncs, St, Line, Ncol, Toks, + Qs, Tqs, Nacc); + true -> + %% White space after too few quote chars + %% - stop searching for end quote chars + scan_tqstring_line( + Ncs, St, Line, Ncol, Toks, + 0, Tqs, Nacc) + end; + ?UNI255(C) -> + scan_tqstring_line( + Ncs, St, Line, Ncol, Toks, + 0, Tqs, Nacc); % Stop searching for end quote chars + ?CHAR(C) -> + %% Illegal Unicode character + scan_error( + {illegal,character}, Line, Col, Line, Ncol, Ncs) + end; + [] -> + {more, + {[], St, Col, Toks, Line, {Qs, Tqs, Acc}, + fun scan_tqstring_line/6}}; + eof -> + ContentR = [Acc|Tqs#tqs.content_r], + Estr = string:slice(tqstring_chars(ContentR), 0, 16), + scan_error( + {string,{$",Tqs#tqs.qs},Estr},%" + Tqs#tqs.line, Tqs#tqs.col, Line, Col, eof) + end. + +%% Strip last line newline, +%% strip indentation, +%% check white space on first line, +%% create the string token, +%% done +%% +scan_tqstring_finish(Cs, St, Line, Col, Toks, Tqs, IndentR) -> + %% IndentR :: Indentation characters, reversed + #tqs{ line = Line0, col = Col0, content_r = ContentR } = Tqs, + {Stripped, NcontentR} = strip_last_line_newline(ContentR), + case + tqstring_finish(lists:reverse(IndentR), NcontentR, Line-1, Stripped) + of + Content when is_list(Content) -> + Qs = Tqs#tqs.qs, + Chars = + lists_duplicate( + Qs, $",%" + tqstring_chars(ContentR, lists:duplicate(Qs, $"))),%" + Anno = anno(Line0, Col0, St, ?STR(string, St, Chars)), + scan1(Cs, St, Line, Col, [{string,Anno,Content}|Toks]); + {Tag=indentation, ErrorLine, ErrorCol, StringR} -> + scan_error( + Tag, ErrorLine, new_column(Col, 1), + ErrorLine, new_column(Col, ErrorCol), + lists:nthtail(ErrorCol, lists:reverse(StringR))); + {Tag=white_space, N, String} -> + scan_error( + Tag, Line0, incr_column(Col0, Tqs#tqs.qs), + Line0, incr_column(Col0, Tqs#tqs.qs+length(String)), + lists:nthtail(N, String)) + end. + +%% Reconstruct the scanned triple-quoted string from content lines +%% +tqstring_chars(ContentR) -> + tqstring_chars(ContentR, ""). +%% +tqstring_chars([], Chars) -> Chars; +tqstring_chars([StringR|StringsR], Chars) -> + tqstring_chars(StringsR, lists:reverse(StringR, Chars)). + +%% Strip newline from the last line, but not if it is the only line, +%% and return the newline character(s) +%% +strip_last_line_newline(ContentR=[_]) -> + {"", ContentR}; +strip_last_line_newline([LastLineR|ContentR]) -> + {Newline, NlastLineR} = strip_newline(LastLineR), + {Newline, [NlastLineR|ContentR]}. + +strip_newline("\n\r"++R) -> + {"\n\r", R}; +strip_newline("\n"++R) -> + {"\n", R}. + +%% Strip indentation from all content lines but the first, +%% which contains the characters after the start quote chars, +%% check that they are white space. +%% +%% Loop from last to first line and remember the last error, +%% so the last error that is found will be the one reported, +%% that is: the first in the string. +%% +%% Build the string content by prepending all indentation stripped +%% lines onto the string: Content. +%% +tqstring_finish(Indent, ContentR, Line, Stripped) -> + tqstring_finish(Indent, ContentR, Line, Stripped, undefined, ""). +%% +tqstring_finish(_Indent, [FirstLineR], _Line, _Stripped, Error, Content) -> + {_, NfirstLineR} = strip_newline(FirstLineR), + FirstLine = lists:reverse(NfirstLineR), + %% First line; check that it is all white space + case check_white_space(FirstLine) of + ok -> + if + Error =:= undefined -> + Content; + true -> + Error + end; + N -> + {white_space, N, FirstLine} + end; +tqstring_finish( + Indent, [StringR|StringsR], Line, Stripped, Error, Content) -> + %% Stripped :: newline characters stripped from the last line, + %% only used if there is an error on that line + case strip_indent(Indent, lists:reverse(StringR, Content)) of + Ncontent when is_list(Ncontent) -> + tqstring_finish(Indent, StringsR, Line-1, "", Error, Ncontent); + ErrorCol when is_integer(ErrorCol) -> + Nerror = {indentation, Line, ErrorCol, Stripped++StringR}, + tqstring_finish(Indent, StringsR, Line-1, "", Nerror, "") + end. + +%% Strip indentation +%% +strip_indent(Indent, Cs) -> + strip_indent(Indent, Cs, 1). +%% +strip_indent([C|Indent], [C|Cs], Col) -> + strip_indent(Indent, Cs, Col+1); % Strip +strip_indent([], Cs, _) -> Cs; % Done +strip_indent(_, _, Col) -> Col. % Incorrect indentation + +%% Check that all characters are white space and return 'ok', +%% or return the number of white space characters +check_white_space(Cs) -> + check_white_space(Cs, 0). +%% +check_white_space([], _) -> + ok; +check_white_space([C|Cs], N) -> + if + ?WHITE_SPACE(C) -> + check_white_space(Cs, N+1); + true -> + N + end. + scan_string(Cs, #erl_scan{}=St, Line, Col, Toks, {Wcs,Str,Line0,Col0}) -> case scan_string0(Cs, St, Line, Col, $\", Str, Wcs) of %" {more,Ncs,Nline,Ncol,Nstr,Nwcs} -> @@ -1322,6 +1557,10 @@ new_column(no_col=Col, _Ncol) -> new_column(Col, Ncol) when is_integer(Col) -> Ncol. +%% lists:duplicate/3 +lists_duplicate(0, _, L) -> L; +lists_duplicate(N, X, L) -> lists_duplicate(N-1, X, [X|L]). + nl_spcs(2) -> "\n "; nl_spcs(3) -> "\n "; nl_spcs(4) -> "\n "; diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 96c68039ae..317225f7fb 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1998-2022. All Rights Reserved. +%% Copyright Ericsson AB 1998-2023. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ -export([error_1/1, error_2/1, iso88591/1, otp_7810/1, otp_10302/1, otp_10990/1, otp_10992/1, otp_11807/1, otp_16480/1, otp_17024/1, - text_fun/1]). + text_fun/1, triple_quoted_string/1]). -import(lists, [nth/2,flatten/1]). -import(io_lib, [print/1]). @@ -59,7 +59,7 @@ suite() -> all() -> [{group, error}, iso88591, otp_7810, otp_10302, otp_10990, otp_10992, - otp_11807, otp_16480, otp_17024, text_fun]. + otp_11807, otp_16480, otp_17024, text_fun, triple_quoted_string]. groups() -> [{error, [], [error_1, error_2]}]. @@ -90,14 +90,17 @@ error_2(Config) when is_list(Config) -> error_cases() -> ["'a", - "\"a", + "\"a",%" "'\\", - "\"\\", + "\"\\",%" "$", "$\\", "2.3e", "2.3e-", - "91#9" + "91#9", + "\"\"\"x",%" + "\"\"\"\n\"\"",%" + "\"\"\"\nx\n \"\"\"" ]. assert_type(N, integer) when is_integer(N) -> @@ -1299,6 +1302,130 @@ text_fun(Config) when is_list(Config) -> erl_scan:string(String(All), 7, [{text_fun, KeepClass('{')}]), [Sep1] = lists:filter(fun(T) -> T /= undefined end, Texts(Tokens5)). +triple_quoted_string(Config) when is_list(Config) -> + {ok,[{string,1,""}],2} = + erl_scan:string( + "\"\"\"\n" + "\"\"\""), + + {ok,[{string,1,""}],3} = + erl_scan:string( + "\"\"\"\n" + "\n" + "\"\"\""), + + {ok,[{string,1,"\n"}],4} = + erl_scan:string( + "\"\"\"\n" + "\n\n" + "\"\"\""), + + {ok,[{string,1,"CR LF"}],3} = + erl_scan:string( + "\"\"\" \t\r\n" + "CR LF\r\n" + "\"\"\""), + + {ok,[{string,1,"this is a\nvery long\nstring"}],5} = + erl_scan:string( + "\"\"\"\n" + "this is a\n" + "very long\n" + "string\n" + "\"\"\""), + + {ok,[{string,1,"this is a\r\nvery long\r\nstring"}],5} = + erl_scan:string( + "\"\"\"\r\n" + " this is a\r\n" + " very long\r\n" + " string\r\n" + " \"\"\""), + + {ok,[{string,1," this is a\n very long\n string"}],5} = + erl_scan:string( + "\"\"\"\n" + "\t this is a\n" + "\t very long\n" + "\t string\n" + "\t\"\"\""), + + {ok,[{string,1,"this is a \\\\\nvery long \\\\\nstring\\\\"}],5} = + erl_scan:string( + "\"\"\"\n" + "this is a \\\\\n" + "very long \\\\\n" + "string\\\\\n" + "\"\"\""), + + {ok,[{string,1, + "this contains \"quotes\"\n" + "and \"\"\"triple quotes\"\"\"\n" + " \"\" \"\"\" and\n" + "ends here"}],6} = + erl_scan:string( + "\"\"\"\n" + "this contains \"quotes\"\n" + "and \"\"\"triple quotes\"\"\"\n" + " \"\" \"\"\" and\n" + "ends here\n" + "\"\"\""), + + {ok,[{string,{1,1}, + "```erlang\n" + "foo() ->\n" + " \"\"\"\n" + " foo\n" + " bar\n" + " \"\"\".\n" + "```"}],{9,5}} = + erl_scan:string( + "\"\"\"\"\n" + "```erlang\n" + "foo() ->\n" + " \"\"\"\n" + " foo\n" + " bar\n" + " \"\"\".\n" + "```\n" + "\"\"\"\"", {1,1}, []), + + {ok,[{string,1,"5-quoted"}],3} = + erl_scan:string( + "\"\"\"\"\"\n" + " 5-quoted\n" + " \"\"\"\"\""), + + {error,{{1,4},erl_scan,white_space},{1,7}} = + erl_scan:string( + "\"\"\"foo\n" + "\"\"\"", {1,1}, []), + + {error,{{2,1},erl_scan,indentation},{2,2}} = + erl_scan:string( + "\"\"\"\n" + " foo\n" + " \"\"\"", {1,1}, []), + + {error,{{2,1},erl_scan,indentation},{2,8}} = + erl_scan:string( + "\"\"\"\n" + " \tfoo\n" + " \"\"\"", {1,1}, []), + + {error,{{1,1},erl_scan,{string,{$",3},"\n\tx\n\t\"\""}},{3,4}} = + erl_scan:string( + "\"\"\"\n" + "\tx\n" + "\t\"\"", {1,1}, []), + + {error,{{3,4},erl_scan,{string,$",[]}},{3,5}} = + erl_scan:string( + "\"\"\"\n" + "x\n" + "\"\"\"\"", {1,1}, []), + + ok. test_string(String, ExpectedWithCol) -> {ok, ExpectedWithCol, _EndWithCol} = erl_scan_string(String, {1, 1}, []), -- 2.35.3
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor