File 2711-stdlib-Add-helper-to-support-round-tripping-using-er.patch of Package erlang
From 107a035aecfaae531d4b33ee7a8b984ef3e84e7c Mon Sep 17 00:00:00 2001
From: Frej Drejhammar <frej.drejhammar@gmail.com>
Date: Fri, 5 Nov 2021 12:07:00 +0100
Subject: [PATCH 1/2] stdlib: Add helper to support round tripping using
erl_pp:form/{1,2}
The Erlang compiler will, when expanding records to tuples, introduce
new variables in the abstract representation. As the expansion is done
on the abstract representation, the compiler can safely name the new
variables with names that are not syntactically valid in Erlang source
code (the name starts with a lowercase letter), thus ensuring the
uniqueness of the new names.
The above strategy leads to problems if a user uses the compiler to
produce the abstract representation (using options `E` and `binary`),
transforms it and intends to use the functions in erl_pp to produce
source code, as the generated variable names will be printed as atoms.
This patch introduces an utility function, `erl_pp:legalize_vars/1`,
which when given the abstract representation of a function, will
return an equivalent function where all variables will have
syntactically valid names. As outputting Erlang source code from the
abstract representation is a niche application, the solution with an
utility function, instead of modifying compiler passes, was chosen to
not slow down the common use case.
---
lib/stdlib/doc/src/erl_pp.xml | 31 ++++++++++++++++
lib/stdlib/src/erl_pp.erl | 64 +++++++++++++++++++++++++++++++-
lib/stdlib/test/erl_pp_SUITE.erl | 34 ++++++++++++++++-
3 files changed, 127 insertions(+), 2 deletions(-)
diff --git a/lib/stdlib/doc/src/erl_pp.xml b/lib/stdlib/doc/src/erl_pp.xml
index 0600881fbe..8e524e3e94 100644
--- a/lib/stdlib/doc/src/erl_pp.xml
+++ b/lib/stdlib/doc/src/erl_pp.xml
@@ -45,6 +45,14 @@
<p>All functions can have an optional argument, which specifies a hook
that is called if an attempt is made to print an unknown form.</p>
+
+ <p>Note that if the functions in this module are used to convert
+ abstract code back to Erlang source code, the enclosing function
+ should first be processed by <seemfa
+ marker="#legalize_vars/1"><c>legalize_vars/1</c></seemfa> in order
+ to ensure that the output is semantically equivalent to the
+ abstract code.</p>
+
</description>
<datatypes>
@@ -146,6 +154,29 @@
but only for the guard test <c><anno>Guard</anno></c>.</p>
</desc>
</func>
+
+ <func>
+ <name name="legalize_vars" arity="1" since=""/>
+ <fsummary>Ensure all variable names are valid.</fsummary>
+ <desc>
+ <p>The Erlang compiler will, when expanding records to tuples,
+ introduce new variables in the abstract representation. As the
+ expansion is done on the abstract representation, the compiler
+ can safely name the new variables with names that are not
+ syntactically valid in Erlang source code (the name starts
+ with a lowercase letter), thus ensuring the uniqueness of the
+ new names.</p>
+
+ <p>The above strategy leads to problems if a user wants to
+ convert the abstract representation, using the functions of
+ this module back to Erlang source code. Typically, pattern
+ variables are output as atoms thus changing the sematics of
+ the program. To solve this problem <c>legalize_vars/1</c>,
+ when run on the abstract representation of a function, will
+ return an equivalent function where all variables will have
+ syntactically valid names.</p>
+ </desc>
+ </func>
</funcs>
<section>
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index d704d097c8..6951442c38 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -24,7 +24,8 @@
-export([form/1,form/2,
attribute/1,attribute/2,function/1,function/2,
- guard/1,guard/2,exprs/1,exprs/2,exprs/3,expr/1,expr/2,expr/3,expr/4]).
+ guard/1,guard/2,exprs/1,exprs/2,exprs/3,expr/1,expr/2,expr/3,expr/4,
+ legalize_vars/1]).
-import(lists, [append/1,foldr/3,map/2,mapfoldl/3,reverse/1,reverse/2]).
-import(io_lib, [write/1,format/2]).
@@ -199,6 +200,34 @@ expr(E, I, P, Options) ->
?TEST(E),
frmt(lexpr(E, P, options(Options)), I, state(Options)).
+-spec(legalize_vars(Function) -> erl_parse:abstract_form() when
+ Function :: erl_parse:abstract_form()).
+
+legalize_vars({function,ANNO,Name0,Arity,Clauses0}) ->
+ ?TEST(F),
+ %% Collect all used variables in this function and classify them
+ %% as either syntactically valid or not.
+ F = fun({var,_Anno,Name}, {Valid, Invalid}) ->
+ Str = [First|_] = atom_to_list(Name),
+ case First of
+ X when X >= $a, X =< $z ->
+ {Valid,Invalid#{Name => Str}};
+ _ ->
+ {Valid#{Name => Name},Invalid}
+ end
+ end,
+ {Valid, Invalid} = fold_vars(F, {#{}, #{}}, Clauses0),
+ %% Make up an unique variable name for each key in Invalid, then
+ %% replace all invalid names.
+ Mapping = maps:fold(fun legalize_name/3, Valid, Invalid),
+ Subs = fun({var,Anno,Name}) ->
+ {var,Anno,map_get(Name, Mapping)}
+ end,
+ Clauses = map_vars(Subs, Clauses0),
+ {function,ANNO,Name0,Arity,Clauses};
+legalize_vars(Form) ->
+ erlang:error(badarg, [Form]).
+
%%%
%%% Local functions
%%%
@@ -1352,3 +1381,36 @@ word('when', WT) -> element(16, WT);
word(' ::', WT) -> element(17, WT);
word('..', WT) -> element(18, WT);
word(' |', WT) -> element(19, WT).
+
+%% Make up an unique variable name for Name that won't clash with any
+%% name in Used. We first try by converting the name to uppercase and
+%% if that fails we start prepending 'X'es until we find an unused
+%% name.
+legalize_name(InvalidName, StringName, Used) ->
+ Upper = string:to_upper(StringName),
+ NewName = list_to_atom(Upper),
+ case Used of
+ #{ NewName := _ } ->
+ legalize_name(InvalidName, [$X|StringName], Used);
+ #{} ->
+ Used#{ InvalidName => NewName }
+ end.
+
+fold_vars(F, Acc0, Forms) when is_list(Forms) ->
+ lists:foldl(fun(Elem, Acc) -> fold_vars(F, Acc, Elem) end, Acc0, Forms);
+fold_vars(F, Acc0, V={var,_,_}) ->
+ F(V, Acc0);
+fold_vars(F, Acc0, Form) when is_tuple(Form) ->
+ lists:foldl(fun(Elem, Acc) -> fold_vars(F, Acc, Elem) end,
+ Acc0, tuple_to_list(Form));
+fold_vars(_, Acc, _) ->
+ Acc.
+
+map_vars(F, Forms) when is_list(Forms) ->
+ [map_vars(F, Form) || Form <- Forms];
+map_vars(F, V={var,_,_}) ->
+ F(V);
+map_vars(F, Form) when is_tuple(Form) ->
+ list_to_tuple([map_vars(F, Elem) || Elem <- tuple_to_list(Form)]);
+map_vars(_, Form) ->
+ Form.
diff --git a/lib/stdlib/test/erl_pp_SUITE.erl b/lib/stdlib/test/erl_pp_SUITE.erl
index b740ead730..bbca63c796 100644
--- a/lib/stdlib/test/erl_pp_SUITE.erl
+++ b/lib/stdlib/test/erl_pp_SUITE.erl
@@ -48,6 +48,7 @@
neg_indent/1,
maps_syntax/1,
format_options/1,
+ form_vars/1,
quoted_atom_types/1,
otp_6321/1, otp_6911/1, otp_6914/1, otp_8150/1, otp_8238/1,
@@ -78,7 +79,7 @@ groups() ->
[func, call, recs, try_catch, if_then, receive_after,
bits, head_tail, cond1, block, case1, ops,
messages, maps_syntax, quoted_atom_types,
- format_options
+ format_options, form_vars
]},
{attributes, [], [misc_attrs, import_export, dialyzer_attrs]},
{tickets, [],
@@ -578,6 +579,37 @@ format_options(Config) when is_list(Config) ->
)
).
+form_vars(Config) when is_list(Config) ->
+ %% Check that erl_pp:legalize_vars/1 does its job. If
+ %% legalize_vars/1 fails to convert variable names starting with a
+ %% lower case letter, the compiler will detect that `X` is an atom
+ %% and report that the `+` operation will fail. If legalize_vars/1
+ %% fails to generate unique variable names and just converts the
+ %% name to uppercase, the variable named `REC0` will be used in an
+ %% unsafe way.
+ String = <<"-module(erl_pp_test).
+ -export([f/1]).
+ -record(r, {a, b}).
+ f(#r{b = B} = C) ->
+ receive
+ B ->
+ X = C#r.a,
+ REC0 = X + X,
+ REC0
+ end.">>,
+ FileName = filename('erl_pp_test.erl', Config),
+ ok = file:write_file(FileName, String),
+ Opts = [binary,deterministic,nowarn_unused_record],
+ {ok, [], Forms} = compile:file(FileName, ['E'|Opts]),
+ Forms1 = lists:map(fun(F={function,_,_,_,_}) ->
+ erl_pp:legalize_vars(F);
+ (F) ->
+ F
+ end, Forms),
+ ok = file:write_file(FileName, [erl_pp:form(F) || F <- Forms1]),
+ {ok, _, _, []} = compile:file(FileName, [return|Opts]),
+ ok.
+
misc_attrs(Config) when is_list(Config) ->
ok = pp_forms(<<"-module(m). ">>),
ok = pp_forms(<<"-module(m, [Aafjlksfjdlsjflsdfjlsdjflkdsfjlk,"
--
2.31.1