File 3001-Forbid-nonsensical-module-names.patch of Package erlang
From 7fadbdb941539415376f81a24ce46012236ab4e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Jun 2022 09:26:01 +0200
Subject: [PATCH] Forbid nonsensical module names
When attempting to compile a module named `''` (in the file `.erl`),
the compiler would produce the following error message:
1> c("").
..beam: Module name '' does not match file name '.'
error
Compiling with `erlc` would also fail.
Instead of trying to fix this problem, this commit forbids nonsensical
module names. Here are the types of module names that are no longer
allowed:
* An empty module name (the atom `''`).
* Names containing control characters (16#00 through 16x1F and 0x7F
through 16#9F).
* Names containing **only** spaces and soft hyphens.
We don't change the rules for function names (yet, anyway), because
they don't have the same fundamental problem that module names have,
namely that a file in the file system must be have the same base name
as the module.
Solves #6026
---
lib/stdlib/src/erl_lint.erl | 47 ++++++++++++++++++++++----
lib/stdlib/test/erl_lint_SUITE.erl | 54 ++++++++++++++++++++++++------
2 files changed, 84 insertions(+), 17 deletions(-)
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 1572a5508c..176c3ae701 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -196,6 +196,12 @@ format_error(pmod_unsupported) ->
%% io_lib:format("module '~s' already imported from package '~s'", [M, P]);
format_error(non_latin1_module_unsupported) ->
"module names with non-latin1 characters are not supported";
+format_error(empty_module_name) ->
+ "the module name must not be empty";
+format_error(blank_module_name) ->
+ "the module name must contain at least one visible character";
+format_error(ctrl_chars_in_module_name) ->
+ "the module name must not contain control characters";
format_error(invalid_call) ->
"invalid function call";
@@ -3248,15 +3254,42 @@ is_fa({FuncName, Arity})
when is_atom(FuncName), is_integer(Arity), Arity >= 0 -> true;
is_fa(_) -> false.
-check_module_name(M, Anno, St) ->
- case is_latin1_name(M) of
- true -> St;
- false ->
- add_error(Anno, non_latin1_module_unsupported, St)
+check_module_name(M, Anno, St0) ->
+ AllChars = atom_to_list(M),
+ VisibleChars = remove_non_visible(AllChars),
+ case {AllChars, VisibleChars} of
+ {[], []} ->
+ add_error(Anno, empty_module_name, St0);
+ {[_|_], []} ->
+ add_error(Anno, blank_module_name, St0);
+ {Cs,[_|_]} ->
+ St1 = case io_lib:latin1_char_list(Cs) of
+ true ->
+ St0;
+ false ->
+ add_error(Anno,
+ non_latin1_module_unsupported,
+ St0)
+ end,
+ case any_control_characters(Cs) of
+ true ->
+ add_error(Anno, ctrl_chars_in_module_name, St1);
+ false ->
+ St1
+ end
end.
-is_latin1_name(Name) ->
- io_lib:latin1_char_list(atom_to_list(Name)).
+remove_non_visible(Cs) ->
+ SP = $\s, %Plain space.
+ NBSP = 16#A0, %Non-breaking space.
+ SHY = 16#AD, %Soft hyphen.
+ [C || C <- Cs, C =/= SP, C =/= NBSP, C =/= SHY].
+
+any_control_characters(Cs) ->
+ any(fun(C) when is_integer(C), 0 =< C, C < 16#20;
+ is_integer(C), 16#7F =< C, C < 16#A0 -> true;
+ (_) -> false
+ end, Cs).
check_specs([FunType|Left], ETag, Arity, St0) ->
{FunType1, CTypes} =
diff --git a/lib/stdlib/test/erl_lint_SUITE.erl b/lib/stdlib/test/erl_lint_SUITE.erl
index 627d61c74d..b37bcd43da 100644
--- a/lib/stdlib/test/erl_lint_SUITE.erl
+++ b/lib/stdlib/test/erl_lint_SUITE.erl
@@ -65,7 +65,7 @@
maps/1,maps_type/1,maps_parallel_match/1,
otp_11851/1,otp_11879/1,otp_13230/1,
record_errors/1, otp_11879_cont/1,
- non_latin1_module/1, otp_14323/1,
+ non_latin1_module/1, illegal_module_name/1, otp_14323/1,
stacktrace_syntax/1,
otp_14285/1, otp_14378/1,
external_funs/1,otp_15456/1,otp_15563/1,
@@ -98,7 +98,8 @@ all() ->
too_many_arguments, basic_errors, bin_syntax_errors, predef,
maps, maps_type, maps_parallel_match,
otp_11851, otp_11879, otp_13230,
- record_errors, otp_11879_cont, non_latin1_module, otp_14323,
+ record_errors, otp_11879_cont,
+ non_latin1_module, illegal_module_name, otp_14323,
stacktrace_syntax, otp_14285, otp_14378, external_funs,
otp_15456, otp_15563, unused_type, binary_types, removed, otp_16516,
inline_nifs, warn_missing_spec, otp_16824,
@@ -4100,9 +4101,10 @@ otp_11879_cont(Config) ->
%% OTP-14285: We currently don't support non-latin1 module names.
non_latin1_module(Config) ->
- do_non_latin1_module('юникод'),
- do_non_latin1_module(list_to_atom([256,$a,$b,$c])),
- do_non_latin1_module(list_to_atom([$a,$b,256,$c])),
+ Expected = [non_latin1_module_unsupported],
+ Expected = check_module_name('юникод'),
+ Expected = check_module_name(list_to_atom([256,$a,$b,$c])),
+ Expected = check_module_name(list_to_atom([$a,$b,256,$c])),
"module names with non-latin1 characters are not supported" =
format_error(non_latin1_module_unsupported),
@@ -4161,16 +4163,48 @@ non_latin1_module(Config) ->
run(Config, Ts),
ok.
-do_non_latin1_module(Mod) ->
+illegal_module_name(_Config) ->
+ [empty_module_name] = check_module_name(''),
+
+ [ctrl_chars_in_module_name] = check_module_name('\x00'),
+ [ctrl_chars_in_module_name] = check_module_name('abc\x1F'),
+ [ctrl_chars_in_module_name] = check_module_name('\x7F'),
+ [ctrl_chars_in_module_name] = check_module_name('abc\x80'),
+ [ctrl_chars_in_module_name] = check_module_name('abc\x80xyz'),
+ [ctrl_chars_in_module_name] = check_module_name('\x9Fxyz'),
+
+ [ctrl_chars_in_module_name,
+ non_latin1_module_unsupported] = check_module_name('атом\x00'),
+
+ [blank_module_name] = check_module_name(' '),
+ [blank_module_name] = check_module_name('\xA0'),
+ [blank_module_name] = check_module_name('\xAD'),
+ [blank_module_name] = check_module_name(' \xA0\xAD '),
+
+ %% White space and soft hyphens are OK if there are visible
+ %% characters in the name.
+ ok = check_module_name(' abc '),
+ ok = check_module_name('abc '),
+ ok = check_module_name(' abc '),
+ ok = check_module_name(' abc xyz '),
+ ok = check_module_name(' abc\xADxyz '),
+
+ ok.
+
+check_module_name(Mod) ->
File = atom_to_list(Mod) ++ ".erl",
L1 = erl_anno:new(1),
Forms = [{attribute,L1,file,{File,1}},
{attribute,L1,module,Mod},
{eof,2}],
- error = compile:forms(Forms),
- {error,_,[]} = compile:forms(Forms, [return]),
- ok.
-
+ _ = compile:forms(Forms),
+ case compile:forms(Forms, [return]) of
+ {error,Errors,[]} ->
+ [{_ModName,L}] = Errors,
+ lists:sort([Reason || {1,erl_lint,Reason} <- L]);
+ {ok,Mod,Code,Ws} when is_binary(Code), is_list(Ws) ->
+ ok
+ end.
otp_14378(Config) ->
Ts = [
--
2.35.3