File 3001-Forbid-nonsensical-module-names.patch of Package erlang

From 7fadbdb941539415376f81a24ce46012236ab4e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 16 Jun 2022 09:26:01 +0200
Subject: [PATCH] Forbid nonsensical module names

When attempting to compile a module named `''` (in the file `.erl`),
the compiler would produce the following error message:

    1> c("").
    ..beam: Module name '' does not match file name '.'
    error

Compiling with `erlc` would also fail.

Instead of trying to fix this problem, this commit forbids nonsensical
module names. Here are the types of module names that are no longer
allowed:

* An empty module name (the atom `''`).

* Names containing control characters (16#00 through 16x1F and 0x7F
  through 16#9F).

* Names containing **only** spaces and soft hyphens.

We don't change the rules for function names (yet, anyway), because
they don't have the same fundamental problem that module names have,
namely that a file in the file system must be have the same base name
as the module.

Solves #6026
---
 lib/stdlib/src/erl_lint.erl        | 47 ++++++++++++++++++++++----
 lib/stdlib/test/erl_lint_SUITE.erl | 54 ++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 1572a5508c..176c3ae701 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -196,6 +196,12 @@ format_error(pmod_unsupported) ->
 %%     io_lib:format("module '~s' already imported from package '~s'", [M, P]);
 format_error(non_latin1_module_unsupported) ->
     "module names with non-latin1 characters are not supported";
+format_error(empty_module_name) ->
+    "the module name must not be empty";
+format_error(blank_module_name) ->
+    "the module name must contain at least one visible character";
+format_error(ctrl_chars_in_module_name) ->
+    "the module name must not contain control characters";
 
 format_error(invalid_call) ->
     "invalid function call";
@@ -3248,15 +3254,42 @@ is_fa({FuncName, Arity})
   when is_atom(FuncName), is_integer(Arity), Arity >= 0 -> true;
 is_fa(_) -> false.
 
-check_module_name(M, Anno, St) ->
-    case is_latin1_name(M) of
-        true -> St;
-        false ->
-            add_error(Anno, non_latin1_module_unsupported, St)
+check_module_name(M, Anno, St0) ->
+    AllChars = atom_to_list(M),
+    VisibleChars = remove_non_visible(AllChars),
+    case {AllChars, VisibleChars} of
+        {[], []} ->
+            add_error(Anno, empty_module_name, St0);
+        {[_|_], []} ->
+            add_error(Anno, blank_module_name, St0);
+        {Cs,[_|_]} ->
+            St1 = case io_lib:latin1_char_list(Cs) of
+                      true ->
+                          St0;
+                      false ->
+                          add_error(Anno,
+                                    non_latin1_module_unsupported,
+                                    St0)
+                  end,
+            case any_control_characters(Cs) of
+                true ->
+                    add_error(Anno, ctrl_chars_in_module_name, St1);
+                false ->
+                    St1
+            end
     end.
 
-is_latin1_name(Name) ->
-    io_lib:latin1_char_list(atom_to_list(Name)).
+remove_non_visible(Cs) ->
+    SP = $\s,                                   %Plain space.
+    NBSP = 16#A0,                               %Non-breaking space.
+    SHY = 16#AD,                                %Soft hyphen.
+    [C || C <- Cs, C =/= SP, C =/= NBSP, C =/= SHY].
+
+any_control_characters(Cs) ->
+    any(fun(C) when is_integer(C), 0 =< C, C < 16#20;
+                    is_integer(C), 16#7F =< C, C < 16#A0 -> true;
+           (_) -> false
+        end, Cs).
 
 check_specs([FunType|Left], ETag, Arity, St0) ->
     {FunType1, CTypes} =
diff --git a/lib/stdlib/test/erl_lint_SUITE.erl b/lib/stdlib/test/erl_lint_SUITE.erl
index 627d61c74d..b37bcd43da 100644
--- a/lib/stdlib/test/erl_lint_SUITE.erl
+++ b/lib/stdlib/test/erl_lint_SUITE.erl
@@ -65,7 +65,7 @@
          maps/1,maps_type/1,maps_parallel_match/1,
          otp_11851/1,otp_11879/1,otp_13230/1,
          record_errors/1, otp_11879_cont/1,
-         non_latin1_module/1, otp_14323/1,
+         non_latin1_module/1, illegal_module_name/1, otp_14323/1,
          stacktrace_syntax/1,
          otp_14285/1, otp_14378/1,
          external_funs/1,otp_15456/1,otp_15563/1,
@@ -98,7 +98,8 @@ all() ->
      too_many_arguments, basic_errors, bin_syntax_errors, predef,
      maps, maps_type, maps_parallel_match,
      otp_11851, otp_11879, otp_13230,
-     record_errors, otp_11879_cont, non_latin1_module, otp_14323,
+     record_errors, otp_11879_cont,
+     non_latin1_module, illegal_module_name, otp_14323,
      stacktrace_syntax, otp_14285, otp_14378, external_funs,
      otp_15456, otp_15563, unused_type, binary_types, removed, otp_16516,
      inline_nifs, warn_missing_spec, otp_16824,
@@ -4100,9 +4101,10 @@ otp_11879_cont(Config) ->
 %% OTP-14285: We currently don't support non-latin1 module names.
 
 non_latin1_module(Config) ->
-    do_non_latin1_module('юникод'),
-    do_non_latin1_module(list_to_atom([256,$a,$b,$c])),
-    do_non_latin1_module(list_to_atom([$a,$b,256,$c])),
+    Expected = [non_latin1_module_unsupported],
+    Expected = check_module_name('юникод'),
+    Expected = check_module_name(list_to_atom([256,$a,$b,$c])),
+    Expected = check_module_name(list_to_atom([$a,$b,256,$c])),
 
     "module names with non-latin1 characters are not supported" =
         format_error(non_latin1_module_unsupported),
@@ -4161,16 +4163,48 @@ non_latin1_module(Config) ->
     run(Config, Ts),
     ok.
 
-do_non_latin1_module(Mod) ->
+illegal_module_name(_Config) ->
+    [empty_module_name] = check_module_name(''),
+
+    [ctrl_chars_in_module_name] = check_module_name('\x00'),
+    [ctrl_chars_in_module_name] = check_module_name('abc\x1F'),
+    [ctrl_chars_in_module_name] = check_module_name('\x7F'),
+    [ctrl_chars_in_module_name] = check_module_name('abc\x80'),
+    [ctrl_chars_in_module_name] = check_module_name('abc\x80xyz'),
+    [ctrl_chars_in_module_name] = check_module_name('\x9Fxyz'),
+
+    [ctrl_chars_in_module_name,
+     non_latin1_module_unsupported] = check_module_name('атом\x00'),
+
+    [blank_module_name] = check_module_name(' '),
+    [blank_module_name] = check_module_name('\xA0'),
+    [blank_module_name] = check_module_name('\xAD'),
+    [blank_module_name] = check_module_name('  \xA0\xAD '),
+
+    %% White space and soft hyphens are OK if there are visible
+    %% characters in the name.
+    ok = check_module_name(' abc '),
+    ok = check_module_name('abc '),
+    ok = check_module_name(' abc '),
+    ok = check_module_name(' abc xyz '),
+    ok = check_module_name(' abc\xADxyz '),
+
+    ok.
+
+check_module_name(Mod) ->
     File = atom_to_list(Mod) ++ ".erl",
     L1 = erl_anno:new(1),
     Forms = [{attribute,L1,file,{File,1}},
              {attribute,L1,module,Mod},
              {eof,2}],
-    error = compile:forms(Forms),
-    {error,_,[]} = compile:forms(Forms, [return]),
-    ok.
-
+    _ = compile:forms(Forms),
+    case compile:forms(Forms, [return]) of
+        {error,Errors,[]} ->
+            [{_ModName,L}] = Errors,
+            lists:sort([Reason || {1,erl_lint,Reason} <- L]);
+        {ok,Mod,Code,Ws} when is_binary(Code), is_list(Ws)  ->
+            ok
+    end.
 
 otp_14378(Config) ->
     Ts = [
-- 
2.35.3

openSUSE Build Service is sponsored by