File 2051-stdlib-Add-classifying-functions.patch of Package erlang

From ca7e19b8723b36bc96c3127eae4e54d25685197f Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Tue, 18 Nov 2025 12:27:05 +0100
Subject: [PATCH] stdlib: Add classifying functions

When making scanners there might be a need to classify codepoints if
the specification follows the Unicode standard.

The following:
 is_whitespace/1,
 is_ID_start/1,
 is_ID_continue/1,
 category/1
is (at least) needed to scan and parse for example markdown.

This commit is backward incompatible if the *undocumented*
unicode_util:[is_]whitespace(..) functions have been used.

unicode_util:whitespace() have been changed to
unicode_util:pattern_whitespace() string module uses it and documents
that is uses the `pattern_whitespace` property.

So the name of the function have been changed, this is because
`pattern_whitespace` is not a subset of `whitespace` characters,
as was discovered during testing.

is_whitespace(Char) have been changed to use the Unicode `whitespace`
property.

See Unicode Standard Annex #31 and #44.
---
 lib/stdlib/src/erl_stdlib_errors.erl       |  12 +-
 lib/stdlib/src/string.erl                  |   4 +-
 lib/stdlib/src/unicode.erl                 | 158 ++++++++++++++++++++-
 lib/stdlib/test/Makefile                   |   3 +
 lib/stdlib/test/unicode_SUITE.erl          | 154 +++++++++++++++++++-
 lib/stdlib/test/unicode_util_SUITE.erl     |   6 +-
 lib/stdlib/uc_spec/gen_unicode_mod.escript |  17 ++-
 7 files changed, 333 insertions(+), 21 deletions(-)

diff --git a/lib/stdlib/src/erl_stdlib_errors.erl b/lib/stdlib/src/erl_stdlib_errors.erl
index 49db693d57..31503da214 100644
--- a/lib/stdlib/src/erl_stdlib_errors.erl
+++ b/lib/stdlib/src/erl_stdlib_errors.erl
@@ -428,7 +428,15 @@ format_unicode_error(characters_to_nfkc_list, [_]) ->
 format_unicode_error(characters_to_nfkd_binary, [_]) ->
     [bad_char_data];
 format_unicode_error(characters_to_nfkd_list, [_]) ->
-    [bad_char_data].
+    [bad_char_data];
+format_unicode_error(category, [_]) ->
+    [bad_char];
+format_unicode_error(is_whitespace, [_]) ->
+    [bad_char];
+format_unicode_error(is_id_start, [_]) ->
+    [bad_char];
+format_unicode_error(is_id_continue, [_]) ->
+    [bad_char].
 
 unicode_char_data(Chars) ->
     try unicode:characters_to_binary(Chars) of
@@ -1121,6 +1129,8 @@ expand_error(bad_boolean) ->
     <<"not a boolean value">>;
 expand_error(bad_binary_list) ->
     <<"not a flat list of binaries">>;
+expand_error(bad_char) ->
+    <<"not a valid character">>;
 expand_error(bad_char_data) ->
     <<"not valid character data (an iodata term)">>;
 expand_error(bad_binary_pattern) ->
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index bfcec6e6ca..ad61855ba0 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -477,7 +477,7 @@ pad(CD, Length, both, Char) when is_integer(Length) ->
 -spec trim(String) -> unicode:chardata() when
       String :: unicode:chardata().
 trim(Str) ->
-    trim(Str, both, unicode_util:whitespace()).
+    trim(Str, both, unicode_util:pattern_whitespace()).
 
 -doc """
 Equivalent to [`trim(String, Dir, Whitespace})`](`trim/3`) where 
@@ -490,7 +490,7 @@ as Pattern_White_Space in
       String :: unicode:chardata(),
       Dir :: direction() | 'both'.
 trim(Str, Dir) ->
-    trim(Str, Dir, unicode_util:whitespace()).
+    trim(Str, Dir, unicode_util:pattern_whitespace()).
 
 -doc """
 Returns a string, where leading or trailing, or both, `Characters` have been
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl
index 376444d930..7186770a60 100644
--- a/lib/stdlib/src/unicode.erl
+++ b/lib/stdlib/src/unicode.erl
@@ -21,7 +21,7 @@
 %%
 -module(unicode).
 -moduledoc """
-Functions for converting Unicode characters.
+Functions for converting and classifying Unicode characters.
 
 This module contains functions for converting between different character
 representations. It converts between ISO Latin-1 characters and Unicode
@@ -71,9 +71,12 @@ normalization can be found in the
          characters_to_nfkc_list/1, characters_to_nfkc_binary/1
         ]).
 
+-export([is_whitespace/1, is_id_start/1, is_id_continue/1, category/1]).
+
 -export_type([chardata/0, charlist/0, encoding/0, external_chardata/0,
               external_charlist/0, latin1_char/0, latin1_chardata/0,
-              latin1_charlist/0, latin1_binary/0, unicode_binary/0]).
+              latin1_charlist/0, latin1_binary/0, unicode_binary/0,
+              category/0]).
 
 -type encoding()  :: 'latin1' | 'unicode' | 'utf8'
                    | 'utf16' | {'utf16', endian()}
@@ -108,6 +111,15 @@ than UTF-8 (that is, UTF-16 or UTF-32).
                               latin1_binary() |
                               latin1_charlist(),
                             latin1_binary() | nil()).
+-doc "Character category".
+-type category() ::
+        {letter, uppercase | lowercase | titlecase | modifier | other} |
+        {mark, non_spacing | spacing_combining | enclosing} |
+        {number, decimal | letter | other} |
+        {separator, space | line | paragraph} |
+        {other, control | format | surrogate | private | not_assigned} |
+        {punctuation, connector | dash | open | close | initial | final | other} |
+        {symbol, math | currency | modifier | other}.
 
 %% We must inline these functions so that the stacktrace points to
 %% the correct function.
@@ -122,6 +134,8 @@ than UTF-8 (that is, UTF-16 or UTF-32).
 
 -export([bin_is_7bit/1, characters_to_binary/2, characters_to_list/2]).
 
+-define(IS_CP(CP), (is_integer(CP) andalso 0 =< CP andalso CP =< 16#10FFFF)).
+
 -doc false.
 -spec bin_is_7bit(Binary) -> boolean() when
       Binary :: binary().
@@ -681,13 +695,149 @@ characters_to_nfkc_binary(CD, N, Row, Acc) when N > 0 ->
 characters_to_nfkc_binary(CD, _, Row, Acc) ->
     characters_to_nfkc_binary(CD, ?GC_N, [], prepend_row_to_acc(Row, Acc)).
 
+-doc """
+Returns true if `Char` is a whitespace.
+
+Whitespace is defined in
+[Unicode Standard Annex #44](http://unicode.org/reports/tr44/).
+
+```erlang
+1> unicode:is_whitespace($\s).
+true
+2> unicode:is_whitespace($😊).
+false
+```
+""".
+-doc(#{since => ~"@OTP-19858@"}).
+-spec is_whitespace(char()) -> boolean().
+is_whitespace(X) %% ASCII (and low number) Optimizations
+  when X =:= 9; X =:= 10; X =:= 11; X =:= 12; X =:= 13; X =:= 32;
+       X =:= 133; X =:= 160 ->
+    true;
+is_whitespace(Char) when is_integer(Char), 0 =< Char, Char =< 5000 -> %% Arbitrary limit without whitespace
+    false;
+is_whitespace(Char) when ?IS_CP(Char) ->
+    unicode_util:is_whitespace(Char);
+is_whitespace(Term) ->
+    badarg_with_info([Term]).
+
+
+-doc """
+Returns true if `Char` is an identifier start.
+
+Identifier start is defined by the ID_Start property in
+[Unicode Standard Annex #31](https://unicode.org/reports/tr31/#D1).
+
+```erlang
+1> unicode:is_id_start($a).
+true
+2> unicode:is_id_start($_).
+false
+3> unicode:is_id_start($-).
+false
+```
+""".
+-doc(#{since => ~"@OTP-19858@"}).
+-spec is_id_start(char()) -> boolean().
+is_id_start(X)  %% ASCII optimizations
+  when X =:= 65; X =:= 66; X =:= 67; X =:= 68; X =:= 69; X =:= 70; X =:= 71;
+       X =:= 72; X =:= 73; X =:= 74; X =:= 75; X =:= 76; X =:= 77; X =:= 78;
+       X =:= 79; X =:= 80; X =:= 81; X =:= 82; X =:= 83; X =:= 84; X =:= 85;
+       X =:= 86; X =:= 87; X =:= 88; X =:= 89; X =:= 90; X =:= 97; X =:= 98;
+       X =:= 99; X =:= 100; X =:= 101; X =:= 102; X =:= 103; X =:= 104; X =:= 105;
+       X =:= 106; X =:= 107; X =:= 108; X =:= 109; X =:= 110; X =:= 111; X =:= 112;
+       X =:= 113; X =:= 114; X =:= 115; X =:= 116; X =:= 117; X =:= 118; X =:= 119;
+       X =:= 120; X =:= 121; X =:= 122 ->
+    true;
+is_id_start(Char) when is_integer(Char), 0 =< Char, Char =< 127 ->
+    false;
+is_id_start(Char) when ?IS_CP(Char) ->
+    case unicode_util:category(Char) of
+        {number,letter} -> true;
+        {letter,modifier} -> unicode_util:is_letter_not_pattern_syntax(Char);
+        {letter,_} -> true;
+        {_,_} -> unicode_util:is_other_id_start(Char)
+    end;
+is_id_start(Term) ->
+    badarg_with_info([Term]).
+
+
+-doc """
+Returns true if `Char` is an identifier continuation.
+
+Identifier continuation is defined by the ID_Continue property in
+[Unicode Standard Annex #31](https://unicode.org/reports/tr31/#D1).
+
+```erlang
+1> unicode:is_id_continue($a).
+true
+2> unicode:is_id_continue($_).
+true
+3> unicode:is_id_continue($-).
+false
+```
+""".
+-doc(#{since => ~"@OTP-19858@"}).
+-spec is_id_continue(char()) -> boolean().
+is_id_continue(X)
+  when X =:= 48; X =:= 49; X =:= 50; X =:= 51; X =:= 52; X =:= 53; X =:= 54;
+       X =:= 55; X =:= 56; X =:= 57; X =:= 65; X =:= 66; X =:= 67; X =:= 68;
+       X =:= 69; X =:= 70; X =:= 71; X =:= 72; X =:= 73; X =:= 74; X =:= 75;
+       X =:= 76; X =:= 77; X =:= 78; X =:= 79; X =:= 80; X =:= 81; X =:= 82;
+       X =:= 83; X =:= 84; X =:= 85; X =:= 86; X =:= 87; X =:= 88; X =:= 89;
+       X =:= 90; X =:= 95; X =:= 97; X =:= 98; X =:= 99; X =:= 100; X =:= 101;
+       X =:= 102; X =:= 103; X =:= 104; X =:= 105; X =:= 106; X =:= 107;
+       X =:= 108; X =:= 109; X =:= 110; X =:= 111; X =:= 112; X =:= 113;
+       X =:= 114; X =:= 115; X =:= 116; X =:= 117; X =:= 118; X =:= 119;
+       X =:= 120; X =:= 121; X =:= 122 ->
+    true;
+is_id_continue(Char) when is_integer(Char), 0 =< Char, Char =< 127 ->
+    false;
+is_id_continue(Char) when ?IS_CP(Char) ->
+    case unicode_util:category(Char) of
+        {punctuation, connector} -> true;
+        {mark,non_spacing} -> true;
+        {mark,spacing_combining} -> true;
+        {number,other} -> unicode_util:is_other_id_continue(Char);
+        {number,_} -> true;
+        {letter,modifier} -> unicode_util:is_letter_not_pattern_syntax(Char);
+        {letter,_} -> true;
+        {_,_} -> unicode_util:is_other_id_start(Char) orelse
+                     unicode_util:is_other_id_continue(Char)
+    end;
+is_id_continue(Term) ->
+    badarg_with_info([Term]).
+
+-doc """
+Returns the `Char` category.
+
+```erlang
+1> unicode:category($a).
+{letter,lowercase}
+2> unicode:category($Ä).
+{letter,uppercase}
+3> unicode:category($😊).
+{symbol,other}
+4> unicode:category($€).
+{symbol,currency}
+5> unicode:category($[).
+{punctuation,open}
+```
+""".
+-doc(#{since => ~"@OTP-19858@"}).
+-spec category(char()) -> category().
+category(Char) when ?IS_CP(Char) ->
+    unicode_util:category(Char);
+category(Term) ->
+    badarg_with_info([Term]).
+
+%% internals
+
 acc_to_binary(Acc) ->
     list_to_binary(lists:reverse(Acc)).
 prepend_row_to_acc(Row, Acc) ->
     [characters_to_binary(lists:reverse(Row))|Acc].
 
-%% internals
-
 -doc false.
 characters_to_list_int(ML, Encoding) ->
     try
diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile
index 1ce324b46c..7df2b43eac 100644
--- a/lib/stdlib/test/Makefile
+++ b/lib/stdlib/test/Makefile
@@ -197,6 +197,9 @@ release_tests_spec: make_emakefile
 		$(ERL_FILES) $(COVERFILE) $(EXTRA_FILES) "$(RELSYSDIR)"
 	chmod -R u+w "$(RELSYSDIR)"
 	@tar cf - *_SUITE_data property_test | (cd "$(RELSYSDIR)"; tar xf -)
+	$(INSTALL_DIR) "$(RELSYSDIR)/unicode_SUITE_data"
+	$(INSTALL_DATA) ../uc_spec/PropList.txt ../uc_spec/DerivedCoreProperties.txt \
+	       "$(RELSYSDIR)/unicode_SUITE_data"
 	$(INSTALL_DIR) "$(RELSYSDIR)/stdlib_SUITE_data"
 	$(INSTALL_DATA) $(ERL_TOP)/make/otp_version_tickets "$(RELSYSDIR)/stdlib_SUITE_data"
 
diff --git a/lib/stdlib/test/unicode_SUITE.erl b/lib/stdlib/test/unicode_SUITE.erl
index 62306214a0..886b073854 100644
--- a/lib/stdlib/test/unicode_SUITE.erl
+++ b/lib/stdlib/test/unicode_SUITE.erl
@@ -39,14 +39,17 @@
          normalize/1,
          huge_illegal_code_points/1,
          bin_is_7bit/1,
-         error_info/1
+         error_info/1,
+         is_whitespace/1,
+         category/1,
+         is_id/1
         ]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]},
      {timetrap,{minutes,20}}].
 
-all() -> 
+all() ->
     [utf8_illegal_sequences_bif,
      utf16_illegal_sequences_bif, random_lists, roundtrips,
      latin1, exceptions,
@@ -55,6 +58,7 @@ all() ->
      {group,binaries_errors},
      huge_illegal_code_points,
      bin_is_7bit,
+     {group, classify},
      error_info].
 
 groups() -> 
@@ -63,7 +67,12 @@ groups() ->
        ex_binaries_errors_utf16_little,
        ex_binaries_errors_utf16_big,
        ex_binaries_errors_utf32_little,
-       ex_binaries_errors_utf32_big]}].
+       ex_binaries_errors_utf32_big]},
+     {classify, [parallel],
+      [is_whitespace,
+       category,
+       is_id]}
+    ].
 
 binaries_errors_limit(Config) when is_list(Config) ->
     setlimit(10),
@@ -1458,6 +1467,15 @@ error_info(_Config) ->
          {characters_to_nfkd_list, [abc]},
          {characters_to_nfkd_list, [<<1:11>>]},
 
+         {category, [-1]},
+         {category, [foobar]},
+
+         {is_whitespace, [-1]},
+         {is_whitespace, [foobar]},
+
+         {is_id_start, [-1]},
+         {is_id_continue, [foobar]},
+
          %% Not BIFs (they don't throw badarg when they fail).
          {bom_to_encoding, 1},                  %Not BIF.
          {encoding_to_bom, 1},                   %Not BIF.
@@ -1469,13 +1487,139 @@ error_info(_Config) ->
         ],
     error_info_lib:test_error_info(unicode, L).
 
+
+-define(MAX_CHAR, 16#10FFFF).
+category(_Config) ->
+    Check = fun(Id) ->
+                    LC = maps:get(category, unicode_util:lookup(Id)),
+                    LC == unicode:category(Id)
+            end,
+    [] = [Id || Id <- lists:seq(1, ?MAX_CHAR), not Check(Id)],
+    {'EXIT', _} = catch unicode:category(-1),
+    {'EXIT', _} = catch unicode:category(5000000),
+    {'EXIT', _} = catch unicode:category(foobar),
+    ok.
+
+is_whitespace(Config) ->
+    Props = parse_properties(filename:join(proplists:get_value(data_dir, Config), "PropList.txt")),
+    WhiteSpaces = maps:get(white_space, Props),
+    Set = make_set(WhiteSpaces),
+    Test = fun(Char) ->
+                   case {unicode:is_whitespace(Char), sets:is_element(Char, Set)} of
+                       {X,X} -> false;
+                       _ -> true
+                   end
+           end,
+    [] = [{Char, integer_to_list(Char, 16), unicode_util:lookup(Char)}
+          || Char <- lists:seq(1, ?MAX_CHAR), Test(Char)],
+    ok.
+
+is_id(Config) ->
+    Props = parse_properties(filename:join(proplists:get_value(data_dir, Config),
+                                           "DerivedCoreProperties.txt")),
+    [] = id_start(Props),
+    [] = id_cont(Props),
+    ok.
+
+id_start(Props) ->
+    ID_Start = maps:get(id_start, Props),
+    Set = make_set(ID_Start),
+
+    TestStart = fun(Char) ->
+                        case {unicode:is_id_start(Char), sets:is_element(Char, Set)} of
+                            {X,X} -> false;
+                            _ -> true
+                        end
+                end,
+
+    [{Char, integer_to_list(Char, 16), unicode_util:lookup(Char)}
+     || Char <- lists:seq(1, ?MAX_CHAR), TestStart(Char)].
+
+id_cont(Props) ->
+    ID_Cont = maps:get(id_continue, Props),
+    Set = make_set(ID_Cont),
+    TestCont = fun(Char) ->
+                       case {unicode:is_id_continue(Char), sets:is_element(Char, Set)} of
+                           {X,X} -> false;
+                           _ -> true
+                       end
+               end,
+    [{Char, integer_to_list(Char, 16), unicode_util:lookup(Char)}
+     || Char <- lists:seq(1, ?MAX_CHAR), TestCont(Char)].
+
+
 %%%
 %%% Utilities.
 %%%
 
-id(I) -> I.
-
 setlimit(X) ->
     erts_debug:set_internal_state(available_internal_state,true),
     io:format("Setting loop limit, old: ~p, now set to ~p~n",
 	      [erts_debug:set_internal_state(unicode_loop_limit,X),X]).
+
+make_set(ListOfRanges) ->
+    List = lists:foldl(fun add_range/2, [], ListOfRanges),
+    sets:from_list(List).
+
+add_range({A,undefined}, Acc) ->
+    [A|Acc];
+add_range({A,B}, Acc) ->
+    lists:seq(A,B) ++ Acc.
+
+parse_properties(File) ->
+    {ok, Fd} = file:open(File, [read, raw, {read_ahead, 1000000}]),
+    Props0 = foldl(fun parse_properties/2, [], Fd),
+    file:close(Fd),
+    Props1 = sofs:to_external(sofs:relation_to_family(sofs:relation(Props0))),
+    maps:from_list(Props1).
+
+parse_properties(Line0, Acc) ->
+    [Line|_Comments] = tokens(Line0, "#"),
+    [CodePoints, Class | _] = tokens(Line, ";"),
+    case tokens(CodePoints, ".") of
+        [CodePoint] ->
+            [{to_atom(Class), {hex_to_int(CodePoint), undefined}}|Acc];
+        [CodePoint1,"",CodePoint2] ->
+            [{to_atom(Class), {hex_to_int(CodePoint1), hex_to_int(CodePoint2)}}|Acc]
+    end.
+
+hex_to_int([]) -> [];
+hex_to_int(HexStr) ->
+    list_to_integer(string:trim(HexStr, both), 16).
+
+to_atom(Str) ->
+    list_to_atom(string:lowercase(string:trim(Str, both))).
+
+foldl(Fun, Acc, Fd) ->
+    Get = fun() -> file:read_line(Fd) end,
+    foldl_1(Fun, Acc, Get).
+
+foldl_1(_Fun, {done, Acc}, _Get) -> Acc;
+foldl_1(Fun, Acc, Get) ->
+    case Get() of
+        eof -> Acc;
+        {ok, "#" ++ _} -> %% Ignore comments
+            foldl_1(Fun, Acc, Get);
+        {ok, "\n"} -> %% Ignore empty lines
+            foldl_1(Fun, Acc, Get);
+        {ok, Line} ->
+            foldl_1(Fun, Fun(Line, Acc), Get)
+    end.
+
+%% Differs from string:lexemes, it returns empty string as token between two delimiters
+tokens(S, [C]) ->
+    tokens(lists:reverse(S), C, []).
+
+tokens([Sep|S], Sep, Toks) ->
+    tokens(S, Sep, [[]|Toks]);
+tokens([C|S], Sep, Toks) ->
+    tokens_2(S, Sep, Toks, [C]);
+tokens([], _, Toks) ->
+    Toks.
+
+tokens_2([Sep|S], Sep, Toks, Tok) ->
+    tokens(S, Sep, [Tok|Toks]);
+tokens_2([C|S], Sep, Toks, Tok) ->
+    tokens_2(S, Sep, Toks, [C|Tok]);
+tokens_2([], _Sep, Toks, Tok) ->
+    [Tok|Toks].
diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl
index dbc7e21d11..6832196c13 100644
--- a/lib/stdlib/test/unicode_util_SUITE.erl
+++ b/lib/stdlib/test/unicode_util_SUITE.erl
@@ -95,8 +95,10 @@ casefold(_) ->
 
 whitespace(_Config) ->
     %% Pattern whitespace
-    WS = unicode_util:whitespace(),
-    WS = lists:filter(fun unicode_util:is_whitespace/1, WS),
+    WS = lists:sort(unicode_util:pattern_whitespace()),
+    %% is_whitespace are an extended subset of pattern_whitespace
+    %% (more tested in the unicode module)
+    WS = lists:sort(lists:filter(fun unicode_util:is_whitespace/1, WS) ++ [8206,8207]),
     false = unicode_util:is_whitespace($A),
     ok.
 
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index 35c3f05ce2..1d6507bf69 100644
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -278,7 +278,7 @@ gen_header(Fd) ->
 -moduledoc false.
 -export([cp/1, gc/1]).
 -export([nfd/1, nfc/1, nfkd/1, nfkc/1]).
--export([whitespace/0, is_whitespace/1]).
+-export([pattern_whitespace/0, is_whitespace/1]).
 -export([uppercase/1, lowercase/1, titlecase/1, casefold/1]).
 
 -export([spec_version/0, lookup/1, category/1, get_case/1]).
@@ -657,13 +657,16 @@ gen_norm(Fd) ->
     ok.
 
 gen_props(Fd, Props, Data) ->
-    WS0 = maps:get(pattern_white_space, Props),
-    WS = merge_ranges(WS0, split),
+    PWS0 = maps:get(pattern_white_space, Props),
+    PWS = merge_ranges(PWS0, split),
     io:put_chars(Fd, "%% Useful non-breakable whitespace chars\n"
                  "%% defined as Pattern White Space in Unicode Standard Annex #31\n"),
-    io:put_chars(Fd, "-spec whitespace() -> [gc()].\n"),
-    WsChars = [CP || {CP, undefined} <- WS],
-    io:format(Fd, "whitespace() -> ~w.\n\n", [[[$\r,$\n]|WsChars]]),
+    io:put_chars(Fd, "-spec pattern_whitespace() -> [gc()].\n"),
+    WsChars = [CP || {CP, undefined} <- PWS],
+    io:format(Fd, "pattern_whitespace() -> ~w.\n\n", [[[$\r,$\n]|WsChars]]),
+
+    WS0 = maps:get(white_space, Props),
+    WS = merge_ranges(WS0, split),
 
     io:put_chars(Fd, "-spec is_whitespace(gc()) -> boolean().\n"),
     IsWS = fun(Range) -> io:format(Fd, "is_whitespace~s true;\n", [gen_single_clause(Range)]) end,
-- 
2.51.0

openSUSE Build Service is sponsored by