File 0694-Fix-xml-regexp-bug-in-XSD-validation.patch of Package erlang

From 88f2ef99b1033c3add6ea5c1aacf05857af2677e Mon Sep 17 00:00:00 2001
From: Lars Thorsen <lars@erlang.org>
Date: Thu, 4 Dec 2025 15:40:23 +0100
Subject: [PATCH] Fix xml regexp bug in XSD validation

XML regular expression evaluator in XSD validation didn't handle
'\s' and '\S' correctly.

Removed some compiler warnings in test code.
---
 .gitignore                                    |    2 +
 lib/xmerl/doc/examples/xmerl_test.erl         |  148 +-
 lib/xmerl/src/Makefile                        |    8 +-
 lib/xmerl/src/xmerl.app.src                   |    3 +-
 lib/xmerl/src/xmerl_regexp.erl                | 1444 -----------------
 lib/xmerl/src/xmerl_xsd_re.erl                |  142 ++
 lib/xmerl/src/xmerl_xsd_re_parse.yrl          |  580 +++++++
 lib/xmerl/src/xmerl_xsd_type.erl              |  131 +-
 lib/xmerl/test/xmerl_SUITE.erl                |    1 -
 lib/xmerl/test/xmerl_xsd_SUITE.erl            |   12 +-
 .../xmerl_xsd_SUITE_data/ticket_19762.xml     |    4 +
 .../xmerl_xsd_SUITE_data/ticket_19762.xsd     |   26 +
 12 files changed, 913 insertions(+), 1588 deletions(-)
 delete mode 100644 lib/xmerl/src/xmerl_regexp.erl
 create mode 100644 lib/xmerl/src/xmerl_xsd_re.erl
 create mode 100644 lib/xmerl/src/xmerl_xsd_re_parse.yrl
 create mode 100644 lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xml
 create mode 100644 lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xsd

diff --git a/lib/xmerl/doc/examples/xmerl_test.erl b/lib/xmerl/doc/examples/xmerl_test.erl
index f180d9b81f..44952068b4 100644
--- a/lib/xmerl/doc/examples/xmerl_test.erl
+++ b/lib/xmerl/doc/examples/xmerl_test.erl
@@ -28,7 +28,7 @@ test3() ->
     io:format("From xmerl:export/2 xmerl_html filter~n ~p~n", [B]),
     C = xmerl:export([A], xmerl_text),
     io:format("From xmerl:export/2 xmerl_text filter~n ~p~n", [C]).
-    
+
 
 test4() ->
     FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched, S} end,
@@ -61,14 +61,14 @@ test6() ->
 
 
 simple() ->
-    [{document, 
+    [{document,
       [{title, ["Doc Title"]},
        {author, ["Ulf Wiger"]},
        {section,[{heading, ["heading1"]},
 		 {'P', ["This is a paragraph of text."]},
 		 {section,[{heading, ["heading2"]},
 			   {'P', ["This is another paragraph."]},
-			   {table,[{border, ["1"]}, 
+			   {table,[{border, ["1"]},
 				   {heading,[{col, ["head1"]},
 					     {col, ["head2"]}]},
 				   {row, [{col, ["col11"]},
@@ -166,7 +166,7 @@ w3cvalidate() ->
 	    C = xmerl:export([A], xmerl_test),
 	    io:format("From xmerl:export/2 xmerl_text filter~n ~p~n", [C])
     end.
-    
+
 
 'TESTSUITE'(_Data, Attrs, _Parents, _E) ->
     _Profile = find_attribute('PROFILE', Attrs),
@@ -186,7 +186,7 @@ w3cvalidate() ->
     Id = find_attribute('ID', Attrs),
     io:format("Test: ~p ",[Id]),
     Entities = find_attribute('ENTITIES', Attrs), % Always handle all entities
-    Output1 = find_attribute('OUTPUT', Attrs), % 
+    Output1 = find_attribute('OUTPUT', Attrs), %
     Output3 = find_attribute('OUTPUT3', Attrs), % FIXME!
     Sections = find_attribute('SECTIONS', Attrs),
     Recommendation = find_attribute('RECOMMENDATION', Attrs), % FIXME!
@@ -253,18 +253,18 @@ test_valid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res, Tail}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	Error ->
 	    print_error(Error, URI, Sections, Entities, OutputForm, Recommendation,
 			Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end
     end,
     io:format("validating ", []),
     case validating_parser_q(URI) of
@@ -277,18 +277,18 @@ test_valid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	Error2 ->
 	    print_error(Error2, URI, Sections, Entities, OutputForm, Recommendation,
 			Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end
     end.
 
 
@@ -307,18 +307,18 @@ test_invalid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res, Tail}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	Error ->
 	    print_error(Error, URI, Sections, Entities, OutputForm, Recommendation,
 			Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end
     end,
     io:format("validating ", []),
     case validating_parser_q(URI) of
@@ -331,18 +331,18 @@ test_invalid(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	{error, enoent} ->
 	    print_error("Testfile not found", URI, Sections, Entities, OutputForm,
 			Recommendation, Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end;
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end;
 	_Error2 ->
 	    io:format("OK~n", []),
 	    ok
@@ -363,18 +363,18 @@ test_notwf(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res, Tail}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	{error,enoent} ->
 	    print_error("Testfile not found",URI,Sections,Entities,OutputForm,
 			Recommendation,Version,Namespace,Data),
-	    if
-		?CONT==false -> throw({'EXIT', failed_test});
-		true -> error
-	    end;
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end;
 	_Error ->
 	    io:format("OK ",[]),
 	    ok
@@ -390,18 +390,18 @@ test_notwf(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 		    print_error({Res2, Tail2}, URI, Sections, Entities, OutputForm,
 				Recommendation,
 				Version, Namespace, Data),
-		    if
-			?CONT == false -> throw({'EXIT', failed_test});
-			true -> error
-		    end
+                    case ?CONT of
+                        false -> throw({'EXIT', failed_test});
+                        true -> error
+                    end
 	    end;
 	{error,enoent} ->
 	    print_error("Testfile not found", URI, Sections, Entities, OutputForm,
 			Recommendation, Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end;
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end;
 	_Error2 ->
 	    io:format("OK~n", []),
 	    ok
@@ -418,17 +418,17 @@ test_error(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 	{error, enoent} ->
 	    print_error("Testfile not found", URI, Sections, Entities, OutputForm,
 			Recommendation, Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end;
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end;
 	Res ->
 	    print_error(Res, URI, Sections, Entities, OutputForm, Recommendation,
 			Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end
     end,
     io:format("validating ", []),
     case validating_parser_q(URI) of
@@ -438,17 +438,17 @@ test_error(URI, Data, Sections, Entities, OutputForm, Recommendation, Version,
 	{error, enoent} ->
 	    print_error("Testfile not found", URI, Sections, Entities, OutputForm,
 			Recommendation, Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end;
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end;
 	Res2 ->
 	    print_error(Res2, URI, Sections, Entities, OutputForm, Recommendation,
 			Version, Namespace, Data),
-	    if
-		?CONT == false -> throw({'EXIT', failed_test});
-		true -> error
-	    end
+            case ?CONT of
+                false -> throw({'EXIT', failed_test});
+                true -> error
+            end
     end.
 
 
@@ -506,10 +506,10 @@ print_error(Error, URI, Sections, Entities, OutputForm, Recommendation, Version,
     io:format(Data).
 
 
-    
-    
-    
-	
+
+
+
+
 
 
 
@@ -521,5 +521,3 @@ para(_Data, _Attrs, US) ->
 	Int when is_integer(Int) -> Int+1;
 	undefined -> 1
     end.
-
-
diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile
index 56eb876c65..69aa4dc6fb 100644
--- a/lib/xmerl/src/Makefile
+++ b/lib/xmerl/src/Makefile
@@ -68,7 +68,6 @@ MODULES = $(EDOC_MODULES) \
 	xmerl_html \
 	xmerl_lib \
 	xmerl_otpsgml \
-	xmerl_regexp \
 	xmerl_sgml \
 	xmerl_simple \
 	xmerl_text \
@@ -90,7 +89,9 @@ MODULES = $(EDOC_MODULES) \
 	xmerl_sax_parser_utf16be \
 	xmerl_sax_parser_utf16le \
 	xmerl_sax_simple_dom \
-	xmerl_sax_old_dom 
+	xmerl_sax_old_dom \
+	xmerl_xsd_re \
+	xmerl_xsd_re_parse
 
 
 
@@ -185,6 +186,9 @@ xmerl_xpath_parse.erl:	xmerl_xpath_parse.yrl
 xmerl_b64Bin.erl: xmerl_b64Bin.yrl
 	$(yecc_verbose)$(ERLC)  -o  $(ESRC) $(DETERMINISM_FLAG) $<
 
+xmerl_xsd_re_parse.erl:	xmerl_xsd_re_parse.yrl
+	$(yecc_verbose)$(ERLC)  -o  $(ESRC) $(DETERMINISM_FLAG) $<
+
 xmerl_sax_parser_list.erl: xmerl_sax_parser_list.erlsrc xmerl_sax_parser_base.erlsrc
 	$(gen_verbose)cat xmerl_sax_parser_list.erlsrc xmerl_sax_parser_base.erlsrc >$@
 
diff --git a/lib/xmerl/src/xmerl.app.src b/lib/xmerl/src/xmerl.app.src
index 67511902cf..2881aa6766 100644
--- a/lib/xmerl/src/xmerl.app.src
+++ b/lib/xmerl/src/xmerl.app.src
@@ -9,7 +9,6 @@
 	xmerl_html,
 	xmerl_lib,
 	xmerl_otpsgml,
-	xmerl_regexp,
 	xmerl_sax_parser,
 	xmerl_sax_parser_list,
 	xmerl_sax_parser_latin1,
@@ -35,6 +34,8 @@
 	xmerl_xpath_scan,
 	xmerl_xs,
 	xmerl_xsd,
+	xmerl_xsd_re,
+	xmerl_xsd_re_parse,
 	xmerl_xsd_type
 	]},
 
diff --git a/lib/xmerl/src/xmerl_regexp.erl b/lib/xmerl/src/xmerl_regexp.erl
deleted file mode 100644
index bfca3b50df..0000000000
--- a/lib/xmerl/src/xmerl_regexp.erl
+++ /dev/null
@@ -1,1443 +0,0 @@
-%%
-%% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2006-2025. All Rights Reserved.
-%% 
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%% 
-%% %CopyrightEnd%
-%%
-
-%%
--module(xmerl_regexp).
-
-%% This module provides a basic set of regular expression functions
-%% for strings. The functions provided are taken from AWK.
-%%
-%% Note that we interpret the syntax tree of a regular expression
-%% directly instead of converting it to an NFA and then interpreting
-%% that. This method seems to go significantly faster.
-
--export([sh_to_awk/1,parse/1,format_error/1,match/2,first_match/2,matches/2]).
--export([sub/3,gsub/3,split/2,sub_match/2,sub_first_match/2]).
-
--export([make_nfa/1,make_dfa/1,make_dfa/2,compile/1]).
-
--import(string, [substr/2,substr/3]).
--import(lists, [reverse/1,reverse/2,last/1,duplicate/2,seq/2]).
--import(lists, [member/2,keysearch/3,keysort/2,map/2,foldl/3]).
--import(ordsets, [is_element/2,add_element/2,union/2,subtract/2]).
-
-%%-compile([export_all]).
-
--export([setup/1,compile_proc/2]).
-
--include("xmerl_internal.hrl").
-
-setup(RE0) ->
-    RE = setup(RE0, [$^]),
-    Pid = spawn(?MODULE,compile_proc,[self(),RE]),
-    receive
-	{ok,Result} ->
-	    Result
-    after 2000 ->
-	    exit(Pid,force),
-	    parse(RE)
-    end.
-    %% compile(RE).
-%%RE.
-compile_proc(From,RE) ->
-    Res = compile(RE),
-    From ! {ok,Res}.
-
-
-setup([$\\,$d|S],Acc) -> setup(S,"]9-0[" ++Acc);
-setup([$\\,$D|S],Acc) -> setup(S,"]9-0^[" ++Acc);
-setup([$\\,$s|S],Acc) -> setup(S,"]s\\t\\n\\r\\[" ++Acc);
-setup([$\\,$S|S],Acc) -> setup(S,"]\\s\\t\\n\\r^[" ++Acc);
-setup([$\\,$i|S],Acc) -> setup(S,"]z-aZ-A_:[" ++Acc);   %% Only Latin-1 now
-setup([$\\,$I|S],Acc) -> setup(S,"]z-aZ-A_:^[" ++Acc);
-setup([$\\,$c|S],Acc) -> setup(S,"]9-0z-aZ-A_:."++[183]++"-[" ++Acc); 
-setup([$\\,$C|S],Acc) -> setup(S,"]9-0z-aZ-A_:."++[183]++"-^[" ++Acc);
-%% fixme setup([$\\,$w|S]) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup([$\\,$W|S]) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Any
-%% fixme setup(["\\p{L}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{L}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Uppercase
-%% fixme setup(["\\p{Lu}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Lu}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Lowercase
-%% fixme setup(["\\p{Ll}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Ll}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Titlecase
-%% fixme setup(["\\p{Lt}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Lt}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Modifier
-%% fixme setup(["\\p{Lm}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Lm}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Letter, Other
-%% fixme setup(["\\p{Lo}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Lo}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Mark, Any
-%% fixme setup(["\\p{M}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{M}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Mark, Nonspacing
-%% fixme setup(["\\p{Mn}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Mn}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Mark, Spacing Combining
-%% fixme setup(["\\p{Mc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Mc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Mark, Enclosing
-%% fixme setup(["\\p{Me}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Me}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Number, Any
-%% fixme setup(["\\p{N}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{N}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Number, Decimal Digit
-%% fixme setup(["\\p{Nd}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Nd}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Number, Letter
-%% fixme setup(["\\p{Nl}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Nl}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Number, Other
-%% fixme setup(["\\p{No}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{No}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Any
-%% fixme setup(["\\p{P}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{P}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Connector
-%% fixme setup(["\\p{Pc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Pc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Dash
-%% fixme setup(["\\p{Pd}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Pd}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Open
-%% fixme setup(["\\p{Ps}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Ps}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Close
-%% fixme setup(["\\p{Pe}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Pe}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Initial quote (may behave like Ps or Pe, depending on usage)
-%% fixme setup(["\\p{Pi}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Pi}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Final quote (may behave like Ps or Pe, depending on usage)
-%% fixme setup(["\\p{Pf}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Pf}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Punctuation, Other
-%% fixme setup(["\\p{Po}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Po}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Symbol, Any
-%% fixme setup(["\\p{S}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{S}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Symbol, Math
-%% fixme setup(["\\p{Sm}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Sm}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Symbol, Currency
-%% fixme setup(["\\p{Sc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Sc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Symbol, Modifier
-%% fixme setup(["\\p{Sk}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Sk}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Symbol, Other
-%% fixme setup(["\\p{So}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{So}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Separator, Any
-%% fixme setup(["\\p{Z}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Z}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Separator, Space
-%% fixme setup(["\\p{Zs}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Zs}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Separator, Line
-%% fixme setup(["\\p{Zl}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Zl}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Separator, Paragraph
-%% fixme setup(["\\p{Zp}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Zp}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Any
-%% fixme setup(["\\p{C}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{C}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Control
-%% fixme setup(["\\p{Cc}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Cc}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Format
-%% fixme setup(["\\p{Cf}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Cf}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Surrogate not supported by schema recommendation
-%% fixme setup(["\\p{Cs}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Cs}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Private Use
-%% fixme setup(["\\p{Co}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Co}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-%% Other, Not assigned (no characters in the file have this property)
-%% fixme setup(["\\p{Cn}" ++ S) -> {{char_class,"\s\t\n\r"},S};
-%% fixme setup(["\\P{Cn}" ++ S) -> {{comp_class,"\s\t\n\r"},S};
-setup([A|S], Acc) -> setup(S, [A|Acc]);
-setup([],Acc) ->  reverse([$$|Acc]).
-
-%% sh_to_awk(ShellRegExp)
-%%  Convert a sh style regexp into a full AWK one. The main difficulty is
-%%  getting character sets right as the conventions are different.
-
-sh_to_awk(Sh) -> "^(" ++ sh_to_awk_1(Sh).	%Fix the beginning
-
-sh_to_awk_1([$*|Sh]) ->				%This matches any string
-    ".*" ++ sh_to_awk_1(Sh);
-sh_to_awk_1([$?|Sh]) ->				%This matches any character
-    [$.|sh_to_awk_1(Sh)];
-sh_to_awk_1([$[,$^,$]|Sh]) ->			%This takes careful handling
-    "\\^" ++ sh_to_awk_1(Sh);
-%% Must move '^' to end.
-sh_to_awk_1("[^" ++ Sh) -> [$[|sh_to_awk_2(Sh, true)];
-sh_to_awk_1("[!" ++ Sh) -> "[^" ++ sh_to_awk_2(Sh, false);
-sh_to_awk_1([$[|Sh]) -> [$[|sh_to_awk_2(Sh, false)];
-sh_to_awk_1([C|Sh]) ->
-    %% Unspecialise everything else which is not an escape character.
-    case sh_special_char(C) of
-	true -> [$\\,C|sh_to_awk_1(Sh)];
-	false -> [C|sh_to_awk_1(Sh)]
-    end;
-sh_to_awk_1([]) -> ")$".			%Fix the end
-
-sh_to_awk_2([$]|Sh], UpArrow) -> [$]|sh_to_awk_3(Sh, UpArrow)];
-sh_to_awk_2(Sh, UpArrow) -> sh_to_awk_3(Sh, UpArrow).
-
-sh_to_awk_3([$]|Sh], true) -> "^]" ++ sh_to_awk_1(Sh);
-sh_to_awk_3([$]|Sh], false) -> [$]|sh_to_awk_1(Sh)];
-sh_to_awk_3([C|Sh], UpArrow) -> [C|sh_to_awk_3(Sh, UpArrow)];
-sh_to_awk_3([], true) -> [$^|sh_to_awk_1([])];
-sh_to_awk_3([], false) -> sh_to_awk_1([]).
-
-%% -type sh_special_char(char()) -> bool().
-%%  Test if a character is a special character.
-
-sh_special_char($|) -> true;
-sh_special_char($*) -> true;
-sh_special_char($+) -> true;
-sh_special_char($?) -> true;
-sh_special_char($() -> true;
-sh_special_char($)) -> true;
-sh_special_char($\\) -> true;
-sh_special_char($^) -> true;
-sh_special_char($$) -> true;
-sh_special_char($.) -> true;
-sh_special_char($[) -> true;
-sh_special_char($]) -> true;
-sh_special_char($") -> true;
-sh_special_char(_C) -> false.
-
-%% parse(RegExp) -> {ok,RE} | {error,E}.
-%%  Parse the regexp described in the string RegExp.
-
-parse(S) ->
-    try reg(S, 0) of
-	{R,Sc,[]} ->
-            {ok,{regexp,{R,Sc}}};
-	{_R,_Sc,[C|_]} ->
-            {error,{illegal,[C]}}
-    catch
-	throw:{error,E} -> {error,E}
-    end.
-
-%% format_error(Error) -> String.
-
-format_error({interval_range,What}) ->
-    ["illegal interval range",io_lib:write_string(What)];
-format_error({illegal,What}) -> ["illegal character `",What,"'"];
-format_error({unterminated,What}) -> ["unterminated `",What,"'"];
-format_error({posix_cc,What}) ->
-    ["illegal POSIX character class ",io_lib:write_string(What)];
-format_error({char_class,What}) ->
-    ["illegal character class ",io_lib:write_string(What)].
-
-%% match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
-%%  Find the longest match of RegExp in String.
-
-match(S, RegExp) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> match(S, RE);
-	{error,E} -> {error,E}
-    end;
-match(S, {regexp,RE}) ->
-    case match_re(RE, S, 1, 0, -1) of
-	{Start,Len} when Len >= 0 ->
-	    {match,Start,Len};
-	{_Start,_Len} -> nomatch
-    end;
-match(S, {comp_regexp,RE}) ->
-    case match_comp(RE, S, 1, 0, -1) of
-	{Start,Len} when Len >= 0 ->
-	    {match,Start,Len};
-	{_Start,_Len} -> nomatch
-    end.
-
-match_re(RE, [_|Cs]=S0, P0, Mst, Mlen) ->
-    case re_apply(S0, P0, RE) of
-	{match,P1,_S1,_Subs} ->
-	    Len = P1-P0,
-	    if Len > Mlen -> match_re(RE, Cs, P0+1, P0, Len);
-	       true -> match_re(RE, Cs, P0+1, Mst, Mlen)
-	    end;
-	nomatch -> match_re(RE, Cs, P0+1, Mst, Mlen);
-	never_match -> {Mst,Mlen}		%No need to go on
-    end;
-match_re(_RE, _S, _P, Mst, Mlen) -> {Mst,Mlen}.
-
-match_comp(RE, [_|Cs]=S0, P0, Mst, Mlen) ->
-    case comp_apply(S0, P0, RE) of
-	{match,P1,_S1} ->
-	    Len = P1-P0,
-	    if Len > Mlen -> match_comp(RE, Cs, P0+1, P0, Len);
-	       true -> match_comp(RE, Cs, P0+1, Mst, Mlen)
-	    end;
-	nomatch -> match_comp(RE, Cs, P0+1, Mst, Mlen)
-    end;
-match_comp(_RE, _S, _P, Mst, Mlen) -> {Mst,Mlen}.
-
-%% match_re(RE, S0, Pos0, Mst, Mlen) ->
-%%     case first_match_re(RE, S0, Pos0) of
-%% 	{St,Len,_} ->				%Found a match
-%% 	    Pos1 = St + 1,			%Where to start next match
-%% 	    S1 = lists:nthtail(Pos1-Pos0, S0),
-%% 	    if Len > Mlen -> match_re(RE, S1, Pos1, St, Len);
-%% 	       true -> match_re(RE, S1, Pos1, Mst, Mlen)
-%% 	    end;
-%% 	nomatch -> {Mst,Mlen}
-%%     end.
-
-%% match_comp(RE, S0, Pos0, Mst, Mlen) ->
-%%     case first_match_comp(RE, S0, Pos0) of
-%% 	{St,Len} ->				%Found a match
-%% 	    Pos1 = St + 1,			%Where to start next match
-%% 	    S1 = lists:nthtail(Pos1-Pos0, S0),
-%% 	    if Len > Mlen -> match_comp(RE, S1, Pos1, St, Len);
-%% 	       true -> match_comp(RE, S1, Pos1, Mst, Mlen)
-%% 	    end;
-%% 	nomatch -> {Mst,Mlen}
-%%     end.
-
-%% first_match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
-%%  Find the first match of RegExp in String.
-
-first_match(S, RegExp) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> first_match(S, RE);
-	{error,E} -> {error,E}
-    end;
-first_match(S, {regexp,RE}) ->
-    case first_match_re(RE, S, 1) of
-	{Start,Len,_} -> {match,Start,Len};
-	nomatch -> nomatch
-    end;
-first_match(S, {comp_regexp,RE}) ->
-    case first_match_comp(RE, S, 1) of
-	{Start,Len} -> {match,Start,Len};
-	nomatch -> nomatch
-    end.
-
-first_match_re(RE, S, St) when S /= [] ->
-    case re_apply(S, St, RE) of
-	{match,P,_Rest,Subs} -> {St,P-St,Subs};
-	nomatch -> first_match_re(RE, tl(S), St+1);
-	never_match -> nomatch
-    end;
-first_match_re(_RE, [], _St) -> nomatch.
-
-first_match_comp(RE, S, St) when S /= [] ->
-    case comp_apply(S, St, RE) of
-	{match,P,_Rest} -> {St,P-St};
-	nomatch -> first_match_comp(RE, tl(S), St+1)
-    end;
-first_match_comp(_RE, [], _St) -> nomatch.
-
-%% matches(String, RegExp) -> {match,[{Start,Length}]} | {error,E}.
-%%  Return the all the non-overlapping matches of RegExp in String.
-
-matches(S, RegExp) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> matches(S, RE);
-	{error,E} -> {error,E}
-    end;
-matches(S, {regexp,RE}) -> {match,matches_re(S, RE, 1)};
-matches(S, {comp_regexp,RE}) -> {match,matches_comp(S, RE, 1)}.
-
-matches_re([_|Cs]=S0, RE, P0) ->
-    case re_apply(S0, P0, RE) of
-	{match,P0,S1,_Subs} ->			%0 length match
-	    [{P0,0}|matches_re(tl(S1), RE, P0+1)];
-	{match,P1,S1,_Subs} ->
-	    [{P0,P1-P0}|matches_re(S1, RE, P1)];
-	nomatch -> matches_re(Cs, RE, P0+1);
-	never_match -> []
-    end;
-matches_re([], _RE, _P) -> [].
-
-matches_comp([_|Cs]=S0, RE, P0) ->
-    case comp_apply(S0, P0, RE) of
-	{match,P0,S1} ->			%0 length match
-	    [{P0,0}|matches_comp(tl(S1), RE, P0+1)];
-	{match,P1,S1} ->
-	    [{P0,P1-P0}|matches_comp(S1, RE, P1)];
-	nomatch -> matches_comp(Cs, RE, P0+1)
-    end;
-matches_comp([], _RE, _P) -> [].
-
-%% sub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
-%%  Substitute the first match of the regular expression RegExp with
-%%  the string Replace in String. Accept pre-parsed regular
-%%  expressions.
-
-sub(String, RegExp, Rep) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> sub(String, RE, Rep);
-	{error,E} -> {error,E}
-    end;
-sub(String, {regexp,RE}, Rep) ->
-    case sub_re(String, 1, RE, [], Rep) of
-	{yes,NewStr} -> {ok,NewStr,1};
-	no -> {ok,String,0}
-    end;
-sub(String, {comp_regexp,RE}, Rep) ->
-    case sub_comp(String, 1, RE, [], Rep) of
-	{yes,NewStr} -> {ok,NewStr,1};
-	no -> {ok,String,0}
-    end.
-
-%% sub_re(String, Position, Regexp, Before, Replacement) ->
-%%      {NewString,Count}.
-%% sub_comp(String, Position, Regexp, Before, Replacement) ->
-%%      {NewString,Count}.
-%% Step forward over String until a match is found saving stepped over
-%% chars in Before. Return reversed Before prepended to replacement
-%% and rest of string.
-
-sub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->
-    case re_apply(S0, P0, RE) of
-	{match,P0,_S1,_} ->			%Ignore 0 length match
-	    sub_re(Cs, P0+1, RE, [C|Bef], Rep);
-	{match,P1,Rest,_Gps} ->
-	    {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};
-	nomatch -> sub_re(Cs, P0+1, RE, [C|Bef], Rep);
-	never_match -> no			%No need to go on
-    end;
-sub_re([], _P, _RE, _Bef, _Rep) -> no.
-
-sub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->
-    case comp_apply(S0, P0, RE) of
-	{match,P0,_S1} ->			%Ignore 0 length match
-	    sub_comp(Cs, P0+1, RE, [C|Bef], Rep);
-	{match,P1,Rest} ->
-	    {yes,reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), Rest))};
-	nomatch -> sub_comp(Cs, P0+1, RE, [C|Bef], Rep)
-    end;
-sub_comp([], _P, _RE, _Bef, _Rep) -> no.
-
-sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);
-sub_repl("\\&" ++ Rep, M, Rest) -> [$&|sub_repl(Rep, M, Rest)];
-sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];
-sub_repl([], _M, Rest) -> Rest.
-
-%%  gsub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
-%%  Substitute every match of the regular expression RegExp with the
-%%  string New in String. Accept pre-parsed regular expressions.
-
-gsub(String, RegExp, Rep) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> gsub(String, RE, Rep);
-	{error,E} -> {error,E}
-    end;
-gsub(String, {regexp,RE}, Rep) ->
-    case gsub_re(String, 1, RE, [], Rep) of
-	{NewStr,N} -> {ok,NewStr,N};
-	no -> {ok,String,0}			%No substitutions
-    end;
-gsub(String, {comp_regexp,RE}, Rep) ->
-    case gsub_comp(String, 1, RE, [], Rep) of
-	{NewStr,N} -> {ok,NewStr,N};
-	no -> {ok,String,0}			%No substitutions
-    end.
-
-%% gsub_re(String, Position, Regexp, Before, Replacement) ->
-%%      {NewString,Count}.
-%% gsub_comp(String, Position, Regexp, Before, Replacement) ->
-%%      {NewString,Count}.
-%% Step forward over String until a match is found saving stepped over
-%% chars in Before. Call recursively to do rest of string after
-%% match. Return reversed Before prepended to return from recursive
-%% call.
-
-gsub_re([C|Cs]=S0, P0, RE, Bef, Rep) ->
-    case re_apply(S0, P0, RE) of
-	{match,P0,_S1,_} ->			%Ignore 0 length match
-	    gsub_re(Cs, P0+1, RE, [C|Bef], Rep);
-	{match,P1,S1,_Gps} ->
-	    case gsub_re(S1, P1, RE, [], Rep) of
-		{NewStr,N0} ->			%Substituitions
-		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),
-		     N0+1};
-		no ->				%No substituitions.
-		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}
-	    end;
-	%%No match so step forward saving C on Bef.
-	nomatch -> gsub_re(Cs, P0+1, RE, [C|Bef], Rep);
-	never_match -> no			%No need to go on
-    end;
-gsub_re([], _P, _RE, _Bef, _Rep) -> no.
-
-gsub_comp([C|Cs]=S0, P0, RE, Bef, Rep) ->
-    case comp_apply(S0, P0, RE) of
-	{match,P0,_S1} ->			%Ignore 0 length match
-	    gsub_comp(Cs, P0+1, RE, [C|Bef], Rep);
-	{match,P1,S1} ->
-	    case gsub_comp(S1, P1, RE, [], Rep) of
-		{NewStr,N0} ->			%Substituitions
-		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), NewStr)),
-		     N0+1};
-		no ->				%No substituitions.
-		    {reverse(Bef, sub_repl(Rep, substr(S0, 1, P1-P0), S1)),1}
-	    end;
-	%%No match so step forward saving C on Bef.
-	nomatch -> gsub_comp(Cs, P0+1, RE, [C|Bef], Rep)
-    end;
-gsub_comp([], _P, _RE, _Bef, _Rep) -> no.
-
-%% split(String, RegExp) -> {ok,[SubString]} | {error,E}.
-%%  Split a string into substrings where the RegExp describes the
-%%  field separator. The RegExp " " is specially treated.
-
-split(String, " ") ->				%This is really special
-    {ok,{regexp,RE}} = parse("[ \t]+"),
-    case split_apply_re(String, RE, true) of
-	[[]|Ss] -> {ok,Ss};
-	Ss -> {ok,Ss}
-    end;
-split(String, RegExp) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,{regexp,RE}} -> {ok,split_apply_re(String, RE, false)};
-	{error,E} -> {error,E}
-    end;
-split(String, {regexp,RE}) -> {ok,split_apply_re(String, RE, false)};
-split(String, {comp_regexp,RE}) -> {ok,split_apply_comp(String, RE, false)}.
-
-split_apply_re(S, RE, Trim) -> split_apply_re(S, 1, RE, Trim, []).
-
-split_apply_re([], _P, _RE, true, []) -> [];
-split_apply_re([], _P, _RE, _T, Sub) -> [reverse(Sub)];
-split_apply_re([C|Cs]=S, P0, RE, T, Sub) ->
-    case re_apply(S, P0, RE) of
-	{match,P0,_S1,_} ->			%Ignore 0 length match
-	    split_apply_re(Cs, P0+1, RE, T, [C|Sub]);
-	{match,P1,S1,_} ->
-	    [reverse(Sub)|split_apply_re(S1, P1, RE, T, [])];
-	nomatch ->
-	    split_apply_re(Cs, P0+1, RE, T, [C|Sub]);
-	never_match -> [reverse(Sub, S)]	%No need to go on
-    end.
-
-split_apply_comp(S, RE, Trim) -> split_apply_comp(S, 1, RE, Trim, []).
-
-%%split_apply_comp([], _P, _RE, true, []) -> [];
-split_apply_comp([], _P, _RE, _T, Sub) -> [reverse(Sub)];
-split_apply_comp([C|Cs]=S, P0, RE, T, Sub) ->
-    case comp_apply(S, P0, RE) of
-	{match,P0,_S1} ->			%Ignore 0 length match
-	    split_apply_comp(Cs, P0+1, RE, T, [C|Sub]);
-	{match,P1,S1} ->
-	    [reverse(Sub)|split_apply_comp(S1, P1, RE, T, [])];
-	nomatch ->
-	    split_apply_comp(Cs, P0+1, RE, T, [C|Sub])
-    end.
-
-%% sub_match(String, RegExp) ->
-%%      {match,Start,Length,SubExprs} | nomatch | {error,E}.
-%%  Find the longest match of RegExp in String.
-
-sub_match(S, RegExp) when is_list(RegExp) ->
-    case parse(RegExp) of
-	{ok,RE} -> sub_match(S, RE);
-	{error,E} -> {error,E}
-    end;
-sub_match(S, {regexp,RE}) ->
-    case sub_match_re(RE, S, 1, 0, -1, none) of
-	{Start,Len,Subs} when Len >= 0 ->
-	    {match,Start,Len,Subs};
-	{_Start,_Len,_Subs} -> nomatch
-    end.
-
-sub_match_re(RE, S0, Pos0, Mst, Mlen, Msubs) ->
-    case first_match_re(RE, S0, Pos0) of
-	{St,Len,Subs} ->			%Found a match
-	    Pos1 = St + 1,			%Where to start next match
-	    S1 = lists:nthtail(Pos1-Pos0, S0),
-	    if Len > Mlen -> sub_match_re(RE, S1, Pos1, St, Len, Subs);
-	       true -> sub_match_re(RE, S1, Pos1, Mst, Mlen, Msubs)
-	    end;
-	nomatch -> {Mst,Mlen,Msubs}
-    end.
-
-%% sub_first_match(String, RegExp) ->
-%%       {match,Start,Length,SubExprs} | nomatch | {error,E}.
-%%  Find the longest match of RegExp in String, return Start and Length
-%%  as well as tuple of sub-expression matches.
-
-sub_first_match(S, RegExp) when is_list(RegExp) ->
-    {ok,RE} = parse(RegExp),
-    sub_first_match(S, RE);
-sub_first_match(S, {regexp,RE}) ->
-    case first_match_re(RE, S, 1) of
-	{St,Len,Subs} -> {match,St,Len,Subs};
-	nomatch -> nomatch
-    end.
-
-
-%% This is the regular expression grammar used. It is equivalent to the
-%% one used in AWK, except that we allow ^ $ to be used anywhere and fail
-%% in the matching.
-%%
-%% reg -> reg1 : '$1'.
-%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.
-%% reg1 -> reg2 : '$1'.
-%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.
-%% reg2 -> reg3 : '$1'.
-%% reg3 -> reg3 "*" : {kclosure,'$1'}.
-%% reg3 -> reg3 "+" : {pclosure,'$1'}.
-%% reg3 -> reg3 "?" : {optional,'$1'}.
-%% reg3 -> reg3 "{" [Min],[Max] "}" : {closure_range, Num, '$1'} see below
-%% reg3 -> reg4 : '$1'.
-%% reg4 -> "(" reg ")" : '$2'.
-%% reg4 -> "\\" char : '$2'.
-%% reg4 -> "^" : bos.
-%% reg4 -> "$" : eos.
-%% reg4 -> "." : char.
-%% reg4 -> "[" class "]" : {char_class,char_class('$2')}
-%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}
-%% reg4 -> "\"" chars "\"" : char_string('$2')
-%% reg4 -> char : '$1'.
-%% reg4 -> empty : epsilon.
-%%  The grammar of the current regular expressions. The actual parser
-%%  is a recursive descent implementation of the grammar.
-
-reg(S, Sc) -> reg1(S, Sc).
-
-%% reg1 -> reg2 reg1'
-%% reg1' -> "|" reg2
-%% reg1' -> empty
-
-reg1(S0, Sc0) ->
-    {L,Sc1,S1} = reg2(S0, Sc0),
-    reg1p(S1, L, Sc1).
-
-reg1p([$||S0], L, Sc0) ->
-    {R,Sc1,S1} = reg2(S0, Sc0),
-    reg1p(S1, {'or',L,R}, Sc1);
-reg1p(S, L, Sc) -> {L,Sc,S}.
-
-%% reg2 -> reg3 reg2'
-%% reg2' -> reg3
-%% reg2' -> empty
-
-reg2(S0, Sc0) ->
-    {L,Sc1,S1} = reg3(S0, Sc0),
-    reg2p(S1, L, Sc1).
-
-reg2p([C|S0], L, Sc0) when C /= $|, C /= $) ->
-    {R,Sc1,S1} = reg3([C|S0], Sc0),
-    %% reg2p(S1, {concat,L,R}, Sc1);
-    case is_integer(R) of
- 	true -> 
- 	    case L of
- 		{literal,Lit} ->
- 		    reg2p(S1, {literal,Lit ++[R]}, Sc1);
- 		{concat,S2,Char} when is_integer(Char) ->
- 		    reg2p(S1, {concat,S2,{literal,[Char,R]}}, Sc1);
- 		{concat,S2,{literal,Lit}}  ->
- 		    reg2p(S1, {concat,S2,{literal,Lit ++ [R]}}, Sc1);
- 		Char when is_integer(Char) -> 
- 		    reg2p(S1, {literal,[Char,R]}, Sc1);
- 		_ ->
- 		    reg2p(S1, {concat,L,R}, Sc1)
- 	    end;
- 	false ->
- 	    reg2p(S1, {concat,L,R}, Sc1)
-    end;
-reg2p(S, L, Sc) -> {L,Sc,S}.
-
-%% reg3 -> reg4 reg3'
-%% reg3' -> "*" reg3'
-%% reg3' -> "+" reg3'
-%% reg3' -> "?" reg3'
-%% reg3' -> "{" [Min],[Max] "}" reg3'
-%% reg3' -> empty
-
-reg3(S0, Sc0) ->
-    {L,Sc1,S1} = reg4(S0, Sc0),
-    reg3p(S1, L, Sc1).
-
-reg3p([$*|S], L, Sc) -> reg3p(S, {kclosure,L}, Sc);
-reg3p([$+|S], L, Sc) -> reg3p(S, {pclosure,L}, Sc);
-reg3p([$?|S], L, Sc) -> reg3p(S, {optional,L}, Sc);
-reg3p([${|Cs0], L, Sc) ->			% $}
-    case interval_range(Cs0) of
-	{none,none,_Cs1} -> parse_error({interval_range,[${|Cs0]});
-	{N,M,[$}|Cs1]} -> reg3p(Cs1, {iclosure,L,N,M}, Sc);
-	{_N,_M,_Cs1} -> parse_error({unterminated,"{"})
-    end;
-reg3p(S, L, Sc) -> {L,Sc,S}.
-
-reg4([$(|S0], Sc0) ->
-    Sc1 = Sc0+1,
-    case reg(S0, Sc1) of
-	{R,Sc2,[$)|S1]} -> {{subexpr,Sc1,R},Sc2,S1};
-	{_R,_Sc,_S} -> parse_error({unterminated,"("})
-    end;
-reg4([$^|S], Sc) -> {bos,Sc,S};
-reg4([$$|S], Sc) -> {eos,Sc,S};
-reg4([$.|S], Sc) -> {{comp_class,"\n"},Sc,S};
-reg4("[^" ++ S0, Sc) ->
-    case char_class(S0) of
-	{Cc,[$]|S1]} -> {{comp_class,Cc},Sc,S1};
-	{_Cc,_S} -> parse_error({unterminated,"["})
-    end;
-reg4([$[|S0], Sc) ->
-    case char_class(S0) of
-	{Cc,[$]|S1]} -> {{char_class,Cc},Sc,S1};
-	{_Cc,_S1} -> parse_error({unterminated,"["})
-    end;
-%reg4([$"|S0], Sc) ->
-%    case char_string(S0) of
-%	{St,[$"|S1]} -> {St,Sc,S1};
-%	{St,S1} -> parse_error({unterminated,"\""})
-%    end;
-reg4([C0|S0], Sc) when
-  is_integer(C0), C0 /= $*, C0 /= $+, C0 /= $?, C0 /= $], C0 /= $), C0 /= $} ->
-    %% Handle \ quoted characters as well, at least those we see.
-    {C1,S1} = char(C0, S0),
-    {C1,Sc,S1};
-reg4(S=[$)|_], Sc) -> {epsilon,Sc,S};
-reg4([C|_S], _Sc) -> parse_error({illegal,[C]});
-reg4([], Sc) -> {epsilon,Sc,[]}.
-
-char($\\, [O1,O2,O3|S]) when
-  O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
-    {(O1*8 + O2)*8 + O3 - 73*$0,S};
-char($\\, [C|S]) -> {escape_char(C),S};
-char($\\, []) -> parse_error({unterminated,"\\"});
-char(C, S) -> {C,S}.
-
-escape_char($n) -> $\n;				%\n = LF
-escape_char($r) -> $\r;				%\r = CR
-escape_char($t) -> $\t;				%\t = TAB
-escape_char($v) -> $\v;				%\v = VT
-escape_char($b) -> $\b;				%\b = BS
-escape_char($f) -> $\f;				%\f = FF
-escape_char($e) -> $\e;				%\e = ESC
-escape_char($s) -> $\s;				%\s = SPACE
-escape_char($d) -> $\d;				%\d = DEL
-escape_char(C) -> C.
-
-char_class([$]|S0]) ->
-    {Cc,S1} = char_class(S0, [$]]),
-    {pack_cc(Cc),S1};
-char_class(S0) ->
-    {Cc,S1} = char_class(S0, []),
-    {pack_cc(Cc),S1}.
-
-pack_cc(Cc0) ->
-    %% First sort the list.
-    Cc1 = lists:usort(fun ({Cf1,_}, {Cf2,_}) -> Cf1 < Cf2;
-			  ({Cf1,_}, C) -> Cf1 < C;
-			  (C, {Cf,_}) -> C < Cf;
-			  (C1, C2) -> C1 =< C2
-		      end, Cc0),
-    pack_cc1(Cc1).
-
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 =< Cl2 ->
-    pack_cc1([{Cf1,Cl2}|Cc]);
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 >= Cl2 ->
-    pack_cc1([{Cf1,Cl1}|Cc]);
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1+1 == Cf2 ->
-    pack_cc1([{Cf1,Cl2}|Cc]);
-pack_cc1([{Cf,Cl},C|Cc]) when Cl >= C -> pack_cc1([{Cf,Cl}|Cc]);
-pack_cc1([{Cf,Cl},C|Cc]) when Cl+1 == C -> pack_cc1([{Cf,C}|Cc]);
-pack_cc1([C,{Cf,Cl}|Cc]) when C == Cf-1 -> pack_cc1([{C,Cl}|Cc]);
-pack_cc1([C1,C2|Cc]) when C1+1 == C2 -> pack_cc1([{C1,C2}|Cc]);
-pack_cc1([C|Cc]) -> [C|pack_cc1(Cc)];
-pack_cc1([]) -> [].
-
-char_class("[:" ++ S0, Cc0) ->			%Start of POSIX char class
-    case posix_cc(S0, Cc0) of
-	{Cc1,":]" ++ S1} -> char_class(S1, Cc1);
-	{_,_S1} -> parse_error({posix_cc,"[:" ++ S0})
-    end;
-char_class([C1|S0], Cc) when C1 /= $] ->
-    case char(C1, S0) of
-	{Cf,[$-,C2|S1]} when C2 /= $] ->
-	    case char(C2, S1) of
-		{Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]); 
-		{_Cl,_S2} -> parse_error({char_class,[C1|S0]})
-	    end;
-	{C,S1} -> char_class(S1, [C|Cc])
-    end;
-char_class(S, Cc) -> {Cc,S}.
-
-%% posix_cc(String, CharClass) -> {NewCharClass,RestString}.
-%%  Handle POSIX character classes, use Latin-1 character set.
-
-posix_cc("alnum" ++ S, Cc) ->
-    {[{$0,$9},{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
-posix_cc("alpha" ++ S, Cc) ->
-    {[{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
-posix_cc("blank" ++ S, Cc) -> {[$\s,$\t,160|Cc],S};
-posix_cc("cntrl" ++ S, Cc) -> {[{0,31},{127,159}|Cc],S};
-posix_cc("digit" ++ S, Cc) -> {[{$0,$9}|Cc],S};
-posix_cc("graph" ++ S, Cc) -> {[{33,126},{161,255}|Cc],S};
-posix_cc("lower" ++ S, Cc) -> {[{$a,$z},{224,246},{248,255}|Cc],S};
-posix_cc("print" ++ S, Cc) -> {[{32,126},{160,255}|Cc],S};
-posix_cc("punct" ++ S, Cc) -> {[{$!,$/},{$:,$?},{${,$~},{161,191}|Cc],S};
-posix_cc("space" ++ S, Cc) -> {[$\s,$\t,$\f,$\r,$\v,160|Cc],S};
-posix_cc("upper" ++ S, Cc) -> {[{$A,$Z},{192,214},{216,223}|Cc],S};
-posix_cc("xdigit" ++ S, Cc) -> {[{$a,$f},{$A,$F},{$0,$9}|Cc],S};
-posix_cc(S, _Cc) -> parse_error({posix_cc,"[:" ++ S}).
-
-interval_range(Cs0) ->
-    case number(Cs0) of
-	{none,Cs1} -> {none,none,Cs1};
-	{N,[$,|Cs1]} ->
-	    case number(Cs1) of
-		{none,Cs2} -> {N,any,Cs2};
-		{M,Cs2} -> {N,M,Cs2}
-	    end;
-	{N,Cs1} -> {N,none,Cs1}
-    end.
-
-number([C|Cs]) when C >= $0, C =< $9 ->
-    number(Cs, C - $0);
-number(Cs) -> {none,Cs}.
-
-number([C|Cs], Acc) when C >= $0, C =< $9 ->
-    number(Cs, 10*Acc + (C - $0));
-number(Cs, Acc) -> {Acc,Cs}.
-
-parse_error(E) -> throw({error,E}).
-
-%char_string([C|S]) when C /= $" -> char_string(S, C);
-%char_string(S) -> {epsilon,S}.
-
-%char_string([C|S0], L) when C /= $" ->
-%    char_string(S0, {concat,L,C});
-%char_string(S, L) -> {L,S}.
-
-%% re_apply(String, StartPos, RegExp) ->
-%%      {match,RestPos,Rest,SubExprs} | nomatch.
-%%
-%%  Apply the (parse of the) regular expression RegExp to String.  If
-%%  there is a match return the position of the remaining string and
-%%  the string if else return 'nomatch'.
-%%
-%%  StartPos should be the real start position as it is used to decide
-%%  if we are at the beginning of the string.
-
-re_apply(S, St, {RE,Sc}) ->
-    Subs = erlang:make_tuple(Sc, none),		%Make a sub-regexp table.
-    Res = re_apply(RE, [], S, St, Subs),
-    %% ?dbg("~p x ~p -> ~p\n", [RE,S,Res]),
-    Res.
-
-re_apply(epsilon, More, S, P, Subs) ->		%This always matches
-    re_apply_more(More, S, P, Subs);
-re_apply({'or',RE1,RE2}, More, S, P, Subs) ->
-    re_apply_or(re_apply(RE1, More, S, P, Subs),
-		re_apply(RE2, More, S, P, Subs));
-re_apply({concat,RE1,RE2}, More, S0, P, Subs) ->
-    re_apply(RE1, [RE2|More], S0, P, Subs);
-re_apply({literal,[C|Lcs]}, More, [C|S], P, Subs) ->
-    re_apply_lit(Lcs, More, S, P+1, Subs);	%Have matched first char
-re_apply({kclosure,RE}, More, S0, P0, Subs0) ->
-    %% Greedy so try RE first, no difference here actually.
-    Loop = case re_apply(RE, [], S0, P0, Subs0) of
-	       {match,P0,_S1,_Subs1} ->		%0 length match, don't loop!
-		   nomatch;
-	       {match,P1,S1,Subs1} ->
-		   re_apply_more([{kclosure,RE}|More], S1, P1, Subs1);
-	       nomatch -> nomatch;
-	       never_match -> never_match
-	   end,
-    re_apply_or(Loop, re_apply_more(More, S0, P0, Subs0));
-re_apply({pclosure,RE}, More, S, P, Subs) ->
-    re_apply(RE, [{kclosure,RE}|More], S, P, Subs);
-re_apply({optional,RE}, More, S, P, Subs) ->
-    %% Greedy so try RE first, no difference here actually.
-    re_apply_or(re_apply(RE, More, S, P, Subs),
-		re_apply_more(More, S, P, Subs));
-re_apply({iclosure,RE,N,M}, More, S, P, Subs) when N > 0 ->
-    re_apply(RE, [{iclosure,RE,N-1,M}|More], S, P, Subs);
-re_apply({iclosure,RE,0,M}, More, S, P, Subs) ->
-    Exp = expand_opt(RE, M),
-    re_apply(Exp, More, S, P, Subs);
-re_apply({subexpr,N,RE}, More, S, P, Subs) ->
-    re_apply(RE, [{endsub,N,P}|More], S, P, Subs);
-re_apply({endsub,N,St}, More, S, P, Subs0) ->
-    Subs1 = setelement(N, Subs0, {St,P-St}),	%Record sub-expr
-    re_apply_more(More, S, P, Subs1);
-re_apply(bos, More, S, 1, Subs) -> re_apply_more(More, S, 1, Subs);
-re_apply(bos, _More, _S, _, _) -> never_match;
-re_apply(eos, More, [$\n], P, Subs) -> re_apply_more(More, [], P, Subs);
-re_apply(eos, More, [], P, Subs) -> re_apply_more(More, [], P, Subs);
-re_apply({char_class,Cc}, More, [C|S], P, Subs) ->
-    case in_char_class(C, Cc) of
-	true -> re_apply_more(More, S, P+1, Subs);
-	false -> nomatch
-    end;
-re_apply({comp_class,Cc}, More, [C|S], P, Subs) ->
-    case in_char_class(C, Cc) of
-	true -> nomatch;
-	false -> re_apply_more(More, S, P+1, Subs)
-    end;
-re_apply(C, More, [C|S], P, Subs) when is_integer(C) ->
-    re_apply_more(More, S, P+1, Subs);
-re_apply(_RE, _More, _S, _P, _Subs) ->
-    %% ?dbg("~p : ~p\n", [_RE,_S]),
-    nomatch.
-
-%% re_apply_more([RegExp], String, Length, SubsExprs) ->
-%%      {match,RestPos,Rest,SubExprs} | nomatch.
-
-re_apply_more([RE|More], S, P, Subs) -> re_apply(RE, More, S, P, Subs);
-re_apply_more([], S, P, Subs) -> {match,P,S,Subs}.
-
-%% re_apply_lit(Literal, More, String, Position, SubExprs) ->
-%%      {match,RestPos,Rest,SubExprs} | nomatch.
-re_apply_lit([C|Lit], More, [C|Cs], P, Subs) ->
-    re_apply_lit(Lit, More, Cs, P+1, Subs);
-re_apply_lit([], More, Cs, P, Subs) ->
-    re_apply_more(More, Cs, P, Subs);
-re_apply_lit(_Lit, _More, _Cs, _P, _Subs) ->
-    nomatch.
-
-%% expand_iclosure(RE, N, M) -> RE.
-
-expand_iclosure(RE, 0, M) -> expand_opt(RE, M);
-expand_iclosure(RE, N, M) ->
-    {concat,RE,expand_iclosure(RE, N-1, M)}.
-
-%% expand_opt(RegExp, Count) -> RE.
-%% Handle all the cases.
-
-expand_opt(_RE, none) -> epsilon;
-expand_opt(RE, any) -> {kclosure,RE};
-expand_opt(_RE, 0) -> epsilon;
-expand_opt(RE, 1) -> {optional,RE};
-expand_opt(RE, N) ->
-    {optional,{concat,RE,expand_opt(RE, N-1)}}.
-
-%% find_prefix(PrefixStr, SourceStr)
-%% if PrefixStr is a prefix of Str then return {ok,RemainingStr}
-%% otherwise return false
-
-%% find_prefix([C|Prest], [C|Rest]) ->
-%%     find_prefix(Prest, Rest);
-%% find_prefix([], Rest) -> {yes,Rest};
-%% find_prefix(_, _) -> no.
-
-%% in_char_class(Char, Class) -> bool().
-
-in_char_class(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true;
-in_char_class(C, [C|_Cc]) -> true;
-in_char_class(C, [_|Cc]) -> in_char_class(C, Cc);
-in_char_class(_C, []) -> false.
-
-%% re_apply_or(Match1, Match2, SubExprs) ->
-%%      {match,RestPos,Rest,SubExprs} | nomatch.
-%%  If we want the best match then choose the longest match, else just
-%%  choose one by trying sequentially.
-
-re_apply_or(M1={match,P1,_,_},{match,P2,_,_}) when P1 >= P2 -> M1;
-re_apply_or({match,_,_,_},  M2={match,_,_,_}) -> M2;
-re_apply_or(never_match, R2) -> R2;
-re_apply_or(R1, never_match) -> R1;
-re_apply_or(nomatch, R2) -> R2;
-re_apply_or(R1, nomatch) -> R1.
-
-%% Record definitions for the NFA, DFA and compiler.
-
--record(nfa_state, {no,edges=[],accept=no}).
--record(dfa_state, {no,nfa=[],trans=[],accept=no}).
-
--record(c_state, {no,trans=[],tmin=0,smin=none,tmax=0,smax=none,
-		  accept=false,spec=[]}).
-
-%% We use standard methods, Thompson's construction and subset
-%% construction, to create first an NFA and then a DFA from the
-%% regexps. A non-standard feature is that we work with sets of
-%% character ranges (crs) instead sets of characters. This is most
-%% noticeable when constructing DFAs. The major benefit is that we can
-%% handle characters from any set, not just limited ASCII or 8859,
-%% even 16/32 bit unicode.
-%%
-%% The whole range of characters is 0-maxchar, where maxchar is a BIG
-%% number. We don't make any assumptions about the size of maxchar, it
-%% is just bigger than any character.
-%%
-%% Using character ranges makes describing many regexps very simple,
-%% for example the regexp "." just becomes the range
-%% [{0-9},{11-maxchar}].
-
-%% make_nfa(RegExpActions) -> {ok,{NFA,StartState}} | {error,E}.
-%% Build a complete nfa from a list of {RegExp,Action}. The NFA field
-%% accept has values {yes,Action}|no. The NFA is a list of states.
-
-make_nfa(REAs0) ->
-    case parse_reas(REAs0) of
-	{ok,REAs1} ->
-	    {NFA,Start} = build_combined_nfa(REAs1),
-	    {ok,{NFA,Start}};
-	{error,E} -> {error,E}
-    end.
-
-%% make_dfa(RegExpActions) -> {ok,{DFA,StartState}} | {error,E}.
-%% make_dfa(RegExpActions, LowestState) -> {ok,{DFA,StartState}} | {error,E}.
-%% Build a complete dfa from a list of {RegExp,Action}. The DFA field
-%% accept has values {yes,Action}|no. If multiple Regexps can result
-%% in same match string then RegExpActions list define priority.
-
-make_dfa(REAs) -> make_dfa(REAs, 0).
-
-make_dfa(REAs0, Low) ->
-    case parse_reas(REAs0) of
-	{ok,REAs1} ->
-	    {NFA,Start0} = build_combined_nfa(REAs1),
-	    {DFA0,Start1} = build_dfa(NFA, Start0),
-	    {DFA,Start} = minimise_dfa(DFA0, Start1, Low),
-	    {ok,{DFA,Start}};
-	{error,E} -> {error,E}
-    end.
-
-parse_reas(REAs) -> parse_reas(REAs, []).
-
-parse_reas([{{regexp,{R,_Sc}},A}|REAs], S) ->	%Already parsed
-    parse_reas(REAs, [{R,A}|S]);
-parse_reas([{RegExp,A}|REAs], S) ->
-    case parse(RegExp) of
-	{ok,{regexp,{R,_Sc}}} -> parse_reas(REAs, [{R,A}|S]);
-	{error,E} -> {error,E}
-    end;
-parse_reas([], Stack) -> {ok,reverse(Stack)}.
-
-%% build_combined_nfa(RegExpActionList) -> {NFA,StartState}.
-%%  Build the combined NFA using Thompson's construction straight out
-%%  of the book. Build the separate NFAs in the same order as the
-%%  rules so that the accepting have ascending states have ascending
-%%  state numbers.  Start numbering the states from 1 as we put the
-%%  states in a tuple with the state number as the index.
-
-build_combined_nfa(REAs) ->
-    {NFA,Starts,Next} = build_nfa_list(REAs, [], [], 1),
-    F = #nfa_state{no=Next,edges=epsilon_trans(Starts),accept=no},
-    {[F|NFA],Next}.
-
-build_nfa_list([{RE,Action}|REAs], NFA0, Starts, Next0) ->
-    {NFA1,Next1,Start} = build_nfa(RE, Next0, Action),
-    build_nfa_list(REAs, NFA1 ++ NFA0, [Start|Starts], Next1);
-build_nfa_list([], NFA, Starts, Next) ->
-    {NFA,reverse(Starts),Next}.
-
-epsilon_trans(Sts) -> [ {epsilon,S} || S <- Sts ].
-
-%% build_nfa(RegExp, NextState, Action) -> {NFA,NextFreeState,StartState}.
-%%  When building the NFA states for a ??? we don't build the end
-%%  state, just allocate a State for it and return this state
-%%  number. This allows us to avoid building unnecessary states for
-%%  concatenation which would then have to be removed by overwriting
-%%  an existing state.
-
-build_nfa(RE, Next, Action) ->
-    {NFA,N,E} = build_nfa(RE, Next+1, Next, []),
-    {[#nfa_state{no=E,accept={yes,Action}}|NFA],N,Next}.
-
-%% build_nfa(RegExp, NextState, StartState, NFA) -> {NFA,NextState,EndState}.
-%%  The NFA is a list of nfa_state is no predefined order. The state
-%%  number of the returned EndState is already allocated!
-
-build_nfa({'or',RE1,RE2}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(RE1, N0+1, N0, NFA0),
-    {NFA2,N2,E2} = build_nfa(RE2, N1+1, N1, NFA1),
-    E = N2,
-    {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,N1}]},
-      #nfa_state{no=E1,edges=[{epsilon,E}]},
-      #nfa_state{no=E2,edges=[{epsilon,E}]}|NFA2],
-     N2+1,E};
-build_nfa({literal,[]}, N, S, NFA) ->
-    {NFA,N,S};
-build_nfa({literal,[C|Cs]}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(C, N0, S, NFA0),
-    build_nfa({literal,Cs}, N1, E1, NFA1);
-build_nfa({concat,RE1,RE2}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(RE1, N0, S, NFA0),
-    {NFA2,N2,E2} = build_nfa(RE2, N1, E1, NFA1),
-    {NFA2,N2,E2};
-build_nfa({kclosure,RE}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
-    E = N1,
-    {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},
-      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
-     N1+1,E};
-build_nfa({pclosure,RE}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
-    E = N1,
-    {[#nfa_state{no=S,edges=[{epsilon,N0}]},
-      #nfa_state{no=E1,edges=[{epsilon,N0},{epsilon,E}]}|NFA1],
-     N1+1,E};
-build_nfa({optional,RE}, N0, S, NFA0) ->
-    {NFA1,N1,E1} = build_nfa(RE, N0+1, N0, NFA0),
-    E = N1,
-    {[#nfa_state{no=S,edges=[{epsilon,N0},{epsilon,E}]},
-      #nfa_state{no=E1,edges=[{epsilon,E}]}|NFA1],
-     N1+1,E};
-build_nfa({iclosure,RE,I1,I2}, N, S, NFA) ->
-    Exp = expand_iclosure(RE, I1, I2),
-    build_nfa(Exp, N, S, NFA);
-build_nfa({char_class,Cc}, N, S, NFA) ->
-    {[#nfa_state{no=S,edges=[{nfa_char_class(Cc),N}]}|NFA],N+1,N};
-build_nfa({comp_class,Cc}, N, S, NFA) ->
-    {[#nfa_state{no=S,edges=[{nfa_comp_class(Cc),N}]}|NFA],N+1,N};
-build_nfa(epsilon, N, S, NFA) ->
-    {NFA,N,S};
-build_nfa({group,RE}, N, S, NFA) ->		%%% FIXME %%%%%%%
-    build_nfa(RE, N, S, NFA);
-build_nfa({subexpr,_N,RE}, N, S, NFA) ->	%%% FIXME %%%%%%%
-    build_nfa(RE, N, S, NFA);
-build_nfa(bos, N, S, NFA) ->
-    {[#nfa_state{no=S,edges=[{[bos],N}]}|NFA],N+1,N};
-build_nfa(eos, N, S, NFA) ->
-    {[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};
-%%{[#nfa_state{no=S,edges=[{[eos],N}]}|NFA],N+1,N};
-build_nfa(C, N, S, NFA) when is_integer(C) ->
-    {[#nfa_state{no=S,edges=[{[{C,C}],N}]}|NFA],N+1,N}.
-
-nfa_char_class(Cc) ->
-    Crs = lists:foldl(fun({C1,C2}, Set) -> add_element({C1,C2}, Set);
-			 (C, Set) -> add_element({C,C}, Set) end, [], Cc),
-    %% ?dbg("cc: ~p\n", [Crs]),
-    pack_crs(Crs).
-
-pack_crs([{C1,C2}=Cr,{C3,C4}|Crs]) when C1 =< C3, C2 >= C4 ->
-    %% C1      C2
-    %%   C3  C4
-    pack_crs([Cr|Crs]);
-pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 >= C3, C2 < C4 ->
-    %% C1    C2
-    %%    C3   C4
-    pack_crs([{C1,C4}|Crs]);
-pack_crs([{C1,C2},{C3,C4}|Crs]) when C2 + 1 == C3 ->
-    %% C1   C2
-    %%        C3  C4
-    pack_crs([{C1,C4}|Crs]);
-pack_crs([Cr|Crs]) -> [Cr|pack_crs(Crs)];
-pack_crs([]) -> [].
-
-nfa_comp_class(Cc) ->
-    Crs = nfa_char_class(Cc),
-    %% ?dbg("comp: ~p\n", [Crs]),
-    comp_crs(Crs, 0).
-
-comp_crs([{C1,C2}|Crs], Last) ->
-    [{Last,C1-1}|comp_crs(Crs, C2+1)];
-comp_crs([], Last) -> [{Last,maxchar}].
-
-%% build_dfa(NFA, NfaStartState) -> {DFA,DfaStartState}.
-%%  Build a DFA from an NFA using "subset construction". The major
-%%  difference from the book is that we keep the marked and unmarked
-%%  DFA states in separate lists. New DFA states are added to the
-%%  unmarked list and states are marked by moving them to the marked
-%%  list. We assume that the NFA accepting state numbers are in
-%%  ascending order for the rules and use ordsets to keep this order.
-
-build_dfa(NFA0, Start) ->
-    %% We want NFA as sorted tuple for fast access, assume lowest state 1.
-    NFA1 = list_to_tuple(keysort(#nfa_state.no, NFA0)),
-    D = #dfa_state{no=0,nfa=eclosure([Start], NFA1),accept=no},
-    {build_dfa([D], 1, [], NFA1),0}.
-
-%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.
-%%  Traverse the unmarked states. Temporarily add the current unmarked
-%%  state to the marked list before calculating translation, this is
-%%  to avoid adding too many duplicate states. Add it properly to the
-%%  marked list afterwards with correct translations.
-
-build_dfa([U|Us0], N0, Ms, NFA) ->
-    {Ts,Us1,N1} = build_dfa(U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),
-    M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},
-    build_dfa(Us1, N1, [M|Ms], NFA);
-build_dfa([], _N, Ms, _NFA) -> Ms.
-
-%% build_dfa([NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->
-%%	{Transitions,UnmarkedStates,NextState}.
-%%  Foreach NFA state set calculate the legal translations. N.B. must
-%%  search *BOTH* the unmarked and marked lists to check if DFA state
-%%  already exists. As the range of characters is potentially VERY
-%%  large we cannot explicitly test all characters. Instead we first
-%%  calculate the set of all disjoint character ranges which are
-%%  possible candidates to the set of NFA states.
-
-build_dfa(Set, Us, N, Ts, Ms, NFA) ->
-    %% List of all transition sets.
-    Crs0 = [Cr || S <- Set,
-		  {Crs,_St} <- (element(S, NFA))#nfa_state.edges,
-		 is_list(Crs),
-		  Cr <- Crs ],
-    Crs1 = lists:usort(Crs0),			%Must remove duplicates!
-    %% Build list of disjoint test ranges.
-    Test = disjoint_crs(Crs1),
-    %% ?dbg("bd: ~p\n    ~p\n    ~p\n    ~p\n", [Set,Crs0,Crs1,Test]),
-    build_dfa(Test, Set, Us, N, Ts, Ms, NFA).
-
-%% disjoint_crs([CharRange]) -> [CharRange].
-%%  Take a sorted list of char ranges and make a sorted list of
-%%  disjoint char ranges. No new char range extends past an existing
-%%  char range.
-
-disjoint_crs([{_C1,C2}=Cr1,{C3,_C4}=Cr2|Crs]) when C2 < C3 ->
-    %% C1  C2
-    %%        C3  C4
-    [Cr1|disjoint_crs([Cr2|Crs])];
-disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 == C3 ->
-    %% C1     C2
-    %% C3       C4
-    [{C1,C2}|disjoint_crs(add_element({C2+1,C4}, Crs))];
-disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 >= C3, C2 < C4 ->
-    %% C1     C2
-    %%    C3     C4
-    [{C1,C3-1}|disjoint_crs(union([{C3,C2},{C2+1,C4}], Crs))];
-disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 == C4 ->
-    %% C1      C2
-    %%    C3   C4
-    [{C1,C3-1}|disjoint_crs(add_element({C3,C4}, Crs))];
-disjoint_crs([{C1,C2},{C3,C4}|Crs]) when C1 < C3, C2 > C4 ->
-    %% C1        C2
-    %%    C3   C4
-    [{C1,C3-1}|disjoint_crs(union([{C3,C4},{C4+1,C2}], Crs))];
-disjoint_crs([Cr|Crs]) -> [Cr|disjoint_crs(Crs)];
-disjoint_crs([]) -> [].
-
-build_dfa([Cr|Crs], Set, Us, N, Ts, Ms, NFA) ->
-    case eclosure(move(Set, Cr, NFA), NFA) of
-	S when S /= [] ->
-	    case keysearch(S, #dfa_state.nfa, Us) of
-		{value,#dfa_state{no=T}} ->
-		    build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);
-		false ->
-		    case keysearch(S, #dfa_state.nfa, Ms) of
-			{value,#dfa_state{no=T}} ->
-			    build_dfa(Crs, Set, Us, N, [{Cr,T}|Ts], Ms, NFA);
-			false ->
-			    U = #dfa_state{no=N,nfa=S},
-			    build_dfa(Crs, Set, [U|Us], N+1, [{Cr,N}|Ts], Ms, NFA)
-		    end
-	    end;
-	[] ->
-	    build_dfa(Crs, Set, Us, N, Ts, Ms, NFA)
-    end;
-build_dfa([], _Set, Us, N, Ts, _Ms, _NFA) ->
-    {Ts,Us,N}.
-   
-%% eclosure([State], NFA) -> [State].
-%% move([State], Char, NFA) -> [State].
-%%  These are straight out of the book. As eclosure uses ordsets then
-%%  the generated state sets are in ascending order.
-
-eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).
-
-eclosure([St|Sts], NFA, Ec) ->
-    #nfa_state{edges=Es} = element(St, NFA),
-    eclosure([ N || {epsilon,N} <- Es,
-		    not is_element(N, Ec) ] ++ Sts,
-	     NFA, add_element(St, Ec));
-eclosure([], _NFA, Ec) -> Ec.
-
-move(Sts, Cr, NFA) ->
-    [ St || N <- Sts,
-	    {Crs,St} <- (element(N, NFA))#nfa_state.edges,
-	   is_list(Crs),
-%% 	    begin
-%% 		?dbg("move1: ~p\n", [{Sts,Cr,Crs,in_crs(Cr,Crs)}]),
-%% 		true
-%% 	    end,
-	    in_crs(Cr, Crs) ].
-
-in_crs({C1,C2}, [{C3,C4}|_Crs]) when C1 >= C3, C2 =< C4 -> true;
-in_crs(Cr, [Cr|_Crs]) -> true;			%Catch bos and eos.
-in_crs(Cr, [_|Crs]) -> in_crs(Cr, Crs);
-in_crs(_Cr, []) -> false.
-
-%% accept([State], NFA) -> true | false.
-%%  Scan down the state list until we find an accepting state.
-
-accept([St|Sts], NFA) ->
-    case element(St, NFA) of
-	#nfa_state{accept={yes,A}} -> {yes,A};
-	#nfa_state{accept=no} -> accept(Sts, NFA)
-    end;
-accept([], _NFA) -> no.
-
-%% minimise_dfa(DFA, StartState, FirstState) -> {DFA,StartState}.
-%%  Minimise the DFA by removing equivalent states. We consider a
-%%  state if both the transitions and the their accept state is the
-%%  same.  First repeatedly run through the DFA state list removing
-%%  equivalent states and updating remaining transitions with
-%%  remaining equivalent state numbers. When no more reductions are
-%%  possible then pack the remaining state numbers to get consecutive
-%%  states.
-
-minimise_dfa(DFA0, Start, N) ->
-    case min_dfa(DFA0) of
-	{DFA1,[]} ->				%No reduction!
-	    {DFA2,Rs} = pack_dfa(DFA1, N),
-	    {min_update(DFA2, Rs),min_new_state(Start, Rs)};
-	{DFA1,Rs} ->
-	    minimise_dfa(min_update(DFA1, Rs), min_new_state(Start, Rs), N)
-    end.
-
-min_dfa(DFA) -> min_dfa(DFA, [], []).
-
-min_dfa([D|DFA0], Rs0, MDFA) ->
-    {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept, 
-			    D#dfa_state.no, Rs0, []),
-    min_dfa(DFA1, Rs1, [D|MDFA]);
-min_dfa([], Rs, MDFA) -> {MDFA,Rs}.
-
-min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->
-    min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);
-min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->
-    min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);
-min_delete([], _T, _A, _NewN, Rs, MDFA) -> {MDFA,Rs}.
-
-min_update(DFA, Rs) ->
-    [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].
-
-min_update_trans(Tr, Rs) ->
-    [ {C,min_new_state(S, Rs)} || {C,S} <- Tr ].
-
-min_new_state(Old, [{Old,New}|_Reds]) -> New;
-min_new_state(Old, [_R|Reds]) -> min_new_state(Old, Reds);
-min_new_state(Old, []) -> Old.
-
-pack_dfa(DFA, N) -> pack_dfa(DFA, N, [], []).
-
-pack_dfa([D|DFA], NewN, Rs, PDFA) ->
-    pack_dfa(DFA, NewN+1, [{D#dfa_state.no,NewN}|Rs],
-	     [D#dfa_state{no=NewN}|PDFA]);
-pack_dfa([], _NewN, Rs, PDFA) -> {PDFA,Rs}.
-
-%% comp_apply(String, StartPos, DFAReg) -> {match,RestPos,Rest} | nomatch.
-%% Apply the DFA of a regular expression to a string.  If
-%%  there is a match return the position of the remaining string and
-%%  the string if else return 'nomatch'.
-%%
-%%  StartPos should be the real start position as it is used to decide
-%%  if we are at the beginning of the string.
-
-comp_apply(Cs, P, {DFA,Start,_Fail}) ->
-    comp_apply(element(Start, DFA), Cs, P, DFA, nomatch).
-
-comp_apply(#c_state{spec=[]}=St, Cs, P, DFA, Accept) ->
-    comp_apply_tr(St, Cs, P, DFA, Accept);
-comp_apply(#c_state{spec=Sp}=St, Cs, P, DFA, Accept) ->
-    comp_apply_sp(St, Cs, P, DFA, Accept, Sp).
-
-comp_apply_tr(#c_state{trans=none,accept=A}, Cs, P, _DFA, Accept) ->
-    %% End state.
-    accept_value(A, Cs, P, Accept);
-comp_apply_tr(#c_state{trans=Tr,tmin=Tmin,smin=Smin,tmax=Tmax,smax=Smax,accept=A},
-	      [C|Cs]=Cs0, P, DFA, Accept) ->
-    %% Get the next state number to go to.
-    NextSt = if  C =< Tmin -> Smin;		%Below transition table
-		 C >= Tmax -> Smax;		%Above transition table
-		 true ->			%Otherwise use table
- 		     element(C - Tmin, Tr)
-	     end,
-    comp_apply(element(NextSt, DFA), Cs, P+1, DFA,
-	       accept_value(A, Cs0, P, Accept));
-comp_apply_tr(#c_state{trans=_Tr,accept=A}, [], P, _DFA, Accept) ->
-    accept_value(A, [], P, Accept).
-
-comp_apply_sp(_St, Cs, 1, DFA, Accept, [{bos,S}|_]) ->
-    comp_apply(element(S, DFA), Cs, 1, DFA, Accept);
-comp_apply_sp(_St, [$\n], P, DFA, Accept, [{eos,S}|_]) ->
-    comp_apply(element(S, DFA), [], P, DFA, Accept);
-comp_apply_sp(_St, [], P, DFA, Accept, [{eos,S}|_]) ->
-    comp_apply(element(S, DFA), [], P, DFA, Accept);
-comp_apply_sp(St, Cs, P, DFA, Accept, [_|Sp]) ->
-    comp_apply_sp(St, Cs, P, DFA, Accept, Sp);
-comp_apply_sp(St, Cs, P, DFA, Accept, []) ->
-    comp_apply_tr(St, Cs, P, DFA, Accept).
-    
-accept_value(true, Cs, P, _Accept) -> {match,P,Cs};
-accept_value(false, _Cs, _P, Accept) -> Accept.
-
-%% compile(RegExp) -> {ok,RE} | {error,E}.
-%%  Parse the regexp described in the string RegExp.
-
-compile(RegExp) ->
-    case make_dfa([{RegExp,yes}], 2) of
-	{ok,{DFA0,Start}} ->
-	    Fail = 1,
-	    DFA1 = [#dfa_state{no=Fail,accept=no,trans=[]}|DFA0],
-	    DFA = tuplelise_dfa(DFA1, 1),
-	    {ok,{comp_regexp,{DFA,Start,Fail}}};
-	{error,E} -> {error,E}
-    end.
-
-%% tuplelise_dfa(DFAstates, NoAcceptState) -> {{CompState},FirstState}.
-
-tuplelise_dfa(DFA0, NoAccept) ->
-    DFA1 = map(fun (#dfa_state{no=N,trans=Ts,accept=A}) ->
-		       {Tr,Tmin,Smin,Tmax,Smax,Sp} = build_trans(Ts, NoAccept),
-		       #c_state{no=N,trans=Tr,tmin=Tmin,smin=Smin,
-				tmax=Tmax,smax=Smax,
-				accept=fix_accept(A),spec=Sp}
-	       end, DFA0),
-    list_to_tuple(keysort(#dfa_state.no, DFA1)).
-
-build_trans(Ts0, NoAccept) ->
-    %% Split transitions into character ranges and specials.
-    {Ts1,Sp1} = foldl(fun ({{_,_},_}=T, {Ts,Sp}) -> {[T|Ts],Sp};
-			  ({_,_}=T, {Ts,Sp}) -> {Ts,[T|Sp]}
-		      end, {[],[]}, Ts0),
-    if Ts1 == [] ->
-	    {none,none,none,none,none,Sp1};
-       true ->
-	    %% Have transitions, convert to tuple.
-	    Ts2 = keysort(1, Ts1),
-	    {Tmin,Smin,Ts3} = min_trans(Ts2, NoAccept),
-	    %% ?dbg("exptr: ~p\n", [{Ts3,Tmin}]),
-	    {Trans,Tmax,Smax} = expand_trans(Ts3, Tmin, NoAccept),
-	    {list_to_tuple(Trans),Tmin,Smin,Tmax,Smax,Sp1}
-    end.
-   
-min_trans([{{0,C2},S}|Crs], _Def) -> {C2,S,Crs};
-min_trans([{{C1,_C2},_S}|_]=Crs, Def) -> {C1-1,Def,Crs}.
-
-expand_trans([{{C1,maxchar},S}], Last, Def) ->
-    Trs = duplicate(C1-(Last+1), Def),
-    {Trs,C1,S};
-expand_trans([{{C1,C2},S}], Last, Def) ->
-    Trs = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S),
-    {Trs,C2+1,Def};
-expand_trans([{{C1,C2},S}|Crs], Last, Def) ->
-    {Trs0,Tmax,Smax} = expand_trans(Crs, C2, Def),
-    Trs1 = duplicate(C1-(Last+1), Def) ++ duplicate(C2-C1+1, S) ++ Trs0,
-    {Trs1,Tmax,Smax}.
-
-fix_accept({yes,_}) -> true;
-fix_accept(no) -> false.
-
diff --git a/lib/xmerl/src/xmerl_xsd_re.erl b/lib/xmerl/src/xmerl_xsd_re.erl
new file mode 100644
index 0000000000..9e7810f2ae
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xsd_re.erl
@@ -0,0 +1,141 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2025. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(xmerl_xsd_re).
+
+-export([map/1]). %% api
+
+-export([scan/1]).  %% test
+
+%% map/1
+%%
+%% Map an XSD 1.0 regular expression to an equivalent PCRE regular
+%% expression as understood by re(3).
+
+-spec map(binary()) -> iodata().
+
+map(Bin) ->
+    case xmerl_xsd_re_parse:parse(scan(Bin)) of
+        {ok, RE} ->
+            %% io:format("map RE: ~p\n\n", [RE]),
+            RE;
+        {error, Reason} ->
+            %% io:format("map error reason: \n~p\n\n", [Reason]),
+            error({?MODULE, Reason})
+    end.
+
+%% scan/1
+%%
+%% Scanner for XSD 1.0 regular expressions as required by yecc. Just
+%% breaks the input into metacharacters, escapes (SingleCharEsc,
+%% MultiCharEsc, CatEsc/ComplEsc), digits, and other characters. Scan
+%% the entire input in one go since regular expressions aren't
+%% expected to be overly huge.
+
+-spec scan(binary()) -> [Tok]
+ when Tok :: {Sym, Pos}
+           | {Cat, Pos, Chr},
+      Sym :: eof
+           | '.' | '?' | '*' | '+' | '(' | ')' | '|' | '[' | ']'
+           | '{' | '}' | ',' | '-'
+           | '^' | '$' | ':',
+      Cat :: digit | multi | single | property | other,
+      Pos :: non_neg_integer(),
+      Chr :: pos_integer().
+
+scan(Bin) ->
+    scan(Bin, 0).
+
+%% scan/2
+
+scan(<<>>, N) ->
+    [{eof, N}];
+
+scan(<<$\\, C, B/binary>>, N)  %% SingleCharEsc
+  when C == $n;
+       C == $r;
+       C == $t;
+       C == $\\;
+       C == $|;
+       C == $.;
+       C == $?;
+       C == $*;
+       C == $+;
+       C == $(;
+       C == $);
+       C == ${;
+       C == $};
+       C == $-;
+       C == $[;
+       C == $];
+       C == $^ ->
+    [{single, N, C} | scan(B, N+2)];
+
+scan(<<$\\, C, B/binary>>, N)  %% MultiCharEsc
+  when C == $s;
+       C == $S;
+       C == $i;
+       C == $I;
+       C == $c;
+       C == $C;
+       C == $d;
+       C == $D;
+       C == $w;
+       C == $W ->
+    [{multi, N, C} | scan(B, N+2)];
+
+scan(<<$\\, C, B/binary>>, N)
+  when C == $p;
+       C == $P ->
+    [{property, N, C} | scan(B, N+2)];
+
+scan(<<C/utf8, B/binary>>, N)
+  when C /= $\\ ->
+    [chr(C, N) | scan(B, N+1)];
+
+scan(B, N) ->
+    error({?MODULE, N, B}).
+
+%% chr/2
+
+chr(C, N)
+  when C == $.;
+       C == $?;
+       C == $*;
+       C == $+;
+       C == $(;
+       C == $);
+       C == $|;
+       C == $[;
+       C == $];
+       C == ${;
+       C == $};
+       C == $,;
+       C == $-;
+       C == $^;
+       C == $$;
+       C == $: ->
+    {list_to_atom([C]), N};
+
+chr(C, N) when $0 =< C, C =< $9 ->
+    {digit, N, C};
+
+chr(C, N) ->
+    {other, N, C}.
diff --git a/lib/xmerl/src/xmerl_xsd_re_parse.yrl b/lib/xmerl/src/xmerl_xsd_re_parse.yrl
new file mode 100644
index 0000000000..dc6b35a9d6
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xsd_re_parse.yrl
@@ -0,0 +1,578 @@
+Header "%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2025. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the \"License\");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an \"AS IS\" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%".
+
+%%
+%% A grammar for the XSD 1.0 regular expression used by the YANG
+%% pattern statement:
+%%
+%%   https://www.w3.org/TR/2004/REC-xmlschema-2-20041028/#regexs
+%%
+%% Produces an equivalent re(3) regular expression as an iolist(),
+%% mapping constructs as required.
+%%
+%% The 1.0 grammar is ambiguous in several places, rules being stated
+%% in text to try an remove the ambiguities. There are still problems
+%% however, which are noted in comments below. In particular, { as a
+%% regular character (Char) means that arbitrary lookahead is required
+%% to decide of it should be interpreted as a qualification or not;
+%% for example, 0{12} vs 0{12. The 1.1 specification recognizes this
+%% and require both { and } to be escaped, which is the solution
+%% adopted here: it's not clear if this is just a blunder in the 1.0
+%% spec, but probably since {} are listed as metacharacters that
+%% require escape to be interpreted as a normal character and no
+%% mention is made of the issue.
+%%
+
+Nonterminals
+  %% from the 1.0 grammar:
+  regExp branch piece atom
+  quantifier quantity 'QuantExact'
+  charClass 'Char' 'WildcardEsc'
+  charClassExpr charGroup
+  'XmlChar' 'SingleCharEsc' 'MultiCharEsc' charClassEsc catEsc
+  charProp
+  %% adaptations for yecc and disambiguation
+  'quantifier?' 'quantity?' 'quantmax?'
+  'branch*' 'digit*' 'subtract?' group
+  'group*' char
+  block 'block*' blockchar.
+
+Terminals
+  '.' '?' '*' '+' '(' ')' '|' '[' ']'  %% not a Normal Character
+  '{' '}' ',' '-'  %% Quantifier
+  '^' '$' ':'
+  digit single multi property other.
+
+Rootsymbol regExp.
+
+Endsymbol eof.
+
+%% ===========================================================================
+%% This is the XSD grammar with some modifications to avoid lookahead
+%% and otherwise adapt to yecc. The original XSD productions are
+%% provided as comments, followed by their yecc implementation.
+
+%% [1]     regExp     ::=      branch ( '|' branch )*
+
+%% XSD regular expressions are implicitly anchored at both ends.
+regExp -> branch 'branch*' : ['$1' | '$2'].
+
+'branch*' -> '$empty'   : [].
+'branch*' -> '|' regExp : [$| | '$2'].
+
+%% [2]     branch     ::=      piece*
+
+branch -> '$empty' : [].
+branch -> piece branch : ['$1', '$2'].
+
+%% [3]     piece      ::=      atom quantifier?
+
+piece -> atom 'quantifier?' : ['$1', '$2'].
+%% This is ambiguous since { can both be the start of a qualifier and
+%% and atom in its own right (through Char). For example, 0{1} is both
+%% a sequence of four atoms and one atom with a qualifier. Also,
+%% 0{12345 is a sequence of 7 atoms, so even if { as a qualifier takes
+%% precedence, this can require an unlimited amount of lookahead to
+%% decide that { isn't the start of a qualifier.
+%%
+%% This can't be intentional, and the grammar has changed in XSD 1.1
+%% to remove the ambiguity by disallowing {} in Char (renamed
+%% NormalChar). This is adopted here as the only reasonable solution.
+
+'quantifier?' -> '$empty' : [].
+'quantifier?' -> quantifier : '$1'.
+
+%% [4]     quantifier     ::=      [?*+] | ( '{' quantity '}' )
+%% [5]     quantity       ::=      quantRange | quantMin | QuantExact
+%% [6]     quantRange     ::=      QuantExact ',' QuantExact
+%% [7]     quantMin       ::=      QuantExact ','
+%% [8]     QuantExact     ::=      [0-9]+
+
+quantifier -> '?' : "?".
+quantifier -> '*' : "*".
+quantifier -> '+' : "+".
+quantifier -> '{' quantity '}' : [${, '$2', $}].
+
+quantity -> 'QuantExact' 'quantity?' : ['$1' | '$2'].
+
+'quantity?' -> '$empty' : [].
+'quantity?' -> ',' 'quantmax?' : [$,, '$2'].
+
+'quantmax?' -> '$empty' : [].
+'quantmax?' -> 'QuantExact' : '$1'.
+
+'QuantExact' -> digit 'digit*' : [value('$1') | '$2'].
+
+'digit*' -> '$empty' : [].
+'digit*' -> digit 'digit*' : [value('$1') | '$2'].
+
+%% [9]     atom       ::=      Char | charClass | ( '(' regExp ')' )
+
+atom -> 'Char' : '$1'.
+atom -> charClass : '$1'.
+atom -> '(' regExp ')' : [$(, '$2', $)].
+
+%% [10]    Char       ::=      [^.\?*+()|#x5B#x5D]
+
+'Char' -> other : value('$1').
+'Char' -> digit : value('$1').
+
+'Char' -> ',' : ",".
+'Char' -> '-' : "-".
+
+%% ^ and $ are not metacharacters in XSD regular expressions.
+'Char' -> '^' : "\\^".
+'Char' -> '$' : "\\$".
+
+%% Allowing {} as the grammar specifies requires arbitrary lookahead
+%% to decide whether { is the start of a quantifier or an atom in its
+%% own right, assuming a quantifier takes precedence; for example,
+%% 0{12345 vs 0{12345}. The 1.1 grammar recognises this and disallows
+%% it, which is the only reasonable solution.
+%'Char' -> '{' : "{".
+%'Char' -> '}' : "}".
+
+'Char' -> ':' : ":".
+
+%% [11]    charClass ::= charClassEsc | charClassExpr | WildcardEsc
+
+charClass -> 'SingleCharEsc' : '$1'.
+charClass -> charClassEsc : '$1'.
+charClass -> charClassExpr : '$1'.
+charClass -> 'WildcardEsc' : '$1'.
+
+%% [12]    charClassExpr ::= '[' charGroup ']'
+%% [13]    charGroup     ::= posCharGroup | negCharGroup | charClassSub
+%% [14]    posCharGroup  ::= ( charRange | charClassEsc )+
+%% [15]    negCharGroup  ::= '^' posCharGroup
+%% [16]    charClassSub  ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
+
+charClassExpr -> '[' charGroup ']' : '$2'.
+
+%% Parse ^- as normal characters (char below) and verify the textual
+%% rules on their use at the end of the CharClassExpr since expressing
+%% it in grammar seems rife with shift/reduce conflict.
+charGroup -> group 'subtract?' : group('$1', '$2').
+
+%% Character Class Subtraction doesn't exist in PRCE. For example,
+%% [a-d-[c]] means one of abd in XSD, but one of abcd-[ followed by ]
+%% in PCRE. Map to a negative lookahead assertion: (?![c])[a-d]. These
+%% nest with the expected semantics.
+'subtract?' -> '$empty' : [].
+'subtract?' -> charClassExpr : ["(?!", '$1', $)].
+
+group -> char 'group*' : ['$1' | '$2'].
+
+char -> '-' : $-.
+char -> '^' : $^.
+char -> 'XmlChar' : '$1'.
+char -> 'SingleCharEsc' : '$1'.
+char -> charClassEsc : {class, '$1'}.
+
+'group*' -> '$empty' : [].
+'group*' -> char 'group*' : ['$1' | '$2'].
+
+%% [17]    charRange      ::= seRange | XmlCharIncDash
+%% [18]    seRange        ::= charOrEsc '-' charOrEsc
+%% [20]    charOrEsc      ::= XmlChar | SingleCharEsc
+%% [21]    XmlChar        ::= [^\#x2D#x5B#x5D]
+%% [22]    XmlCharIncDash ::= [^\#x5B#x5D]
+%%
+%% "A single XML character is a ·character range· that identifies the
+%%  set of characters containing only itself. All XML characters are
+%%  valid character ranges, except as follows:
+%%
+%%    The [, ], - and \ characters are not valid character ranges;
+%%    The ^ character is only valid at the beginning of a ·positive
+%%        character group· if it is part of a ·negative character group·
+%%    The - character is a valid character range only at the beginning
+%%        or end of a ·positive character group.
+%%
+%% Note: The grammar for ·character range· as given above is
+%%       ambiguous, but the second and third bullets above together
+%%       remove the ambiguity."
+
+%% The second rule above disallows including ^ in a character group:
+%% it can be excluded with [^^], but not included with [^]. (Which the
+%% 1.1 specification explicitly notes.)
+
+%% Anything except \[], or ^- to deal with the aforementioned ambiguity.
+'XmlChar' -> '.' : ".".
+'XmlChar' -> '?' : "?".
+'XmlChar' -> '*' : "*".
+'XmlChar' -> '+' : "+".
+'XmlChar' -> '(' : "(".
+'XmlChar' -> ')' : ")".
+'XmlChar' -> '|' : "|".
+'XmlChar' -> '{' : "{".
+'XmlChar' -> '}' : "}".
+'XmlChar' -> ',' : ",".
+'XmlChar' -> '$' : "$".     %% not a metacharacter in a PCRE range
+'XmlChar' -> ':' : "\\:". %% avoid [: :]
+'XmlChar' -> digit : value('$1').
+'XmlChar' -> other : value('$1').
+
+%% [23]    charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
+
+%% Move SingleCharEsc up to be able to differentiate between
+%% characters classes in a charGroup and as an atom.
+%charClassEsc -> 'SingleCharEsc' : '$1'.
+charClassEsc -> 'MultiCharEsc' : '$1'.
+charClassEsc -> catEsc : '$1'.
+
+%% [24]    SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
+
+'SingleCharEsc' -> single : escape('$1').
+
+%% [25]    catEsc     ::= '\p{' charProp '}'
+%% [26]    complEsc   ::= '\P{' charProp '}'
+
+catEsc -> property '{' charProp '}' : prop(value('$1'), '$3', prop('$3')).
+
+%% [27]    charProp    ::= IsCategory | IsBlock
+
+%% [28]    IsCategory  ::= Letters | Marks | Numbers | Punctuation | Separators | Symbols | Others
+%% [29]    Letters     ::= 'L' [ultmo]?
+%% [30]    Marks       ::= 'M' [nce]?
+%% [31]    Numbers     ::= 'N' [dlo]?
+%% [32]    Punctuation ::= 'P' [cdseifo]?
+%% [33]    Separators  ::= 'Z' [slp]?
+%% [34]    Symbols     ::= 'S' [mcko]?
+%% [35]    Others      ::= 'C' [cfon]?
+
+%% [36]    IsBlock     ::= 'Is' [a-zA-Z0-9#x2D]+
+
+charProp -> block : '$1'.
+
+block -> blockchar 'block*' : ['$1' | '$2'].
+
+'block*' -> '$empty' : [].
+'block*' -> blockchar 'block*' : ['$1' | '$2'].
+
+blockchar -> other : value('$1').
+blockchar -> digit : value('$1').
+blockchar -> '-' : $-.
+
+%% [37]    MultiCharEsc ::= '\' [sSiIcCdDwW]
+
+'MultiCharEsc' -> multi : escape('$1').
+
+%% [37a]       WildcardEsc ::= '.'
+
+'WildcardEsc' -> '.' : ".".
+
+Erlang code.
+%% value/1
+
+value({_,_,C}) ->
+    C;
+value({A,_}) ->
+    atom_to_list(A).
+
+%% escape/1
+
+escape({_,_,$i}) ->
+    ["[:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"];
+escape({_,_,$I}) ->
+    ["[^:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"];
+escape({_,_,$c}) ->
+    ["[-.0-9:A-Z_a-z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"];
+escape({_,_,$C}) ->
+    ["[^-.0-9:A-Z_a-z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"];
+escape({_,_,C}) ->
+    ["\\", C].
+
+%% group/2
+%%
+%% The mapping of character groups is slightly complex since neither
+%% Character Class Subtraction nor Block Escape exists in PCRE. The
+%% former is mapped to a negative lookup assertion, the latter to a
+%% range. The range is complemented if need be to deal with negation
+%% in either the escape (ie. \P) or the group.
+%%
+%% Examples:
+%%
+%%  \P{IsBasicLatin}            ->  [^\U0000-\U+007F]
+%%  [a-z\p{IsRunic}]            ->  [a-z\U+16A0-\U+16FF]
+%%  [^a-z\p{IsRunic}]           ->  [^a-z\U+0000-\U+169F\U+1700-\U+FFFF]
+%%  [0\P{IsBasicLatin}]         ->  [0\U+0080-\U+FFFF]
+%%  [a-z\P{IsBasicLatin}-[ei]]  ->  (?!([e]|[i]))[a-z\U+0080-\U+FFFF]
+%%
+%% (Replace \U+XXXX with actual characters.)
+
+group(Grp, Sub) ->
+    [Neg, Pre | Rest] = group(Grp),
+    [Sub, $[, Neg, Pre, alt(Rest, Sub /= [], Neg == $^), $]].
+
+%% group/1
+%%
+%% Extract leading ^ and/or -.
+
+group([$^, C | L])
+  when C == $^;
+       C == $- ->
+    [$^, C | L];
+
+group([$^ | L]) ->
+    [$^, [] | L];
+
+group([$- | L]) ->
+    [[], $- | L];
+
+group([_|_] = L) ->
+    [[], [] | L].
+
+%% alt/3
+%%
+%% Return a character group in which Block Escapes are inlined. The
+%% error reporting isn't great, since it doesn't point at a location
+%% in the expression, which would require passing more information
+%% through the parse.
+
+%%alt([$^ | _], _, _) ->
+%%    fail({invalid_range, $^});
+
+alt([$-, $-], false, _) ->
+    fail({invalid_range, "-"});
+
+alt([T, $-], Sub, Neg) ->
+    [case T of {class, C} -> class(Neg, C); _ -> T end, [$- || not Sub]];
+
+alt([$-], Sub, _) ->
+    [$- || not Sub];
+
+alt([_], true, _) ->
+    fail({invalid_range, "["});
+
+alt([$- | _], _, _) ->
+    fail({invalid_range, "-"});
+
+alt([{class, C} | Rest], Sub, Neg) ->
+    [class(Neg, C) | alt(Rest, Sub, Neg)];
+
+alt([C, $-, D | Rest], Sub, Neg) ->
+    [C, $-, D | alt(Rest, Sub, Neg)];
+
+alt([C | Rest], Sub, Neg) ->
+    [C | alt(Rest, Sub, Neg)];
+
+alt([] = L, false, _) ->
+    L;
+
+alt([], true, _) ->
+    fail({invalid_range, $[}).
+
+%% class/2
+%%
+%% Complement a range of a Block Escape if need be.
+
+class(Neg, [$[, P, A, $-, B, $]])
+  when P == "^", Neg;
+       P == [], not Neg ->
+    [A, $-, B];
+
+class(_, [$[, _, <<A/utf8>>, $-, <<B/utf8>>, $]]) ->
+    [[[<<L/utf8>>, $-, <<(A-1)/utf8>>] || L <- [0], L < A],
+     [[<<(B+1)/utf8>>, $-, <<H/utf8>>] || H <- [16#FFFF], B < H]];
+
+class(_, Cs) ->
+    Cs.
+
+%% property/3
+
+prop(C, B, ok) ->
+    ["\\", C, ${, B, $}];
+
+prop(C, _, {Start, End}) ->
+    [$[, [$^ || C == $P], <<Start/utf8>>, $-, <<End/utf8>>, $]];
+
+prop(_, Block, false) ->
+    fail({unknown_block, Block}).
+
+%% prop/1
+%%
+%% All of the 1-2 letter properties are supported by PCRE, but the
+%% latter also supports Cs, so guard that only XSD properties are
+%% parsed.
+
+prop([P])
+  when P == $L;
+       P == $M;
+       P == $N;
+       P == $P;
+       P == $Z;
+       P == $S;
+       P == $C ->
+    ok;
+
+prop([$L, C])
+  when C == $u;
+       C == $l;
+       C == $t;
+       C == $m;
+       C == $o ->
+    ok;
+
+prop([$M, C])
+  when C == $n;
+       C == $c;
+       C == $e ->
+    ok;
+
+prop([$N, C])
+  when C == $d;
+       C == $l;
+       C == $o ->
+    ok;
+
+prop([$P, C])
+  when C == $c;
+       C == $d;
+       C == $s;
+       C == $e;
+       C == $i;
+       C == $f;
+       C == $o ->
+    ok;
+
+prop([$Z, C])
+  when C == $s;
+       C == $l;
+       C == $p ->
+    ok;
+
+prop([$S, C])
+  when C == $m;
+       C == $c;
+       C == $k;
+       C == $o ->
+    ok;
+
+prop([$C, C])
+  when C == $c;
+       C == $f;
+       C == $o;
+       C == $n ->
+    ok;
+
+prop([$I, $s | Rest]) ->
+    block(Rest);
+
+prop(B) ->
+    fail({unknown_property, B}).
+
+%% block/1
+%%
+%% Some of these are supported by PCRE, but many aren't. Map to a
+%% character range in each case.
+
+block("BasicLatin") -> {16#0000, 16#007F};
+block("Latin-1Supplement") -> {16#0080, 16#00FF};
+block("LatinExtended-A") -> {16#0100, 16#017F};
+block("LatinExtended-B") -> {16#0180, 16#024F};
+block("IPAExtensions") -> {16#0250, 16#02AF};
+block("SpacingModifierLetters") -> {16#02B0, 16#02FF};
+block("CombiningDiacriticalMarks") -> {16#0300, 16#036F};
+block("Greek") -> {16#0370, 16#03FF};
+block("Cyrillic") -> {16#0400, 16#04FF};
+block("Armenian") -> {16#0530, 16#058F};
+block("Hebrew") -> {16#0590, 16#05FF};
+block("Arabic") -> {16#0600, 16#06FF};
+block("Syriac") -> {16#0700, 16#074F};
+block("Thaana") -> {16#0780, 16#07BF};
+block("Devanagari") -> {16#0900, 16#097F};
+block("Bengali") -> {16#0980, 16#09FF};
+block("Gurmukhi") -> {16#0A00, 16#0A7F};
+block("Gujarati") -> {16#0A80, 16#0AFF};
+block("Oriya") -> {16#0B00, 16#0B7F};
+block("Tamil") -> {16#0B80, 16#0BFF};
+block("Telugu") -> {16#0C00, 16#0C7F};
+block("Kannada") -> {16#0C80, 16#0CFF};
+block("Malayalam") -> {16#0D00, 16#0D7F};
+block("Sinhala") -> {16#0D80, 16#0DFF};
+block("Thai") -> {16#0E00, 16#0E7F};
+block("Lao") -> {16#0E80, 16#0EFF};
+block("Tibetan") -> {16#0F00, 16#0FFF};
+block("Myanmar") -> {16#1000, 16#109F};
+block("Georgian") -> {16#10A0, 16#10FF};
+block("HangulJamo") -> {16#1100, 16#11FF};
+block("Ethiopic") -> {16#1200, 16#137F};
+block("Cherokee") -> {16#13A0, 16#13FF};
+block("UnifiedCanadianAboriginalSyllabics") -> {16#1400, 16#167F};
+block("Ogham") -> {16#1680, 16#169F};
+block("Runic") -> {16#16A0, 16#16FF};
+block("Khmer") -> {16#1780, 16#17FF};
+block("Mongolian") -> {16#1800, 16#18AF};
+block("LatinExtendedAdditional") -> {16#1E00, 16#1EFF};
+block("GreekExtended") -> {16#1F00, 16#1FFF};
+block("GeneralPunctuation") -> {16#2000, 16#206F};
+block("SuperscriptsandSubscripts") -> {16#2070, 16#209F};
+block("CurrencySymbols") -> {16#20A0, 16#20CF};
+block("CombiningMarksforSymbols") -> {16#20D0, 16#20FF};
+block("LetterlikeSymbols") -> {16#2100, 16#214F};
+block("NumberForms") -> {16#2150, 16#218F};
+block("Arrows") -> {16#2190, 16#21FF};
+block("MathematicalOperators") -> {16#2200, 16#22FF};
+block("MiscellaneousTechnical") -> {16#2300, 16#23FF};
+block("ControlPictures") -> {16#2400, 16#243F};
+block("OpticalCharacterRecognition") -> {16#2440, 16#245F};
+block("EnclosedAlphanumerics") -> {16#2460, 16#24FF};
+block("BoxDrawing") -> {16#2500, 16#257F};
+block("BlockElements") -> {16#2580, 16#259F};
+block("GeometricShapes") -> {16#25A0, 16#25FF};
+block("MiscellaneousSymbols") -> {16#2600, 16#26FF};
+block("Dingbats") -> {16#2700, 16#27BF};
+block("BraillePatterns") -> {16#2800, 16#28FF};
+block("CJKRadicalsSupplement") -> {16#2E80, 16#2EFF};
+block("KangxiRadicals") -> {16#2F00, 16#2FDF};
+block("IdeographicDescriptionCharacters") -> {16#2FF0, 16#2FFF};
+block("CJKSymbolsandPunctuation") -> {16#3000, 16#303F};
+block("Hiragana") -> {16#3040, 16#309F};
+block("Katakana") -> {16#30A0, 16#30FF};
+block("Bopomofo") -> {16#3100, 16#312F};
+block("HangulCompatibilityJamo") -> {16#3130, 16#318F};
+block("Kanbun") -> {16#3190, 16#319F};
+block("BopomofoExtended") -> {16#31A0, 16#31BF};
+block("EnclosedCJKLettersandMonths") -> {16#3200, 16#32FF};
+block("CJKCompatibility") -> {16#3300, 16#33FF};
+block("CJKUnifiedIdeographsExtensionA") -> {16#3400, 16#4DB5};
+block("CJKUnifiedIdeographs") -> {16#4E00, 16#9FFF};
+block("YiSyllables") -> {16#A000, 16#A48F};
+block("YiRadicals") -> {16#A490, 16#A4CF};
+block("HangulSyllables") -> {16#AC00, 16#D7A3};
+block("PrivateUse") -> {16#E000, 16#F8FF};
+block("CJKCompatibilityIdeographs") -> {16#F900, 16#FAFF};
+block("AlphabeticPresentationForms") -> {16#FB00, 16#FB4F};
+block("ArabicPresentationForms-A") -> {16#FB50, 16#FDFF};
+block("CombiningHalfMarks") -> {16#FE20, 16#FE2F};
+block("CJKCompatibilityForms") -> {16#FE30, 16#FE4F};
+block("SmallFormVariants") -> {16#FE50, 16#FE6F};
+block("ArabicPresentationForms-B") -> {16#FE70, 16#FEFE};
+%block("Specials") -> {16#FEFF, 16#FEFF};
+block("HalfwidthandFullwidthForms") -> {16#FF00, 16#FFEF};
+%block("Specials") -> {16#FFF0, 16#FFFD};
+block("Specials") -> {16#FEFF, 16#FFFD};
+
+block(_) -> false.
+
+%% fail/1
+
+fail(T) ->
+    error({?MODULE, T}).
diff --git a/lib/xmerl/src/xmerl_xsd_type.erl b/lib/xmerl/src/xmerl_xsd_type.erl
index 612a4ae340..32f0725558 100644
--- a/lib/xmerl/src/xmerl_xsd_type.erl
+++ b/lib/xmerl/src/xmerl_xsd_type.erl
@@ -14,7 +14,7 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 
@@ -93,7 +93,7 @@ check_simpleType(double,Value,_S) ->
 % extended format PnYnMnDTnHnMnS where n is an integer. The n value
 % before S may include decimal fraction.
 check_simpleType(duration,Value,_S) ->
-    ?catch_exit(check_duration(Value),Value,invalid_duration);	
+    ?catch_exit(check_duration(Value),Value,invalid_duration);
 check_simpleType(dateTime,Value,_S) ->
     ?catch_exit(check_dateTime(Value),Value,invalid_dateTime);
 check_simpleType(time,Value,_S) ->
@@ -269,7 +269,7 @@ check_duration("P"++Value) ->
     {Date,Time}=lists:splitwith(fun($T) -> false;(_) -> true end,Value),
     {ok,_} = check_duration_date(Date,["Y","M","D"]),
     {ok,_} = check_duration_time(Time,["T","H","M","S"]).
-	    
+
 check_duration_date("",_) ->
     {ok,""};
 check_duration_date(Date,[H|T]) ->
@@ -284,7 +284,7 @@ check_duration_date(Date,[H|T]) ->
     end.
 %% Time any combination of TnHnMfS
 %% n unsigned integers and f unsigned decimal
-%%check_duration_time(Time,["T","H","M","S"]) 
+%%check_duration_time(Time,["T","H","M","S"])
 check_duration_time("",[_H|_T]) ->
     {ok,""};
 check_duration_time(Time,[S]) ->
@@ -318,7 +318,7 @@ check_positive_integer(Value) ->
 %% check_integer and thereof derived types
 check_integer(Value) ->
     {ok,list_to_integer(Value)}.
-    
+
 check_nonPositiveInteger(Value) ->
     check_constr_int(Value,undefined,0,illegal_nonPositiveInteger).
 
@@ -371,7 +371,7 @@ check_constr_int(Value,Min,Max,ErrMsg) ->
 	    {error,{ErrMsg}}
     end.
 
-%% DateTime on form: '-'? yyyy '-' mm '-' dd 'T' hh ':' mm ':' ss 
+%% DateTime on form: '-'? yyyy '-' mm '-' dd 'T' hh ':' mm ':' ss
 %% ('.' s+)? (zzzzzz)?
 check_dateTime("-"++DateTime) ->
     check_dateTime(DateTime);
@@ -400,14 +400,14 @@ check_month(Str) ->
     case check_positive_integer(Str) of
 	{ok,Int} when Int >= 1,Int =< 12 ->
 	    {ok,Int};
-	_ -> 
+	_ ->
 	    {error,{invalid_month,Str}}
     end.
 check_day(Str) ->
     case check_positive_integer(Str) of
 	{ok,Int} when Int >= 1,Int =< 31 ->
 	    {ok,Int};
-	_ -> 
+	_ ->
 	    {error,{invalid_day,Str}}
     end.
 
@@ -498,7 +498,7 @@ check_date(Date) ->
     {ok,_}=check_year(Year),
     {ok,_}=check_month(Month),
     {ok,_}=check_day(Day).
-    
+
 %% gYearMonth on the form: '-'? ccyy '-' mm zzzzzz?
 check_gYearMonth("-"++Value) ->
     check_gYearMonth(Value);
@@ -531,7 +531,7 @@ check_gYear(Value) ->
 		Y
 	end,
     {ok,_} = check_year(Year).
-    
+
 %% gMonthDay on the form: mm dd zzzzzz?
 check_gMonthDay("--"++Value) ->
     {M,"-"++DTZ} = lists:split(2,Value),
@@ -652,7 +652,7 @@ check_IDREF(Value) ->
 
 check_IDREFS(Value) ->
     check_list_type(Value,fun check_IDREF/1).
-    
+
 check_ENTITY(Value) ->
     true = xmerl_lib:is_ncname(Value),
     {ok,Value}.
@@ -665,11 +665,11 @@ check_list_type(Value,BaseTypeFun) ->
     lists:foreach(BaseTypeFun,Tokens),
     {ok,Value}.
 
-ns_whitespace(WS) when WS==16#9;WS==16#A;WS==16#D -> 
+ns_whitespace(WS) when WS==16#9;WS==16#A;WS==16#D ->
     true;
 ns_whitespace(_) ->
     false.
-    
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%  facet functions
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -708,7 +708,7 @@ facet_fun(Type,F) ->
     end.
 
 
-length_fun(T,V) 
+length_fun(T,V)
   when T==string;T==normalizedString;T==token;
        T=='Name';T=='NCName';T==language;T=='ID';
        T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
@@ -735,7 +735,7 @@ length_fun(T,_V) ->
 	    {error,{length_not_applicable_on,T}}
     end.
 
-minLength_fun(T,V) 
+minLength_fun(T,V)
   when T==string;T==normalizedString;T==token;
        T=='Name';T=='NCName';T==language;T=='ID';
        T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
@@ -762,7 +762,7 @@ minLength_fun(T,_V) ->
 	    {error,{minLength_not_applicable_on,T}}
     end.
 
-maxLength_fun(T,V) 
+maxLength_fun(T,V)
   when T==string;T==normalizedString;T==token;
        T=='Name';T=='NCName';T==language;T=='ID';
        T=='IDREF';T=='IDREFS';T=='ENTITY';T=='ENTITIES';
@@ -789,21 +789,30 @@ maxLength_fun(T,_V) ->
 	    {error,{maxLength_not_applicable_on,T}}
     end.
 
-pattern_fun(_Type,RegExp) ->
-    case xmerl_regexp:setup(RegExp) of
-	{ok,RE} ->
-	    fun(Val) ->
-		    case xmerl_regexp:first_match(Val,RE) of
-			{match,_,_} -> {ok,Val};
-			_ -> {error,{pattern_mismatch,Val,RegExp}}
-		    end
-	    end;
-	_ ->
-	    fun(Val) ->
-		    {error,{unsupported_pattern,Val,RegExp}}
-	    end
+pattern_fun(_Type, RegExp) ->
+    try
+        ReRegExp = xmerl_xsd_re:map(conv_list_to_binary(RegExp)),
+        BinReRegExp = unicode:characters_to_binary(lists:flatten(ReRegExp)),
+        {ok, CompiledRegExp} =
+            re:compile([$^, $(, BinReRegExp, $), $$], [no_auto_capture, unicode]),
+        fun(Val) ->
+                case re:run(conv_list_to_binary(Val), CompiledRegExp) of
+                    {match, _} -> {ok, Val};
+                    _ -> {error, {pattern_mismatch, Val, RegExp}}
+                end
+        end
+    catch
+        _ ->
+            fun(Val) ->
+		    {error,{unsupported_pattern, Val, RegExp}}
+ 	    end
     end.
 
+conv_list_to_binary(V) when is_binary(V) ->
+    V;
+conv_list_to_binary(V) when is_list(V) ->
+    unicode:characters_to_binary(V).
+
 enumeration_fun(_Type,V) ->
     fun(Val) ->
 	    case lists:member(Val,V) of
@@ -845,8 +854,8 @@ collapse_ws([H|T],Acc) ->
     collapse_ws(T,[H|Acc]);
 collapse_ws([],Acc) ->
     lists:reverse(lists:dropwhile(fun($ ) ->true;(_) -> false end,Acc)).
-    
-maxInclusive_fun(T,V) 
+
+maxInclusive_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;
        T==nonNegativeInteger;T==nonPositiveInteger;T==long;
        T==unsignedLong;T==int;T==unsignedInt;T==short;
@@ -895,7 +904,7 @@ maxInclusive_fun(T,_V) ->
 %%        T==gMonth;T==gMonthDay;T==gDay ->
     fun(_) -> {error,{maxInclusive,not_implemented_for,T}} end.
 
-maxExclusive_fun(T,V) 
+maxExclusive_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;
        T==nonNegativeInteger;T==nonPositiveInteger;T==long;
        T==unsignedLong;T==int;T==unsignedInt;T==short;
@@ -904,7 +913,7 @@ maxExclusive_fun(T,V)
 	    try list_to_integer(Val) < list_to_integer(V) of
 		true ->
 		    {ok,Val};
-		false -> 
+		false ->
 		    {error,{maxExclusive,Val,not_less_than,V}}
             catch
                 _:_ ->
@@ -942,7 +951,7 @@ maxExclusive_fun(T,V) when T==dateTime ->
 maxExclusive_fun(T,_V) ->
     fun(_) -> {error,{maxExclusive,not_implemented_for,T}} end.
 
-minExclusive_fun(T,V) 
+minExclusive_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;
        T==nonNegativeInteger;T==nonPositiveInteger;T==long;
        T==unsignedLong;T==int;T==unsignedInt;T==short;
@@ -951,7 +960,7 @@ minExclusive_fun(T,V)
 	    try list_to_integer(Val) > list_to_integer(V) of
 		true ->
 		    {ok,Val};
-		false -> 
+		false ->
                     {error,{minExclusive,Val,not_greater_than,V}}
             catch
                 _:_ ->
@@ -989,7 +998,7 @@ minExclusive_fun(T,V) when T==dateTime ->
 minExclusive_fun(T,_V) ->
     fun(_) -> {error,{minExclusive,not_implemented_for,T}} end.
 
-minInclusive_fun(T,V) 
+minInclusive_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;
        T==nonNegativeInteger;T==nonPositiveInteger;T==long;
        T==unsignedLong;T==int;T==unsignedInt;T==short;
@@ -998,7 +1007,7 @@ minInclusive_fun(T,V)
 	    try list_to_integer(Val) >= list_to_integer(V) of
 		true ->
 		    {ok,Val};
-		false -> 
+		false ->
 		    {error,{minInclusive,Val,not_greater_than_or_equal_with,V}}
             catch
                 _:_ ->
@@ -1035,7 +1044,7 @@ minInclusive_fun(T,V) when T==dateTime ->
     end;
 minInclusive_fun(T,_V) ->
     fun(_) -> {error,{minInclusive,not_implemented_for,T}} end.
-    
+
 totalDigits_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;T==nonNegativeInteger;
        T==nonPositiveInteger;T==long;T==unsignedLong;T==int;T==unsignedInt;
@@ -1051,7 +1060,7 @@ totalDigits_fun(T,V)
 		case lists:member($.,Val2) of
 		    true ->
 			length(lists:dropwhile(Pred,lists:reverse(Val2))) -1;
-		    _ -> 
+		    _ ->
 			length(Val2)
 		end,
 	    if
@@ -1063,12 +1072,12 @@ totalDigits_fun(T,V)
     end;
 totalDigits_fun(T,_V) ->
     fun(_) -> {error,{totalDigits,not_applicable,T}} end.
-		     
+
 fractionDigits_fun(T,V)
   when T==integer;T==positiveInteger;T==negativeInteger;T==nonNegativeInteger;
        T==nonPositiveInteger;T==long;T==unsignedLong;T==int;T==unsignedInt;
        T==short;T==unsignedShort;T==byte;T==unsignedByte;T==decimal ->
-    fun(Val) ->	
+    fun(Val) ->
 	    Len =
 		case string:tokens(Val,".") of
 		    [_I,Frc] when T==decimal ->
@@ -1079,7 +1088,7 @@ fractionDigits_fun(T,V)
 		    _ ->
 			0
 		end,
-	    if 
+	    if
 		Len =< V ->
 		    {ok,Val};
 		true ->
@@ -1088,7 +1097,7 @@ fractionDigits_fun(T,V)
     end;
 fractionDigits_fun(T,_V) ->
     fun(_) -> {error,{fractionDigits,not_applicable,T}} end.
-    
+
 
 %% The relation between F1 and F2 may be eq,lt or gt.
 %% lt: F1 < F2
@@ -1130,13 +1139,13 @@ compare_floats2({S1,B1,D1,E1},{_S2,B2,D2,E2}) ->
 	I1 < I2 -> sign(S1,lt);
 	true ->
 	    %% fractions are compared in lexicographic order
-	    if 
+	    if
 		D1 == D2 -> eq;
 		D1 < D2 -> sign(S1,lt);
 		D1 > D2 -> sign(S1,gt)
 	    end
     end.
-    
+
 str_to_float(String) ->
     {Sign,Str} =
 	case String of
@@ -1172,7 +1181,7 @@ pow(Mantissa,Exponent) ->
     end.
 
 pow(Mantissa,Fraction,Exponent) ->
-    (Mantissa * math:pow(10,Exponent)) + 
+    (Mantissa * math:pow(10,Exponent)) +
 	(list_to_integer(Fraction) * math:pow(10,Exponent-length(Fraction))).
 
 sign('-',gt) ->
@@ -1195,7 +1204,7 @@ remove_trailing_zeros(Str) ->
 %%        T==gMonth;T==gMonthDay;T==gDay ->
 
 %% compare_duration(V1,V2) compares V1 to V2
-%% returns gt | lt | eq | indefinite 
+%% returns gt | lt | eq | indefinite
 %% ex: V1 > V2 -> gt
 %%
 %% V1, V2 on format PnYnMnDTnHnMnS
@@ -1283,7 +1292,7 @@ compare_dateTime(P,Q) when is_list(Q) ->
     compare_dateTime(P,normalize_dateTime(dateTime_atoms(Q)));
 compare_dateTime(_P,_Q) ->
     indefinite.
-    
+
 fQuotient(A,B) when is_float(A) ->
     fQuotient(erlang:floor(A),B);
 fQuotient(A,B) when is_float(B) ->
@@ -1308,7 +1317,7 @@ modulo(A,B) ->
 
 modulo(A, Low, High) ->
     modulo(A - Low, High - Low) + Low.
-    
+
 maximumDayInMonthFor(YearValue, MonthValue) ->
     M = modulo(MonthValue, 1, 13),
     Y = YearValue + fQuotient(MonthValue, 1, 13),
@@ -1330,7 +1339,7 @@ monthValue(_M,Y) ->
 		    28
 	    end
     end.
-		
+
 %% S dateTime, D duration
 %% result is E dateTime, end of time period with start S and duration
 %% D. E = S + D.
@@ -1357,25 +1366,25 @@ add_duration2dateTime2({Syear,Smonth,Sday,Shour,Sminute,Ssec,Szone},
     Temp1 = Smonth + Dmonths,
     Emonth = modulo(Temp1,1,13),
     Carry1 = fQuotient(Temp1,1,13),
-    
+
     %% years
     Eyear = Syear + Dyears + Carry1,
-    
+
     %% seconds
     Temp2 = Ssec + Dsecs,
     Esecs = modulo(Temp2,60),
     Carry2 = fQuotient(Temp2,60),
-    
+
     %% minutes
     Temp3 = Sminute + Dminutes + Carry2,
     Eminute = modulo(Temp3,60),
     Carry3 = fQuotient(Temp3,60),
-    
+
     %% hours
     Temp4 = Shour + Dhours + Carry3,
     Ehour = modulo(Temp4,24),
     Carry4 = fQuotient(Temp4,24),
-    
+
     %% days
     TempDays =
 	case maximumDayInMonthFor(Eyear,Emonth) of
@@ -1451,7 +1460,7 @@ zone_atoms(Sign,Zone) when is_list(Zone) ->
 zone_atoms(_Sign,Zone) ->
     Zone.
 
-    
+
 %% Format: '-'? PnYnMnDTnHnMnS
 duration_atoms("-P"++Dur) ->
     duration_atoms2(Dur,neg);
@@ -1539,8 +1548,8 @@ get_sec([$S|T],Acc,_) ->
     {lists:reverse(Acc),T};
 get_sec(_,_,Str) ->
     {"0",Str}.
-    
-	    
+
+
 set_sign(pos,Istr) ->
     list_to_integer(Istr);
 set_sign(_,Istr) ->
@@ -1572,7 +1581,7 @@ normalize_dateTime({Y,M,D,Hour,Min,Sec,{Sign,ZH,ZM}}) ->
     TmpHour = Hour + set_sign(invert_sign(Sign),integer_to_list(ZH)) + Carry1,
     NHour = modulo(TmpHour,24),
     Carry2 = fQuotient(TmpHour,24),
-    
+
     {NY,NM,ND} =
 	carry_loop(D+Carry2,M,Y),
     {NY,NM,ND,NHour,NMin,Sec,{pos,0,0}};
diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl
index 11ec26192f..4f3e99fbe8 100644
--- a/lib/xmerl/test/xmerl_SUITE.erl
+++ b/lib/xmerl/test/xmerl_SUITE.erl
@@ -653,7 +653,6 @@ allow_entities_test(Config) ->
         (catch xmerl_scan:file(File, [{allow_entities, false}])),
     ok.
 
-
 %%======================================================================
 %% Support Functions
 %%======================================================================
diff --git a/lib/xmerl/test/xmerl_xsd_SUITE.erl b/lib/xmerl/test/xmerl_xsd_SUITE.erl
index 3060f27e6c..a568cd102c 100644
--- a/lib/xmerl/test/xmerl_xsd_SUITE.erl
+++ b/lib/xmerl/test/xmerl_xsd_SUITE.erl
@@ -66,7 +66,8 @@ groups() ->
        sis2, state2file_file2state, union]},
      {ticket_tests, [],
       [ticket_6910, ticket_7165, ticket_7190, ticket_7288,
-       ticket_7736, ticket_8599, ticket_9410, ticket_19792]},
+       ticket_7736, ticket_8599, ticket_9410, ticket_19762,
+       ticket_19792]},
      {facets, [],
       [length, minLength, maxLength, pattern, enumeration,
        whiteSpace, maxInclusive, maxExclusive, minExclusive,
@@ -965,10 +965,14 @@ ticket_8599(Config) ->
     
     {{xmlElement,persons,persons,_,_,_,_,_,_,_,_,_},_GlobalState} = xmerl_xsd:validate(E, S).
 
-
 ticket_9410(Config) ->
-    file:set_cwd(datadir_join(Config,[".."])),
-    {ok, _S} = xmerl_xsd:process_schema("xmerl_xsd_SUITE_data/small.xsd").
+    {ok, _S} = xmerl_xsd:process_schema(datadir_join(Config,["small.xsd"])).
+
+ticket_19762(Config) ->
+    {E, _} = xmerl_scan:file(datadir_join(Config,["ticket_19762.xml"]),[]),
+    {ok, S} = xmerl_xsd:process_schema(datadir_join(Config,["ticket_19762.xsd"])),
+    {E, _} = xmerl_xsd:validate(E, S),
+    ok.
 
 
 ticket_19792(Config) ->
diff --git a/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xml b/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xml
new file mode 100644
index 0000000000..792ebde504
--- /dev/null
+++ b/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<body>
+  <test testattr="Testing"/>
+</body>
diff --git a/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xsd b/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xsd
new file mode 100644
index 0000000000..3af9f59a8c
--- /dev/null
+++ b/lib/xmerl/test/xmerl_xsd_SUITE_data/ticket_19762.xsd
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="file:///C:/xsd/unpack-1/LKF-XSD/LicFormat20_Schema_Rev_E_MOD.xsd">
+  <xs:attribute name="testattr">
+    <xs:simpleType>
+      <xs:restriction base="xs:string">
+	<xs:minLength value="5"/>
+	<xs:maxLength value="20"/>
+	<xs:pattern value="([a-zA-Z0-9\-\s/_])*"/>
+      </xs:restriction>
+    </xs:simpleType>
+  </xs:attribute>
+
+  <xs:element name="test">
+    <xs:complexType>
+      <xs:attribute ref="testattr" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="body">
+    <xs:complexType>
+      <xs:sequence>
+	<xs:element ref="test"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+</xs:schema>
-- 
2.51.0
Places

File 0694-Fix-xml-regexp-bug-in-XSD-validation.patch of Package erlang

Places