File 3371-add-xmerl_xml_indent-module.patch of Package erlang
From 802bebd9289f54b1d2d4ed80d35871e279c30ecf Mon Sep 17 00:00:00 2001
From: "Fabian N.C. van 't Hooft" <fnchooft@gmail.com>
Date: Sun, 10 Dec 2023 20:56:06 -0300
Subject: [PATCH] add xmerl_xml_indent-module
The indent module was added to provide out of the box
indented output instead of the standard xmerl_xml-module.
The code is based on the Elixir xmerl_xml_indent-package.
That package can be found on hex.pm.
The documentation was updated to mention the module and its
use using the motorcycle-example.
Test-cases have been added to the xmerl-SUITE as requested
by OTP-maintenance team.
---
lib/xmerl/doc/src/xmerl_ug.xmlsrc | 21 +++--
lib/xmerl/src/Makefile | 1 +
lib/xmerl/src/xmerl.app.src | 1 +
lib/xmerl/src/xmerl_xml_indent.erl | 86 ++++++++++++++++++
lib/xmerl/test/xmerl_SUITE.erl | 137 +++++++++++++++++++++++++----
5 files changed, 222 insertions(+), 24 deletions(-)
create mode 100644 lib/xmerl/src/xmerl_xml_indent.erl
diff --git a/lib/xmerl/doc/src/xmerl_ug.xmlsrc b/lib/xmerl/doc/src/xmerl_ug.xmlsrc
index b3fcbf0930..8f96bc23c9 100644
--- a/lib/xmerl/doc/src/xmerl_ug.xmlsrc
+++ b/lib/xmerl/doc/src/xmerl_ug.xmlsrc
@@ -11,7 +11,7 @@
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
-
+
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
@@ -19,7 +19,7 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-
+
</legalnotice>
<title>xmerl</title>
@@ -171,7 +171,7 @@ Grand Danois\
<p>If you want to parse the XML file motorcycles.xml you run
it in the Erlang shell like:</p>
<pre>
-3> {ParsResult,Misc}=xmerl_scan:file("motorcycles.xml").
+3> {ParseResult,Misc}=xmerl_scan:file("motorcycles.xml").
{{xmlElement,motorcycles,
motorcycles,
[],
@@ -279,7 +279,7 @@ Grand Danois\
<item>Name = atom()</item>
<item>Value = IOString | atom() | integer()</item>
</list>
- <p>See also reference manual for
+ <p>See also reference manual for
<seeerl marker="xmerl#export_simple-3">xmerl</seeerl></p>
<p>If you want to add the information about a black Harley
Davidsson 1200 cc Sportster motorcycle from 2003 that is in
@@ -359,6 +359,18 @@ Data =
... </pre>
<p>The result will be: </p>
<codeinclude file="new_motorcycles2.txt" tag="" type="none"></codeinclude>
+ <p>
+
+ The generated XML above was formatted for readability.
+
+ Another exporter which indents the code with 2 spaces can also be used.
+
+ In order to use it one only needs to change the export-module:</p>
+ <pre>
+ ...
+ Export=xmerl:export_simple([NewRootEl],xmerl_xml_indent,[{prolog,Prolog}]),
+ ...
+ </pre>
</section>
<section>
@@ -488,4 +500,3 @@ template(E = #xmlElement{name='bike'}) ->
elements and the 'manufacturer' elements are not in order.</p>
</section>
</chapter>
-
diff --git a/lib/xmerl/src/Makefile b/lib/xmerl/src/Makefile
index e7e7c8e978..ff84b7de8d 100644
--- a/lib/xmerl/src/Makefile
+++ b/lib/xmerl/src/Makefile
@@ -77,6 +77,7 @@ MODULES = $(EDOC_MODULES) \
xmerl_validate \
xmerl_xlate \
xmerl_xml \
+ xmerl_xml_indent \
xmerl_xpath_lib \
xmerl_xpath_parse \
xmerl_xpath_pred \
diff --git a/lib/xmerl/src/xmerl.app.src b/lib/xmerl/src/xmerl.app.src
index aed9cf176f..f3ab13ee26 100644
--- a/lib/xmerl/src/xmerl.app.src
+++ b/lib/xmerl/src/xmerl.app.src
@@ -27,6 +27,7 @@
xmerl_validate,
xmerl_xlate,
xmerl_xml,
+ xmerl_xml_indent,
xmerl_xpath,
xmerl_xpath_lib,
xmerl_xpath_parse,
diff --git a/lib/xmerl/src/xmerl_xml_indent.erl b/lib/xmerl/src/xmerl_xml_indent.erl
new file mode 100644
index 0000000000..deb9f0890e
--- /dev/null
+++ b/lib/xmerl/src/xmerl_xml_indent.erl
@@ -0,0 +1,86 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2003-2016. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%% Description : Callback module for exporting complete or simple forms to indented XML.
+%%
+%% This module indents the xml with 2 spaces and a newline \n.
+%% Currently the implementation does not allow it to be configured.
+%% The implementation is based on the same Elixir implementation.
+%% https://hexdocs.pm/xmerl_xml_indent/readme.html
+
+-module(xmerl_xml_indent).
+
+-export(['#xml-inheritance#'/0]).
+
+-export(['#root#'/4,
+ '#element#'/5,
+ '#text#'/1]).
+
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1]).
+
+-include("xmerl.hrl").
+-include("xmerl_internal.hrl").
+
+
+'#xml-inheritance#'() -> [].
+
+
+%% The '#text#' function is called for every text segment.
+
+'#text#'(Text) ->
+ export_text(Text).
+
+
+%% The '#root#' tag is called when the entire structure has been
+%% exported. It does not appear in the structure itself.
+
+'#root#'(Data, [#xmlAttribute{name=prolog,value=V}], [], _E) ->
+ [V,Data];
+'#root#'(Data, _Attrs, [], _E) ->
+ ["<?xml version=\"1.0\"?>\n", Data].
+
+
+%% The '#element#' function is the default handler for XML elements.
+
+'#element#'(Tag, [], Attrs, _Parents, _E) ->
+ empty_tag(Tag, Attrs);
+'#element#'(Tag, Data, Attrs, Parents, _E) ->
+ IsCharData = is_char(Data),
+ NewData =
+ case IsCharData of
+ true ->
+ LengthParents = length(Parents),
+ %% Push all the data over Lvl spaces.
+ [
+ indent(LengthParents + 1) ++ DataEntry
+ || DataEntry <- Data
+ ] ++ indent(LengthParents);
+ false ->
+ Data
+ end,
+ markup(Tag, Attrs, NewData).
+
+is_char([[X|_]|_]) ->
+ not is_integer(X);
+is_char(Data) when is_list(Data) ->
+ false.
+
+indent(Level) ->
+ [$\n | lists:duplicate(2 * Level, $\s)].
diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl
index cf1728bf2a..0d01b98b37 100644
--- a/lib/xmerl/test/xmerl_SUITE.erl
+++ b/lib/xmerl/test/xmerl_SUITE.erl
@@ -40,14 +40,14 @@
%%----------------------------------------------------------------------
%% Test groups
%%----------------------------------------------------------------------
-all() ->
+all() ->
[{group, cpd_tests}, xpath_text1, xpath_main,
xpath_abbreviated_syntax, xpath_functions, xpath_namespaces,
{group, misc}, {group, eventp_tests},
{group, ticket_tests}, {group, app_test},
- {group, appup_test}].
+ {group, appup_test}, {group, format_test}].
-groups() ->
+groups() ->
[{cpd_tests, [],
[cpd_invalid1, cpd_invalid1_index, cpd_invalid2_index,
cpd_invalid_index3, cpd_invalid_is_layer,
@@ -63,7 +63,8 @@ groups() ->
ticket_6873, ticket_7496, ticket_8156, ticket_8697,
ticket_9411, ticket_9457, ticket_9664_schema, ticket_9664_dtd]},
{app_test, [], [{xmerl_app_test, all}]},
- {appup_test, [], [{xmerl_appup_test, all}]}].
+ {appup_test, [], [{xmerl_appup_test, all}]},
+ {format_test, [], [formatter_pass,formatter_fail]}].
suite() ->
[{timetrap,{minutes,10}}].
@@ -257,12 +258,12 @@ xml_ns(_Config) ->
attributes = [#xmlAttribute{name = 'xmlns:xml',
expanded_name = {"xmlns","xml"},
nsinfo = {"xmlns","xml"},
- namespace = #xmlNamespace{default = [],
+ namespace = #xmlNamespace{default = [],
nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}},
#xmlAttribute{name = 'xml:attr1',
expanded_name = {'http://www.w3.org/XML/1998/namespace',attr1},
nsinfo = {"xml","attr1"},
- namespace = #xmlNamespace{default = [],
+ namespace = #xmlNamespace{default = [],
nodes = [{"xml",'http://www.w3.org/XML/1998/namespace'}]}}]},
[]
} = xmerl_scan:string(Doc2, [{namespace_conformant, true}]),
@@ -349,15 +350,15 @@ sax_parse_export_xml_small(Config) ->
ok.
simple() ->
- [{document,
+ [{document,
[{title, "Doc Title"}, {author, "Ulf Wiger"}],
- [{section,
+ [{section,
[{heading, "heading1"}],
[{'P', ["This is a paragraph of text."]},
- {section,
+ {section,
[{heading, "heading2"}],
[{'P', ["This is another paragraph."]},
- {table,
+ {table,
[{border, 1}],
[{heading,
[{col, ["head1"]},
@@ -393,7 +394,7 @@ generate_section_attribute(0) ->
generate_section_attribute(N) ->
{{heading, "heading1"},N-1}.
-
+
generate_subsection_content(0) ->
done;
generate_subsection_content(1) ->
@@ -450,7 +451,7 @@ generate_heading_col(N) ->
ticket_5998(Config) ->
DataDir = datadir(Config),
%% First fix is tested by case syntax_bug2.
-
+
ok =
case catch xmerl_scan:file(filename:join([DataDir,misc,"ticket_5998_2.xml"])) of
{'EXIT',{fatal,Reason1}} ->
@@ -484,18 +485,18 @@ ticket_7211(Config) ->
{E,[]} = xmerl_scan:file(filename:join([DataDir,misc,"notes2.xml"]),
[{fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]},
{validation,dtd}]),
-
+
ok = case E of
Rec when is_record(Rec,xmlElement) ->
ok;
_ ->
E
end,
-
+
{E2,[]} = xmerl_scan:file(filename:join([DataDir,misc,"XS.xml"]),
[{fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]},
{validation,dtd}]),
-
+
ok = case E2 of
Rec2 when is_record(Rec2,xmlElement) ->
ok;
@@ -517,7 +518,7 @@ ticket_7214(Config) ->
{E,[]} = xmerl_scan:file(filename:join([DataDir,misc,'block_tags.html']),
[{validation,dtd},
{fetch_path,[filename:join([DataDir,misc,erlang_docs_dtd])]}]),
-
+
ok = case E of
Rec when is_record(Rec,xmlElement) ->
ok;
@@ -528,7 +529,7 @@ ticket_7214(Config) ->
%%
%% ticket_7430
%%
-%% Problem with contents of numeric character references followed by
+%% Problem with contents of numeric character references followed by
%% UTF-8 characters..
%%
ticket_7430(_Config) ->
@@ -631,7 +632,7 @@ allow_entities_test(Config) ->
DataDir = proplists:get_value(data_dir, Config),
File = filename:join(DataDir, "lol_1_test.xml"), %% Depth 9
%% Disallow entities
- {'EXIT',{fatal, {{error,entities_not_allowed}, _, _, _}}} =
+ {'EXIT',{fatal, {{error,entities_not_allowed}, _, _, _}}} =
(catch xmerl_scan:file(File, [{allow_entities, false}])),
ok.
@@ -679,7 +680,7 @@ change_mode3([F|Fs]) ->
chmod(F)
end,
change_mode3(Fs).
-
+
chmod(F) ->
case file:read_file_info(F) of
{ok,FileInfo} ->
@@ -696,3 +697,101 @@ datadir(Config) ->
datadir_join(Config,Files) ->
filename:join([datadir(Config)|Files]).
+
+%%======================================================================
+%% New formatter tests input/output
+%%======================================================================
+
+html() ->
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"DTD/xhtml1-strict.dtd\"><html>"
+ "<head><title>Doc Title</title><author>Ulf Wiger</author></head>"
+ "<h1>heading1</h1>"
+ "<p>This is a paragraph of text.</p>"
+ "<h2>heading2</h2>"
+ "<p>This is another paragraph.</p>"
+ "<table>"
+ "<thead><tr><td>head1</td><td>head2</td></tr></thead>"
+ "<tr><td>col11</td><td>col122</td></tr>"
+ "<tr><td>col21</td><td>col122</td></tr>"
+ "</table>"
+ "</html>".
+
+html_indented() ->
+ "<?xml version=\"1.0\"?>"
+ "\n<html>"
+ "\n <head>"
+ "\n <title>Doc Title</title>"
+ "\n <author>Ulf Wiger</author>"
+ "\n </head>"
+ "\n <h1>heading1</h1>"
+ "\n <p>This is a paragraph of text.</p>"
+ "\n <h2>heading2</h2>"
+ "\n <p>This is another paragraph.</p>"
+ "\n <table>"
+ "\n <thead>"
+ "\n <tr>"
+ "\n <td>head1</td>"
+ "\n <td>head2</td>"
+ "\n </tr>"
+ "\n </thead>"
+ "\n <tr>"
+ "\n <td>col11</td>"
+ "\n <td>col122</td>"
+ "\n </tr>"
+ "\n <tr>"
+ "\n <td>col21</td>"
+ "\n <td>col122</td>"
+ "\n </tr>"
+ "\n </table>"
+ "\n</html>".
+
+xml_namespace() ->
+ "<?xml version=\"1.0\"?>"
+ "<!-- initially, the default namespace is \"books\" -->"
+ "<book xmlns='urn:loc.gov:books' xmlns:isbn='urn:ISBN:0-395-36341-6'>"
+ "<title>Cheaper by the Dozen</title>"
+ "<isbn:number>1568491379</isbn:number>"
+ "<notes>"
+ "<!-- make HTML the default namespace for some comments -->"
+ "<p xmlns='urn:w3-org-ns:HTML'>"
+ "This is a <i>funny</i> book!"
+ "</p>"
+ "</notes>"
+ "</book>".
+
+xml_namespace_indented() ->
+ "<?xml version=\"1.0\"?>"
+ "\n<book xmlns=\"urn:loc.gov:books\" xmlns:isbn=\"urn:ISBN:0-395-36341-6\">"
+ "\n <title>Cheaper by the Dozen</title>"
+ "\n <isbn:number>1568491379</isbn:number>"
+ "\n <notes>"
+ "\n <p xmlns=\"urn:w3-org-ns:HTML\">This is a <i>funny</i> book!</p>"
+ "\n </notes>"
+ "\n</book>".
+
+output_element_to_str(E) ->
+ Output = xmerl:export([E], xmerl_xml_indent),
+ [Str] = io_lib:format("~s", [lists:flatten(Output)]),
+ Str.
+
+%%======================================================================
+%% New formatter tests
+%%======================================================================
+formatter_pass(_Config) ->
+
+ FetchFun = fun(_DTDSpec, S) -> {ok, not_fetched, S} end,
+ %% Generate based on namespace-example
+ {Ns, _} = xmerl_scan:string(xml_namespace(), [{fetch_fun, FetchFun}]),
+ GNs = output_element_to_str(Ns),
+ INs = xml_namespace_indented(),
+ INs = GNs,
+
+ %% Generate based on html-example
+ {Html, _} = xmerl_scan:string(html(), [{fetch_fun, FetchFun}]),
+ GHtml = output_element_to_str(Html),
+ IHtml = html_indented(),
+ GHtml = IHtml,
+ ok.
+
+formatter_fail(_Config) ->
+ ok.
--
2.35.3