File 4402-xmerl-Fix-exporting-XML-comments.patch of Package erlang
From e646306233df20bc44fe9c7e24976f1dcc420fe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paulo=20Tom=C3=A9?= <paulo.v.tome@tecnico.ulisboa.pt>
Date: Thu, 27 Mar 2025 20:49:24 +0000
Subject: [PATCH] xmerl: Fix exporting XML comments
Fix issue #5697 by exporting #xmlComment elements when calling
export/3 or export_simple/3 and similar functions.
---
lib/xmerl/src/xmerl.erl | 15 +++++++++------
lib/xmerl/src/xmerl_html.erl | 7 ++++++-
lib/xmerl/src/xmerl_lib.erl | 16 ++++++++++++++++
lib/xmerl/src/xmerl_text.erl | 7 ++++++-
lib/xmerl/src/xmerl_xml.erl | 10 ++++++++--
lib/xmerl/src/xmerl_xml_indent.erl | 12 ++++++++++--
lib/xmerl/test/xmerl_SUITE.erl | 14 +++++++++++++-
7 files changed, 68 insertions(+), 13 deletions(-)
diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl
index 3b56b804d2..aa9f1ddaaf 100644
--- a/lib/xmerl/src/xmerl.erl
+++ b/lib/xmerl/src/xmerl.erl
@@ -265,8 +265,8 @@ export_content([#xmlText{value = Text, type = cdata} | Es], Callbacks) ->
[apply_cdata_cb(Callbacks, Text) | export_content(Es, Callbacks)];
export_content([#xmlPI{} | Es], Callbacks) ->
export_content(Es, Callbacks);
-export_content([#xmlComment{} | Es], Callbacks) ->
- export_content(Es, Callbacks);
+export_content([#xmlComment{value = Text} | Es], Callbacks) ->
+ [apply_comment_cb(Callbacks, Text) | export_content(Es, Callbacks)];
export_content([#xmlDecl{} | Es], Callbacks) ->
export_content(Es, Callbacks);
export_content([E | Es], Callbacks) ->
@@ -306,8 +306,8 @@ export_element(E = #xmlElement{name = Tag,
tagdef(Tag,Pos,Parents,Args,CBs);
export_element(#xmlPI{}, _CBs) ->
[];
-export_element(#xmlComment{}, _CBs) ->
- [];
+export_element(#xmlComment{value = Text}, CBs) ->
+ apply_comment_cb(CBs, Text);
export_element(#xmlDecl{}, _CBs) ->
[].
@@ -337,8 +337,8 @@ export_element(E=#xmlElement{name = Tag,
tagdef(Tag,Pos,Parents,Args,Callbacks);
export_element(#xmlPI{}, _CallbackModule, CallbackState) ->
CallbackState;
-export_element(#xmlComment{},_CallbackModule, CallbackState) ->
- CallbackState;
+export_element(#xmlComment{value = Text},CallbackModule,_CallbackState) ->
+ apply_comment_cb(CallbackModule,Text);
export_element(#xmlDecl{},_CallbackModule, CallbackState) ->
CallbackState.
@@ -390,6 +390,9 @@ apply_text_cb(Ms, Text) ->
apply_cdata_cb(Ms, Text) ->
apply_cb(Ms, '#cdata#', '#cdata#', [Text]).
+apply_comment_cb(Ms, Text) ->
+ apply_cb(Ms, '#comment#', '#comment#', [Text]).
+
apply_tag_cb(Ms, F, Args) ->
apply_cb(Ms, F, '#element#', Args).
diff --git a/lib/xmerl/src/xmerl_html.erl b/lib/xmerl/src/xmerl_html.erl
index 28cb6f06ab..8d475c2a41 100644
--- a/lib/xmerl/src/xmerl_html.erl
+++ b/lib/xmerl/src/xmerl_html.erl
@@ -33,10 +33,11 @@
'#element#'/5,
'#text#'/1,
'#cdata#'/1,
+ '#comment#'/1,
p/4]).
-import(xmerl_lib, [start_tag/2, end_tag/1, is_empty_data/1,
- find_attribute/2, export_text/1]).
+ find_attribute/2, export_text/1, export_comment/1]).
-include("xmerl.hrl").
@@ -53,6 +54,10 @@
'#cdata#'(Text) ->
export_text(Text).
+%% The '#comment#' function is called for every comment element.
+'#comment#'(Text) ->
+ export_comment(Text).
+
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl
index b8766db3b1..3cc59614ec 100644
--- a/lib/xmerl/src/xmerl_lib.erl
+++ b/lib/xmerl/src/xmerl_lib.erl
@@ -30,6 +30,7 @@
expand_content/3, normalize_element/1, normalize_element/3,
expand_element/1, expand_element/3, expand_attributes/1,
expand_attributes/3, export_text/1, flatten_text/1, export_cdata/1,
+ export_comment/1,
export_attribute/1, markup/2, markup/3, simplify_element/1,
simplify_content/1, start_tag/1, start_tag/2, end_tag/1,
empty_tag/1, empty_tag/2,is_empty_data/1, find_attribute/2,
@@ -104,6 +105,21 @@ export_cdata([], []) ->
export_cdata(Bin, Cont) ->
export_cdata(binary_to_list(Bin), Cont).
+%% Export comment
+export_comment(T) ->
+ R = "<!--" ++ export_comment(T, []),
+ R ++ "-->".
+export_comment([C | T], Cont) when is_integer(C) ->
+ [C | export_comment(T, Cont)];
+export_comment([T | T1], Cont) ->
+ export_comment(T, [T1 | Cont]);
+export_comment([], [T | Cont]) ->
+ export_comment(T, Cont);
+export_comment([], []) ->
+ [];
+export_comment(Bin, Cont) ->
+ export_comment(binary_to_list(Bin), Cont).
+
%% Convert attribute value to a flat string, escaping characters `"',
%% `<' and `&'. (Note that single-quote characters are not escaped; the
%% markup-generating functions (`start_tag', `end_tag', ...) always use
diff --git a/lib/xmerl/src/xmerl_text.erl b/lib/xmerl/src/xmerl_text.erl
index 1fcf4cd9e6..16fb33df90 100644
--- a/lib/xmerl/src/xmerl_text.erl
+++ b/lib/xmerl/src/xmerl_text.erl
@@ -31,7 +31,8 @@
-export(['#root#'/4,
'#element#'/5,
'#text#'/1,
- '#cdata#'/1]).
+ '#cdata#'/1,
+ '#comment#'/1]).
-include("xmerl.hrl").
@@ -46,6 +47,10 @@
%% Handled the same as text.
'#cdata#'(Text) -> Text.
+%% The '#comment#' function is called for every comment element.
+%% Comment value is not exported since there is no markup.
+'#comment#'(_Text) -> [].
+
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_xml.erl b/lib/xmerl/src/xmerl_xml.erl
index 6eb1857dff..26463bc56c 100644
--- a/lib/xmerl/src/xmerl_xml.erl
+++ b/lib/xmerl/src/xmerl_xml.erl
@@ -31,9 +31,11 @@
-export(['#root#'/4,
'#element#'/5,
'#text#'/1,
- '#cdata#'/1]).
+ '#cdata#'/1,
+ '#comment#'/1]).
--import(xmerl_lib, [markup/3, empty_tag/2, export_text/1, export_cdata/1]).
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1, export_cdata/1,
+ export_comment/1]).
-include("xmerl.hrl").
-include("xmerl_internal.hrl").
@@ -52,6 +54,10 @@
%?dbg("Cdata=~p~n",[Text]),
export_cdata(Text).
+%% The '#comment#' function is called for every comment element.
+'#comment#'(Text) ->
+ export_comment(Text).
+
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_xml_indent.erl b/lib/xmerl/src/xmerl_xml_indent.erl
index e4112f29e0..0cc7fcead8 100644
--- a/lib/xmerl/src/xmerl_xml_indent.erl
+++ b/lib/xmerl/src/xmerl_xml_indent.erl
@@ -34,9 +34,10 @@
-export(['#root#'/4,
'#element#'/5,
- '#text#'/1]).
+ '#text#'/1,
+ '#comment#'/1]).
--import(xmerl_lib, [markup/3, empty_tag/2, export_text/1]).
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1, export_comment/1]).
-include("xmerl.hrl").
-include("xmerl_internal.hrl").
@@ -51,6 +52,13 @@
export_text(Text).
+% The '#comment#' function is called for every comment element.
+
+'#comment#'(Text) ->
+ %% Returning a list allows is_char/1 to return true
+ %% when indenting the contents of an element.
+ [ export_comment(Text) ].
+
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl
index 393ce4d1dd..696a6b8ca3 100644
--- a/lib/xmerl/test/xmerl_SUITE.erl
+++ b/lib/xmerl/test/xmerl_SUITE.erl
@@ -57,7 +57,7 @@ groups() ->
{misc, [],
[latin1_alias, syntax_bug1, syntax_bug2, syntax_bug3,
pe_ref1, copyright, testXSEIF, export_simple1, export,
- export_cdata,
+ export_cdata, export_comments,
default_attrs_bug, xml_ns, scan_splits_string_bug,
allow_entities_test]},
{eventp_tests, [], [sax_parse_and_export]},
@@ -325,6 +325,17 @@ export_cdata(Config) ->
InData = list_to_binary(Exported),
ok.
+export_comments(Config) ->
+ InData = <<"<?xml version=\"1.0\" encoding=\"UTF-8\"?>
+<doc>
+ <!-- top comment --><a>Test...</a>
+ <!-- bottom comment --></doc>">>,
+ Prolog = ["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"],
+ {E, _} = xmerl_scan:string(binary:bin_to_list(InData)),
+ Exported = xmerl:export([E],xmerl_xml,[{prolog,Prolog}]),
+ InData = list_to_binary(Exported),
+ ok.
+
%%----------------------------------------------------------------------
sax_parse_and_export(Config) ->
@@ -784,6 +795,7 @@ xml_namespace_indented() ->
"\n <title>Cheaper by the Dozen</title>"
"\n <isbn:number>1568491379</isbn:number>"
"\n <notes>"
+ "\n <!-- make HTML the default namespace for some comments -->"
"\n <p xmlns=\"urn:w3-org-ns:HTML\">This is a <i>funny</i> book!</p>"
"\n </notes>"
"\n</book>".
--
2.51.0