File 1067-Fix-xml-export-for-CDATA.patch of Package erlang
From 47dd2cd8c1069115c305eb79eb22def45a37f436 Mon Sep 17 00:00:00 2001
From: Lars Thorsen <lars@erlang.org>
Date: Wed, 4 Sep 2024 10:30:00 +0200
Subject: [PATCH] Fix xml export for CDATA
---
lib/xmerl/src/xmerl.erl | 15 ++++++++++++---
lib/xmerl/src/xmerl_html.erl | 8 ++++++--
lib/xmerl/src/xmerl_lib.erl | 18 +++++++++++++++++-
lib/xmerl/src/xmerl_otpsgml.erl | 8 ++++++--
lib/xmerl/src/xmerl_sgml.erl | 10 +++++++---
lib/xmerl/src/xmerl_text.erl | 9 ++++++---
lib/xmerl/src/xmerl_xml.erl | 12 ++++++++----
lib/xmerl/test/xmerl_SUITE.erl | 15 +++++++++++++++
8 files changed, 77 insertions(+), 18 deletions(-)
diff --git a/lib/xmerl/src/xmerl.erl b/lib/xmerl/src/xmerl.erl
index 32dad69166..216859b28b 100644
--- a/lib/xmerl/src/xmerl.erl
+++ b/lib/xmerl/src/xmerl.erl
@@ -187,8 +187,10 @@ export_simple_content(Content, Callbacks) when is_list(Callbacks) ->
%% Content = [Element]
%% Callback = [atom()]
%% @doc Exports normal XML content directly, without further context.
-export_content([#xmlText{value = Text} | Es], Callbacks) ->
+export_content([#xmlText{value = Text, type = text} | Es], Callbacks) ->
[apply_text_cb(Callbacks, Text) | export_content(Es, Callbacks)];
+export_content([#xmlText{value = Text, type = cdata} | Es], Callbacks) ->
+ [apply_cdata_cb(Callbacks, Text) | export_content(Es, Callbacks)];
export_content([#xmlPI{} | Es], Callbacks) ->
export_content(Es, Callbacks);
export_content([#xmlComment{} | Es], Callbacks) ->
@@ -214,8 +216,10 @@ export_simple_element(Content, Callbacks) when is_list(Callbacks) ->
export_element(E, CB) when is_atom(CB) ->
export_element(E, callbacks(CB));
-export_element(#xmlText{value = Text}, CBs) ->
+export_element(#xmlText{value = Text, type = text}, CBs) ->
apply_text_cb(CBs, Text);
+export_element(#xmlText{value = Text, type = cdata}, CBs) ->
+ apply_cdata_cb(CBs, Text);
export_element(E = #xmlElement{name = Tag,
pos = Pos,
attributes = Attributes,
@@ -237,9 +241,11 @@ export_element(#xmlDecl{}, _CBs) ->
%% document.
export_element(E, CallbackModule, CallbackState) when is_atom(CallbackModule) ->
export_element(E, callbacks(CallbackModule), CallbackState);
-export_element(#xmlText{value = Text},CallbackModule,_CallbackState) ->
+export_element(#xmlText{value = Text, type = text},CallbackModule,_CallbackState) ->
%% apply_cb(CallbackModule, '#text#', '#text#', [Text,CallbackState]);
apply_text_cb(CallbackModule,Text);
+export_element(#xmlText{value = Text, type = cdata},CallbackModule,_CallbackState) ->
+ apply_cdata_cb(CallbackModule,Text);
export_element(E=#xmlElement{name = Tag,
pos = Pos,
parents = Parents,
@@ -301,6 +307,9 @@ check_inheritance(M, Visited) ->
apply_text_cb(Ms, Text) ->
apply_cb(Ms, '#text#', '#text#', [Text]).
+apply_cdata_cb(Ms, Text) ->
+ apply_cb(Ms, '#cdata#', '#cdata#', [Text]).
+
apply_tag_cb(Ms, F, Args) ->
apply_cb(Ms, F, '#element#', Args).
diff --git a/lib/xmerl/src/xmerl_html.erl b/lib/xmerl/src/xmerl_html.erl
index cb72eb4185..807c6afb28 100644
--- a/lib/xmerl/src/xmerl_html.erl
+++ b/lib/xmerl/src/xmerl_html.erl
@@ -29,6 +29,7 @@
-export(['#root#'/4,
'#element#'/5,
'#text#'/1,
+ '#cdata#'/1,
p/4]).
-import(xmerl_lib, [start_tag/2, end_tag/1, is_empty_data/1,
@@ -40,11 +41,14 @@
'#xml-inheritance#'() -> [].
-%% The '#text#' function is called for every text segment.
-
+%% The '#text#' function is called for every text segment of type text.
'#text#'(Text) ->
export_text(Text).
+%% The '#cdata#' function is called for every text segment of type cdata.
+%% Handled the same as text.
+'#cdata#'(Text) ->
+ export_text(Text).
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_lib.erl b/lib/xmerl/src/xmerl_lib.erl
index be0e28e62d..c8ebc292a9 100644
--- a/lib/xmerl/src/xmerl_lib.erl
+++ b/lib/xmerl/src/xmerl_lib.erl
@@ -26,7 +26,7 @@
-export([normalize_content/1, normalize_content/3, expand_content/1,
expand_content/3, normalize_element/1, normalize_element/3,
expand_element/1, expand_element/3, expand_attributes/1,
- expand_attributes/3, export_text/1, flatten_text/1,
+ expand_attributes/3, export_text/1, flatten_text/1, export_cdata/1,
export_attribute/1, markup/2, markup/3, simplify_element/1,
simplify_content/1, start_tag/1, start_tag/2, end_tag/1,
empty_tag/1, empty_tag/2,is_empty_data/1, find_attribute/2,
@@ -85,6 +85,22 @@ flatten_text([], []) ->
flatten_text(Bin, Cont) ->
flatten_text(binary_to_list(Bin), Cont).
+%% Export CDATA
+export_cdata(T) ->
+ R = "<![CDATA[" ++ export_cdata(T, []),
+ R ++ "]]>".
+
+export_cdata([C | T], Cont) when is_integer(C) ->
+ [C | export_cdata(T, Cont)];
+export_cdata([T | T1], Cont) ->
+ export_cdata(T, [T1 | Cont]);
+export_cdata([], [T | Cont]) ->
+ export_cdata(T, Cont);
+export_cdata([], []) ->
+ [];
+export_cdata(Bin, Cont) ->
+ export_cdata(binary_to_list(Bin), Cont).
+
%% Convert attribute value to a flat string, escaping characters `"',
%% `<' and `&'. (Note that single-quote characters are not escaped; the
%% markup-generating functions (`start_tag', `end_tag', ...) always use
diff --git a/lib/xmerl/src/xmerl_otpsgml.erl b/lib/xmerl/src/xmerl_otpsgml.erl
index cbd13263f6..7636713811 100644
--- a/lib/xmerl/src/xmerl_otpsgml.erl
+++ b/lib/xmerl/src/xmerl_otpsgml.erl
@@ -29,6 +29,7 @@
-export(['#root#'/4,
'#element#'/5,
'#text#'/1,
+ '#cdata#'/1,
p/4]).
-import(xmerl_lib, [markup/3, start_tag/2, is_empty_data/1,
@@ -41,11 +42,14 @@
'#xml-inheritance#'() -> [xmerl_sgml].
-%% The '#text#' function is called for every text segment.
-
+%% The '#text#' function is called for every text segment of type text.
'#text#'(Text) ->
export_text(Text).
+%% The '#cdata#' function is called for every text segment of type cdata.
+%% Handled the same as text.
+'#cdata#'(Text) ->
+ export_text(Text).
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_sgml.erl b/lib/xmerl/src/xmerl_sgml.erl
index 0d73df1e02..0dcd237847 100644
--- a/lib/xmerl/src/xmerl_sgml.erl
+++ b/lib/xmerl/src/xmerl_sgml.erl
@@ -28,7 +28,8 @@
-export(['#root#'/4,
'#element#'/5,
- '#text#'/1]).
+ '#text#'/1,
+ '#cdata#'/1]).
-import(xmerl_lib, [markup/3, find_attribute/2, export_text/1]).
@@ -38,11 +39,14 @@
'#xml-inheritance#'() -> [].
-%% The '#text#' function is called for every text segment.
-
+%% The '#text#' function is called for every text segment of type text.
'#text#'(Text) ->
export_text(Text).
+%% The '#cdata#' function is called for every text segment of type cdata.
+%% Handled the same as text.
+'#cdata#'(Text) ->
+ export_text(Text).
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_text.erl b/lib/xmerl/src/xmerl_text.erl
index eb1df6e5d3..74eedcaf71 100644
--- a/lib/xmerl/src/xmerl_text.erl
+++ b/lib/xmerl/src/xmerl_text.erl
@@ -27,7 +27,8 @@
-export(['#root#'/4,
'#element#'/5,
- '#text#'/1]).
+ '#text#'/1,
+ '#cdata#'/1]).
-include("xmerl.hrl").
@@ -35,10 +36,12 @@
'#xml-inheritance#'() -> [].
-%% The '#text#' function is called for every text segment.
-
+%% The '#text#' function is called for every text segment of type text.
'#text#'(Text) -> Text.
+%% The '#cdata#' function is called for every text segment of type cdata.
+%% Handled the same as text.
+'#cdata#'(Text) -> Text.
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/src/xmerl_xml.erl b/lib/xmerl/src/xmerl_xml.erl
index 28802666a4..78c73c17cb 100644
--- a/lib/xmerl/src/xmerl_xml.erl
+++ b/lib/xmerl/src/xmerl_xml.erl
@@ -27,9 +27,10 @@
-export(['#root#'/4,
'#element#'/5,
- '#text#'/1]).
+ '#text#'/1,
+ '#cdata#'/1]).
--import(xmerl_lib, [markup/3, empty_tag/2, export_text/1]).
+-import(xmerl_lib, [markup/3, empty_tag/2, export_text/1, export_cdata/1]).
-include("xmerl.hrl").
-include("xmerl_internal.hrl").
@@ -38,12 +39,15 @@
'#xml-inheritance#'() -> [].
-%% The '#text#' function is called for every text segment.
-
+%% The '#text#' function is called for every text segment of type text.
'#text#'(Text) ->
%?dbg("Text=~p~n",[Text]),
export_text(Text).
+%% The '#cdata#' function is called for every text segment of type cdata.
+'#cdata#'(Text) ->
+%?dbg("Cdata=~p~n",[Text]),
+ export_cdata(Text).
%% The '#root#' tag is called when the entire structure has been
%% exported. It does not appear in the structure itself.
diff --git a/lib/xmerl/test/xmerl_SUITE.erl b/lib/xmerl/test/xmerl_SUITE.erl
index 360c675d26..370963f56f 100644
--- a/lib/xmerl/test/xmerl_SUITE.erl
+++ b/lib/xmerl/test/xmerl_SUITE.erl
@@ -55,6 +55,7 @@ groups() ->
{misc, [],
[latin1_alias, syntax_bug1, syntax_bug2, syntax_bug3,
pe_ref1, copyright, testXSEIF, export_simple1, export,
+ export_cdata,
default_attrs_bug, xml_ns, scan_splits_string_bug,
allow_entities_test]},
{eventp_tests, [], [sax_parse_and_export]},
@@ -305,6 +306,20 @@ export(Config) ->
{ok, B} = file:read_file(TestFile),
ok.
+export_cdata(Config) ->
+ InData = <<"<?xml version=\"1.0\" encoding=\"UTF-8\"?>
+<doc>
+ <a>Test...</a>
+ <b><![CDATA[
+<c>Test</c>
+]]></b>
+</doc>">>,
+ Prolog = ["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"],
+ {E,_} = xmerl_scan:string(binary:bin_to_list(InData)),
+ Exported = xmerl:export([E],xmerl_xml,[{prolog,Prolog}]),
+ InData = list_to_binary(Exported),
+ ok.
+
%%----------------------------------------------------------------------
sax_parse_and_export(Config) ->
--
2.43.0