File 3323-Implements-rest-of-missing-features-for-SAX-parser.patch of Package erlang
From 4d13e78579a5a4fca86d9e9e4cc5b4a127c6fa7f Mon Sep 17 00:00:00 2001
From: zadean <contact@zadean.com>
Date: Thu, 12 Sep 2019 09:59:37 +0200
Subject: [PATCH 3/5] Implements rest of missing features for SAX parser
[features]
- External entities implemented, both as markup and replacement text
- Cyclic references are checked at the end of the DTD and when found in
external entities
- Default attributes from DTD reported in events
- Attribute whitespace normalization based on type
- External entities can have a different encoding than the base document
- Conditional sections (IGNORE/INCLUDE) handling added
- Element content-model well-formed checks
[possibly breaking change]
- adds attribute_values field to xmerl_sax_parser_state record
[bug fixes]
- adds missing function clauses matching on empty string and individual
characters
- XML Version WFC
- corrected handling of character references for '<' and '&'
- not allowing fragments in System Literals
- added '?' to allowed pubid characters
- report errors in initial encoding check with `fatal_error` macro
[cosmetic]
- rename "litteral" to "literal"
---
lib/xmerl/src/xmerl_sax_parser.erl | 5 +-
lib/xmerl/src/xmerl_sax_parser.hrl | 3 +-
lib/xmerl/src/xmerl_sax_parser_base.erlsrc | 1758 ++++++++++++++++++++++------
3 files changed, 1412 insertions(+), 354 deletions(-)
diff --git a/lib/xmerl/src/xmerl_sax_parser.erl b/lib/xmerl/src/xmerl_sax_parser.erl
index 2767d02552..ab9dbd1ea1 100644
--- a/lib/xmerl/src/xmerl_sax_parser.erl
+++ b/lib/xmerl/src/xmerl_sax_parser.erl
@@ -312,7 +312,10 @@ convert_encoding(Enc, State) -> %% Just for 7,8 bit + utf8
%% Description: Parsing the xml declaration from the input stream.
%%----------------------------------------------------------------------
parse_xml_directive(<<C, Rest/binary>>, State) when ?is_whitespace(C) ->
- parse_xml_directive_1(Rest, [], State).
+ parse_xml_directive_1(Rest, [], State);
+parse_xml_directive(_, State) ->
+ ?fatal_error(State, "Expected whitespace in directive").
+
%%----------------------------------------------------------------------
%% Function: parse_xml_directive_1(Xml, Acc) -> [{Name, Value}]
diff --git a/lib/xmerl/src/xmerl_sax_parser.hrl b/lib/xmerl/src/xmerl_sax_parser.hrl
index 56a3a42e5f..564e7fd541 100644
--- a/lib/xmerl/src/xmerl_sax_parser.hrl
+++ b/lib/xmerl/src/xmerl_sax_parser.hrl
@@ -88,7 +88,8 @@
current_location, % Location of the currently parsed XML entity
entity, % Parsed XML entity
skip_external_dtd = false,% If true the external DTD is skipped during parsing
- input_type % Source type: file | stream
+ input_type, % Source type: file | stream
+ attribute_values = [] % default attribute values
}).
diff --git a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
index d7128dd4d0..a1305902d1 100644
--- a/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
+++ b/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
@@ -113,7 +113,7 @@ parse_dtd(Xml, State) ->
try
State1 = event_callback(startDocument, State),
- Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
+ Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}, []),
handle_end_document(Result)
catch
throw:Exception ->
@@ -262,6 +262,8 @@ parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
parse_xml_decl_rest(Rest1, State);
?PARSE_XML_DECL(Bytes, State).
+parse_xml_decl_rest(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_xml_decl_rest/2);
parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
if
?is_whitespace(C) ->
@@ -294,46 +296,72 @@ parse_text_decl(?STRING("<?xm") = Bytes, State) ->
parse_text_decl(?STRING("<?xml") = Bytes, State) ->
cf(Bytes, State, fun parse_text_decl/2);
parse_text_decl(?STRING_REST("<?xml", Rest1), State) ->
- parse_text_decl_rest(Rest1, State);
+ parse_text_decl_1(Rest1, State);
parse_text_decl(Bytes, State) when is_binary(Bytes) ->
{Bytes, State}.
-parse_text_decl_rest(?STRING_EMPTY, State) ->
- cf(?STRING_EMPTY, State, fun parse_text_decl_rest/2);
-parse_text_decl_rest(?STRING("?") = Rest, State) ->
- cf(Rest, State, fun parse_text_decl_rest/2);
-parse_text_decl_rest(?STRING("v") = Rest, State) ->
- cf(Rest, State, fun parse_text_decl_rest/2);
-parse_text_decl_rest(?STRING("e") = Rest, State) ->
- cf(Rest, State, fun parse_text_decl_rest/2);
-parse_text_decl_rest(?STRING_UNBOUND_REST("?>", Rest) = _Bytes, State) ->
- {Rest, State};
-parse_text_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
+parse_text_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_text_decl_1/2);
+parse_text_decl_1(?STRING("?") = Rest, State) ->
+ cf(Rest, State, fun parse_text_decl_1/2);
+parse_text_decl_1(?STRING("v") = Rest, State) ->
+ cf(Rest, State, fun parse_text_decl_1/2);
+parse_text_decl_1(?STRING("e") = Rest, State) ->
+ cf(Rest, State, fun parse_text_decl_2/2);
+parse_text_decl_1(?STRING_UNBOUND_REST("?>", _Rest) = _Bytes, State) ->
+ ?fatal_error(State, "expecting attribute encoding");
+parse_text_decl_1(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
{_WS, Rest1, State1} = whitespace(Rest, State, []),
- parse_text_decl_rest(Rest1, State1);
-parse_text_decl_rest(?STRING_UNBOUND_REST("v", Rest) = _Bytes, State) ->
+ parse_text_decl_1(Rest1, State1);
+parse_text_decl_1(?STRING_UNBOUND_REST("v", Rest) = _Bytes, State) ->
case parse_name(Rest, State, [$v]) of
{"version", Rest1, State1} ->
{Rest2, State2} = parse_eq(Rest1, State1),
{_Version, Rest3, State3} = parse_att_value(Rest2, State2),
- parse_text_decl_rest(Rest3, State3);
+ parse_text_decl_2(Rest3, State3);
{_, _, State1} ->
?fatal_error(State1, "expecting attribute version")
end;
-parse_text_decl_rest(?STRING_UNBOUND_REST("e", Rest) = _Bytes, State) ->
+parse_text_decl_1(?STRING_UNBOUND_REST("e", _) = Bytes, State) ->
+ parse_text_decl_2(Bytes, State);
+parse_text_decl_1(?STRING_UNBOUND_REST(_, _), State) ->
+ ?fatal_error(State, "expecting attribute encoding or version");
+parse_text_decl_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_text_decl_1/2],
+ "expecting attribute encoding or version").
+
+parse_text_decl_2(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_text_decl_2/2);
+parse_text_decl_2(?STRING("e") = Rest, State) ->
+ cf(Rest, State, fun parse_text_decl_2/2);
+parse_text_decl_2(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_text_decl_2(Rest1, State1);
+parse_text_decl_2(?STRING_UNBOUND_REST("e", Rest) = _Bytes, State) ->
case parse_name(Rest, State, [$e]) of
{"encoding", Rest1, State1} ->
{Rest2, State2} = parse_eq(Rest1, State1),
{_Version, Rest3, State3} = parse_att_value(Rest2, State2),
- parse_text_decl_rest(Rest3, State3);
+ parse_text_decl_3(Rest3, State3);
{_, _, State1} ->
?fatal_error(State1, "expecting attribute encoding")
end;
-parse_text_decl_rest(?STRING_UNBOUND_REST(_, _), State) ->
- ?fatal_error(State, "expecting attribute encoding or version");
-parse_text_decl_rest(Bytes, State) ->
- unicode_incomplete_check([Bytes, State, fun parse_text_decl_rest/2],
- "expecting attribute encoding or version").
+parse_text_decl_2(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_text_decl_2/2],
+ "expecting attribute encoding").
+
+parse_text_decl_3(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_text_decl_3/2);
+parse_text_decl_3(?STRING("?") = Rest, State) ->
+ cf(Rest, State, fun parse_text_decl_3/2);
+parse_text_decl_3(?STRING_UNBOUND_REST("?>", Rest) = _Bytes, State) ->
+ {Rest, State};
+parse_text_decl_3(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
+ {_WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_text_decl_3(Rest1, State1);
+parse_text_decl_3(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_text_decl_3/2],
+ "expecting ?>").
%%----------------------------------------------------------------------
%% Function: parse_prolog(Rest, State) -> Result
@@ -381,6 +409,7 @@ parse_prolog_1(?STRING("DOCTYP") = Bytes, State) ->
cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) ->
{Rest1, State1} = parse_doctype(Rest, State),
+ ok = check_ref_cycle(State1),
State2 = event_callback(endDTD, State1),
parse_prolog(Rest1, State2);
parse_prolog_1(?STRING("-"), State) ->
@@ -412,17 +441,31 @@ parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_white
parse_version_info(Rest1, State1, Acc);
parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) ->
case is_name_start(C) of
- true ->
- case parse_name(Rest, State, [C]) of
- {"version", Rest1, State1} ->
- {Rest2, State2} = parse_eq(Rest1, State1),
- {Version, Rest3, State3} = parse_att_value(Rest2, State2),
- parse_xml_decl_rest(Rest3, State3, [{"version",Version}|Acc]);
- {_, _, State1} ->
- ?fatal_error(State1, "expecting attribute version")
- end;
- false ->
- ?fatal_error(State, "expecting attribute version")
+ true ->
+ case parse_name(Rest, State, [C]) of
+ {"version", Rest1, State1} ->
+ {Rest2, State2} = parse_eq(Rest1, State1),
+ case parse_att_value(Rest2, State2) of
+ {"1." ++ SubVersion, Rest3, State3} ->
+ % any 1.N version is valid but will be handled as 1.0
+ case lists:all(fun(D) when D >= $0, D =< $9 ->
+ true;
+ (_) ->
+ false
+ end, SubVersion) of
+ true ->
+ parse_xml_decl_rest(Rest3, State3, [{"version","1.0"}|Acc]);
+ false ->
+ ?fatal_error(State3, "unsupported version: 1." ++ SubVersion)
+ end;
+ {Version, _Rest3, State3} ->
+ ?fatal_error(State3, "unsupported version: " ++ Version)
+ end;
+ {_, _, State1} ->
+ ?fatal_error(State1, "expecting attribute version")
+ end;
+ false ->
+ ?fatal_error(State, "expecting attribute version")
end;
parse_version_info(Bytes, State, Acc) ->
unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3],
@@ -485,6 +528,8 @@ parse_xml_decl_encoding(Bytes, State, Acc) ->
undefined).
+parse_xml_decl_encoding_1(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding_1/3);
parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
{_WS, Rest1, State1} = whitespace(Bytes, State, []),
parse_xml_decl_standalone(Rest1, State1, Acc);
@@ -595,6 +640,8 @@ check_if_rest_ok(_, _) ->
%%----------------------------------------------------------------------
parse_pi_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_pi_1/2);
+parse_pi_1(?STRING("?") = Rest, State) ->
+ cf(Rest, State, fun parse_pi_1/2);
parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) ->
{_WS, Rest1, State1} =
whitespace(Rest, State, []),
@@ -880,7 +927,8 @@ parse_attributes(?STRING_EMPTY, State, CurrentTag) ->
cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3);
parse_attributes(?STRING("/") = Bytes, State, CurrentTag) ->
cf(Bytes, State, CurrentTag, fun parse_attributes/3);
-parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
+parse_attributes(?STRING_REST("/>", Rest), State, CurrentTag) ->
+ {Tag, AttList, NewNsList} = fill_default_attributes(CurrentTag, State),
CompleteNsList = NewNsList ++ State#xmerl_sax_parser_state.ns,
{Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
@@ -889,7 +937,8 @@ parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
State4 = send_end_prefix_mapping_event(NewNsList, State3),
parse_content(Rest, State4, [], true);
parse_attributes(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=ETags, ns = OldNsList} = State,
- {Tag, AttList, NewNsList}) ->
+ CurrentTag) ->
+ {Tag, AttList, NewNsList} = fill_default_attributes(CurrentTag, State),
CompleteNsList = NewNsList ++ OldNsList,
{Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
@@ -910,13 +959,13 @@ parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) -
{AttValue, Rest3, State3} = parse_att_value(Rest2, State2),
case AttrName of
{"xmlns", NsName} ->
- parse_attributes(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]});
+ parse_attributes_1(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]});
{"", "xmlns"} ->
- parse_attributes(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]});
+ parse_attributes_1(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]});
{_Prefix, _LocalName} ->
case lists:keyfind(AttrName, 1, AttList) of
false ->
- parse_attributes(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList});
+ parse_attributes_1(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList});
_ ->
ElName =
case Tag of
@@ -933,7 +982,59 @@ parse_attributes(Bytes, State, CurrentTag) ->
unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3],
"expecting name, whitespace, /> or >").
-
+% check that the next character is valid
+parse_attributes_1(?STRING_EMPTY, State, CurrentTag) ->
+ cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes_1/3);
+parse_attributes_1(?STRING_UNBOUND_REST("/", _) = Bytes, State, CurrentTag) ->
+ parse_attributes(Bytes, State, CurrentTag);
+parse_attributes_1(?STRING_UNBOUND_REST(">", _) = Bytes, State, CurrentTag) ->
+ parse_attributes(Bytes, State, CurrentTag);
+parse_attributes_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, CurrentTag) when ?is_whitespace(C) ->
+ parse_attributes(Bytes, State, CurrentTag);
+parse_attributes_1(?STRING_UNBOUND_REST(C, _), State, _) ->
+ ?fatal_error(State, "Expecting whitespace, /> or >, got:" ++ [C]).
+
+fill_default_attributes(CurrentTag, #xmerl_sax_parser_state{attribute_values = []}) ->
+ CurrentTag;
+fill_default_attributes({Tag, AttList, NsList}, #xmerl_sax_parser_state{attribute_values = Atts}) ->
+ F = fun({{E, A}, {V, normalize}}, {AttList1, NsList1}) when E == Tag ->
+ {merge_on_key({A, V}, AttList1), NsList1};
+ ({_, ignore}, Acc) -> Acc;
+ ({{E, A}, V}, {AttList1, NsList1}) when E == Tag, V =/= normalize ->
+ case A of
+ {"xmlns", NsName} ->
+ {AttList1, merge_on_key({NsName, V}, NsList1)};
+ {"", "xmlns"} ->
+ {AttList1, merge_on_key({"", V}, NsList1)};
+ {_, _} ->
+ {merge_on_key({A, V}, AttList1), NsList1}
+ end;
+ (_, Acc) -> Acc
+ end,
+ {AttList2, NsList2} = lists:foldl(F, {AttList, NsList}, Atts),
+ % attribute names for values needing normalization
+ Norm = [A ||
+ {{E, A}, V} <- Atts,
+ E == Tag,
+ V == normalize orelse element(2, V) == normalize],
+ N = fun({A, V}) ->
+ case lists:member(A, Norm) of
+ true ->
+ {A, lists:reverse(normalize_whitespace(V))};
+ false ->
+ {A, V}
+ end
+ end,
+ AttList3 = lists:map(N, AttList2),
+ {Tag, AttList3, NsList2}.
+
+merge_on_key({Key, Value}, List) ->
+ case lists:keyfind(Key, 1, List) of
+ false ->
+ [{Key, Value}|List];
+ _ ->
+ List
+ end.
%%----------------------------------------------------------------------
%% Function: fix_ns({Prefix, Name}, Attributes, Ns) -> Result
@@ -1093,24 +1194,26 @@ parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = S
parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
{Ref, Rest1, State1} = parse_reference(Rest, State, true),
case Ref of
- {character, _, CharValue} ->
- parse_att_value(Rest1, State1, Stop, [CharValue | Acc]);
- {internal_general, true, _, Value} ->
- parse_att_value(Rest1, State1, Stop, Value ++ Acc);
- {internal_general, false, _, Value} ->
- {ParsedValue, [], State2} = parse_att_value(?TO_INPUT_FORMAT(Value), State1, undefined, []),
- parse_att_value(Rest1, State2, Stop, ParsedValue ++ Acc);
- {external_general, Name, _} ->
- ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name);
- {not_found, Name} ->
- case State#xmerl_sax_parser_state.skip_external_dtd of
- false ->
- ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
- true ->
- parse_att_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc)
- end;
- {unparsed, Name, _} ->
- ?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name)
+ {character, _, CharValue} ->
+ parse_att_value(Rest1, State1, Stop, [CharValue | Acc]);
+ {internal_general, true, _, [Stop]} -> % stop char in entity
+ parse_att_value(Rest1, State1, Stop, [Stop|Acc]);
+ {internal_general, true, _, Value} ->
+ IValue = ?TO_INPUT_FORMAT(Value),
+ parse_att_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc);
+ {internal_general, _, _, Value} ->
+ IValue = ?TO_INPUT_FORMAT(Value),
+ {Ctx, State2} = strip_context(State1),
+ {Acc1, _, State3} = parse_entity_content(IValue, State2, Acc, normalize),
+ parse_att_value(Rest1, add_context_back(Ctx, State3), Stop, Acc1);
+ {external_general, Name, _} ->
+ ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name);
+ {not_found, Name} when State#xmerl_sax_parser_state.file_type =:= normal ->
+ ?fatal_error(State1, "Undeclared reference: " ++ Name);
+ {not_found, Name} ->
+ parse_att_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
+ {unparsed, Name, _} ->
+ ?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name)
end;
parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
@@ -1168,6 +1271,8 @@ parse_etag(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_etag/2],
undefined).
+parse_etag_1(?STRING_EMPTY, State, Tag) ->
+ cf(?STRING_EMPTY, State, Tag, fun parse_etag_1/3);
parse_etag_1(?STRING_REST(">", Rest),
#xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
|RestOfETags],
@@ -1200,38 +1305,37 @@ parse_etag_1(Bytes, State, Tag) ->
%% Description: Parsing the content part of tags
%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------
-parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
- case check_if_document_complete(State, "No more bytes") of
- true ->
- State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- {?STRING_EMPTY, State1};
- false ->
- case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
- {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State1};
- {fatal_error, {State1, Msg}} ->
- case check_if_document_complete(State1, Msg) of
- true ->
- State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
- {?STRING_EMPTY, State2};
- false ->
- ?fatal_error(State1, Msg)
- end;
- Other ->
- throw(Other)
- end
+parse_content(?STRING_EMPTY, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
+ {fatal_error, {State1, "No more bytes"}} when ET == [] ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ {fatal_error, {State1, "Continuation function undefined"}} when ET == [] ->
+ State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
+ {?STRING_EMPTY, State2};
+ {fatal_error, {State1, Msg}} ->
+ ?fatal_error(State1, Msg);
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ Other ->
+ throw(Other)
end;
parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) ->
cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
-parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) ->
- State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- parse_etag(Rest, State1);
-parse_content(?STRING("<!") = Bytes, State, _Acc, IgnorableWS) ->
- cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
-parse_content(?STRING("<!-") = Bytes, State, _Acc, IgnorableWS) ->
- cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
+parse_content(?STRING_REST("</", Rest), #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
+ case ET of
+ [] ->
+ ?fatal_error(State, "Unbalanced tags");
+ _ ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_etag(Rest, State1)
+ end;
+parse_content(?STRING("<!") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING("<!-") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
{Rest1, State2} = parse_comment(Rest, State1, []),
@@ -1247,19 +1351,21 @@ parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
end;
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
- [] ->
- {Rest, State}; %% Skicka ignorable WS ???
- _ ->
- State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- parse_cdata(Rest1, State1)
+ [] ->
+ IValue = ?TO_INPUT_FORMAT(lists:reverse(Acc)),
+ {?APPEND_STRING(IValue, Rest), State};
+ _ ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_cdata(Rest1, State1)
end;
parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
case ET of
- [] ->
- {Rest, State}; %% Skicka ignorable WS ???
- _ ->
- State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- parse_stag(Rest1, State1)
+ [] ->
+ IValue = ?TO_INPUT_FORMAT(lists:reverse(Acc)),
+ {?APPEND_STRING(IValue, Rest), State};
+ _ ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ parse_stag(Rest1, State1)
end;
parse_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) ->
N = State#xmerl_sax_parser_state.line_no,
@@ -1274,34 +1380,50 @@ parse_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) ->
parse_content(Rest, State,[?space |Acc], IgnorableWS);
parse_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) ->
parse_content(Rest, State,[?tab |Acc], IgnorableWS);
+parse_content(?STRING("]") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
+parse_content(?STRING("]]") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("]]>", _Rest), State, _Acc, _IgnorableWS) ->
?fatal_error(State, "\"]]>\" is not allowed in content");
parse_content(?STRING_UNBOUND_REST(_C, _) = Rest,
#xmerl_sax_parser_state{end_tags = []} = State,
- _Acc, _IgnorableWS) ->
- {Rest, State};
-parse_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) ->
+ Acc, _IgnorableWS) ->
+ IValue = ?TO_INPUT_FORMAT(lists:reverse(Acc)),
+ {?APPEND_STRING(IValue, Rest), State};
+parse_content(?STRING_REST("&", Rest), #xmerl_sax_parser_state{file_type = Type} = State, Acc, IgnorableWS) ->
{Ref, Rest1, State1} = parse_reference(Rest, State, true),
case Ref of
- {character, _, CharValue} ->
- parse_content(Rest1, State1, [CharValue | Acc], false);
- {internal_general, true, _, Value} ->
- parse_content(Rest1, State1, Value ++ Acc, false);
- {internal_general, false, _, Value} ->
- IValue = ?TO_INPUT_FORMAT(Value),
- parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
- {external_general, _, {PubId, SysId}} ->
- State2 = parse_external_entity(State1, PubId, SysId),
- parse_content(Rest1, State2, Acc, false);
- {not_found, Name} ->
- case State#xmerl_sax_parser_state.skip_external_dtd of
- false ->
- ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
- true ->
- parse_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false)
- end;
- {unparsed, Name, _} ->
- ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
+ {character, _, CharValue} ->
+ parse_content(Rest1, State1, [CharValue | Acc], false);
+ % & causes problems with references
+ {internal_general, true, _, "&"} ->
+ ?fatal_error(State1, "Reference must begin and end in same entity");
+ {internal_general, true, _, Value} ->
+ IValue = ?TO_INPUT_FORMAT(Value),
+ parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
+ {internal_general, _, _, Value} ->
+ IValue = ?TO_INPUT_FORMAT(Value),
+ {Ctx, State2} = strip_context(State1),
+ % markup must be self contained
+ case parse_entity_content(IValue, State2, Acc, IgnorableWS) of
+ {fatal_error, {State3, Message}} ->
+ ?fatal_error(State3, Message);
+ {Acc1, _, State3} ->
+ parse_content(Rest1, add_context_back(Ctx, State3), Acc1, false)
+ end;
+ {external_general, _, {PubId, SysId}} ->
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, Acc),
+ parse_content(Rest1, State2#xmerl_sax_parser_state{file_type = Type}, Acc1, false);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
+ true ->
+ parse_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false)
+ end;
+ {unparsed, Name, _} ->
+ ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
end;
parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
if
@@ -1319,67 +1441,141 @@ parse_content(Bytes, State, Acc, IgnorableWS) ->
%% Parameters: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
%% Acc = string()
-%% IgnorableWS = true | false
-%% Result : {Rest, State}
+%% IgnorableWS = true | false | normalize
+%% Result : {Acc, Rest, State}
%% Description: Parsing the content part of an external entity
%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------
+parse_entity_content(Bytes, #xmerl_sax_parser_state{file_type = text} = State, Acc, _IgnorableWS) ->
+ parse_entity_content_1(Bytes, State, Acc);
parse_entity_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
- parse_content(?STRING_EMPTY, State, Acc, IgnorableWS);
-parse_entity_content(?STRING_REST("<", Rest1), State, Acc, IgnorableWS) ->
+ case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_entity_content/4) of
+ {Acc1, Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Acc1, Rest, State1};
+ {fatal_error, {State1, "No more bytes"}} ->
+ {Acc, ?STRING_EMPTY, State1};
+ {fatal_error, {State1, "Continuation function undefined"}} ->
+ {Acc, ?STRING_EMPTY, State1};
+ {fatal_error, {State1, Message}} ->
+ ?fatal_error(State1, Message)
+ end;
+parse_entity_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4);
+parse_entity_content(?STRING("<!") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4);
+parse_entity_content(?STRING("<!-") = Bytes, State, Acc, IgnorableWS) ->
+ cf(Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4);
+parse_entity_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ case catch parse_comment(Rest, State1, Acc) of
+ {Rest1, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ parse_entity_content(Rest1, State2, [], true);
+ {fatal_error, {State2, "No more bytes"}} ->
+ ?fatal_error(State2, "Expected end comment");
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
+parse_entity_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
- case parse_stag(Rest1, State1) of
+ case parse_pi(Rest, State1) of
+ {Rest1, State2} ->
+ parse_entity_content(Rest1, State2, [], true);
+ {endDocument, _Rest1, State2} ->
+ IValue = ?TO_INPUT_FORMAT("<?"),
+ {[],?APPEND_STRING(IValue, Rest), State2}
+ end;
+parse_entity_content(?STRING_REST("</", _), #xmerl_sax_parser_state{end_tags = []} = State, _, _)->
+ ?fatal_error(State, "Unbalanced tags");
+parse_entity_content(?STRING_REST("</", Rest1), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ case parse_etag(Rest1, State1) of
{?STRING_EMPTY, State2} ->
- {?STRING_EMPTY, State2};
+ {[], ?STRING_EMPTY, State2};
+ {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ parse_entity_content(Rest2, State2, [], true);
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
+parse_entity_content(?STRING_REST("<!", Rest1), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ case parse_cdata(Rest1, State1) of
+ {?STRING_EMPTY, State2} ->
+ {[], ?STRING_EMPTY, State2};
{Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
parse_entity_content(Rest2, State2, [], true);
Other ->
Other
end;
+parse_entity_content(?STRING_REST("<", Rest1), State, Acc, IgnorableWS) ->
+ State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
+ case catch parse_stag(Rest1, State1) of
+ {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ parse_entity_content(Rest2, State2, [], true);
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
parse_entity_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) ->
N = State#xmerl_sax_parser_state.line_no,
- parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
-parse_entity_content(?STRING_REST("\r\n", Rest), State, Acc, IgnorableWS) ->
+ case IgnorableWS of
+ normalize ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?space |Acc], IgnorableWS);
+ _ ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS)
+ end;
+parse_entity_content(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{file_type = entity} = State, Acc, IgnorableWS) ->
N = State#xmerl_sax_parser_state.line_no,
- parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
+ case IgnorableWS of
+ normalize ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?space |Acc], IgnorableWS);
+ _ ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS)
+ end;
parse_entity_content(?STRING_REST("\r", Rest), State, Acc, IgnorableWS) ->
N = State#xmerl_sax_parser_state.line_no,
- parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
+ case IgnorableWS of
+ normalize ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?space |Acc], IgnorableWS);
+ % only external entities are end-of-line normalized
+ _ when State#xmerl_sax_parser_state.file_type == normal ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?cr |Acc], IgnorableWS);
+ _ ->
+ parse_entity_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS)
+ end;
parse_entity_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) ->
parse_entity_content(Rest, State,[?space |Acc], IgnorableWS);
parse_entity_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) ->
parse_entity_content(Rest, State,[?tab |Acc], IgnorableWS);
-parse_entity_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) ->
+parse_entity_content(?STRING_REST("&", Rest), #xmerl_sax_parser_state{file_type = Type} = State, Acc, IgnorableWS) ->
{Ref, Rest1, State1} = parse_reference(Rest, State, true),
+ ok = check_ref_cycle(State1),
case Ref of
{character, _, CharValue} ->
parse_entity_content(Rest1, State1, [CharValue | Acc], false);
{internal_general, true, _, Value} ->
- parse_entity_content(Rest1, State1, Value ++ Acc, false);
+ IValue = ?TO_INPUT_FORMAT(Value),
+ parse_entity_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
{internal_general, false, _, Value} ->
IValue = ?TO_INPUT_FORMAT(Value),
- parse_entity_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
+ ET = State1#xmerl_sax_parser_state.end_tags,
+ {Acc1, _, State2} = parse_entity_content(IValue, State1#xmerl_sax_parser_state{end_tags = []}, Acc, IgnorableWS),
+ parse_entity_content(Rest1, State2#xmerl_sax_parser_state{end_tags = ET}, Acc1, false);
{external_general, _, {PubId, SysId}} ->
- State2 = parse_external_entity(State1, PubId, SysId),
- parse_entity_content(Rest1, State2, Acc, false);
+ %?fatal_error(State1, "External reference in entity: " ++ Name);
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, Acc),
+ parse_entity_content(Rest1, State2#xmerl_sax_parser_state{file_type = Type}, Acc1, false);
{not_found, Name} ->
- case State#xmerl_sax_parser_state.skip_external_dtd of
- false ->
- ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
- true ->
- parse_entity_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false)
- end;
+ ?fatal_error(State1, "Entity not declared: " ++ Name);
{unparsed, Name, _} ->
?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
end;
parse_entity_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
if
?is_char(C) ->
- case parse_content(Rest, State, [C|Acc], false) of
- {?STRING_EMPTY, State1} ->
- {?STRING_EMPTY, State1};
- {Rest1, State1} when is_record(State1, xmerl_sax_parser_state) ->
- parse_entity_content(Rest1, State1, [], true);
+ case parse_entity_content(Rest, State, [C|Acc], false) of
+ {Acc1, ?STRING_EMPTY, State1} ->
+ {Acc1, ?STRING_EMPTY, State1};
+ {Acc1, Rest1, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ parse_entity_content(Rest1, State1, Acc1, true);
Other ->
Other
end;
@@ -1388,23 +1584,28 @@ parse_entity_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
end;
parse_entity_content(Bytes, State, Acc, IgnorableWS) ->
unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_entity_content/4],
- undefined).
-
-%%----------------------------------------------------------------------
-%% Function: check_if_document_complete(State, ErrorMsg) -> Result
-%% Parameters: State = #xmerl_sax_parser_state{}
-%% ErrorMsg = string()
-%% Result : boolean()
-%% Description: Checks that the document is complete if we don't have more data..
-%%----------------------------------------------------------------------
-check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
- "No more bytes") ->
- true;
-check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
- "Continuation function undefined") ->
- true;
-check_if_document_complete(_, _) ->
- false.
+ "Unexpected end of entity content").
+
+% reads an external entity as replacement text
+parse_entity_content_1(?STRING_EMPTY, State, Acc) ->
+ case catch cf(?STRING_EMPTY, State, Acc, fun parse_entity_content_1/3) of
+ {fatal_error, {State1, "No more bytes"}} ->
+ {Acc, ?STRING_EMPTY, State1};
+ {fatal_error, {State1, Message}} ->
+ ?fatal_error(State1, Message);
+ {Acc1, ?STRING_EMPTY, State1} ->
+ {Acc1, ?STRING_EMPTY, State1}
+ end;
+parse_entity_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ if
+ ?is_char(C) ->
+ parse_entity_content_1(Rest, State, [C|Acc]);
+ true ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity: ~p", [C])))
+ end;
+parse_entity_content_1(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_entity_content_1/3],
+ "Unexpected end of entity content").
%%----------------------------------------------------------------------
%% Function: send_character_event(Length, IgnorableWS, String, State) -> Result
@@ -1506,6 +1707,8 @@ parse_reference(Bytes, State, HaveToExist) ->
undefined).
+parse_reference_1(?STRING_EMPTY, State, HaveToExist, Name) ->
+ cf(?STRING_EMPTY, State, HaveToExist, Name, fun parse_reference_1/4);
parse_reference_1(?STRING_REST(";", Rest), State, HaveToExist, Name) ->
case look_up_reference(Name, HaveToExist, State) of
{internal_general, Name, RefValue} ->
@@ -1537,8 +1740,12 @@ is_delimiter(34) ->
true;
is_delimiter("&") ->
true;
+is_delimiter("&") ->
+ true;
is_delimiter("<") ->
true;
+is_delimiter("<") ->
+ true;
is_delimiter(">") ->
true;
is_delimiter("'") ->
@@ -1572,6 +1779,8 @@ parse_pe_reference(Bytes, State) ->
undefined).
+parse_pe_reference_1(?STRING_EMPTY, State, Name) ->
+ cf(?STRING_EMPTY, State, Name, fun parse_pe_reference_1/3);
parse_pe_reference_1(?STRING_REST(";", Rest), State, Name) ->
Name1 = "%" ++ Name,
Result = look_up_reference(Name1, true, State),
@@ -1600,15 +1809,15 @@ insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State)
%%----------------------------------------------------------------------
-%% Function: look_up_reference(Reference, State) -> Result
+%% Function: look_up_reference(Reference, HaveToExist, State) -> Result
%% Parameters: Reference = string()
%% State = #xmerl_sax_parser_state{}
%% Result :
%%----------------------------------------------------------------------
look_up_reference("amp", _, _) ->
- {internal_general, "amp", "&"};
+ {internal_general, "amp", "&"};
look_up_reference("lt", _, _) ->
- {internal_general, "lt", "<"};
+ {internal_general, "lt", "<"};
look_up_reference("gt", _, _) ->
{internal_general, "gt", ">"};
look_up_reference("apos", _, _) ->
@@ -1683,7 +1892,7 @@ parse_digit(Bytes, State, Acc) ->
undefined).
%%----------------------------------------------------------------------
-%% Function: parse_system_litteral(Rest, State, Stop, Acc) -> Result
+%% Function: parse_system_literal(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
%% Stop = $' | $"
@@ -1691,21 +1900,23 @@ parse_digit(Bytes, State, Acc) ->
%% Result : {Value, Reference, Rest, State}
%% Value = integer()
%% Reference = string()
-%% Description: Parse a system litteral.
+%% Description: Parse a system literal.
%% [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
%%----------------------------------------------------------------------
-parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
- cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_litteral/4);
-parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+parse_system_literal(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_literal/4);
+parse_system_literal(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
{lists:reverse(Acc), Rest, State};
-parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
- parse_system_litteral(Rest, State, Stop, [C |Acc]);
-parse_system_litteral(Bytes, State, Stop, Acc) ->
- unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4],
+parse_system_literal(?STRING_UNBOUND_REST("#", _), State, _, _) ->
+ ?fatal_error(State, "Fragment found in system identifier");
+parse_system_literal(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+ parse_system_literal(Rest, State, Stop, [C |Acc]);
+parse_system_literal(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_literal/4],
undefined).
%%----------------------------------------------------------------------
-%% Function: parse_pubid_litteral(Rest, State, Stop, Acc) -> Result
+%% Function: parse_pubid_literal(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
%% Stop = $' | $"
@@ -1713,24 +1924,42 @@ parse_system_litteral(Bytes, State, Stop, Acc) ->
%% Result : {Value, Reference, Rest, State}
%% Value = integer()
%% Reference = string()
-%% Description: Parse a public idlitteral.
+%% Description: Parse a public idliteral.
%% [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
%%----------------------------------------------------------------------
-parse_pubid_litteral(?STRING_EMPTY, State, Stop, Acc) ->
- cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_litteral/4);
-parse_pubid_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
- {lists:reverse(Acc), Rest, State};
-parse_pubid_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
+parse_pubid_literal(?STRING_EMPTY, State, Stop, Acc) ->
+ cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_literal/4);
+parse_pubid_literal(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
+ {normalize_whitespace(Acc), Rest, State};
+parse_pubid_literal(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
case is_pubid_char(C) of
true ->
- parse_pubid_litteral(Rest, State, Stop, [C |Acc]);
+ parse_pubid_literal(Rest, State, Stop, [C |Acc]);
false ->
- ?fatal_error(State, "Character not allowed in pubid litteral: " ++ [C])
+ ?fatal_error(State, "Character not allowed in pubid literal: " ++ [C])
end;
-parse_pubid_litteral(Bytes, State, Stop, Acc) ->
- unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_litteral/4],
+parse_pubid_literal(Bytes, State, Stop, Acc) ->
+ unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_literal/4],
undefined).
+% returns a reversed, normalized version of the string
+normalize_whitespace(Acc) ->
+ T1 = delete_leading_whitespace(Acc),
+ T2 = normalize_whitespace(T1, []),
+ delete_leading_whitespace(T2).
+
+-define(is_ws(C), C =:= ?space orelse C =:= ?cr orelse C =:= ?lf orelse C =:= ?tab).
+
+normalize_whitespace([W1,W2|T], Acc) when ?is_ws(W1),
+ ?is_ws(W2) ->
+ normalize_whitespace([$ |T], Acc);
+normalize_whitespace([W|T], Acc) when ?is_ws(W) ->
+ normalize_whitespace(T, [$ |Acc]);
+normalize_whitespace([W|T], Acc) ->
+ normalize_whitespace(T, [W|Acc]);
+normalize_whitespace([], Acc) ->
+ Acc.
+
%%======================================================================
%% DTD Parsing
%%======================================================================
@@ -1829,19 +2058,23 @@ parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, Definition) when
parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, _Definition) when C == $S; C == $P ->
{PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
State2 = event_callback({startDTD, Name, PubId, SysId}, State1),
- State3 =
- case State2#xmerl_sax_parser_state.skip_external_dtd of
- false ->
- parse_external_entity(State2#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId);
- true ->
- State2
- end,
- parse_doctype_1(Rest1, State3, Name, true);
+ {Rest2, State3} = parse_doctype_1(Rest1, State2, Name, true),
+ % external subsets are parsed after internal
+ case State2#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ FT = State3#xmerl_sax_parser_state.file_type,
+ {_, State4} = parse_external_entity(State3#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId, []),
+ {Rest2, State4#xmerl_sax_parser_state{file_type = FT}};
+ true ->
+ {Rest2, State3}
+ end;
parse_doctype_1(Bytes, State, Name, Definition) ->
unicode_incomplete_check([Bytes, State, Name, Definition, fun parse_doctype_1/4],
"expecting >, external id or declaration part").
+parse_doctype_2(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_doctype_2/2);
parse_doctype_2(?STRING_REST(">", Rest), State) ->
{Rest, State};
parse_doctype_2(Bytes, State) ->
@@ -1850,62 +2083,70 @@ parse_doctype_2(Bytes, State) ->
%%----------------------------------------------------------------------
-%% Function : parse_external_entity(State, PubId, SysId) -> Result
+%% Function : parse_external_entity(State, PubId, SysId, Acc) -> Result
%% Parameters: State = #xmerl_sax_parser_state{}
%% PubId = string()
%% SysId = string()
-%% Result : {Rest, State}
+%% Result : {Acc, State}
%% Description: Starts the parsing of an external entity by calling the resolver and
%% then sends the input to the parsing function.
%%----------------------------------------------------------------------
%% The public id is not handled
-parse_external_entity(State, _PubId, SysId) ->
+parse_external_entity(State, _PubId, SysId, Acc) ->
ExtRef = check_uri(SysId, State#xmerl_sax_parser_state.current_location),
SaveState = event_callback({startEntity, SysId}, State),
State1 = State#xmerl_sax_parser_state{line_no=1,
- continuation_state=undefined,
- continuation_fun=fun xmerl_sax_parser:default_continuation_cb/1,
- end_tags = []},
+ end_tags = []},
-
- {EventState, RefTable} = handle_external_entity(ExtRef, State1),
+ {Acc1, EventState, EventRefTab, AttVals} = handle_external_entity(ExtRef, State1, Acc),
NewState = event_callback({endEntity, SysId},
- SaveState#xmerl_sax_parser_state{event_state=EventState,
- ref_table=RefTable}),
- NewState#xmerl_sax_parser_state{file_type=normal}.
+ SaveState#xmerl_sax_parser_state{event_state=EventState}),
+ case SaveState#xmerl_sax_parser_state.standalone of
+ no ->
+ {Acc1, NewState#xmerl_sax_parser_state{ref_table = EventRefTab,
+ attribute_values = AttVals}};
+ yes ->
+ {Acc1, NewState#xmerl_sax_parser_state{attribute_values = AttVals}}
+ end.
%%----------------------------------------------------------------------
-%% Function : handle_external_entity(ExtRef, State) -> Result
+%% Function : handle_external_entity(ExtRef, State, Acc) -> Result
%% Parameters: ExtRef = {file, string()} | {http, string()}
%% State = #xmerl_sax_parser_state{}
-%% Result : string() | binary()
+%% Result : {Acc, State}
%% Description: Returns working directory, entity and the opened
%% filedescriptor.
%%----------------------------------------------------------------------
-handle_external_entity({file, FileToOpen}, State) ->
+handle_external_entity({file, FileToOpen}, #xmerl_sax_parser_state{encoding = Enc} = State, Acc) ->
case file:open(FileToOpen, [raw, read, binary]) of
{error, Reason} ->
?fatal_error(State, "Couldn't open external entity "++ FileToOpen ++ " : "
++ file:format_error(Reason));
{ok, FD} ->
- {?STRING_EMPTY, EntityState} =
- parse_external_entity_1(<<>>,
- State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(FileToOpen),
- entity=filename:basename(FileToOpen),
- input_type=file}),
- ok = file:close(FD),
- {EntityState#xmerl_sax_parser_state.event_state,
- EntityState#xmerl_sax_parser_state.ref_table}
- end;
-handle_external_entity({http, Url}, State) ->
+ State1 = State#xmerl_sax_parser_state{continuation_state={FD, <<>>},
+ continuation_fun = fun external_continuation_cb/1,
+ current_location=filename:dirname(FileToOpen),
+ entity=filename:basename(FileToOpen),
+ input_type=file},
+ {Head, #xmerl_sax_parser_state{encoding = Enc1} = State2} = detect_charset(State1),
+ {Head1, State3} = encode_external_input(Head, Enc1, Enc, State2),
+ ConFun = external_continuation_cb(Enc1, Enc),
+ {Acc1, ?STRING_EMPTY, EntityState} =
+ parse_external_entity_1(Head1, State3#xmerl_sax_parser_state{continuation_fun = ConFun}, Acc),
+ ok = file:close(FD),
+ {Acc1,
+ EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table,
+ EntityState#xmerl_sax_parser_state.attribute_values}
+ end;
+handle_external_entity({http, Url}, #xmerl_sax_parser_state{encoding = Enc} = State, Acc) ->
try
{Host, Port, Key} = http(Url),
@@ -1915,92 +2156,110 @@ handle_external_entity({http, Url}, State) ->
?fatal_error(State, "Couldn't open temporary file " ++ TmpFile ++ " : "
++ file:format_error(Reason));
{ok, FD} ->
- {?STRING_EMPTY, EntityState} =
- parse_external_entity_byte_order_mark(<<>>,
- State#xmerl_sax_parser_state{continuation_state=FD,
- current_location=filename:dirname(Url),
- entity=filename:basename(Url),
- input_type=file}),
- ok = file:close(FD),
- ok = file:delete(TmpFile),
- {EntityState#xmerl_sax_parser_state.event_state,
- EntityState#xmerl_sax_parser_state.ref_table}
-
+ State1 = State#xmerl_sax_parser_state{continuation_state={FD, <<>>},
+ continuation_fun = fun external_continuation_cb/1,
+ current_location=filename:dirname(Url),
+ entity=filename:basename(Url),
+ input_type=file},
+ {Head, #xmerl_sax_parser_state{encoding = Enc1} = State2} = detect_charset(State1),
+ ConFun = external_continuation_cb(Enc1, Enc),
+ {Acc1, ?STRING_EMPTY, EntityState} =
+ parse_external_entity_1(Head, State2#xmerl_sax_parser_state{continuation_fun = ConFun}, Acc),
+ ok = file:close(FD),
+ ok = file:delete(TmpFile),
+ {Acc1,
+ EntityState#xmerl_sax_parser_state.event_state,
+ EntityState#xmerl_sax_parser_state.ref_table,
+ EntityState#xmerl_sax_parser_state.attribute_values}
end
catch
throw:{error, Error} ->
?fatal_error(State, Error)
end;
-handle_external_entity({Tag, _Url}, State) ->
+handle_external_entity({Tag, _Url}, State, _Acc) ->
?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)).
-?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State).
+%%?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State).
%%----------------------------------------------------------------------
-%% Function : parse_external_entity_1(Rest, State) -> Result
+%% Function : parse_external_entity_1(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%% State = #xmerl_sax_parser_state{}
-%% Result : {Rest, State}
+%% Result : {Acc, Rest, State}
%% Description: Parse the external entity.
%%----------------------------------------------------------------------
-parse_external_entity_1(?STRING_EMPTY, State) ->
- case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
- {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
- {Rest, State1};
- {fatal_error, {State1, "No more bytes"}} ->
- {?STRING_EMPTY, State1};
- Other ->
- throw(Other)
- end;
-parse_external_entity_1(?STRING("<") = Bytes, State) ->
- cf(Bytes, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?") = Bytes, State) ->
- cf(Bytes, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?x") = Bytes, State) ->
- cf(Bytes, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?xm") = Bytes, State) ->
- cf(Bytes, State, fun parse_external_entity_1/2);
-parse_external_entity_1(?STRING("<?xml") = Bytes, State) ->
- cf(Bytes, State, fun parse_external_entity_1/2);
+parse_external_entity_1(?STRING_EMPTY, State, Acc) ->
+ case catch cf(?STRING_EMPTY, State, Acc, fun parse_external_entity_1/3) of
+ {fatal_error, {State1, "No more bytes"}} ->
+ {Acc, ?STRING_EMPTY, State1};
+ {fatal_error, {State1, Msg}} ->
+ ?fatal_error(State1, Msg);
+ {Acc1, ?STRING_EMPTY, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Acc1, ?STRING_EMPTY, State1};
+ {_, _, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ ?fatal_error(State1, "Not well-formed entity")
+ end;
+parse_external_entity_1(?STRING("<") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_external_entity_1/3);
+parse_external_entity_1(?STRING("<?") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_external_entity_1/3);
+parse_external_entity_1(?STRING("<?x") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_external_entity_1/3);
+parse_external_entity_1(?STRING("<?xm") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_external_entity_1/3);
+parse_external_entity_1(?STRING("<?xml") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_external_entity_1/3);
parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes,
- #xmerl_sax_parser_state{file_type=Type} = State) ->
- {Rest1, State1} =
- case is_next_char_whitespace(Rest, State) of
- false ->
- {Bytes, State};
- true when Type =:= dtd ->
- {_XmlAttributes, R, S} = parse_version_info(Rest, State, []),
- %S1 = event_callback({processingInstruction, "xml", XmlAttributes}, S),% The XML decl. should not be reported as a PI
- {R, S};
- true ->
- parse_text_decl(Bytes, State)
- end,
+ #xmerl_sax_parser_state{file_type=Type,
+ end_tags = ET} = State, Acc) ->
+ {Rest1, State1} =
+ case is_next_char_whitespace(Rest, State) of
+ false ->
+ {Bytes, State};
+ true ->
+ parse_text_decl(Bytes, State)
+ end,
case Type of
- dtd ->
- case catch parse_doctype_decl(Rest1, State1) of
- {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
- {Rest2, State2};
- {fatal_error, {State2, "No more bytes"}} ->
- {?STRING_EMPTY, State2};
- Other ->
- throw(Other)
- end;
-
- _ -> % Type is normal or entity
- parse_entity_content(Rest1, State1, [], true)
+ dtd ->
+ case catch parse_doctype_decl(Rest1, State1) of
+ {?STRING_EMPTY, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ % this my not truly be empty. the file may have
+ % more unbalanced stuff, but not have been read yet
+ {[], ?STRING_EMPTY, State2};
+ {_, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ ?fatal_error(State1, "Not well-formed DTD");
+ {fatal_error, {State2, "No more bytes"}} ->
+ {[], ?STRING_EMPTY, State2};
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
+ _ -> % Type is normal or entity
+ {Acc1, Rest3, State3} = parse_entity_content(Rest1, State1#xmerl_sax_parser_state{end_tags = []}, Acc, true),
+ {Acc1, Rest3, State3#xmerl_sax_parser_state{end_tags = ET}}
end;
parse_external_entity_1(?STRING_UNBOUND_REST(_C, _) = Bytes,
- #xmerl_sax_parser_state{file_type=Type} = State) ->
+ #xmerl_sax_parser_state{file_type = Type,
+ end_tags = ET} = State, Acc) ->
case Type of
- normal ->
- parse_entity_content(Bytes, State, [], true);
- dtd ->
- parse_doctype_decl(Bytes, State);
- entity ->
- parse_doctype_decl(Bytes, State) end;
-parse_external_entity_1(Bytes, State) ->
- unicode_incomplete_check([Bytes, State, fun parse_external_entity_1/2],
- undefined).
+ dtd ->
+ case catch parse_doctype_decl(Bytes, State) of
+ {?STRING_EMPTY, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ {[], ?STRING_EMPTY, State2};
+ {_, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ ?fatal_error(State1, "Not well-formed DTD");
+ {fatal_error, {State2, "No more bytes"}} ->
+ {[], ?STRING_EMPTY, State2};
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
+ _ ->
+ {Acc1, Rest1, State1} = parse_entity_content(Bytes, State#xmerl_sax_parser_state{end_tags = []}, Acc, true),
+ {Acc1, Rest1, State1#xmerl_sax_parser_state{end_tags = ET}}
+
+ end;
+parse_external_entity_1(Bytes, State, Acc) ->
+ unicode_incomplete_check([Bytes, State, Acc, fun parse_external_entity_1/3],
+ undefined).
%%----------------------------------------------------------------------
%% Function : is_next_char_whitespace(Bytes, State) -> Result
@@ -2009,6 +2268,8 @@ parse_external_entity_1(Bytes, State) ->
%% Result : true | false
%% Description: Checks if first character is whitespace.
%%----------------------------------------------------------------------
+is_next_char_whitespace(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun is_next_char_whitespace/2);
is_next_char_whitespace(?STRING_UNBOUND_REST(C, _), _) when ?is_whitespace(C) ->
true;
is_next_char_whitespace(?STRING_UNBOUND_REST(_C, _), _) ->
@@ -2073,21 +2334,25 @@ parse_external_id(Bytes, State, OptionalSystemId) ->
%% Description: Parse a system id. The function is used in two cases one
%% where the system is optional and one where it's required.
%%----------------------------------------------------------------------
+parse_system_id(?STRING_EMPTY, State, OptionalSystemId) ->
+ cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_system_id/3);
parse_system_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
- check_system_litteral(Rest, State1, OptionalSystemId);
+ check_system_literal(Rest, State1, OptionalSystemId);
parse_system_id(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
{"", Bytes, State};
parse_system_id(Bytes, State, OptionalSystemId) ->
unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_system_id/3],
"whitespace expected").
-check_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" ->
- parse_system_litteral(Rest, State, C, []);
-check_system_litteral(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
+check_system_literal(?STRING_EMPTY, State, OptionalSystemId) ->
+ cf(?STRING_EMPTY, State, OptionalSystemId, fun check_system_literal/3);
+check_system_literal(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" ->
+ parse_system_literal(Rest, State, C, []);
+check_system_literal(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
{"", Bytes, State};
-check_system_litteral(Bytes, State, OptionalSystemId) ->
- unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_litteral/3],
+check_system_literal(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_literal/3],
"\" or \' expected").
@@ -2102,20 +2367,24 @@ check_system_litteral(Bytes, State, OptionalSystemId) ->
%% Description: Parse a public id. The function is used in two cases one
%% where the following system is optional and one where it's required.
%%----------------------------------------------------------------------
+parse_public_id(?STRING_EMPTY, State, OptionalSystemId) ->
+ cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_public_id/3);
parse_public_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
- check_public_litteral(Rest, State1, OptionalSystemId);
+ check_public_literal(Rest, State1, OptionalSystemId);
parse_public_id(Bytes, State,OptionalSystemId) ->
unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_public_id/3],
"whitespace expected").
-check_public_litteral(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" ->
- {PubId, Rest1, State1} = parse_pubid_litteral(Rest, State, C, []),
+check_public_literal(?STRING_EMPTY, State, OptionalSystemId) ->
+ cf(?STRING_EMPTY, State, OptionalSystemId, fun check_public_literal/3);
+check_public_literal(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" ->
+ {PubId, Rest1, State1} = parse_pubid_literal(Rest, State, C, []),
{SysId, Rest2, State2} = parse_system_id(Rest1, State1, OptionalSystemId),
{PubId, SysId, Rest2, State2};
-check_public_litteral(Bytes, State, OptionalSystemId) ->
- unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_litteral/3],
+check_public_literal(Bytes, State, OptionalSystemId) ->
+ unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_literal/3],
"\" or \' expected").
@@ -2132,6 +2401,8 @@ parse_doctype_decl(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_doctype_decl/2);
parse_doctype_decl(?STRING("<"), State) ->
cf(?STRING("<"), State, fun parse_doctype_decl/2);
+parse_doctype_decl(?STRING("<!"), State) ->
+ cf(?STRING("<!"), State, fun parse_doctype_decl/2);
parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
case parse_pi(Rest, State) of
{Rest1, State1} ->
@@ -2140,23 +2411,37 @@ parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
IValue = ?TO_INPUT_FORMAT("<?"),
{?APPEND_STRING(IValue, Rest), State1}
end;
-parse_doctype_decl(?STRING_REST("%", Rest), State) ->
+parse_doctype_decl(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type = Type} = State) ->
{Ref, Rest1, State1} = parse_pe_reference(Rest, State),
case Ref of
- {internal_parameter, _, RefValue} ->
- IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
- parse_doctype_decl(?APPEND_STRING(IValue, Rest1), State1);
- {external_parameter, _, {PubId, SysId}} ->
- State2 = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId),
- parse_doctype_decl(Rest1, State2);
- {not_found, Name} ->
- case State#xmerl_sax_parser_state.skip_external_dtd of
- false ->
- ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
- true ->
- parse_doctype_decl(Rest1, State1)
- end
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ {Ctx, State2} = strip_context(State1),
+ case catch parse_doctype_decl(IValue, State2) of
+ {fatal_error, {State3, "No more bytes"}} ->
+ parse_doctype_decl(Rest1, add_context_back(Ctx, State3));
+ {fatal_error, {State3, "Continuation function undefined"}} ->
+ parse_doctype_decl(Rest1, add_context_back(Ctx, State3));
+ {_, State3} when is_record(State3, xmerl_sax_parser_state) ->
+ parse_doctype_decl(Rest1, add_context_back(Ctx, State3));
+ {fatal_error, {State3, Mess}} ->
+ ?fatal_error(State3, Mess)
+ end;
+ {external_parameter, _, {PubId, SysId}} ->
+ {_, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = dtd}, PubId, SysId, []),
+ parse_doctype_decl(Rest1, State2#xmerl_sax_parser_state{file_type = Type});
+ {not_found, _Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ parse_doctype_decl(Rest1, State1);
+ %?fatal_error(State1, "Entity not declared: " ++ Name); %%P69 VC: Entity Declared
+ true ->
+ parse_doctype_decl(Rest1, State1)
+ end
end;
+parse_doctype_decl(?STRING_REST("<![", Rest), State) ->
+ {Rest1, State1} = parse_doctype_decl_2(Rest, State),
+ parse_doctype_decl(Rest1, State1);
parse_doctype_decl(?STRING_REST("<!", Rest1), State) ->
parse_doctype_decl_1(Rest1, State);
parse_doctype_decl(?STRING_REST("]", Rest), State) ->
@@ -2251,6 +2536,68 @@ parse_doctype_decl_1(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_1/2],
"expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").
+parse_doctype_decl_2(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_doctype_decl_2/2);
+% conditionalSect
+parse_doctype_decl_2(?STRING("I") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("IN") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("INC") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("INCL") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("INCLU") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("INCLUD") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING_REST("INCLUDE", Rest), State) ->
+ case State#xmerl_sax_parser_state.file_type of
+ normal ->
+ ?fatal_error(State, "Conditional sections may only appear in the external DTD subset.");
+ _ ->
+ parse_include_sect(Rest, State)
+ end;
+parse_doctype_decl_2(?STRING("IG") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("IGN") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("IGNO") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING("IGNOR") = Bytes, State) ->
+ cf(Bytes, State, fun parse_doctype_decl_2/2);
+parse_doctype_decl_2(?STRING_UNBOUND_REST("IGNORE", Rest), State) ->
+ case State#xmerl_sax_parser_state.file_type of
+ normal ->
+ ?fatal_error(State, "Conditional sections may only appear in the external DTD subset.");
+ _ ->
+ parse_ignore_sect(Rest, State)
+ end;
+parse_doctype_decl_2(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_doctype_decl_2(Rest, State1);
+parse_doctype_decl_2(?STRING_UNBOUND_REST("%", Rest), State) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(RefValue),
+ parse_doctype_decl_2(?APPEND_STRING(IValue, Rest1), State1);
+ {external_parameter, _, {PubId, SysId}} ->
+ {_, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = dtd}, PubId, SysId, []),
+ parse_doctype_decl_2(Rest1, State2);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_doctype_decl_2(Rest1, State1)
+ end
+ end;
+
+parse_doctype_decl_2(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_2/2],
+ "expecting INCLUDE or IGNORE").
+
%%----------------------------------------------------------------------
%% Function : parse_element_decl(Rest, State) -> Result
@@ -2269,15 +2616,21 @@ parse_element_decl(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_element_decl/2],
"whitespace expected").
+parse_element_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_element_decl_1/2);
parse_element_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
case is_name_start(C) of
- true ->
- {Name, Rest1, State1} = parse_name(Rest, State, [C]),
- {Model, Rest2, State2} = parse_element_content(Rest1, State1),
- State3 = event_callback({elementDecl, Name, Model}, State2),
- {Rest2, State3};
- false ->
- ?fatal_error(State, "name expected")
+ true ->
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ case parse_element_content(Rest1, State1) of
+ {[],_,_} ->
+ ?fatal_error(State, "Content spec missing");
+ {Model, Rest2, State2} ->
+ State3 = event_callback({elementDecl, Name, Model}, State2),
+ {Rest2, State3}
+ end;
+ false ->
+ ?fatal_error(State, "name expected")
end;
parse_element_decl_1(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_element_decl_1/2],
@@ -2312,15 +2665,262 @@ parse_element_content(Bytes, State) ->
%% Description: Parse contents of an element declaration.
%%----------------------------------------------------------------------
parse_element_content_1(?STRING_EMPTY, State, Acc) ->
- cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
+ cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING("A") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING("AN") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING_REST("ANY", Rest), State, Acc) ->
+ parse_element_content_1(Rest, State, "YNA" ++ Acc);
+parse_element_content_1(?STRING("E") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING("EM") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING("EMP") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING("EMPT") = Bytes, State, Acc) ->
+ cf(Bytes, State, Acc, fun parse_element_content_1/3);
+parse_element_content_1(?STRING_REST("EMPTY", Rest), State, Acc) ->
+ parse_element_content_1(Rest, State, "YTPME" ++ Acc);
parse_element_content_1(?STRING_REST(">", Rest), State, Acc) ->
{lists:reverse(delete_leading_whitespace(Acc)), Rest, State};
-parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
- parse_element_content_1(Rest, State, [C|Acc]);
+parse_element_content_1(?STRING_UNBOUND_REST("(", Rest), State, []) ->
+ parse_element_content_2(Rest, State, [$(], {1, [none]});
+parse_element_content_1(?STRING_UNBOUND_REST("(", _), State, _) ->
+ ?fatal_error(State, "> expected");
+parse_element_content_1(?STRING_UNBOUND_REST("%", Rest), State, Acc) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(RefValue),
+ parse_element_content_1(?APPEND_STRING(IValue, Rest1), State1, Acc);
+ {external_parameter, _, {PubId, SysId}} ->
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = dtd}, PubId, SysId, Acc),
+ parse_element_content_1(Rest1, State2, Acc1);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_element_content_1(Rest1, State1, Acc)
+ end
+ end;
+parse_element_content_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
+ {WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_element_content_1(Rest1, State1, WS ++ Acc);
+parse_element_content_1(?STRING_UNBOUND_REST(C, _), State, _Acc) ->
+ ?fatal_error(State, "'(' expected got " ++ [C]);
parse_element_content_1(Bytes, State, Acc) ->
unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3],
undefined).
+%%----------------------------------------------------------------------
+%% Function : parse_element_content_2(Rest, State, Acc, Depth) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Acc = string()
+%% Result : {Content, Rest, State}
+%% Content = string()
+%% Description: Parse element declaration Mixed | children.
+%%----------------------------------------------------------------------
+parse_element_content_2(?STRING_EMPTY, State, Acc, Depth) ->
+ cf(?STRING_EMPTY, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#P") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#PC") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#PCD") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#PCDA") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING("#PCDAT") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_2/4);
+parse_element_content_2(?STRING_REST("#PCDATA", _), State, _, {_, ['|'|_]}) ->
+ ?fatal_error(State, "#PCDATA can only come first in element content.");
+parse_element_content_2(?STRING_REST("#PCDATA", Rest), State, Acc, {1, Sep}) ->
+ parse_element_content_4(Rest, State, "ATADCP#" ++ Acc, {1, [any|Sep]});
+parse_element_content_2(?STRING_UNBOUND_REST("%", Rest), State, Acc, Depth) ->
+ case State#xmerl_sax_parser_state.file_type of
+ normal ->
+ % not allowed locally
+ ?fatal_error(State, "PE not allowed in declaration."); %%WFC: Entity Declared
+ _ ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(RefValue),
+ parse_element_content_2(?APPEND_STRING(IValue, Rest1), State1, Acc, Depth);
+ {external_parameter, _, {PubId, SysId}} ->
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, Acc),
+ parse_element_content_2(Rest1, State2, Acc1, Depth);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_element_content_2(Rest1, State1, Acc, Depth)
+ end
+ end
+ end;
+parse_element_content_2(?STRING_UNBOUND_REST(")", Rest), State, Acc, {1, _}) ->
+ case lists:all(fun(C) when ?is_whitespace(C) -> true;
+ ($()-> true;
+ (_) -> false
+ end, Acc) of
+ true ->
+ ?fatal_error(State, "Element content missing.");
+ false when Acc == "(" ->
+ ?fatal_error(State, "Element content missing.");
+ false ->
+ case Acc of
+ [$,|_] ->
+ ?fatal_error(State, "expecting value");
+ [$||_] ->
+ ?fatal_error(State, "expecting value");
+ _ ->
+ {Acc1, Rest1, State1} = parse_element_content_3(Rest, State, [$)|Acc]),
+ parse_element_content_1(Rest1, State1, Acc1)
+ end
+ end;
+parse_element_content_2(?STRING_UNBOUND_REST("(", Rest), State, Acc, {Depth, [H|Sep]}) ->
+ H1 = if H == none -> any;
+ H == any -> ?fatal_error(State, "expecting separator");
+ true ->
+ check_separator(Acc, H, State)
+ end,
+ parse_element_content_2(Rest, State, [$(|Acc], {Depth + 1, [none,H1|Sep]});
+parse_element_content_2(?STRING_UNBOUND_REST(")", Rest), State, Acc, {Depth, [_|Sep]}) ->
+ case Acc of
+ [$,|_] ->
+ ?fatal_error(State, "expecting value");
+ [$||_] ->
+ ?fatal_error(State, "expecting value");
+ _ ->
+ {Acc1, Rest1, State1} = parse_element_content_3(Rest, State, [$)|Acc]),
+ parse_element_content_2(Rest1, State1, Acc1, {Depth - 1, Sep})
+ end;
+parse_element_content_2(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc, Depth) when ?is_whitespace(C) ->
+ {WS, Rest1, State1} = whitespace(Rest, State, []),
+ parse_element_content_2(Rest1, State1, WS ++ Acc, Depth);
+parse_element_content_2(?STRING_UNBOUND_REST("|", Rest), State, Acc, {Depth, [any|T]}) ->
+ parse_element_content_2(Rest, State, [$||Acc], {Depth, ['|'|T]});
+parse_element_content_2(?STRING_UNBOUND_REST("|", Rest), State, Acc, {_, ['|'|_]} = Sep) ->
+ case Acc of
+ [$||_] ->
+ ?fatal_error(State, "expecting value");
+ _ ->
+ parse_element_content_2(Rest, State, [$||Acc], Sep)
+ end;
+parse_element_content_2(?STRING_UNBOUND_REST(",", Rest), State, Acc, {Depth, [any|T]}) ->
+ parse_element_content_2(Rest, State, [$,|Acc], {Depth, [','|T]});
+parse_element_content_2(?STRING_UNBOUND_REST(",", Rest), State, Acc, {_, [','|_]} = Sep) ->
+ case Acc of
+ [$,|_] ->
+ ?fatal_error(State, "expecting value");
+ _ ->
+ parse_element_content_2(Rest, State, [$,|Acc], Sep)
+ end;
+parse_element_content_2(?STRING_UNBOUND_REST("|", _), State, _Acc, {_, [H|_]}) ->
+ ?fatal_error(State, "Expected: " ++ atom_to_list(H));
+parse_element_content_2(?STRING_UNBOUND_REST(",", _), State, _Acc, {_, [H|_]}) ->
+ ?fatal_error(State, "Expected: " ++ atom_to_list(H));
+parse_element_content_2(?STRING_UNBOUND_REST(C, Rest), State, Acc, {Depth, [H|T]}) ->
+ case is_name_start(C) of
+ true ->
+ H1 = if H == none -> any;
+ H == any -> ?fatal_error(State, "expecting separator");
+ true ->
+ check_separator(Acc, H, State)
+ end,
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ {Acc1, Rest2, State2} = parse_element_content_3(Rest1, State1, lists:reverse(Name) ++ Acc),
+ parse_element_content_2(Rest2, State2, Acc1, {Depth, [H1|T]});
+ false ->
+ ?fatal_error(State, "name expected: " ++ [C])
+ end;
+parse_element_content_2(Bytes, State, Acc, _Depth) ->
+ parse_element_content_1(Bytes, State, Acc).
+
+% maybe parse the cardinality
+parse_element_content_3(?STRING_EMPTY, State, Acc) ->
+ cf(?STRING_EMPTY, State, Acc, fun parse_element_content_3/3);
+parse_element_content_3(?STRING_UNBOUND_REST("?", Rest), State, Acc) ->
+ {[$?|Acc], Rest, State};
+parse_element_content_3(?STRING_UNBOUND_REST("+", Rest), State, Acc) ->
+ {[$+|Acc], Rest, State};
+parse_element_content_3(?STRING_UNBOUND_REST("*", Rest), State, Acc) ->
+ {[$*|Acc], Rest, State};
+parse_element_content_3(Rest, State, Acc) ->
+ {Acc, Rest, State}.
+
+% Mixed Content [51]
+parse_element_content_4(?STRING_EMPTY, State, Acc, Depth) ->
+ cf(?STRING_EMPTY, State, Acc, Depth, fun parse_element_content_4/4);
+parse_element_content_4(?STRING(")") = Bytes, State, Acc, Depth) ->
+ cf(Bytes, State, Acc, Depth, fun parse_element_content_4/4);
+parse_element_content_4(?STRING_UNBOUND_REST("|", Rest), State, Acc, {Depth, [any|T]}) ->
+ parse_element_content_4(Rest, State, [$||Acc], {Depth, ['|'|T]});
+parse_element_content_4(?STRING_UNBOUND_REST("|", Rest), State, Acc, {_, ['|'|_]} = Sep) ->
+ case Acc of
+ [$||_] ->
+ ?fatal_error(State, "expecting value");
+ _ ->
+ parse_element_content_4(Rest, State, [$||Acc], Sep)
+ end;
+parse_element_content_4(?STRING_UNBOUND_REST("|", Rest), State, Acc, Depth) ->
+ parse_element_content_4(Rest, State, [$||Acc], Depth);
+parse_element_content_4(?STRING_UNBOUND_REST(C, Rest), State, Acc, Depth) when ?is_whitespace(C) ->
+ parse_element_content_4(Rest, State, [C|Acc], Depth);
+
+parse_element_content_4(?STRING_UNBOUND_REST(")*", Rest), State, Acc, {1, _}) ->
+ parse_element_content_1(Rest, State, [$*,$)|Acc]);
+parse_element_content_4(?STRING_UNBOUND_REST(")", _), State, _, {1, [','|_]}) ->
+ ?fatal_error(State, ")* expected after mixed content");
+parse_element_content_4(?STRING_UNBOUND_REST(")", _), State, _, {1, ['|'|_]}) ->
+ ?fatal_error(State, ")* expected after mixed content");
+parse_element_content_4(?STRING_UNBOUND_REST(")", Rest), State, Acc, {1, _}) ->
+ parse_element_content_1(Rest, State, [$)|Acc]);
+
+parse_element_content_4(?STRING_UNBOUND_REST(")*", Rest), State, Acc, {Depth, [_|T]}) ->
+ parse_element_content_2(Rest, State, [$*,$)|Acc], {Depth - 1, T});
+parse_element_content_4(?STRING_UNBOUND_REST(")", Rest), State, Acc, {Depth, [_|T]}) ->
+ parse_element_content_2(Rest, State, [$)|Acc], {Depth - 1, T});
+parse_element_content_4(?STRING_UNBOUND_REST("%", Rest), State, Acc, Depth) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_element_content_4(?APPEND_STRING(IValue, Rest1), State1, Acc, Depth);
+ {external_parameter, _, {_PubId, _SysId}} ->
+ ?fatal_error(State1, "External parameter name");
+ {not_found, _Name} ->
+ ?fatal_error(State1, "Unknown reference parameter name")
+ end;
+parse_element_content_4(?STRING_UNBOUND_REST(C, Rest), State, Acc, {Depth, [H|T]}) ->
+ case is_name_start(C) of
+ true ->
+ H1 = if H == none -> any;
+ H == any -> ?fatal_error(State, "expecting separator");
+ true ->
+ check_separator(Acc, H, State)
+ end,
+ {Name, Rest1, State1} = parse_name(Rest, State, [C]),
+ parse_element_content_4(Rest1, State1, lists:reverse(Name) ++ Acc, {Depth, [H1|T]});
+ false ->
+ ?fatal_error(State, "name expected: " ++ [C])
+ end;
+parse_element_content_4(Rest1, State, Acc, Depth) ->
+ parse_element_content_2(Rest1, State, Acc, Depth).
+
+check_separator([W|Acc], S, State) when ?is_whitespace(W) ->
+ check_separator(Acc, S, State);
+check_separator([$,|_], ',', _) -> ',';
+check_separator([$||_], '|', _) -> '|';
+check_separator(_, _, State) ->
+ ?fatal_error(State, "Expected serarator").
+
delete_leading_whitespace([C |Acc]) when ?is_whitespace(C)->
delete_leading_whitespace(Acc);
delete_leading_whitespace(Acc) ->
@@ -2339,15 +2939,55 @@ parse_att_list_decl(?STRING_EMPTY, State) ->
parse_att_list_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
parse_att_list_decl_1(Rest, State1);
+parse_att_list_decl(?STRING_UNBOUND_REST("%", Rest), State) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_att_list_decl(?APPEND_STRING(IValue, Rest1), State1);
+ {external_parameter, _, {PubId, SysId}} ->
+ {_, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = dtd}, PubId, SysId, []),
+ parse_att_list_decl(Rest1, State2);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_att_list_decl(Rest1, State1)
+ end
+ end;
parse_att_list_decl(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_att_list_decl/2],
"whitespace expected").
-
+parse_att_list_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_att_list_decl_1/2);
+parse_att_list_decl_1(?STRING_UNBOUND_REST("%", Rest), State) ->
+ case State#xmerl_sax_parser_state.file_type of
+ normal ->
+ ?fatal_error(State, "Parsed entities not allowed in Internal subset"); %%WFC: PEs in Internal Subset
+ _ ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(RefValue),
+ parse_att_list_decl_1(?APPEND_STRING(IValue, Rest1), State1);
+ {external_parameter, _, {PubId, SysId}} ->
+ {_, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, []),
+ parse_att_list_decl(Rest1, State2);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_att_list_decl(Rest1, State1)
+ end
+ end
+ end;
parse_att_list_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
case is_name_start(C) of
true ->
- {ElementName, Rest1, State1} = parse_name(Rest, State, [C]),
+ {ElementName, Rest1, State1} = parse_ns_name(Rest, State, [], [C]),
parse_att_defs(Rest1, State1, ElementName);
false ->
?fatal_error(State, "name expected")
@@ -2373,21 +3013,69 @@ parse_att_defs(?STRING_REST(">", Rest), State, _ElementName) ->
parse_att_defs(?STRING_UNBOUND_REST(C, _) = Rest, State, ElementName) when ?is_whitespace(C) ->
{_WS, Rest1, State1} = whitespace(Rest, State, []),
parse_att_defs(Rest1, State1, ElementName);
+parse_att_defs(?STRING_UNBOUND_REST("%", Rest), #xmerl_sax_parser_state{file_type = Type} = State, ElementName) ->
+ case Type of
+ normal ->
+ ?fatal_error(State, "Parsed entities not allowed in Internal subset"); %%WFC: PEs in Internal Subset
+ _ ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_att_defs(?APPEND_STRING(IValue, Rest1), State1, ElementName);
+ {external_parameter, _, {PubId, SysId}} ->
+ {_, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, []),
+ parse_att_defs(Rest1, State2#xmerl_sax_parser_state{file_type = Type}, ElementName);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_att_defs(Rest1, State1, ElementName)
+ end
+ end
+ end;
parse_att_defs(?STRING_UNBOUND_REST(C, Rest), State, ElementName) ->
case is_name_start(C) of
- true ->
- {AttrName, Rest1, State1} = parse_name(Rest, State, [C]),
- {Type, Rest2, State2} = parse_att_type(Rest1, State1),
- {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2),
- State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3),
- parse_att_defs(Rest3, State4, ElementName);
- false ->
- ?fatal_error(State, "whitespace or name expected")
+ true ->
+ {AttrName, Rest1, State1} = parse_ns_name(Rest, State, [], [C]),
+ {Type, Rest2, State2} = parse_att_type(Rest1, State1),
+ {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2),
+ State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3),
+ State5 =
+ if
+ Type == "CDATA" andalso Mode == "#FIXED";
+ Type == "CDATA" andalso Mode == "";
+ Type == "" andalso Mode == "#FIXED";
+ Type == "" andalso Mode == "" ->
+ % non-normalized default
+ add_default_attribute({ElementName, AttrName, Value}, State4);
+ Mode == "#FIXED";
+ Mode == "" ->
+ % default and normalized
+ add_default_attribute({ElementName, AttrName, {Value, normalize}}, State4);
+ Type == "CDATA";
+ Type == "" ->
+ % as-is
+ add_default_attribute({ElementName, AttrName, ignore}, State4);
+ true ->
+ % just normalize
+ add_default_attribute({ElementName, AttrName, normalize}, State4)
+ end,
+ parse_att_defs(Rest3, State5, ElementName);
+ false ->
+ ?fatal_error(State, "whitespace or name expected")
end;
parse_att_defs(Bytes, State, ElementName) ->
unicode_incomplete_check([Bytes, State, ElementName, fun parse_att_defs/3],
undefined).
+add_default_attribute({ElementName, AttrName, Value},
+ #xmerl_sax_parser_state{attribute_values = Atts} = State) ->
+ % first value wins when there are duplicates
+ Key = {ElementName, AttrName},
+ Atts1 = merge_on_key({Key, Value}, Atts),
+ State#xmerl_sax_parser_state{attribute_values = Atts1}.
%%----------------------------------------------------------------------
%% Function : parse_att_type(Rest, State) -> Result
@@ -2409,16 +3097,35 @@ parse_att_type(?STRING_EMPTY, State) ->
parse_att_type(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
case parse_att_type_1(Rest, State1, []) of
- {Type, Rest1, State2} when Type == "("; Type == "NOTATION" ->
- {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []),
- {Type ++ T, Rest2, State3};
- {Type, Rest1, State2} ->
- case check_att_type(Type) of
- true ->
- {Type, Rest1, State2};
- false ->
- ?fatal_error(State2, "wrong attribute type")
- end
+ {"(", Rest1, State2} ->
+ {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []),
+ case T of
+ ")" ->
+ ?fatal_error(State3, "Empty attribute enumerated type.");
+ _ ->
+ {"(" ++ T, Rest2, State3}
+ end;
+ {"NOTATION", Rest1, State2} ->
+ {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
+ case parse_att_type_1(Rest2, State3, []) of
+ {"(", Rest3, State4} ->
+ {T, Rest4, State5} = parse_until_right_paren(Rest3, State4, []),
+ case T of
+ ")" ->
+ ?fatal_error(State5, "Empty attribute notation type.");
+ _ ->
+ {"(" ++ T, Rest4, State5}
+ end;
+ {Type, _, _} ->
+ ?fatal_error(State2, "wrong attribute type: " ++ Type)
+ end;
+ {Type, Rest1, State2} ->
+ case check_att_type(Type) of
+ true ->
+ {Type, Rest1, State2};
+ false ->
+ ?fatal_error(State2, "wrong attribute type: " ++ Type)
+ end
end;
parse_att_type(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_att_type/2],
@@ -2438,6 +3145,23 @@ parse_att_type_1(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_att_type_1/3);
parse_att_type_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
{lists:reverse(Acc), Bytes, State};
+parse_att_type_1(?STRING_UNBOUND_REST("%", Rest), State, Acc) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_att_type_1(?APPEND_STRING(IValue, Rest1), State1, Acc);
+ {external_parameter, _, {PubId, SysId}} ->
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, Acc),
+ parse_att_type_1(Rest1, State2, Acc1);
+ {not_found, Name} ->
+ case State#xmerl_sax_parser_state.skip_external_dtd of
+ false ->
+ ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
+ true ->
+ parse_att_type_1(Rest1, State1, Acc)
+ end
+ end;
parse_att_type_1(?STRING_REST("(", Rest), State, []) ->
{"(", Rest, State};
parse_att_type_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
@@ -2485,8 +3209,16 @@ parse_until_right_paren(?STRING_EMPTY, State, Acc) ->
cf(?STRING_EMPTY, State, Acc, fun parse_until_right_paren/3);
parse_until_right_paren(?STRING_REST(")", Rest), State, Acc) ->
{lists:reverse(")" ++ Acc), Rest, State};
-parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) ->
parse_until_right_paren(Rest, State, [C|Acc]);
+parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
+ TokenChar = C == $| orelse is_name_char(C),
+ case TokenChar of
+ true ->
+ parse_until_right_paren(Rest, State, [C|Acc]);
+ false ->
+ ?fatal_error(State, lists:flatten(io_lib:format("Bad character in enumeration: ~p", [[C]])))
+ end;
parse_until_right_paren(Bytes, State, Acc) ->
unicode_incomplete_check([Bytes, State, Acc, fun parse_until_right_paren/3],
undefined).
@@ -2505,7 +3237,7 @@ parse_default_decl(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_default_decl/2);
parse_default_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
- parse_default_decl_1(Rest, State1);
+ parse_default_decl_2(Rest, State1);
parse_default_decl(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_default_decl/2],
"whitespace expected").
@@ -2523,6 +3255,8 @@ parse_default_decl_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_default_decl_1/2);
parse_default_decl_1(?STRING_REST("#", _Rest) = Bytes, State) ->
case Bytes of
+ ?STRING("#") ->
+ cf(Bytes, State, fun parse_default_decl_1/2);
?STRING("#R") ->
cf(Bytes, State, fun parse_default_decl_1/2);
?STRING("#RE") ->
@@ -2575,7 +3309,26 @@ parse_default_decl_1(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_default_decl_1/2],
"bad default declaration").
+parse_default_decl_2(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_default_decl_2/2);
+parse_default_decl_2(?STRING_REST("%", Rest), State) ->
+ {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
+ case Ref of
+ {internal_parameter, _, RefValue} ->
+ IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
+ parse_default_decl(?APPEND_STRING(IValue, Rest1), State1);
+ {external_parameter, _, {PubId, SysId}} ->
+ {Acc, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId, []),
+ IValue = ?TO_INPUT_FORMAT(" " ++ lists:reverse(Acc) ++ " "),
+ parse_default_decl(?APPEND_STRING(IValue, Rest1), State2);
+ {not_found, _Name} ->
+ ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected")
+ end;
+parse_default_decl_2(Bytes, State) ->
+ parse_default_decl_1(Bytes, State).
+parse_fixed(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_fixed/2);
parse_fixed(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
{DefaultValue, Rest, State1} = parse_att_value(Bytes, State), % parse_att_value removes leading WS
{"#FIXED", DefaultValue, Rest, State1};
@@ -2612,6 +3365,8 @@ parse_entity_decl(Bytes, State) ->
%%----------------------------------------------------------------------
parse_entity_decl_1(?STRING_EMPTY, State) ->
cf(?STRING_EMPTY, State, fun parse_entity_decl_1/2);
+parse_entity_decl_1(?STRING("%") = Bytes, State) ->
+ cf(Bytes, State, fun parse_entity_decl_1/2);
parse_entity_decl_1(?STRING_REST("%", Rest), State) ->
case is_next_char_whitespace(Rest, State) of
true ->
@@ -2638,9 +3393,8 @@ parse_entity_decl_1(Bytes, State) ->
unicode_incomplete_check([Bytes, State, fun parse_entity_decl_1/2],
undefined).
-
-
-
+parse_pe_name(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_pe_name/2);
parse_pe_name(?STRING_UNBOUND_REST(C, Rest), State) ->
case is_name_start(C) of
true ->
@@ -2698,6 +3452,8 @@ parse_entity_def(Bytes, State, Name) ->
"\", \', SYSTEM or PUBLIC expected").
+parse_def_end(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_def_end/2);
parse_def_end(?STRING_REST(">", Rest), State) ->
{Rest, State};
parse_def_end(Bytes, State) ->
@@ -2743,16 +3499,16 @@ parse_entity_value(?STRING("\r"), State, Stop, Acc) ->
cf(?STRING("\r"), State, Stop, Acc, fun parse_entity_value/4);
parse_entity_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_entity_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?lf |Acc]);
parse_entity_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_entity_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?lf |Acc]);
parse_entity_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_entity_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?lf |Acc]);
parse_entity_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
parse_entity_value(Rest,
- State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
+ State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?tab |Acc]);
parse_entity_value(?STRING_REST("&", Rest), State, Stop, Acc) ->
{Ref, Rest1, State1} = parse_reference(Rest, State, false),
case Ref of
@@ -2776,11 +3532,16 @@ parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Ty
"markup declarations in the internal DTD subset: " ++ Name);
_ ->
case Ref of
+ {internal_parameter, _, []} ->
+ parse_entity_value(Rest1, State1, Stop, Acc);
{internal_parameter, _, RefValue} ->
- IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
- parse_entity_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc);
- {external_parameter, _, {_PubId, _SysId}} ->
- ?fatal_error(State1, "Parameter references in entity value not supported yet.");
+ IValue = ?TO_INPUT_FORMAT(RefValue),
+ {Ctx, State2} = strip_context(State1),
+ {Acc1, ?STRING_EMPTY, State3} = parse_entity_content(IValue, State2, Acc, false),
+ parse_entity_value(Rest1, add_context_back(Ctx, State3), Stop, Acc1);
+ {external_parameter, _, {PubId, SysId}} ->
+ {Acc1, State2} = parse_external_entity(State1#xmerl_sax_parser_state{file_type = text}, PubId, SysId, Acc),
+ parse_entity_value(Rest1, State2#xmerl_sax_parser_state{file_type = Type}, Stop, Acc1);
{not_found, Name} ->
case State#xmerl_sax_parser_state.skip_external_dtd of
false ->
@@ -2832,6 +3593,8 @@ parse_ndata_decl(Bytes, State) ->
"NDATA or > expected").
+parse_ndata_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_ndata_decl_1/2);
parse_ndata_decl_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
{_WS, Rest, State1} = whitespace(Bytes, State, []),
parse_ndecl_name(Rest, State1);
@@ -2840,6 +3603,8 @@ parse_ndata_decl_1(Bytes, State) ->
"whitespace expected").
+parse_ndecl_name(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_ndecl_name/2);
parse_ndecl_name(?STRING_UNBOUND_REST(C, Rest), State) ->
case is_name_start(C) of
true ->
@@ -2885,6 +3650,118 @@ parse_pe_def(Bytes, State, Name) ->
unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3],
"\", \', SYSTEM or PUBLIC expected").
+%%----------------------------------------------------------------------
+%% Function : parse_include_sect(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an INCLUDE section.
+%% [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
+%%----------------------------------------------------------------------
+parse_include_sect(?STRING_EMPTY, State) ->
+ case catch cf(?STRING_EMPTY, State, fun parse_include_sect/2) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, "No more bytes"}} ->
+ ?fatal_error(State1, "Unexpected EOF.");
+ {fatal_error, {State1, Mess}} ->
+ ?fatal_error(State1, Mess)
+ end;
+parse_include_sect(?STRING("]") = Bytes, State) ->
+ cf(Bytes, State, fun parse_include_sect/2);
+parse_include_sect(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State) ->
+ parse_include_sect(Rest, State#xmerl_sax_parser_state{line_no=N+1});
+parse_include_sect(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State) ->
+ parse_include_sect(Rest, State#xmerl_sax_parser_state{line_no=N+1});
+parse_include_sect(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State) ->
+ parse_include_sect(Rest, State#xmerl_sax_parser_state{line_no=N+1});
+parse_include_sect(?STRING_UNBOUND_REST(C, Rest), State) when ?is_whitespace(C) ->
+ parse_include_sect(Rest, State);
+parse_include_sect(?STRING_UNBOUND_REST("]>", Rest), State) ->
+ {Rest, State};
+parse_include_sect(?STRING_UNBOUND_REST("[", Rest), State) ->
+ parse_include_sect_1(Rest, State);
+parse_include_sect(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_include_sect/2],
+ "subset declaration expected").
+
+parse_include_sect_1(?STRING_EMPTY, State) ->
+ case catch cf(?STRING_EMPTY, State, fun parse_include_sect_1/2) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, "No more bytes"}} ->
+ ?fatal_error(State1, "Unexpected EOF.");
+ {fatal_error, {State1, Mess}} ->
+ ?fatal_error(State1, Mess)
+ end;
+parse_include_sect_1(?STRING("]") = Bytes, State) ->
+ cf(Bytes, State, fun parse_include_sect_1/2);
+parse_include_sect_1(?STRING_UNBOUND_REST("]>", Rest), State) ->
+ {Rest, State};
+parse_include_sect_1(?STRING_UNBOUND_REST(_, _) = Bytes, State) ->
+ {Rest1, State1} = parse_text_decl(Bytes, State),
+ case catch parse_doctype_decl(Rest1, State1) of
+ {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
+ parse_include_sect_1(Rest2, State2);
+ {fatal_error, {State2, "No more bytes"}} ->
+ ?fatal_error(State2, "Unexpected EOF.");
+ {fatal_error, {State2, Message}} ->
+ ?fatal_error(State2, Message)
+ end;
+parse_include_sect_1(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_include_sect_1/2],
+ "]> expected").
+
+%%----------------------------------------------------------------------
+%% Function : parse_ignore_sect(Rest, State) -> Result
+%% Parameters: Rest = string() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Result : {Rest, State}
+%% Description: Parse an INCLUDE section.
+%% [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
+%%----------------------------------------------------------------------
+parse_ignore_sect(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_ignore_sect/2);
+parse_ignore_sect(?STRING("]") = Bytes, State) ->
+ cf(Bytes, State, fun parse_ignore_sect/2);
+parse_ignore_sect(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
+ {_WS, Rest, State1} = whitespace(Bytes, State, []),
+ parse_ignore_sect(Rest, State1);
+parse_ignore_sect(?STRING_UNBOUND_REST("[", Rest), State) ->
+ parse_ignore_sect_1(Rest, State, 1);
+parse_ignore_sect(Bytes, State) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ignore_sect/2],
+ "whitespace expected").
+
+parse_ignore_sect_1(?STRING_EMPTY, State, Depth) ->
+ case catch cf(?STRING_EMPTY, State, Depth, fun parse_ignore_sect_1/3) of
+ {Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
+ {Rest, State1};
+ {fatal_error, {State1, "No more bytes"}} ->
+ ?fatal_error(State1, "Unexpected EOF.");
+ {fatal_error, {State1, Message}} ->
+ ?fatal_error(State1, Message)
+ end;
+parse_ignore_sect_1(?STRING("<") = Bytes, State, Depth) ->
+ cf(Bytes, State, Depth, fun parse_ignore_sect_1/3);
+parse_ignore_sect_1(?STRING("<!") = Bytes, State, Depth) ->
+ cf(Bytes, State, Depth, fun parse_ignore_sect_1/3);
+parse_ignore_sect_1(?STRING("]") = Bytes, State, Depth) ->
+ cf(Bytes, State, Depth, fun parse_ignore_sect_1/3);
+parse_ignore_sect_1(?STRING("]]") = Bytes, State, Depth) ->
+ cf(Bytes, State, Depth, fun parse_ignore_sect_1/3);
+parse_ignore_sect_1(?STRING_UNBOUND_REST("]]>", Rest), State, 1) ->
+ {Rest, State};
+parse_ignore_sect_1(?STRING_UNBOUND_REST("]]>", Rest), State, Depth) ->
+ parse_ignore_sect_1(Rest, State, Depth - 1);
+parse_ignore_sect_1(?STRING_UNBOUND_REST("<![", Rest), State, Depth) ->
+ parse_ignore_sect_1(Rest, State, Depth + 1);
+parse_ignore_sect_1(?STRING_UNBOUND_REST(_, Rest), State, Depth) ->
+ parse_ignore_sect_1(Rest, State, Depth);
+parse_ignore_sect_1(Bytes, State, _) ->
+ unicode_incomplete_check([Bytes, State, fun parse_ignore_sect_1/3],
+ "Char expected").
+
%%----------------------------------------------------------------------
%% Function : parse_notation_decl(Rest, State) -> Result
@@ -2904,6 +3781,8 @@ parse_notation_decl(Bytes, State) ->
"whitespace expected").
+parse_notation_decl_1(?STRING_EMPTY, State) ->
+ cf(?STRING_EMPTY, State, fun parse_notation_decl_1/2);
parse_notation_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
case is_name_start(C) of
true ->
@@ -3052,6 +3931,8 @@ is_pubid_char($;) ->
true;
is_pubid_char($=) ->
true;
+is_pubid_char($?) ->
+ true;
is_pubid_char($@) ->
true;
is_pubid_char($_) ->
@@ -3872,3 +4753,176 @@ format_error(Tag, State, Reason) ->
filter_endtag_stack(State#xmerl_sax_parser_state.end_tags),
State#xmerl_sax_parser_state.event_state}.
+external_continuation_cb({IoDevice, _}) ->
+ case file:read(IoDevice, 1024) of
+ eof ->
+ {<<>>, {IoDevice, <<>>}};
+ {error, Err} ->
+ throw({error, Err});
+ {ok, FileBin} ->
+ {FileBin, {IoDevice, <<>>}}
+ end.
+
+external_continuation_cb(FileEnc, FileEnc) ->
+ fun external_continuation_cb/1;
+external_continuation_cb(FileEnc, BaseEnc) ->
+ fun({IoDevice, Rest}) ->
+ case file:read(IoDevice, 1024) of
+ eof when Rest == <<>> ->
+ {<<>>, {IoDevice, <<>>}};
+ eof ->
+ {unicode:characters_to_binary(Rest, FileEnc, BaseEnc), {IoDevice, <<>>}};
+ {error, Err} ->
+ throw({error, Err});
+ {ok, FileBin} ->
+ Comp = <<Rest/binary, FileBin/binary>>,
+ case unicode:characters_to_binary(Comp, FileEnc, BaseEnc) of
+ {incomplete, Good, Bad} ->
+ {Good, {IoDevice, Bad}};
+ {error, _, _} ->
+ throw({error, "bad data"});
+ Good ->
+ {Good, {IoDevice, <<>>}}
+ end
+ end
+ end.
+
+encode_external_input(Head, FileEnc, BaseEnc, #xmerl_sax_parser_state{continuation_state = {FD, _}} = State) ->
+ {NewHead, NewCon} =
+ case unicode:characters_to_binary(Head, FileEnc, BaseEnc) of
+ {incomplete, Good, Bad} ->
+ {Good, {FD, Bad}};
+ {error, _, _} ->
+ throw({error, "bad data"});
+ Good ->
+ {Good, {FD, <<>>}}
+ end,
+ {NewHead, State#xmerl_sax_parser_state{continuation_state = NewCon}}.
+
+check_ref_cycle(#xmerl_sax_parser_state{ref_table = RefTable} = State) ->
+ List = maps:to_list(RefTable),
+ F = fun({K, {internal_general, R}}) ->
+ {K, get_ref_names(R)};
+ ({K, _}) ->
+ {K, []}
+ end,
+ Mapped = lists:map(F, List),
+ IsCycle = lists:any(fun({K, V}) ->
+ check_ref_cycle(K, V, Mapped)
+ end, Mapped),
+ if
+ IsCycle ->
+ ?fatal_error(State, "Reference cycle");
+ true ->
+ ok
+ end.
+
+check_ref_cycle(_, [], _) -> false;
+check_ref_cycle(Key, Vals, List) ->
+ F = fun(V) ->
+ case lists:keyfind(V, 1, List) of
+ false ->
+ [];
+ {_, Vs} ->
+ Vs
+ end
+ end,
+ case lists:flatmap(F, Vals) of
+ [] ->
+ false;
+ Refs ->
+ case lists:member(Key, Refs) of
+ true ->
+ true;
+ false ->
+ check_ref_cycle(Key, Refs, List)
+ end
+ end.
+
+get_ref_names([$&|Rest]) ->
+ case get_ref_names_1(Rest, []) of
+ [] ->
+ [];
+ {Nm, Rest1} ->
+ [Nm|get_ref_names(Rest1)]
+ end;
+get_ref_names([_|Rest]) ->
+ get_ref_names(Rest);
+get_ref_names([]) -> [].
+
+get_ref_names_1([$;|Rest], Acc) ->
+ {lists:reverse(Acc), Rest};
+get_ref_names_1([C|Rest], Acc) ->
+ get_ref_names_1(Rest, [C|Acc]);
+get_ref_names_1([], _) -> [].
+
+%%----------------------------------------------------------------------
+%% Function : strip_context(State) -> {Context, State}
+%% Parameters: Tag = atom(),
+%% State = xmerl_sax_parser_state()
+%% Result : {Context, State}
+%% Description: strips context from State before parsing entity
+%%----------------------------------------------------------------------
+strip_context(#xmerl_sax_parser_state{end_tags = ET,
+ continuation_fun = CF} = State) ->
+ {{ET, CF}, State#xmerl_sax_parser_state{end_tags = [],
+ continuation_fun = undefined}}.
+%%----------------------------------------------------------------------
+%% Function : add_context_back(Context, State) -> State
+%% Parameters: Tag = atom(),
+%% State = xmerl_sax_parser_state()
+%% Result : State
+%% Description: adds original context back to State after parsing entity
+%%----------------------------------------------------------------------
+add_context_back({ET, CF}, State) ->
+ State#xmerl_sax_parser_state{end_tags = ET,
+ continuation_fun = CF}.
+
+%%----------------------------------------------------------------------
+%% Function: detect_charset(Xml, State)
+%% Input: Xml = list() | binary()
+%% State = #xmerl_sax_parser_state{}
+%% Output: {utf8|utf16le|utf16be, Xml, State}
+%% Description: Detects which character set is used in a binary stream.
+%%----------------------------------------------------------------------
+detect_charset(State) ->
+ case catch cf(<<>>, State, fun detect_charset/2) of
+ {fatal_error, {State1, "No more bytes"}} ->
+ {<<>>, State1};
+ Other ->
+ Other
+ end.
+
+detect_charset(<<>>, State) ->
+ {<<>>, State};
+detect_charset(<<16#00>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#00, 16#00>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#00, 16#00, 16#FE>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#EF>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#EF, 16#BB>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#FE>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#FF>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#FF, 16#FE>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#FF, 16#FE, 16#00>> = Bytes, State) ->
+ cf(Bytes, State, fun detect_charset/2);
+detect_charset(<<16#00, 16#3C, 16#00, 16#3F, _/binary>> = Xml, State) ->
+ {Xml, State#xmerl_sax_parser_state{encoding={utf16, big}}};
+detect_charset(<<16#3C, 16#00, 16#3F, 16#00, _/binary>> = Xml, State) ->
+ {Xml, State#xmerl_sax_parser_state{encoding={utf16, little}}};
+detect_charset(Bytes, State) ->
+ case unicode:bom_to_encoding(Bytes) of
+ {latin1, 0} ->
+ {Bytes, State#xmerl_sax_parser_state{encoding=utf8}};
+ {Enc, Length} ->
+ <<_:Length/binary, RealBytes/binary>> = Bytes,
+ {RealBytes, State#xmerl_sax_parser_state{encoding=Enc}}
+ end.
+
--
2.16.4