File rexml-test.patch of Package ruby2.5.36162
diff --git a/lib/rexml.rb b/lib/rexml.rb
new file mode 100644
index 0000000000..eee246e436
--- /dev/null
+++ b/lib/rexml.rb
@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+
+require_relative "rexml/document"
diff --git a/lib/rexml/attlistdecl.rb b/lib/rexml/attlistdecl.rb
index dc1d2add0b..44a91d66d6 100644
--- a/lib/rexml/attlistdecl.rb
+++ b/lib/rexml/attlistdecl.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: false
#vim:ts=2 sw=2 noexpandtab:
-require 'rexml/child'
-require 'rexml/source'
+require_relative 'child'
+require_relative 'source'
module REXML
# This class needs:
diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb
index ca5984e178..fe48745ccf 100644
--- a/lib/rexml/attribute.rb
+++ b/lib/rexml/attribute.rb
@@ -1,6 +1,6 @@
-# frozen_string_literal: false
-require "rexml/namespace"
-require 'rexml/text'
+# frozen_string_literal: true
+require_relative "namespace"
+require_relative 'text'
module REXML
# Defines an Element Attribute; IE, a attribute=value pair, as in:
@@ -13,9 +13,6 @@ class Attribute
# The element to which this attribute belongs
attr_reader :element
- # The normalized value of this attribute. That is, the attribute with
- # entities intact.
- attr_writer :normalized
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
@@ -67,15 +64,11 @@ def initialize( first, second=nil, parent=nil )
# e.add_attribute( "nsa:a", "aval" )
# e.add_attribute( "b", "bval" )
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
- # e.attributes.get_attribute( "b" ).prefix # -> "elns"
+ # e.attributes.get_attribute( "b" ).prefix # -> ""
# a = Attribute.new( "x", "y" )
# a.prefix # -> ""
def prefix
- pf = super
- if pf == ""
- pf = @element.prefix if @element
- end
- pf
+ super
end
# Returns the namespace URL, if defined, or nil otherwise
@@ -86,9 +79,26 @@ def prefix
# e.add_attribute("nsx:a", "c")
# e.attribute("ns:a").namespace # => "http://url"
# e.attribute("nsx:a").namespace # => nil
+ #
+ # This method always returns "" for no namespace attribute. Because
+ # the default namespace doesn't apply to attribute names.
+ #
+ # From https://www.w3.org/TR/xml-names/#uniqAttrs
+ #
+ # > the default namespace does not apply to attribute names
+ #
+ # e = REXML::Element.new("el")
+ # e.add_namespace("", "http://example.com/")
+ # e.namespace # => "http://example.com/"
+ # e.add_attribute("a", "b")
+ # e.attribute("a").namespace # => ""
def namespace arg=nil
arg = prefix if arg.nil?
- @element.namespace arg
+ if arg == ""
+ ""
+ else
+ @element.namespace(arg)
+ end
end
# Returns true if other is an Attribute and has the same name and value,
@@ -109,10 +119,13 @@ def hash
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
+ value = to_s
if @element and @element.context and @element.context[:attribute_quote] == :quote
- %Q^#@expanded_name="#{to_s().gsub(/"/, '"')}"^
+ value = value.gsub('"', '"') if value.include?('"')
+ %Q^#@expanded_name="#{value}"^
else
- "#@expanded_name='#{to_s().gsub(/'/, ''')}'"
+ value = value.gsub("'", ''') if value.include?("'")
+ "#@expanded_name='#{value}'"
end
end
@@ -128,7 +141,6 @@ def to_s
return @normalized if @normalized
@normalized = Text::normalize( @unnormalized, doctype )
- @unnormalized = nil
@normalized
end
@@ -136,9 +148,16 @@ def to_s
# have been expanded to their values
def value
return @unnormalized if @unnormalized
- @unnormalized = Text::unnormalize( @normalized, doctype )
- @normalized = nil
- @unnormalized
+
+ @unnormalized = Text::unnormalize(@normalized, doctype,
+ entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
+ end
+
+ # The normalized value of this attribute. That is, the attribute with
+ # entities intact.
+ def normalized=(new_normalized)
+ @normalized = new_normalized
+ @unnormalized = nil
end
# Returns a copy of this attribute
@@ -177,7 +196,7 @@ def node_type
end
def inspect
- rv = ""
+ rv = +""
write( rv )
rv
end
diff --git a/lib/rexml/cdata.rb b/lib/rexml/cdata.rb
index 2238446dc4..997f5a08db 100644
--- a/lib/rexml/cdata.rb
+++ b/lib/rexml/cdata.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/text"
+require_relative "text"
module REXML
class CData < Text
diff --git a/lib/rexml/child.rb b/lib/rexml/child.rb
index d23451e71e..cc6e9a4719 100644
--- a/lib/rexml/child.rb
+++ b/lib/rexml/child.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/node"
+require_relative "node"
module REXML
##
diff --git a/lib/rexml/comment.rb b/lib/rexml/comment.rb
index 822fe0d586..52c58b46f6 100644
--- a/lib/rexml/comment.rb
+++ b/lib/rexml/comment.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "child"
module REXML
##
diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb
index cb9bf57406..f35904845e 100644
--- a/lib/rexml/doctype.rb
+++ b/lib/rexml/doctype.rb
@@ -1,20 +1,25 @@
# frozen_string_literal: false
-require "rexml/parent"
-require "rexml/parseexception"
-require "rexml/namespace"
-require 'rexml/entity'
-require 'rexml/attlistdecl'
-require 'rexml/xmltokens'
+require_relative "parent"
+require_relative "parseexception"
+require_relative "namespace"
+require_relative 'entity'
+require_relative 'attlistdecl'
+require_relative 'xmltokens'
module REXML
class ReferenceWriter
def initialize(id_type,
public_id_literal,
- system_literal)
+ system_literal,
+ context=nil)
@id_type = id_type
@public_id_literal = public_id_literal
@system_literal = system_literal
- @default_quote = "\""
+ if context and context[:prologue_quote] == :apostrophe
+ @default_quote = "'"
+ else
+ @default_quote = "\""
+ end
end
def write(output)
@@ -150,7 +155,8 @@ def write( output, indent=0, transitive=false, ie_hack=false )
if @external_id
reference_writer = ReferenceWriter.new(@external_id,
@long_name,
- @uri)
+ @uri,
+ context)
reference_writer.write(output)
end
unless @children.empty?
@@ -165,7 +171,11 @@ def write( output, indent=0, transitive=false, ie_hack=false )
end
def context
- @parent.context
+ if @parent
+ @parent.context
+ else
+ nil
+ end
end
def entity( name )
@@ -187,7 +197,7 @@ def public
when "SYSTEM"
nil
when "PUBLIC"
- strip_quotes(@long_name)
+ @long_name
end
end
@@ -197,9 +207,9 @@ def public
def system
case @external_id
when "SYSTEM"
- strip_quotes(@long_name)
+ @long_name
when "PUBLIC"
- @uri.kind_of?(String) ? strip_quotes(@uri) : nil
+ @uri.kind_of?(String) ? @uri : nil
end
end
@@ -221,15 +231,6 @@ def notation(name)
notation_decl.name == name
}
end
-
- private
-
- # Method contributed by Henrik Martensson
- def strip_quotes(quoted_string)
- quoted_string =~ /^[\'\"].*[\'\"]$/ ?
- quoted_string[1, quoted_string.length-2] :
- quoted_string
- end
end
# We don't really handle any of these since we're not a validating
@@ -287,8 +288,10 @@ def initialize name, middle, pub, sys
end
def to_s
+ context = nil
+ context = parent.context if parent
notation = "<!NOTATION #{@name}"
- reference_writer = ReferenceWriter.new(@middle, @public, @system)
+ reference_writer = ReferenceWriter.new(@middle, @public, @system, context)
reference_writer.write(notation)
notation << ">"
notation
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 806bc499cd..d1747dd42f 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -1,40 +1,98 @@
# frozen_string_literal: false
-require "rexml/security"
-require "rexml/element"
-require "rexml/xmldecl"
-require "rexml/source"
-require "rexml/comment"
-require "rexml/doctype"
-require "rexml/instruction"
-require "rexml/rexml"
-require "rexml/parseexception"
-require "rexml/output"
-require "rexml/parsers/baseparser"
-require "rexml/parsers/streamparser"
-require "rexml/parsers/treeparser"
+require_relative "security"
+require_relative "element"
+require_relative "xmldecl"
+require_relative "source"
+require_relative "comment"
+require_relative "doctype"
+require_relative "instruction"
+require_relative "rexml"
+require_relative "parseexception"
+require_relative "output"
+require_relative "parsers/baseparser"
+require_relative "parsers/streamparser"
+require_relative "parsers/treeparser"
module REXML
- # Represents a full XML document, including PIs, a doctype, etc. A
- # Document has a single child that can be accessed by root().
- # Note that if you want to have an XML declaration written for a document
- # you create, you must add one; REXML documents do not write a default
- # declaration for you. See |DECLARATION| and |write|.
+ # Represents an XML document.
+ #
+ # A document may have:
+ #
+ # - A single child that may be accessed via method #root.
+ # - An XML declaration.
+ # - A document type.
+ # - Processing instructions.
+ #
+ # == In a Hurry?
+ #
+ # If you're somewhat familiar with XML
+ # and have a particular task in mind,
+ # you may want to see the
+ # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html],
+ # and in particular, the
+ # {tasks page for documents}[../doc/rexml/tasks/tocs/document_toc_rdoc.html].
+ #
class Document < Element
- # A convenient default XML declaration. If you want an XML declaration,
- # the easiest way to add one is mydoc << Document::DECLARATION
- # +DEPRECATED+
- # Use: mydoc << XMLDecl.default
+ # A convenient default XML declaration. Use:
+ #
+ # mydoc << XMLDecl.default
+ #
DECLARATION = XMLDecl.default
- # Constructor
- # @param source if supplied, must be a Document, String, or IO.
- # Documents have their context and Element attributes cloned.
- # Strings are expected to be valid XML documents. IOs are expected
- # to be sources of valid XML documents.
- # @param context if supplied, contains the context of the document;
- # this should be a Hash.
+ # :call-seq:
+ # new(string = nil, context = {}) -> new_document
+ # new(io_stream = nil, context = {}) -> new_document
+ # new(document = nil, context = {}) -> new_document
+ #
+ # Returns a new \REXML::Document object.
+ #
+ # When no arguments are given,
+ # returns an empty document:
+ #
+ # d = REXML::Document.new
+ # d.to_s # => ""
+ #
+ # When argument +string+ is given, it must be a string
+ # containing a valid XML document:
+ #
+ # xml_string = '<root><foo>Foo</foo><bar>Bar</bar></root>'
+ # d = REXML::Document.new(xml_string)
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
+ #
+ # When argument +io_stream+ is given, it must be an \IO object
+ # that is opened for reading, and when read must return a valid XML document:
+ #
+ # File.write('t.xml', xml_string)
+ # d = File.open('t.xml', 'r') do |io|
+ # REXML::Document.new(io)
+ # end
+ # d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
+ #
+ # When argument +document+ is given, it must be an existing
+ # document object, whose context and attributes (but not children)
+ # are cloned into the new document:
+ #
+ # d = REXML::Document.new(xml_string)
+ # d.children # => [<root> ... </>]
+ # d.context = {raw: :all, compress_whitespace: :all}
+ # d.add_attributes({'bar' => 0, 'baz' => 1})
+ # d1 = REXML::Document.new(d)
+ # d1.children # => []
+ # d1.context # => {:raw=>:all, :compress_whitespace=>:all}
+ # d1.attributes # => {"bar"=>bar='0', "baz"=>baz='1'}
+ #
+ # When argument +context+ is given, it must be a hash
+ # containing context entries for the document;
+ # see {Element Context}[../doc/rexml/context_rdoc.html]:
+ #
+ # context = {raw: :all, compress_whitespace: :all}
+ # d = REXML::Document.new(xml_string, context)
+ # d.context # => {:raw=>:all, :compress_whitespace=>:all}
+ #
def initialize( source = nil, context = {} )
@entity_expansion_count = 0
+ @entity_expansion_limit = Security.entity_expansion_limit
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
super()
@context = context
return if source.nil?
@@ -46,26 +104,71 @@ def initialize( source = nil, context = {} )
end
end
+ # :call-seq:
+ # node_type -> :document
+ #
+ # Returns the symbol +:document+.
+ #
def node_type
:document
end
- # Should be obvious
+ # :call-seq:
+ # clone -> new_document
+ #
+ # Returns the new document resulting from executing
+ # <tt>Document.new(self)</tt>. See Document.new.
+ #
def clone
Document.new self
end
- # According to the XML spec, a root node has no expanded name
+ # :call-seq:
+ # expanded_name -> empty_string
+ #
+ # Returns an empty string.
+ #
def expanded_name
''
#d = doc_type
#d ? d.name : "UNDEFINED"
end
-
alias :name :expanded_name
- # We override this, because XMLDecls and DocTypes must go at the start
- # of the document
+ # :call-seq:
+ # add(xml_decl) -> self
+ # add(doc_type) -> self
+ # add(object) -> self
+ #
+ # Adds an object to the document; returns +self+.
+ #
+ # When argument +xml_decl+ is given,
+ # it must be an REXML::XMLDecl object,
+ # which becomes the XML declaration for the document,
+ # replacing the previous XML declaration if any:
+ #
+ # d = REXML::Document.new
+ # d.xml_decl.to_s # => ""
+ # d.add(REXML::XMLDecl.new('2.0'))
+ # d.xml_decl.to_s # => "<?xml version='2.0'?>"
+ #
+ # When argument +doc_type+ is given,
+ # it must be an REXML::DocType object,
+ # which becomes the document type for the document,
+ # replacing the previous document type, if any:
+ #
+ # d = REXML::Document.new
+ # d.doctype.to_s # => ""
+ # d.add(REXML::DocType.new('foo'))
+ # d.doctype.to_s # => "<!DOCTYPE foo>"
+ #
+ # When argument +object+ (not an REXML::XMLDecl or REXML::DocType object)
+ # is given it is added as the last child:
+ #
+ # d = REXML::Document.new
+ # d.add(REXML::Element.new('foo'))
+ # d.to_s # => "<foo/>"
+ #
def add( child )
if child.kind_of? XMLDecl
if @children[0].kind_of? XMLDecl
@@ -99,49 +202,108 @@ def add( child )
end
alias :<< :add
+ # :call-seq:
+ # add_element(name_or_element = nil, attributes = nil) -> new_element
+ #
+ # Adds an element to the document by calling REXML::Element.add_element:
+ #
+ # REXML::Element.add_element(name_or_element, attributes)
def add_element(arg=nil, arg2=nil)
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
- # @return the root Element of the document, or nil if this document
- # has no children.
+ # :call-seq:
+ # root -> root_element or nil
+ #
+ # Returns the root element of the document, if it exists, otherwise +nil+:
+ #
+ # d = REXML::Document.new('<root></root>')
+ # d.root # => <root/>
+ # d = REXML::Document.new('')
+ # d.root # => nil
+ #
def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
end
- # @return the DocType child of the document, if one exists,
- # and nil otherwise.
+ # :call-seq:
+ # doctype -> doc_type or nil
+ #
+ # Returns the DocType object for the document, if it exists, otherwise +nil+:
+ #
+ # d = REXML::Document.new('<!DOCTYPE document SYSTEM "subjects.dtd">')
+ # d.doctype.class # => REXML::DocType
+ # d = REXML::Document.new('')
+ # d.doctype.class # => nil
+ #
def doctype
@children.find { |item| item.kind_of? DocType }
end
- # @return the XMLDecl of this document; if no XMLDecl has been
- # set, the default declaration is returned.
+ # :call-seq:
+ # xml_decl -> xml_decl
+ #
+ # Returns the XMLDecl object for the document, if it exists,
+ # otherwise the default XMLDecl object:
+ #
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-8"?>')
+ # d.xml_decl.class # => REXML::XMLDecl
+ # d.xml_decl.to_s # => "<?xml version='1.0' encoding='UTF-8'?>"
+ # d = REXML::Document.new('')
+ # d.xml_decl.class # => REXML::XMLDecl
+ # d.xml_decl.to_s # => ""
+ #
def xml_decl
rv = @children[0]
return rv if rv.kind_of? XMLDecl
@children.unshift(XMLDecl.default)[0]
end
- # @return the XMLDecl version of this document as a String.
- # If no XMLDecl has been set, returns the default version.
+ # :call-seq:
+ # version -> version_string
+ #
+ # Returns the XMLDecl version of this document as a string,
+ # if it has been set, otherwise the default version:
+ #
+ # d = REXML::Document.new('<?xml version="2.0" encoding="UTF-8"?>')
+ # d.version # => "2.0"
+ # d = REXML::Document.new('')
+ # d.version # => "1.0"
+ #
def version
xml_decl().version
end
- # @return the XMLDecl encoding of this document as an
- # Encoding object.
- # If no XMLDecl has been set, returns the default encoding.
+ # :call-seq:
+ # encoding -> encoding_string
+ #
+ # Returns the XMLDecl encoding of the document,
+ # if it has been set, otherwise the default encoding:
+ #
+ # d = REXML::Document.new('<?xml version="1.0" encoding="UTF-16"?>')
+ # d.encoding # => "UTF-16"
+ # d = REXML::Document.new('')
+ # d.encoding # => "UTF-8"
+ #
def encoding
xml_decl().encoding
end
- # @return the XMLDecl standalone value of this document as a String.
- # If no XMLDecl has been set, returns the default setting.
+ # :call-seq:
+ # stand_alone?
+ #
+ # Returns the XMLDecl standalone value of the document as a string,
+ # if it has been set, otherwise the default standalone value:
+ #
+ # d = REXML::Document.new('<?xml standalone="yes"?>')
+ # d.stand_alone? # => "yes"
+ # d = REXML::Document.new('')
+ # d.stand_alone? # => nil
+ #
def stand_alone?
xml_decl().stand_alone?
end
@@ -226,7 +388,7 @@ def write(*arguments)
end
formatter = if indent > -1
if transitive
- require "rexml/formatters/transitive"
+ require_relative "formatters/transitive"
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
@@ -271,10 +433,12 @@ def Document::entity_expansion_text_limit
end
attr_reader :entity_expansion_count
+ attr_writer :entity_expansion_limit
+ attr_accessor :entity_expansion_text_limit
def record_entity_expansion
@entity_expansion_count += 1
- if @entity_expansion_count > Security.entity_expansion_limit
+ if @entity_expansion_count > @entity_expansion_limit
raise "number of entity expansions exceeded, processing aborted."
end
end
diff --git a/lib/rexml/dtd/attlistdecl.rb b/lib/rexml/dtd/attlistdecl.rb
index 32847daadb..1326cb21e4 100644
--- a/lib/rexml/dtd/attlistdecl.rb
+++ b/lib/rexml/dtd/attlistdecl.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "../child"
module REXML
module DTD
class AttlistDecl < Child
diff --git a/lib/rexml/dtd/dtd.rb b/lib/rexml/dtd/dtd.rb
index 927d5d847b..8b0f2d753a 100644
--- a/lib/rexml/dtd/dtd.rb
+++ b/lib/rexml/dtd/dtd.rb
@@ -1,10 +1,10 @@
# frozen_string_literal: false
-require "rexml/dtd/elementdecl"
-require "rexml/dtd/entitydecl"
-require "rexml/comment"
-require "rexml/dtd/notationdecl"
-require "rexml/dtd/attlistdecl"
-require "rexml/parent"
+require_relative "elementdecl"
+require_relative "entitydecl"
+require_relative "../comment"
+require_relative "notationdecl"
+require_relative "attlistdecl"
+require_relative "../parent"
module REXML
module DTD
diff --git a/lib/rexml/dtd/elementdecl.rb b/lib/rexml/dtd/elementdecl.rb
index 119fd41a8f..20ed023244 100644
--- a/lib/rexml/dtd/elementdecl.rb
+++ b/lib/rexml/dtd/elementdecl.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "../child"
module REXML
module DTD
class ElementDecl < Child
diff --git a/lib/rexml/dtd/entitydecl.rb b/lib/rexml/dtd/entitydecl.rb
index 45707e2f42..312df655ff 100644
--- a/lib/rexml/dtd/entitydecl.rb
+++ b/lib/rexml/dtd/entitydecl.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "../child"
module REXML
module DTD
class EntityDecl < Child
diff --git a/lib/rexml/dtd/notationdecl.rb b/lib/rexml/dtd/notationdecl.rb
index cfdf0b9b74..04a9b08aa7 100644
--- a/lib/rexml/dtd/notationdecl.rb
+++ b/lib/rexml/dtd/notationdecl.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "../child"
module REXML
module DTD
class NotationDecl < Child
diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb
index ac9b10872c..4e3a60b9a2 100644
--- a/lib/rexml/element.rb
+++ b/lib/rexml/element.rb
@@ -1,23 +1,273 @@
# frozen_string_literal: false
-require "rexml/parent"
-require "rexml/namespace"
-require "rexml/attribute"
-require "rexml/cdata"
-require "rexml/xpath"
-require "rexml/parseexception"
+require_relative "parent"
+require_relative "namespace"
+require_relative "attribute"
+require_relative "cdata"
+require_relative "xpath"
+require_relative "parseexception"
module REXML
- # An implementation note about namespaces:
- # As we parse, when we find namespaces we put them in a hash and assign
- # them a unique ID. We then convert the namespace prefix for the node
- # to the unique ID. This makes namespace lookup much faster for the
- # cost of extra memory use. We save the namespace prefix for the
- # context node and convert it back when we write it.
- @@namespaces = {}
-
- # Represents a tagged XML element. Elements are characterized by
- # having children, attributes, and names, and can themselves be
- # children.
+ # An \REXML::Element object represents an XML element.
+ #
+ # An element:
+ #
+ # - Has a name (string).
+ # - May have a parent (another element).
+ # - Has zero or more children
+ # (other elements, text, CDATA, processing instructions, and comments).
+ # - Has zero or more siblings
+ # (other elements, text, CDATA, processing instructions, and comments).
+ # - Has zero or more named attributes.
+ #
+ # == In a Hurry?
+ #
+ # If you're somewhat familiar with XML
+ # and have a particular task in mind,
+ # you may want to see the
+ # {tasks pages}[../doc/rexml/tasks/tocs/master_toc_rdoc.html],
+ # and in particular, the
+ # {tasks page for elements}[../doc/rexml/tasks/tocs/element_toc_rdoc.html].
+ #
+ # === Name
+ #
+ # An element has a name, which is initially set when the element is created:
+ #
+ # e = REXML::Element.new('foo')
+ # e.name # => "foo"
+ #
+ # The name may be changed:
+ #
+ # e.name = 'bar'
+ # e.name # => "bar"
+ #
+ #
+ # === \Parent
+ #
+ # An element may have a parent.
+ #
+ # Its parent may be assigned explicitly when the element is created:
+ #
+ # e0 = REXML::Element.new('foo')
+ # e1 = REXML::Element.new('bar', e0)
+ # e1.parent # => <foo> ... </>
+ #
+ # Note: the representation of an element always shows the element's name.
+ # If the element has children, the representation indicates that
+ # by including an ellipsis (<tt>...</tt>).
+ #
+ # The parent may be assigned explicitly at any time:
+ #
+ # e2 = REXML::Element.new('baz')
+ # e1.parent = e2
+ # e1.parent # => <baz/>
+ #
+ # When an element is added as a child, its parent is set automatically:
+ #
+ # e1.add_element(e0)
+ # e0.parent # => <bar> ... </>
+ #
+ # For an element that has no parent, method +parent+ returns +nil+.
+ #
+ # === Children
+ #
+ # An element has zero or more children.
+ # The children are an ordered collection
+ # of all objects whose parent is the element itself.
+ #
+ # The children may include any combination of elements, text, comments,
+ # processing instructions, and CDATA.
+ # (This example keeps things clean by controlling whitespace
+ # via a +context+ setting.)
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <ele_0/>
+ # text 0
+ # <!--comment 0-->
+ # <?target_0 pi_0?>
+ # <![CDATA[cdata 0]]>
+ # <ele_1/>
+ # text 1
+ # <!--comment 1-->
+ # <?target_0 pi_1?>
+ # <![CDATA[cdata 1]]>
+ # </root>
+ # EOT
+ # context = {ignore_whitespace_nodes: :all, compress_whitespace: :all}
+ # d = REXML::Document.new(xml_string, context)
+ # root = d.root
+ # root.children.size # => 10
+ # root.each {|child| p "#{child.class}: #{child}" }
+ #
+ # Output:
+ #
+ # "REXML::Element: <ele_0/>"
+ # "REXML::Text: \n text 0\n "
+ # "REXML::Comment: comment 0"
+ # "REXML::Instruction: <?target_0 pi_0?>"
+ # "REXML::CData: cdata 0"
+ # "REXML::Element: <ele_1/>"
+ # "REXML::Text: \n text 1\n "
+ # "REXML::Comment: comment 1"
+ # "REXML::Instruction: <?target_0 pi_1?>"
+ # "REXML::CData: cdata 1"
+ #
+ # A child may be added using inherited methods
+ # Parent#insert_before or Parent#insert_after:
+ #
+ # xml_string = '<root><a/><c/><d/></root>'
+ # d = REXML::Document.new(xml_string)
+ # root = d.root
+ # c = d.root[1] # => <c/>
+ # root.insert_before(c, REXML::Element.new('b'))
+ # root.to_a # => [<a/>, <b/>, <c/>, <d/>]
+ #
+ # A child may be replaced using Parent#replace_child:
+ #
+ # root.replace_child(c, REXML::Element.new('x'))
+ # root.to_a # => [<a/>, <b/>, <x/>, <d/>]
+ #
+ # A child may be removed using Parent#delete:
+ #
+ # x = root[2] # => <x/>
+ # root.delete(x)
+ # root.to_a # => [<a/>, <b/>, <d/>]
+ #
+ # === Siblings
+ #
+ # An element has zero or more siblings,
+ # which are the other children of the element's parent.
+ #
+ # In the example above, element +ele_1+ is between a CDATA sibling
+ # and a text sibling:
+ #
+ # ele_1 = root[5] # => <ele_1/>
+ # ele_1.previous_sibling # => "cdata 0"
+ # ele_1.next_sibling # => "\n text 1\n "
+ #
+ # === \Attributes
+ #
+ # An element has zero or more named attributes.
+ #
+ # A new element has no attributes:
+ #
+ # e = REXML::Element.new('foo')
+ # e.attributes # => {}
+ #
+ # Attributes may be added:
+ #
+ # e.add_attribute('bar', 'baz')
+ # e.add_attribute('bat', 'bam')
+ # e.attributes.size # => 2
+ # e['bar'] # => "baz"
+ # e['bat'] # => "bam"
+ #
+ # An existing attribute may be modified:
+ #
+ # e.add_attribute('bar', 'bad')
+ # e.attributes.size # => 2
+ # e['bar'] # => "bad"
+ #
+ # An existing attribute may be deleted:
+ #
+ # e.delete_attribute('bar')
+ # e.attributes.size # => 1
+ # e['bar'] # => nil
+ #
+ # == What's Here
+ #
+ # To begin with, what's elsewhere?
+ #
+ # \Class \REXML::Element inherits from its ancestor classes:
+ #
+ # - REXML::Child
+ # - REXML::Parent
+ #
+ # \REXML::Element itself and its ancestors also include modules:
+ #
+ # - {Enumerable}[https://docs.ruby-lang.org/en/master/Enumerable.html]
+ # - REXML::Namespace
+ # - REXML::Node
+ # - REXML::XMLTokens
+ #
+ # === Methods for Creating an \Element
+ #
+ # ::new:: Returns a new empty element.
+ # #clone:: Returns a clone of another element.
+ #
+ # === Methods for Attributes
+ #
+ # {[attribute_name]}[#method-i-5B-5D]:: Returns an attribute value.
+ # #add_attribute:: Adds a new attribute.
+ # #add_attributes:: Adds multiple new attributes.
+ # #attribute:: Returns the attribute value for a given name and optional namespace.
+ # #delete_attribute:: Removes an attribute.
+ #
+ # === Methods for Children
+ #
+ # {[index]}[#method-i-5B-5D]:: Returns the child at the given offset.
+ # #add_element:: Adds an element as the last child.
+ # #delete_element:: Deletes a child element.
+ # #each_element:: Calls the given block with each child element.
+ # #each_element_with_attribute:: Calls the given block with each child element
+ # that meets given criteria,
+ # which can include the attribute name.
+ # #each_element_with_text:: Calls the given block with each child element
+ # that meets given criteria,
+ # which can include text.
+ # #get_elements:: Returns an array of element children that match a given xpath.
+ #
+ # === Methods for \Text Children
+ #
+ # #add_text:: Adds a text node to the element.
+ # #get_text:: Returns a text node that meets specified criteria.
+ # #text:: Returns the text string from the first node that meets specified criteria.
+ # #texts:: Returns an array of the text children of the element.
+ # #text=:: Adds, removes, or replaces the first text child of the element
+ #
+ # === Methods for Other Children
+ #
+ # #cdatas:: Returns an array of the cdata children of the element.
+ # #comments:: Returns an array of the comment children of the element.
+ # #instructions:: Returns an array of the instruction children of the element.
+ #
+ # === Methods for Namespaces
+ #
+ # #add_namespace:: Adds a namespace to the element.
+ # #delete_namespace:: Removes a namespace from the element.
+ # #namespace:: Returns the string namespace URI for the element.
+ # #namespaces:: Returns a hash of all defined namespaces in the element.
+ # #prefixes:: Returns an array of the string prefixes (names)
+ # of all defined namespaces in the element
+ #
+ # === Methods for Querying
+ #
+ # #document:: Returns the document, if any, that the element belongs to.
+ # #root:: Returns the most distant element (not document) ancestor of the element.
+ # #root_node:: Returns the most distant ancestor of the element.
+ # #xpath:: Returns the string xpath to the element
+ # relative to the most distant parent
+ # #has_attributes?:: Returns whether the element has attributes.
+ # #has_elements?:: Returns whether the element has elements.
+ # #has_text?:: Returns whether the element has text.
+ # #next_element:: Returns the next sibling that is an element.
+ # #previous_element:: Returns the previous sibling that is an element.
+ # #raw:: Returns whether raw mode is set for the element.
+ # #whitespace:: Returns whether whitespace is respected for the element.
+ # #ignore_whitespace_nodes:: Returns whether whitespace nodes
+ # are to be ignored for the element.
+ # #node_type:: Returns symbol <tt>:element</tt>.
+ #
+ # === One More Method
+ #
+ # #inspect:: Returns a string representation of the element.
+ #
+ # === Accessors
+ #
+ # #elements:: Returns the REXML::Elements object for the element.
+ # #attributes:: Returns the REXML::Attributes object for the element.
+ # #context:: Returns or sets the context hash for the element.
+ #
class Element < Parent
include Namespace
@@ -30,32 +280,42 @@ class Element < Parent
# whitespace handling.
attr_accessor :context
- # Constructor
- # arg::
- # if not supplied, will be set to the default value.
- # If a String, the name of this object will be set to the argument.
- # If an Element, the object will be shallowly cloned; name,
- # attributes, and namespaces will be copied. Children will +not+ be
- # copied.
- # parent::
- # if supplied, must be a Parent, and will be used as
- # the parent of this object.
- # context::
- # If supplied, must be a hash containing context items. Context items
- # include:
- # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
- # strings being the names of the elements to respect
- # whitespace for. Defaults to :+all+.
- # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
- # strings being the names of the elements to ignore whitespace on.
- # Overrides :+respect_whitespace+.
- # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
- # of strings being the names of the elements in which to ignore
- # whitespace-only nodes. If this is set, Text nodes which contain only
- # whitespace will not be added to the document tree.
- # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
- # the elements to process in raw mode. In raw mode, special
- # characters in text is not converted to or from entities.
+ # :call-seq:
+ # Element.new(name = 'UNDEFINED', parent = nil, context = nil) -> new_element
+ # Element.new(element, parent = nil, context = nil) -> new_element
+ #
+ # Returns a new \REXML::Element object.
+ #
+ # When no arguments are given,
+ # returns an element with name <tt>'UNDEFINED'</tt>:
+ #
+ # e = REXML::Element.new # => <UNDEFINED/>
+ # e.class # => REXML::Element
+ # e.name # => "UNDEFINED"
+ #
+ # When only argument +name+ is given,
+ # returns an element of the given name:
+ #
+ # REXML::Element.new('foo') # => <foo/>
+ #
+ # When only argument +element+ is given, it must be an \REXML::Element object;
+ # returns a shallow copy of the given element:
+ #
+ # e0 = REXML::Element.new('foo')
+ # e1 = REXML::Element.new(e0) # => <foo/>
+ #
+ # When argument +parent+ is also given, it must be an REXML::Parent object:
+ #
+ # e = REXML::Element.new('foo', REXML::Parent.new)
+ # e.parent # => #<REXML::Parent @parent=nil, @children=[<foo/>]>
+ #
+ # When argument +context+ is also given, it must be a hash
+ # representing the context for the element;
+ # see {Element Context}[../doc/rexml/context_rdoc.html]:
+ #
+ # e = REXML::Element.new('foo', nil, {raw: :all})
+ # e.context # => {:raw=>:all}
+ #
def initialize( arg = UNDEFINED, parent=nil, context=nil )
super(parent)
@@ -74,6 +334,27 @@ def initialize( arg = UNDEFINED, parent=nil, context=nil )
end
end
+ # :call-seq:
+ # inspect -> string
+ #
+ # Returns a string representation of the element.
+ #
+ # For an element with no attributes and no children, shows the element name:
+ #
+ # REXML::Element.new.inspect # => "<UNDEFINED/>"
+ #
+ # Shows attributes, if any:
+ #
+ # e = REXML::Element.new('foo')
+ # e.add_attributes({'bar' => 0, 'baz' => 1})
+ # e.inspect # => "<foo bar='0' baz='1'/>"
+ #
+ # Shows an ellipsis (<tt>...</tt>), if there are child elements:
+ #
+ # e.add_element(REXML::Element.new('bar'))
+ # e.add_element(REXML::Element.new('baz'))
+ # e.inspect # => "<foo bar='0' baz='1'> ... </>"
+ #
def inspect
rv = "<#@expanded_name"
@@ -89,60 +370,123 @@ def inspect
end
end
-
- # Creates a shallow copy of self.
- # d = Document.new "<a><b/><b/><c><d/></c></a>"
- # new_a = d.root.clone
- # puts new_a # => "<a/>"
+ # :call-seq:
+ # clone -> new_element
+ #
+ # Returns a shallow copy of the element, containing the name and attributes,
+ # but not the parent or children:
+ #
+ # e = REXML::Element.new('foo')
+ # e.add_attributes({'bar' => 0, 'baz' => 1})
+ # e.clone # => <foo bar='0' baz='1'/>
+ #
def clone
self.class.new self
end
- # Evaluates to the root node of the document that this element
- # belongs to. If this element doesn't belong to a document, but does
- # belong to another Element, the parent's root will be returned, until the
- # earliest ancestor is found.
- #
- # Note that this is not the same as the document element.
- # In the following example, <a> is the document element, and the root
- # node is the parent node of the document element. You may ask yourself
- # why the root node is useful: consider the doctype and XML declaration,
- # and any processing instructions before the document element... they
- # are children of the root node, or siblings of the document element.
- # The only time this isn't true is when an Element is created that is
- # not part of any Document. In this case, the ancestor that has no
- # parent acts as the root node.
- # d = Document.new '<a><b><c/></b></a>'
- # a = d[1] ; c = a[1][1]
- # d.root_node == d # TRUE
- # a.root_node # namely, d
- # c.root_node # again, d
+ # :call-seq:
+ # root_node -> document or element
+ #
+ # Returns the most distant ancestor of +self+.
+ #
+ # When the element is part of a document,
+ # returns the root node of the document.
+ # Note that the root node is different from the document element;
+ # in this example +a+ is document element and the root node is its parent:
+ #
+ # d = REXML::Document.new('<a><b><c/></b></a>')
+ # top_element = d.first # => <a> ... </>
+ # child = top_element.first # => <b> ... </>
+ # d.root_node == d # => true
+ # top_element.root_node == d # => true
+ # child.root_node == d # => true
+ #
+ # When the element is not part of a document, but does have ancestor elements,
+ # returns the most distant ancestor element:
+ #
+ # e0 = REXML::Element.new('foo')
+ # e1 = REXML::Element.new('bar')
+ # e1.parent = e0
+ # e2 = REXML::Element.new('baz')
+ # e2.parent = e1
+ # e2.root_node == e0 # => true
+ #
+ # When the element has no ancestor elements,
+ # returns +self+:
+ #
+ # e = REXML::Element.new('foo')
+ # e.root_node == e # => true
+ #
+ # Related: #root, #document.
+ #
def root_node
parent.nil? ? self : parent.root_node
end
+ # :call-seq:
+ # root -> element
+ #
+ # Returns the most distant _element_ (not document) ancestor of the element:
+ #
+ # d = REXML::Document.new('<a><b><c/></b></a>')
+ # top_element = d.first
+ # child = top_element.first
+ # top_element.root == top_element # => true
+ # child.root == top_element # => true
+ #
+ # For a document, returns the topmost element:
+ #
+ # d.root == top_element # => true
+ #
+ # Related: #root_node, #document.
+ #
def root
- return elements[1] if self.kind_of? Document
- return self if parent.kind_of? Document or parent.nil?
- return parent.root
+ target = self
+ while target
+ return target.elements[1] if target.kind_of? Document
+ parent = target.parent
+ return target if parent.kind_of? Document or parent.nil?
+ target = parent
+ end
+ nil
end
- # Evaluates to the document to which this element belongs, or nil if this
- # element doesn't belong to a document.
+ # :call-seq:
+ # document -> document or nil
+ #
+ # If the element is part of a document, returns that document:
+ #
+ # d = REXML::Document.new('<a><b><c/></b></a>')
+ # top_element = d.first
+ # child = top_element.first
+ # top_element.document == d # => true
+ # child.document == d # => true
+ #
+ # If the element is not part of a document, returns +nil+:
+ #
+ # REXML::Element.new.document # => nil
+ #
+ # For a document, returns +self+:
+ #
+ # d.document == d # => true
+ #
+ # Related: #root, #root_node.
+ #
def document
rt = root
rt.parent if rt
end
- # Evaluates to +true+ if whitespace is respected for this element. This
- # is the case if:
- # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
- # 2. The context has :+respect_whitespace+ set to :+all+ or
- # an array containing the name of this element, and
- # :+compress_whitespace+ isn't set to :+all+ or an array containing the
- # name of this element.
- # The evaluation is tested against +expanded_name+, and so is namespace
- # sensitive.
+ # :call-seq:
+ # whitespace
+ #
+ # Returns +true+ if whitespace is respected for this element,
+ # +false+ otherwise.
+ #
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
+ #
+ # The evaluation is tested against the element's +expanded_name+,
+ # and so is namespace-sensitive.
def whitespace
@whitespace = nil
if @context
@@ -159,6 +503,13 @@ def whitespace
@whitespace
end
+ # :call-seq:
+ # ignore_whitespace_nodes
+ #
+ # Returns +true+ if whitespace nodes are ignored for the element.
+ #
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
+ #
def ignore_whitespace_nodes
@ignore_whitespace_nodes = false
if @context
@@ -170,9 +521,12 @@ def ignore_whitespace_nodes
end
end
- # Evaluates to +true+ if raw mode is set for this element. This
- # is the case if the context has :+raw+ set to :+all+ or
- # an array containing the name of this element.
+ # :call-seq:
+ # raw
+ #
+ # Returns +true+ if raw mode is set for the element.
+ #
+ # See {Element Context}[../doc/rexml/context_rdoc.html].
#
# The evaluation is tested against +expanded_name+, and so is namespace
# sensitive.
@@ -180,7 +534,7 @@ def raw
@raw = (@context and @context[:raw] and
(@context[:raw] == :all or
@context[:raw].include? expanded_name))
- @raw
+ @raw
end
#once :whitespace, :raw, :ignore_whitespace_nodes
@@ -189,10 +543,25 @@ def raw
# Namespaces #
#################################################
- # Evaluates to an +Array+ containing the prefixes (names) of all defined
- # namespaces at this context node.
- # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
- # doc.elements['//b'].prefixes # -> ['x', 'y']
+ # :call-seq:
+ # prefixes -> array_of_namespace_prefixes
+ #
+ # Returns an array of the string prefixes (names) of all defined namespaces
+ # in the element and its ancestors:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <a xmlns:x='1' xmlns:y='2'>
+ # <b/>
+ # <c xmlns:z='3'/>
+ # </a>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string, {compress_whitespace: :all})
+ # d.elements['//a'].prefixes # => ["x", "y"]
+ # d.elements['//b'].prefixes # => ["x", "y"]
+ # d.elements['//c'].prefixes # => ["x", "y", "z"]
+ #
def prefixes
prefixes = []
prefixes = parent.prefixes if parent
@@ -200,6 +569,25 @@ def prefixes
return prefixes
end
+ # :call-seq:
+ # namespaces -> array_of_namespace_names
+ #
+ # Returns a hash of all defined namespaces
+ # in the element and its ancestors:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <a xmlns:x='1' xmlns:y='2'>
+ # <b/>
+ # <c xmlns:z='3'/>
+ # </a>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # d.elements['//a'].namespaces # => {"x"=>"1", "y"=>"2"}
+ # d.elements['//b'].namespaces # => {"x"=>"1", "y"=>"2"}
+ # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"}
+ #
def namespaces
namespaces = {}
namespaces = parent.namespaces if parent
@@ -207,19 +595,26 @@ def namespaces
return namespaces
end
- # Evaluates to the URI for a prefix, or the empty string if no such
- # namespace is declared for this element. Evaluates recursively for
- # ancestors. Returns the default namespace, if there is one.
- # prefix::
- # the prefix to search for. If not supplied, returns the default
- # namespace if one exists
- # Returns::
- # the namespace URI as a String, or nil if no such namespace
- # exists. If the namespace is undefined, returns an empty string
- # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
- # b = doc.elements['//b']
- # b.namespace # -> '1'
- # b.namespace("y") # -> '2'
+ # :call-seq:
+ # namespace(prefix = nil) -> string_uri or nil
+ #
+ # Returns the string namespace URI for the element,
+ # possibly deriving from one of its ancestors.
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <a xmlns='1' xmlns:y='2'>
+ # <b/>
+ # <c xmlns:z='3'/>
+ # </a>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # b = d.elements['//b']
+ # b.namespace # => "1"
+ # b.namespace('y') # => "2"
+ # b.namespace('nosuch') # => nil
+ #
def namespace(prefix=nil)
if prefix.nil?
prefix = prefix()
@@ -229,25 +624,34 @@ def namespace(prefix=nil)
else
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
end
- ns = attributes[ prefix ]
- ns = parent.namespace(prefix) if ns.nil? and parent
+ ns = nil
+ target = self
+ while ns.nil? and target
+ ns = target.attributes[prefix]
+ target = target.parent
+ end
ns = '' if ns.nil? and prefix == 'xmlns'
return ns
end
- # Adds a namespace to this element.
- # prefix::
- # the prefix string, or the namespace URI if +uri+ is not
- # supplied
- # uri::
- # the namespace URI. May be nil, in which +prefix+ is used as
- # the URI
- # Evaluates to: this Element
- # a = Element.new("a")
- # a.add_namespace("xmlns:foo", "bar" )
- # a.add_namespace("foo", "bar") # shorthand for previous line
- # a.add_namespace("twiddle")
- # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
+ # :call-seq:
+ # add_namespace(prefix, uri = nil) -> self
+ #
+ # Adds a namespace to the element; returns +self+.
+ #
+ # With the single argument +prefix+,
+ # adds a namespace using the given +prefix+ and the namespace URI:
+ #
+ # e = REXML::Element.new('foo')
+ # e.add_namespace('bar')
+ # e.namespaces # => {"xmlns"=>"bar"}
+ #
+ # With both arguments +prefix+ and +uri+ given,
+ # adds a namespace using both arguments:
+ #
+ # e.add_namespace('baz', 'bat')
+ # e.namespaces # => {"xmlns"=>"bar", "baz"=>"bat"}
+ #
def add_namespace( prefix, uri=nil )
unless uri
@attributes["xmlns"] = prefix
@@ -258,16 +662,28 @@ def add_namespace( prefix, uri=nil )
self
end
- # Removes a namespace from this node. This only works if the namespace is
- # actually declared in this node. If no argument is passed, deletes the
- # default namespace.
+ # :call-seq:
+ # delete_namespace(namespace = 'xmlns') -> self
+ #
+ # Removes a namespace from the element.
+ #
+ # With no argument, removes the default namespace:
+ #
+ # d = REXML::Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
+ # d.to_s # => "<a xmlns:foo='bar' xmlns='twiddle'/>"
+ # d.root.delete_namespace # => <a xmlns:foo='bar'/>
+ # d.to_s # => "<a xmlns:foo='bar'/>"
+ #
+ # With argument +namespace+, removes the specified namespace:
+ #
+ # d.root.delete_namespace('foo')
+ # d.to_s # => "<a/>"
+ #
+ # Does nothing if no such namespace is found:
+ #
+ # d.root.delete_namespace('nosuch')
+ # d.to_s # => "<a/>"
#
- # Evaluates to: this element
- # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
- # doc.root.delete_namespace
- # puts doc # -> <a xmlns:foo='bar'/>
- # doc.root.delete_namespace 'foo'
- # puts doc # -> <a/>
def delete_namespace namespace="xmlns"
namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
attribute = attributes.get_attribute(namespace)
@@ -279,20 +695,40 @@ def delete_namespace namespace="xmlns"
# Elements #
#################################################
- # Adds a child to this element, optionally setting attributes in
- # the element.
- # element::
- # optional. If Element, the element is added.
- # Otherwise, a new Element is constructed with the argument (see
- # Element.initialize).
- # attrs::
- # If supplied, must be a Hash containing String name,value
- # pairs, which will be used to set the attributes of the new Element.
- # Returns:: the Element that was added
- # el = doc.add_element 'my-tag'
- # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
- # el = Element.new 'my-tag'
- # doc.add_element el
+ # :call-seq:
+ # add_element(name, attributes = nil) -> new_element
+ # add_element(element, attributes = nil) -> element
+ #
+ # Adds a child element, optionally setting attributes
+ # on the added element; returns the added element.
+ #
+ # With string argument +name+, creates a new element with that name
+ # and adds the new element as a child:
+ #
+ # e0 = REXML::Element.new('foo')
+ # e0.add_element('bar')
+ # e0[0] # => <bar/>
+ #
+ #
+ # With argument +name+ and hash argument +attributes+,
+ # sets attributes on the new element:
+ #
+ # e0.add_element('baz', {'bat' => '0', 'bam' => '1'})
+ # e0[1] # => <baz bat='0' bam='1'/>
+ #
+ # With element argument +element+, adds that element as a child:
+ #
+ # e0 = REXML::Element.new('foo')
+ # e1 = REXML::Element.new('bar')
+ # e0.add_element(e1)
+ # e0[0] # => <bar/>
+ #
+ # With argument +element+ and hash argument +attributes+,
+ # sets attributes on the added element:
+ #
+ # e0.add_element(e1, {'bat' => '0', 'bam' => '1'})
+ # e0[1] # => <bar bat='0' bam='1'/>
+ #
def add_element element, attrs=nil
raise "First argument must be either an element name, or an Element object" if element.nil?
el = @elements.add(element)
@@ -302,52 +738,112 @@ def add_element element, attrs=nil
el
end
+ # :call-seq:
+ # delete_element(index) -> removed_element or nil
+ # delete_element(element) -> removed_element or nil
+ # delete_element(xpath) -> removed_element or nil
+ #
# Deletes a child element.
- # element::
- # Must be an +Element+, +String+, or +Integer+. If Element,
- # the element is removed. If String, the element is found (via XPath)
- # and removed. <em>This means that any parent can remove any
- # descendant.<em> If Integer, the Element indexed by that number will be
- # removed.
- # Returns:: the element that was removed.
- # doc.delete_element "/a/b/c[@id='4']"
- # doc.delete_element doc.elements["//k"]
- # doc.delete_element 1
+ #
+ # When 1-based integer argument +index+ is given,
+ # removes and returns the child element at that offset if it exists;
+ # indexing does not include text nodes;
+ # returns +nil+ if the element does not exist:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # a = d.root # => <a> ... </>
+ # a.delete_element(1) # => <b/>
+ # a.delete_element(1) # => <c/>
+ # a.delete_element(1) # => nil
+ #
+ # When element argument +element+ is given,
+ # removes and returns that child element if it exists,
+ # otherwise returns +nil+:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # a = d.root # => <a> ... </>
+ # c = a[2] # => <c/>
+ # a.delete_element(c) # => <c/>
+ # a.delete_element(c) # => nil
+ #
+ # When xpath argument +xpath+ is given,
+ # removes and returns the element at xpath if it exists,
+ # otherwise returns +nil+:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # a = d.root # => <a> ... </>
+ # a.delete_element('//c') # => <c/>
+ # a.delete_element('//c') # => nil
+ #
def delete_element element
@elements.delete element
end
- # Evaluates to +true+ if this element has at least one child Element
- # doc = Document.new "<a><b/><c>Text</c></a>"
- # doc.root.has_elements # -> true
- # doc.elements["/a/b"].has_elements # -> false
- # doc.elements["/a/c"].has_elements # -> false
+ # :call-seq:
+ # has_elements?
+ #
+ # Returns +true+ if the element has one or more element children,
+ # +false+ otherwise:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # a = d.root # => <a> ... </>
+ # a.has_elements? # => true
+ # b = a[0] # => <b/>
+ # b.has_elements? # => false
+ #
def has_elements?
!@elements.empty?
end
- # Iterates through the child elements, yielding for each Element that
- # has a particular attribute set.
- # key::
- # the name of the attribute to search for
- # value::
- # the value of the attribute
- # max::
- # (optional) causes this method to return after yielding
- # for this number of matching children
- # name::
- # (optional) if supplied, this is an XPath that filters
- # the children to check.
- #
- # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
- # # Yields b, c, d
- # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
- # # Yields b, d
- # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
- # # Yields b
- # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
- # # Yields d
- # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
+ # :call-seq:
+ # each_element_with_attribute(attr_name, value = nil, max = 0, xpath = nil) {|e| ... }
+ #
+ # Calls the given block with each child element that meets given criteria.
+ #
+ # When only string argument +attr_name+ is given,
+ # calls the block with each child element that has that attribute:
+ #
+ # d = REXML::Document.new '<a><b id="1"/><c id="2"/><d id="1"/><e/></a>'
+ # a = d.root
+ # a.each_element_with_attribute('id') {|e| p e }
+ #
+ # Output:
+ #
+ # <b id='1'/>
+ # <c id='2'/>
+ # <d id='1'/>
+ #
+ # With argument +attr_name+ and string argument +value+ given,
+ # calls the block with each child element that has that attribute
+ # with that value:
+ #
+ # a.each_element_with_attribute('id', '1') {|e| p e }
+ #
+ # Output:
+ #
+ # <b id='1'/>
+ # <d id='1'/>
+ #
+ # With arguments +attr_name+, +value+, and integer argument +max+ given,
+ # calls the block with at most +max+ child elements:
+ #
+ # a.each_element_with_attribute('id', '1', 1) {|e| p e }
+ #
+ # Output:
+ #
+ # <b id='1'/>
+ #
+ # With all arguments given, including +xpath+,
+ # calls the block with only those child elements
+ # that meet the first three criteria,
+ # and also match the given +xpath+:
+ #
+ # a.each_element_with_attribute('id', '1', 2, '//d') {|e| p e }
+ #
+ # Output:
+ #
+ # <d id='1'/>
+ #
def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
each_with_something( proc {|child|
if value.nil?
@@ -358,27 +854,53 @@ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yi
}, max, name, &block )
end
- # Iterates through the children, yielding for each Element that
- # has a particular text set.
- # text::
- # the text to search for. If nil, or not supplied, will iterate
- # over all +Element+ children that contain at least one +Text+ node.
- # max::
- # (optional) causes this method to return after yielding
- # for this number of matching children
- # name::
- # (optional) if supplied, this is an XPath that filters
- # the children to check.
- #
- # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
- # # Yields b, c, d
- # doc.each_element_with_text {|e|p e}
- # # Yields b, c
- # doc.each_element_with_text('b'){|e|p e}
- # # Yields b
- # doc.each_element_with_text('b', 1){|e|p e}
- # # Yields d
- # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
+ # :call-seq:
+ # each_element_with_text(text = nil, max = 0, xpath = nil) {|e| ... }
+ #
+ # Calls the given block with each child element that meets given criteria.
+ #
+ # With no arguments, calls the block with each child element that has text:
+ #
+ # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
+ # a = d.root
+ # a.each_element_with_text {|e| p e }
+ #
+ # Output:
+ #
+ # <b> ... </>
+ # <c> ... </>
+ # <d> ... </>
+ #
+ # With the single string argument +text+,
+ # calls the block with each element that has exactly that text:
+ #
+ # a.each_element_with_text('b') {|e| p e }
+ #
+ # Output:
+ #
+ # <b> ... </>
+ # <c> ... </>
+ #
+ # With argument +text+ and integer argument +max+,
+ # calls the block with at most +max+ elements:
+ #
+ # a.each_element_with_text('b', 1) {|e| p e }
+ #
+ # Output:
+ #
+ # <b> ... </>
+ #
+ # With all arguments given, including +xpath+,
+ # calls the block with only those child elements
+ # that meet the first two criteria,
+ # and also match the given +xpath+:
+ #
+ # a.each_element_with_text('b', 2, '//c') {|e| p e }
+ #
+ # Output:
+ #
+ # <c> ... </>
+ #
def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
each_with_something( proc {|child|
if text.nil?
@@ -389,35 +911,71 @@ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Eleme
}, max, name, &block )
end
- # Synonym for Element.elements.each
+ # :call-seq:
+ # each_element {|e| ... }
+ #
+ # Calls the given block with each child element:
+ #
+ # d = REXML::Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
+ # a = d.root
+ # a.each_element {|e| p e }
+ #
+ # Output:
+ #
+ # <b> ... </>
+ # <c> ... </>
+ # <d> ... </>
+ # <e/>
+ #
def each_element( xpath=nil, &block ) # :yields: Element
@elements.each( xpath, &block )
end
- # Synonym for Element.to_a
- # This is a little slower than calling elements.each directly.
- # xpath:: any XPath by which to search for elements in the tree
- # Returns:: an array of Elements that match the supplied path
+ # :call-seq:
+ # get_elements(xpath)
+ #
+ # Returns an array of the elements that match the given +xpath+:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <a level='1'>
+ # <a level='2'/>
+ # </a>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # d.root.get_elements('//a') # => [<a level='1'> ... </>, <a level='2'/>]
+ #
def get_elements( xpath )
@elements.to_a( xpath )
end
- # Returns the next sibling that is an element, or nil if there is
- # no Element sibling after this one
- # doc = Document.new '<a><b/>text<c/></a>'
- # doc.root.elements['b'].next_element #-> <c/>
- # doc.root.elements['c'].next_element #-> nil
+ # :call-seq:
+ # next_element
+ #
+ # Returns the next sibling that is an element if it exists,
+ # +niL+ otherwise:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # d.root.elements['b'].next_element #-> <c/>
+ # d.root.elements['c'].next_element #-> nil
+ #
def next_element
element = next_sibling
element = element.next_sibling until element.nil? or element.kind_of? Element
return element
end
- # Returns the previous sibling that is an element, or nil if there is
- # no Element sibling prior to this one
- # doc = Document.new '<a><b/>text<c/></a>'
- # doc.root.elements['c'].previous_element #-> <b/>
- # doc.root.elements['b'].previous_element #-> nil
+ # :call-seq:
+ # previous_element
+ #
+ # Returns the previous sibling that is an element if it exists,
+ # +niL+ otherwise:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # d.root.elements['c'].previous_element #-> <b/>
+ # d.root.elements['b'].previous_element #-> nil
+ #
def previous_element
element = previous_sibling
element = element.previous_sibling until element.nil? or element.kind_of? Element
@@ -429,36 +987,69 @@ def previous_element
# Text #
#################################################
- # Evaluates to +true+ if this element has at least one Text child
+ # :call-seq:
+ # has_text? -> true or false
+ #
+ # Returns +true+ if the element has one or more text noded,
+ # +false+ otherwise:
+ #
+ # d = REXML::Document.new '<a><b/>text<c/></a>'
+ # a = d.root
+ # a.has_text? # => true
+ # b = a[0]
+ # b.has_text? # => false
+ #
def has_text?
not text().nil?
end
- # A convenience method which returns the String value of the _first_
- # child text element, if one exists, and +nil+ otherwise.
+ # :call-seq:
+ # text(xpath = nil) -> text_string or nil
+ #
+ # Returns the text string from the first text node child
+ # in a specified element, if it exists, +nil+ otherwise.
+ #
+ # With no argument, returns the text from the first text node in +self+:
+ #
+ # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # d.root.text.class # => String
+ # d.root.text # => "some text "
+ #
+ # With argument +xpath+, returns text from the first text node
+ # in the element that matches +xpath+:
+ #
+ # d.root.text(1) # => "this is bold!"
#
- # <em>Note that an element may have multiple Text elements, perhaps
- # separated by other children</em>. Be aware that this method only returns
- # the first Text node.
+ # Note that an element may have multiple text nodes,
+ # possibly separated by other non-text children, as above.
+ # Even so, the returned value is the string text from the first such node.
#
- # This method returns the +value+ of the first text child node, which
- # ignores the +raw+ setting, so always returns normalized text. See
- # the Text::value documentation.
+ # Note also that the text note is retrieved by method get_text,
+ # and so is always normalized text.
#
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
- # # The element 'p' has two text elements, "some text " and " more text".
- # doc.root.text #-> "some text "
def text( path = nil )
rv = get_text(path)
return rv.value unless rv.nil?
nil
end
- # Returns the first child Text node, if any, or +nil+ otherwise.
- # This method returns the actual +Text+ node, rather than the String content.
- # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
- # # The element 'p' has two text elements, "some text " and " more text".
- # doc.root.get_text.value #-> "some text "
+ # :call-seq:
+ # get_text(xpath = nil) -> text_node or nil
+ #
+ # Returns the first text node child in a specified element, if it exists,
+ # +nil+ otherwise.
+ #
+ # With no argument, returns the first text node from +self+:
+ #
+ # d = REXML::Document.new "<p>some text <b>this is bold!</b> more text</p>"
+ # d.root.get_text.class # => REXML::Text
+ # d.root.get_text # => "some text "
+ #
+ # With argument +xpath+, returns the first text node from the element
+ # that matches +xpath+:
+ #
+ # d.root.get_text(1) # => "this is bold!"
+ #
def get_text path = nil
rv = nil
if path
@@ -470,26 +1061,31 @@ def get_text path = nil
return rv
end
- # Sets the first Text child of this object. See text() for a
- # discussion about Text children.
- #
- # If a Text child already exists, the child is replaced by this
- # content. This means that Text content can be deleted by calling
- # this method with a nil argument. In this case, the next Text
- # child becomes the first Text child. In no case is the order of
- # any siblings disturbed.
- # text::
- # If a String, a new Text child is created and added to
- # this Element as the first Text child. If Text, the text is set
- # as the first Child element. If nil, then any existing first Text
- # child is removed.
- # Returns:: this Element.
- # doc = Document.new '<a><b/></a>'
- # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
- # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
- # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
- # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
- # doc.root.text = nil #-> '<a><b/><c/></a>'
+ # :call-seq:
+ # text = string -> string
+ # text = nil -> nil
+ #
+ # Adds, replaces, or removes the first text node child in the element.
+ #
+ # With string argument +string+,
+ # creates a new \REXML::Text node containing that string,
+ # honoring the current settings for whitespace and row,
+ # then places the node as the first text child in the element;
+ # returns +string+.
+ #
+ # If the element has no text child, the text node is added:
+ #
+ # d = REXML::Document.new '<a><b/></a>'
+ # d.root.text = 'foo' #-> '<a><b/>foo</a>'
+ #
+ # If the element has a text child, it is replaced:
+ #
+ # d.root.text = 'bar' #-> '<a><b/>bar</a>'
+ #
+ # With argument +nil+, removes the first text child:
+ #
+ # d.root.text = nil #-> '<a><b/><c/></a>'
+ #
def text=( text )
if text.kind_of? String
text = Text.new( text, whitespace(), nil, raw() )
@@ -509,17 +1105,45 @@ def text=( text )
return self
end
- # A helper method to add a Text child. Actual Text instances can
- # be added with regular Parent methods, such as add() and <<()
- # text::
- # if a String, a new Text instance is created and added
- # to the parent. If Text, the object is added directly.
- # Returns:: this Element
- # e = Element.new('a') #-> <e/>
- # e.add_text 'foo' #-> <e>foo</e>
- # e.add_text Text.new(' bar') #-> <e>foo bar</e>
- # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
- # element and <b>2</b> Text node children.
+ # :call-seq:
+ # add_text(string) -> nil
+ # add_text(text_node) -> self
+ #
+ # Adds text to the element.
+ #
+ # When string argument +string+ is given, returns +nil+.
+ #
+ # If the element has no child text node,
+ # creates a \REXML::Text object using the string,
+ # honoring the current settings for whitespace and raw,
+ # then adds that node to the element:
+ #
+ # d = REXML::Document.new('<a><b/></a>')
+ # a = d.root
+ # a.add_text('foo')
+ # a.to_a # => [<b/>, "foo"]
+ #
+ # If the element has child text nodes,
+ # appends the string to the _last_ text node:
+ #
+ # d = REXML::Document.new('<a>foo<b/>bar</a>')
+ # a = d.root
+ # a.add_text('baz')
+ # a.to_a # => ["foo", <b/>, "barbaz"]
+ # a.add_text('baz')
+ # a.to_a # => ["foo", <b/>, "barbazbaz"]
+ #
+ # When text node argument +text_node+ is given,
+ # appends the node as the last text node in the element;
+ # returns +self+:
+ #
+ # d = REXML::Document.new('<a>foo<b/>bar</a>')
+ # a = d.root
+ # a.add_text(REXML::Text.new('baz'))
+ # a.to_a # => ["foo", <b/>, "bar", "baz"]
+ # a.add_text(REXML::Text.new('baz'))
+ # a.to_a # => ["foo", <b/>, "bar", "baz", "baz"]
+ #
def add_text( text )
if text.kind_of? String
if @children[-1].kind_of? Text
@@ -532,10 +1156,39 @@ def add_text( text )
return self
end
+ # :call-seq:
+ # node_type -> :element
+ #
+ # Returns symbol <tt>:element</tt>:
+ #
+ # d = REXML::Document.new('<a/>')
+ # a = d.root # => <a/>
+ # a.node_type # => :element
+ #
def node_type
:element
end
+ # :call-seq:
+ # xpath -> string_xpath
+ #
+ # Returns the string xpath to the element
+ # relative to the most distant parent:
+ #
+ # d = REXML::Document.new('<a><b><c/></b></a>')
+ # a = d.root # => <a> ... </>
+ # b = a[0] # => <b> ... </>
+ # c = b[0] # => <c/>
+ # d.xpath # => ""
+ # a.xpath # => "/a"
+ # b.xpath # => "/a/b"
+ # c.xpath # => "/a/b/c"
+ #
+ # If there is no parent, returns the expanded name of the element:
+ #
+ # e = REXML::Element.new('foo')
+ # e.xpath # => "foo"
+ #
def xpath
path_elements = []
cur = self
@@ -551,19 +1204,45 @@ def xpath
# Attributes #
#################################################
- # Fetches an attribute value or a child.
+ # :call-seq:
+ # [index] -> object
+ # [attr_name] -> attr_value
+ # [attr_sym] -> attr_value
+ #
+ # With integer argument +index+ given,
+ # returns the child at offset +index+, or +nil+ if none:
+ #
+ # d = REXML::Document.new '><root><a/>text<b/>more<c/></root>'
+ # root = d.root
+ # (0..root.size).each do |index|
+ # node = root[index]
+ # p "#{index}: #{node} (#{node.class})"
+ # end
+ #
+ # Output:
+ #
+ # "0: <a/> (REXML::Element)"
+ # "1: text (REXML::Text)"
+ # "2: <b/> (REXML::Element)"
+ # "3: more (REXML::Text)"
+ # "4: <c/> (REXML::Element)"
+ # "5: (NilClass)"
#
- # If String or Symbol is specified, it's treated as attribute
- # name. Attribute value as String or +nil+ is returned. This case
- # is shortcut of +attributes[name]+.
+ # With string argument +attr_name+ given,
+ # returns the string value for the given attribute name if it exists,
+ # otherwise +nil+:
#
- # If Integer is specified, it's treated as the index of
- # child. It returns Nth child.
+ # d = REXML::Document.new('<root attr="value"></root>')
+ # root = d.root
+ # root['attr'] # => "value"
+ # root['nosuch'] # => nil
+ #
+ # With symbol argument +attr_sym+ given,
+ # returns <tt>[attr_sym.to_s]</tt>:
+ #
+ # root[:attr] # => "value"
+ # root[:nosuch] # => nil
#
- # doc = REXML::Document.new("<a attr='1'><b/><c/></a>")
- # doc.root["attr"] # => "1"
- # doc.root.attributes["attr"] # => "1"
- # doc.root[1] # => <c/>
def [](name_or_index)
case name_or_index
when String
@@ -575,17 +1254,42 @@ def [](name_or_index)
end
end
+
+ # :call-seq:
+ # attribute(name, namespace = nil)
+ #
+ # Returns the string value for the given attribute name.
+ #
+ # With only argument +name+ given,
+ # returns the value of the named attribute if it exists, otherwise +nil+:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns="ns0">
+ # <a xmlns="ns1" attr="value"></a>
+ # <b xmlns="ns2" attr="value"></b>
+ # <c attr="value"/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # root = d.root
+ # a = root[1] # => <a xmlns='ns1' attr='value'/>
+ # a.attribute('attr') # => attr='value'
+ # a.attribute('nope') # => nil
+ #
+ # With arguments +name+ and +namespace+ given,
+ # returns the value of the named attribute if it exists, otherwise +nil+:
+ #
+ # xml_string = "<root xmlns:a='a' a:x='a:x' x='x'/>"
+ # document = REXML::Document.new(xml_string)
+ # document.root.attribute("x") # => x='x'
+ # document.root.attribute("x", "a") # => a:x='a:x'
+ #
def attribute( name, namespace=nil )
- prefix = nil
- if namespaces.respond_to? :key
- prefix = namespaces.key(namespace) if namespace
- else
- prefix = namespaces.index(namespace) if namespace
- end
+ prefix = namespaces.key(namespace) if namespace
prefix = nil if prefix == 'xmlns'
ret_val =
- attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+ attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
return ret_val unless ret_val.nil?
return nil if prefix.nil?
@@ -598,29 +1302,46 @@ def attribute( name, namespace=nil )
end
- # Evaluates to +true+ if this element has any attributes set, false
- # otherwise.
+ # :call-seq:
+ # has_attributes? -> true or false
+ #
+ # Returns +true+ if the element has attributes, +false+ otherwise:
+ #
+ # d = REXML::Document.new('<root><a attr="val"/><b/></root>')
+ # a, b = *d.root
+ # a.has_attributes? # => true
+ # b.has_attributes? # => false
+ #
def has_attributes?
return !@attributes.empty?
end
+ # :call-seq:
+ # add_attribute(name, value) -> value
+ # add_attribute(attribute) -> attribute
+ #
# Adds an attribute to this element, overwriting any existing attribute
# by the same name.
- # key::
- # can be either an Attribute or a String. If an Attribute,
- # the attribute is added to the list of Element attributes. If String,
- # the argument is used as the name of the new attribute, and the value
- # parameter must be supplied.
- # value::
- # Required if +key+ is a String, and ignored if the first argument is
- # an Attribute. This is a String, and is used as the value
- # of the new Attribute. This should be the unnormalized value of the
- # attribute (without entities).
- # Returns:: the Attribute added
- # e = Element.new 'e'
- # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
- # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
- # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
+ #
+ # With string argument +name+ and object +value+ are given,
+ # adds the attribute created with that name and value:
+ #
+ # e = REXML::Element.new
+ # e.add_attribute('attr', 'value') # => "value"
+ # e['attr'] # => "value"
+ # e.add_attribute('attr', 'VALUE') # => "VALUE"
+ # e['attr'] # => "VALUE"
+ #
+ # With only attribute object +attribute+ given,
+ # adds the given attribute:
+ #
+ # a = REXML::Attribute.new('attr', 'value')
+ # e.add_attribute(a) # => attr='value'
+ # e['attr'] # => "value"
+ # a = REXML::Attribute.new('attr', 'VALUE')
+ # e.add_attribute(a) # => attr='VALUE'
+ # e['attr'] # => "VALUE"
+ #
def add_attribute( key, value=nil )
if key.kind_of? Attribute
@attributes << key
@@ -629,10 +1350,29 @@ def add_attribute( key, value=nil )
end
end
- # Add multiple attributes to this element.
- # hash:: is either a hash, or array of arrays
- # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
- # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
+ # :call-seq:
+ # add_attributes(hash) -> hash
+ # add_attributes(array)
+ #
+ # Adds zero or more attributes to the element;
+ # returns the argument.
+ #
+ # If hash argument +hash+ is given,
+ # each key must be a string;
+ # adds each attribute created with the key/value pair:
+ #
+ # e = REXML::Element.new
+ # h = {'foo' => 'bar', 'baz' => 'bat'}
+ # e.add_attributes(h)
+ #
+ # If argument +array+ is given,
+ # each array member must be a 2-element array <tt>[name, value];
+ # each name must be a string:
+ #
+ # e = REXML::Element.new
+ # a = [['foo' => 'bar'], ['baz' => 'bat']]
+ # e.add_attributes(a)
+ #
def add_attributes hash
if hash.kind_of? Hash
hash.each_pair {|key, value| @attributes[key] = value }
@@ -641,19 +1381,17 @@ def add_attributes hash
end
end
- # Removes an attribute
- # key::
- # either an Attribute or a String. In either case, the
- # attribute is found by matching the attribute name to the argument,
- # and then removed. If no attribute is found, no action is taken.
- # Returns::
- # the attribute removed, or nil if this Element did not contain
- # a matching attribute
- # e = Element.new('E')
- # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
- # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
- # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
- # e.delete_attribute( r ) #-> <E/>
+ # :call-seq:
+ # delete_attribute(name) -> removed_attribute or nil
+ #
+ # Removes a named attribute if it exists;
+ # returns the removed attribute if found, otherwise +nil+:
+ #
+ # e = REXML::Element.new('foo')
+ # e.add_attribute('bar', 'baz')
+ # e.delete_attribute('bar') # => <bar/>
+ # e.delete_attribute('bar') # => nil
+ #
def delete_attribute(key)
attr = @attributes.get_attribute(key)
attr.remove unless attr.nil?
@@ -663,26 +1401,80 @@ def delete_attribute(key)
# Other Utilities #
#################################################
- # Get an array of all CData children.
- # IMMUTABLE
+ # :call-seq:
+ # cdatas -> array_of_cdata_children
+ #
+ # Returns a frozen array of the REXML::CData children of the element:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <![CDATA[foo]]>
+ # <![CDATA[bar]]>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # cds = d.root.cdatas # => ["foo", "bar"]
+ # cds.frozen? # => true
+ # cds.map {|cd| cd.class } # => [REXML::CData, REXML::CData]
+ #
def cdatas
find_all { |child| child.kind_of? CData }.freeze
end
- # Get an array of all Comment children.
- # IMMUTABLE
+ # :call-seq:
+ # comments -> array_of_comment_children
+ #
+ # Returns a frozen array of the REXML::Comment children of the element:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <!--foo-->
+ # <!--bar-->
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # cs = d.root.comments
+ # cs.frozen? # => true
+ # cs.map {|c| c.class } # => [REXML::Comment, REXML::Comment]
+ # cs.map {|c| c.to_s } # => ["foo", "bar"]
+ #
def comments
find_all { |child| child.kind_of? Comment }.freeze
end
- # Get an array of all Instruction children.
- # IMMUTABLE
+ # :call-seq:
+ # instructions -> array_of_instruction_children
+ #
+ # Returns a frozen array of the REXML::Instruction children of the element:
+ #
+ # xml_string = <<-EOT
+ # <root>
+ # <?target0 foo?>
+ # <?target1 bar?>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # is = d.root.instructions
+ # is.frozen? # => true
+ # is.map {|i| i.class } # => [REXML::Instruction, REXML::Instruction]
+ # is.map {|i| i.to_s } # => ["<?target0 foo?>", "<?target1 bar?>"]
+ #
def instructions
find_all { |child| child.kind_of? Instruction }.freeze
end
- # Get an array of all Text children.
- # IMMUTABLE
+ # :call-seq:
+ # texts -> array_of_text_children
+ #
+ # Returns a frozen array of the REXML::Text children of the element:
+ #
+ # xml_string = '<root><a/>text<b/>more<c/></root>'
+ # d = REXML::Document.new(xml_string)
+ # ts = d.root.texts
+ # ts.frozen? # => true
+ # ts.map {|t| t.class } # => [REXML::Text, REXML::Text]
+ # ts.map {|t| t.to_s } # => ["text", "more"]
+ #
def texts
find_all { |child| child.kind_of? Text }.freeze
end
@@ -713,7 +1505,7 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
formatter = if indent > -1
if transitive
- require "rexml/formatters/transitive"
+ require_relative "formatters/transitive"
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
@@ -758,35 +1550,129 @@ def each_with_something( test, max=0, name=nil )
# XPath search support. You are expected to only encounter this class as
# the <tt>element.elements</tt> object. Therefore, you are
# _not_ expected to instantiate this yourself.
+ #
+ # xml_string = <<-EOT
+ # <?xml version="1.0" encoding="UTF-8"?>
+ # <bookstore>
+ # <book category="cooking">
+ # <title lang="en">Everyday Italian</title>
+ # <author>Giada De Laurentiis</author>
+ # <year>2005</year>
+ # <price>30.00</price>
+ # </book>
+ # <book category="children">
+ # <title lang="en">Harry Potter</title>
+ # <author>J K. Rowling</author>
+ # <year>2005</year>
+ # <price>29.99</price>
+ # </book>
+ # <book category="web">
+ # <title lang="en">XQuery Kick Start</title>
+ # <author>James McGovern</author>
+ # <author>Per Bothner</author>
+ # <author>Kurt Cagle</author>
+ # <author>James Linn</author>
+ # <author>Vaidyanathan Nagarajan</author>
+ # <year>2003</year>
+ # <price>49.99</price>
+ # </book>
+ # <book category="web" cover="paperback">
+ # <title lang="en">Learning XML</title>
+ # <author>Erik T. Ray</author>
+ # <year>2003</year>
+ # <price>39.95</price>
+ # </book>
+ # </bookstore>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements # => #<REXML::Elements @element=<bookstore> ... </>>
+ #
class Elements
include Enumerable
- # Constructor
- # parent:: the parent Element
+ # :call-seq:
+ # new(parent) -> new_elements_object
+ #
+ # Returns a new \Elements object with the given +parent+.
+ # Does _not_ assign <tt>parent.elements = self</tt>:
+ #
+ # d = REXML::Document.new(xml_string)
+ # eles = REXML::Elements.new(d.root)
+ # eles # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles == d.root.elements # => false
+ #
def initialize parent
@element = parent
end
- # Fetches a child element. Filters only Element children, regardless of
- # the XPath match.
- # index::
- # the search parameter. This is either an Integer, which
- # will be used to find the index'th child Element, or an XPath,
- # which will be used to search for the Element. <em>Because
- # of the nature of XPath searches, any element in the connected XML
- # document can be fetched through any other element.</em> <b>The
- # Integer index is 1-based, not 0-based.</b> This means that the first
- # child element is at index 1, not 0, and the +n+th element is at index
- # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
- # starting from 1, not 0, and the indexes should be the same.
- # name::
- # optional, and only used in the first argument is an
- # Integer. In that case, the index'th child Element that has the
- # supplied name will be returned. Note again that the indexes start at 1.
- # Returns:: the first matching Element, or nil if no child matched
- # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
- # doc.root.elements[1] #-> <b/>
- # doc.root.elements['c'] #-> <c id="1"/>
- # doc.root.elements[2,'c'] #-> <c id="2"/>
+ # :call-seq:
+ # parent
+ #
+ # Returns the parent element cited in creating the \Elements object.
+ # This element is also the default starting point for searching
+ # in the \Elements object.
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = REXML::Elements.new(d.root)
+ # elements.parent == d.root # => true
+ #
+ def parent
+ @element
+ end
+
+ # :call-seq:
+ # elements[index] -> element or nil
+ # elements[xpath] -> element or nil
+ # elements[n, name] -> element or nil
+ #
+ # Returns the first \Element object selected by the arguments,
+ # if any found, or +nil+ if none found.
+ #
+ # Notes:
+ # - The +index+ is 1-based, not 0-based, so that:
+ # - The first element has index <tt>1</tt>
+ # - The _nth_ element has index +n+.
+ # - The selection ignores non-\Element nodes.
+ #
+ # When the single argument +index+ is given,
+ # returns the element given by the index, if any; otherwise, +nil+:
+ #
+ # d = REXML::Document.new(xml_string)
+ # eles = d.root.elements
+ # eles # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles[1] # => <book category='cooking'> ... </>
+ # eles.size # => 4
+ # eles[4] # => <book category='web' cover='paperback'> ... </>
+ # eles[5] # => nil
+ #
+ # The node at this index is not an \Element, and so is not returned:
+ #
+ # eles = d.root.first.first # => <title lang='en'> ... </>
+ # eles.to_a # => ["Everyday Italian"]
+ # eles[1] # => nil
+ #
+ # When the single argument +xpath+ is given,
+ # returns the first element found via that +xpath+, if any; otherwise, +nil+:
+ #
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles['/bookstore'] # => <bookstore> ... </>
+ # eles['//book'] # => <book category='cooking'> ... </>
+ # eles['//book [@category="children"]'] # => <book category='children'> ... </>
+ # eles['/nosuch'] # => nil
+ # eles['//nosuch'] # => nil
+ # eles['//book [@category="nosuch"]'] # => nil
+ # eles['.'] # => <bookstore> ... </>
+ # eles['..'].class # => REXML::Document
+ #
+ # With arguments +n+ and +name+ given,
+ # returns the _nth_ found element that has the given +name+,
+ # or +nil+ if there is no such _nth_ element:
+ #
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles[1, 'book'] # => <book category='cooking'> ... </>
+ # eles[4, 'book'] # => <book category='web' cover='paperback'> ... </>
+ # eles[5, 'book'] # => nil
+ #
def []( index, name=nil)
if index.kind_of? Integer
raise "index (#{index}) must be >= 1" if index < 1
@@ -806,19 +1692,42 @@ def []( index, name=nil)
end
end
- # Sets an element, replacing any previous matching element. If no
- # existing element is found ,the element is added.
- # index:: Used to find a matching element to replace. See []().
- # element::
- # The element to replace the existing element with
- # the previous element
- # Returns:: nil if no previous element was found.
+ # :call-seq:
+ # elements[] = index, replacement_element -> replacement_element or nil
+ #
+ # Replaces or adds an element.
+ #
+ # When <tt>eles[index]</tt> exists, replaces it with +replacement_element+
+ # and returns +replacement_element+:
+ #
+ # d = REXML::Document.new(xml_string)
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles[1] # => <book category='cooking'> ... </>
+ # eles[1] = REXML::Element.new('foo')
+ # eles[1] # => <foo/>
+ #
+ # Does nothing (or raises an exception)
+ # if +replacement_element+ is not an \Element:
+ # eles[2] # => <book category='web' cover='paperback'> ... </>
+ # eles[2] = REXML::Text.new('bar')
+ # eles[2] # => <book category='web' cover='paperback'> ... </>
+ #
+ # When <tt>eles[index]</tt> does not exist,
+ # adds +replacement_element+ to the element and returns
+ #
+ # d = REXML::Document.new(xml_string)
+ # eles = d.root.elements # => #<REXML::Elements @element=<bookstore> ... </>>
+ # eles.size # => 4
+ # eles[50] = REXML::Element.new('foo') # => <foo/>
+ # eles.size # => 5
+ # eles[5] # => <foo/>
+ #
+ # Does nothing (or raises an exception)
+ # if +replacement_element+ is not an \Element:
+ #
+ # eles[50] = REXML::Text.new('bar') # => "bar"
+ # eles.size # => 5
#
- # doc = Document.new '<a/>'
- # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
- # doc.root.elements[1] #-> <b/>
- # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
- # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
def []=( index, element )
previous = self[index]
if previous.nil?
@@ -829,14 +1738,34 @@ def []=( index, element )
return previous
end
- # Returns +true+ if there are no +Element+ children, +false+ otherwise
+ # :call-seq:
+ # empty? -> true or false
+ #
+ # Returns +true+ if there are no children, +false+ otherwise.
+ #
+ # d = REXML::Document.new('')
+ # d.elements.empty? # => true
+ # d = REXML::Document.new(xml_string)
+ # d.elements.empty? # => false
+ #
def empty?
@element.find{ |child| child.kind_of? Element}.nil?
end
- # Returns the index of the supplied child (starting at 1), or -1 if
- # the element is not a child
- # element:: an +Element+ child
+ # :call-seq:
+ # index(element)
+ #
+ # Returns the 1-based index of the given +element+, if found;
+ # otherwise, returns -1:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # ele_1, ele_2, ele_3, ele_4 = *elements
+ # elements.index(ele_4) # => 4
+ # elements.delete(ele_3)
+ # elements.index(ele_4) # => 3
+ # elements.index(ele_3) # => -1
+ #
def index element
rv = 0
found = @element.find do |child|
@@ -848,17 +1777,47 @@ def index element
return -1
end
- # Deletes a child Element
- # element::
- # Either an Element, which is removed directly; an
- # xpath, where the first matching child is removed; or an Integer,
- # where the n'th Element is removed.
- # Returns:: the removed child
- # doc = Document.new '<a><b/><c/><c id="1"/></a>'
- # b = doc.root.elements[1]
- # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
- # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
- # doc.root.elements.delete 1 #-> <a/>
+ # :call-seq:
+ # delete(index) -> removed_element or nil
+ # delete(element) -> removed_element or nil
+ # delete(xpath) -> removed_element or nil
+ #
+ # Removes an element; returns the removed element, or +nil+ if none removed.
+ #
+ # With integer argument +index+ given,
+ # removes the child element at that offset:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.size # => 4
+ # elements[2] # => <book category='children'> ... </>
+ # elements.delete(2) # => <book category='children'> ... </>
+ # elements.size # => 3
+ # elements[2] # => <book category='web'> ... </>
+ # elements.delete(50) # => nil
+ #
+ # With element argument +element+ given,
+ # removes that child element:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # ele_1, ele_2, ele_3, ele_4 = *elements
+ # elements.size # => 4
+ # elements[2] # => <book category='children'> ... </>
+ # elements.delete(ele_2) # => <book category='children'> ... </>
+ # elements.size # => 3
+ # elements[2] # => <book category='web'> ... </>
+ # elements.delete(ele_2) # => nil
+ #
+ # With string argument +xpath+ given,
+ # removes the first element found via that xpath:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.delete('//book') # => <book category='cooking'> ... </>
+ # elements.delete('//book [@category="children"]') # => <book category='children'> ... </>
+ # elements.delete('//nosuch') # => nil
+ #
def delete element
if element.kind_of? Element
@element.delete element
@@ -868,12 +1827,23 @@ def delete element
end
end
- # Removes multiple elements. Filters for Element children, regardless of
- # XPath matching.
- # xpath:: all elements matching this String path are removed.
- # Returns:: an Array of Elements that have been removed
- # doc = Document.new '<a><c/><c/><c/><c/></a>'
- # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
+ # :call-seq:
+ # delete_all(xpath)
+ #
+ # Removes all elements found via the given +xpath+;
+ # returns the array of removed elements, if any, else +nil+.
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.size # => 4
+ # deleted_elements = elements.delete_all('//book [@category="web"]')
+ # deleted_elements.size # => 2
+ # elements.size # => 2
+ # deleted_elements = elements.delete_all('//book')
+ # deleted_elements.size # => 2
+ # elements.size # => 0
+ # elements.delete_all('//book') # => []
+ #
def delete_all( xpath )
rv = []
XPath::each( @element, xpath) {|element|
@@ -886,15 +1856,68 @@ def delete_all( xpath )
return rv
end
- # Adds an element
- # element::
- # if supplied, is either an Element, String, or
- # Source (see Element.initialize). If not supplied or nil, a
- # new, default Element will be constructed
- # Returns:: the added Element
- # a = Element.new('a')
- # a.elements.add(Element.new('b')) #-> <a><b/></a>
- # a.elements.add('c') #-> <a><b/><c/></a>
+ # :call-seq:
+ # add -> new_element
+ # add(name) -> new_element
+ # add(element) -> element
+ #
+ # Adds an element; returns the element added.
+ #
+ # With no argument, creates and adds a new element.
+ # The new element has:
+ #
+ # - No name.
+ # - \Parent from the \Elements object.
+ # - Context from the that parent.
+ #
+ # Example:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # parent = elements.parent # => <bookstore> ... </>
+ # parent.context = {raw: :all}
+ # elements.size # => 4
+ # new_element = elements.add # => </>
+ # elements.size # => 5
+ # new_element.name # => nil
+ # new_element.parent # => <bookstore> ... </>
+ # new_element.context # => {:raw=>:all}
+ #
+ # With string argument +name+, creates and adds a new element.
+ # The new element has:
+ #
+ # - Name +name+.
+ # - \Parent from the \Elements object.
+ # - Context from the that parent.
+ #
+ # Example:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # parent = elements.parent # => <bookstore> ... </>
+ # parent.context = {raw: :all}
+ # elements.size # => 4
+ # new_element = elements.add('foo') # => <foo/>
+ # elements.size # => 5
+ # new_element.name # => "foo"
+ # new_element.parent # => <bookstore> ... </>
+ # new_element.context # => {:raw=>:all}
+ #
+ # With argument +element+,
+ # creates and adds a clone of the given +element+.
+ # The new element has name, parent, and context from the given +element+.
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.size # => 4
+ # e0 = REXML::Element.new('foo')
+ # e1 = REXML::Element.new('bar', e0, {raw: :all})
+ # element = elements.add(e1) # => <bar/>
+ # elements.size # => 5
+ # element.name # => "bar"
+ # element.parent # => <bookstore> ... </>
+ # element.context # => {:raw=>:all}
+ #
def add element=nil
if element.nil?
Element.new("", self, @element.context)
@@ -909,24 +1932,55 @@ def add element=nil
alias :<< :add
- # Iterates through all of the child Elements, optionally filtering
- # them by a given XPath
- # xpath::
- # optional. If supplied, this is a String XPath, and is used to
- # filter the children, so that only matching children are yielded. Note
- # that XPaths are automatically filtered for Elements, so that
- # non-Element children will not be yielded
- # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
- # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements
- # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements
- # doc.root.elements.each('child::node()') {|e|p e}
- # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
- # XPath.each(doc.root, 'child::node()', &block)
- # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
+ # :call-seq:
+ # each(xpath = nil) {|element| ... } -> self
+ #
+ # Iterates over the elements.
+ #
+ # With no argument, calls the block with each element:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.each {|element| p element }
+ #
+ # Output:
+ #
+ # <book category='cooking'> ... </>
+ # <book category='children'> ... </>
+ # <book category='web'> ... </>
+ # <book category='web' cover='paperback'> ... </>
+ #
+ # With argument +xpath+, calls the block with each element
+ # that matches the given +xpath+:
+ #
+ # elements.each('//book [@category="web"]') {|element| p element }
+ #
+ # Output:
+ #
+ # <book category='web'> ... </>
+ # <book category='web' cover='paperback'> ... </>
+ #
def each( xpath=nil )
XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
end
+ # :call-seq:
+ # collect(xpath = nil) {|element| ... } -> array
+ #
+ # Iterates over the elements; returns the array of block return values.
+ #
+ # With no argument, iterates over all elements:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.collect {|element| element.size } # => [9, 9, 17, 9]
+ #
+ # With argument +xpath+, iterates over elements that match
+ # the given +xpath+:
+ #
+ # xpath = '//book [@category="web"]'
+ # elements.collect(xpath) {|element| element.size } # => [17, 9]
+ #
def collect( xpath=nil )
collection = []
XPath::each( @element, xpath ) {|e|
@@ -935,6 +1989,83 @@ def collect( xpath=nil )
collection
end
+ # :call-seq:
+ # inject(xpath = nil, initial = nil) -> object
+ #
+ # Calls the block with elements; returns the last block return value.
+ #
+ # With no argument, iterates over the elements, calling the block
+ # <tt>elements.size - 1</tt> times.
+ #
+ # - The first call passes the first and second elements.
+ # - The second call passes the first block return value and the third element.
+ # - The third call passes the second block return value and the fourth element.
+ # - And so on.
+ #
+ # In this example, the block returns the passed element,
+ # which is then the object argument to the next call:
+ #
+ # d = REXML::Document.new(xml_string)
+ # elements = d.root.elements
+ # elements.inject do |object, element|
+ # p [elements.index(object), elements.index(element)]
+ # element
+ # end
+ #
+ # Output:
+ #
+ # [1, 2]
+ # [2, 3]
+ # [3, 4]
+ #
+ # With the single argument +xpath+, calls the block only with
+ # elements matching that xpath:
+ #
+ # elements.inject('//book [@category="web"]') do |object, element|
+ # p [elements.index(object), elements.index(element)]
+ # element
+ # end
+ #
+ # Output:
+ #
+ # [3, 4]
+ #
+ # With argument +xpath+ given as +nil+
+ # and argument +initial+ also given,
+ # calls the block once for each element.
+ #
+ # - The first call passes the +initial+ and the first element.
+ # - The second call passes the first block return value and the second element.
+ # - The third call passes the second block return value and the third element.
+ # - And so on.
+ #
+ # In this example, the first object index is <tt>-1</tt>
+ #
+ # elements.inject(nil, 'Initial') do |object, element|
+ # p [elements.index(object), elements.index(element)]
+ # element
+ # end
+ #
+ # Output:
+ #
+ # [-1, 1]
+ # [1, 2]
+ # [2, 3]
+ # [3, 4]
+ #
+ # In this form the passed object can be used as an accumulator:
+ #
+ # elements.inject(nil, 0) do |total, element|
+ # total += element.size
+ # end # => 44
+ #
+ # With both arguments +xpath+ and +initial+ are given,
+ # calls the block only with elements matching that xpath:
+ #
+ # elements.inject('//book [@category="web"]', 0) do |total, element|
+ # total += element.size
+ # end # => 26
+ #
def inject( xpath=nil, initial=nil )
first = true
XPath::each( @element, xpath ) {|e|
@@ -950,23 +2081,39 @@ def inject( xpath=nil, initial=nil )
initial
end
- # Returns the number of +Element+ children of the parent object.
- # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
- # doc.root.size #-> 6, 3 element and 3 text nodes
- # doc.root.elements.size #-> 3
+ # :call-seq:
+ # size -> integer
+ #
+ # Returns the count of \Element children:
+ #
+ # d = REXML::Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
+ # d.root.elements.size # => 3 # Three elements.
+ # d.root.size # => 6 # Three elements plus three text nodes..
+ #
def size
count = 0
@element.each {|child| count+=1 if child.kind_of? Element }
count
end
- # Returns an Array of Element children. An XPath may be supplied to
- # filter the children. Only Element children are returned, even if the
- # supplied XPath matches non-Element children.
- # doc = Document.new '<a>sean<b/>elliott<c/></a>'
- # doc.root.elements.to_a #-> [ <b/>, <c/> ]
- # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
- # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
+ # :call-seq:
+ # to_a(xpath = nil) -> array_of_elements
+ #
+ # Returns an array of element children (not including non-element children).
+ #
+ # With no argument, returns an array of all element children:
+ #
+ # d = REXML::Document.new '<a>sean<b/>elliott<c/></a>'
+ # elements = d.root.elements
+ # elements.to_a # => [<b/>, <c/>] # Omits non-element children.
+ # children = d.root.children
+ # children # => ["sean", <b/>, "elliott", <c/>] # Includes non-element children.
+ #
+ # With argument +xpath+, returns an array of element children
+ # that match the xpath:
+ #
+ # elements.to_a('//c') # => [<c/>]
+ #
def to_a( xpath=nil )
rv = XPath.match( @element, xpath )
return rv.find_all{|e| e.kind_of? Element} if xpath
@@ -988,36 +2135,89 @@ def literalize name
# A class that defines the set of Attributes of an Element and provides
# operations for accessing elements in that set.
class Attributes < Hash
- # Constructor
- # element:: the Element of which this is an Attribute
+
+ # :call-seq:
+ # new(element)
+ #
+ # Creates and returns a new \REXML::Attributes object.
+ # The element given by argument +element+ is stored,
+ # but its own attributes are not modified:
+ #
+ # ele = REXML::Element.new('foo')
+ # attrs = REXML::Attributes.new(ele)
+ # attrs.object_id == ele.attributes.object_id # => false
+ #
+ # Other instance methods in class \REXML::Attributes may refer to:
+ #
+ # - +element.document+.
+ # - +element.prefix+.
+ # - +element.expanded_name+.
+ #
def initialize element
@element = element
end
- # Fetches an attribute value. If you want to get the Attribute itself,
- # use get_attribute()
- # name:: an XPath attribute name. Namespaces are relevant here.
- # Returns::
- # the String value of the matching attribute, or +nil+ if no
- # matching attribute was found. This is the unnormalized value
- # (with entities expanded).
+ # :call-seq:
+ # [name] -> attribute_value or nil
+ #
+ # Returns the value for the attribute given by +name+,
+ # if it exists; otherwise +nil+.
+ # The value returned is the unnormalized attribute value,
+ # with entities expanded:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # ele.attributes['att'] # => "<"
+ # ele.attributes['bar:att'] # => "2"
+ # ele.attributes['nosuch'] # => nil
+ #
+ # Related: get_attribute (returns an \Attribute object).
#
- # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>"
- # doc.root.attributes['att'] #-> '<'
- # doc.root.attributes['bar:att'] #-> '2'
def [](name)
attr = get_attribute(name)
return attr.value unless attr.nil?
return nil
end
+ # :call-seq:
+ # to_a -> array_of_attribute_objects
+ #
+ # Returns an array of \REXML::Attribute objects representing
+ # the attributes:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes.to_a # => [foo:att='1', bar:att='2', att='<']
+ # attrs.first.class # => REXML::Attribute
+ #
def to_a
enum_for(:each_attribute).to_a
end
- # Returns the number of attributes the owning Element contains.
- # doc = Document "<a x='1' y='2' foo:x='3'/>"
- # doc.root.attributes.length #-> 3
+ # :call-seq:
+ # length
+ #
+ # Returns the count of attributes:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # ele.attributes.length # => 3
+ #
def length
c = 0
each_attribute { c+=1 }
@@ -1025,14 +2225,30 @@ def length
end
alias :size :length
- # Iterates over the attributes of an Element. Yields actual Attribute
- # nodes, not String values.
+ # :call-seq:
+ # each_attribute {|attr| ... }
+ #
+ # Calls the given block with each \REXML::Attribute object:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # ele.attributes.each_attribute do |attr|
+ # p [attr.class, attr]
+ # end
+ #
+ # Output:
+ #
+ # [REXML::Attribute, foo:att='1']
+ # [REXML::Attribute, bar:att='2']
+ # [REXML::Attribute, att='<']
#
- # doc = Document.new '<a x="1" y="2"/>'
- # doc.root.attributes.each_attribute {|attr|
- # p attr.expanded_name+" => "+attr.value
- # }
def each_attribute # :yields: attribute
+ return to_enum(__method__) unless block_given?
each_value do |val|
if val.kind_of? Attribute
yield val
@@ -1042,26 +2258,54 @@ def each_attribute # :yields: attribute
end
end
- # Iterates over each attribute of an Element, yielding the expanded name
- # and value as a pair of Strings.
+ # :call-seq:
+ # each {|expanded_name, value| ... }
+ #
+ # Calls the given block with each expanded-name/value pair:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # ele.attributes.each do |expanded_name, value|
+ # p [expanded_name, value]
+ # end
+ #
+ # Output:
+ #
+ # ["foo:att", "1"]
+ # ["bar:att", "2"]
+ # ["att", "<"]
#
- # doc = Document.new '<a x="1" y="2"/>'
- # doc.root.attributes.each {|name, value| p name+" => "+value }
def each
+ return to_enum(__method__) unless block_given?
each_attribute do |attr|
yield [attr.expanded_name, attr.value]
end
end
- # Fetches an attribute
- # name::
- # the name by which to search for the attribute. Can be a
- # <tt>prefix:name</tt> namespace name.
- # Returns:: The first matching attribute, or nil if there was none. This
- # value is an Attribute node, not the String value of the attribute.
- # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
- # doc.root.attributes.get_attribute("foo").value #-> "2"
- # doc.root.attributes.get_attribute("x:foo").value #-> "1"
+ # :call-seq:
+ # get_attribute(name) -> attribute_object or nil
+ #
+ # Returns the \REXML::Attribute object for the given +name+:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs.get_attribute('foo:att') # => foo:att='1'
+ # attrs.get_attribute('foo:att').class # => REXML::Attribute
+ # attrs.get_attribute('bar:att') # => bar:att='2'
+ # attrs.get_attribute('att') # => att='<'
+ # attrs.get_attribute('nosuch') # => nil
+ #
def get_attribute( name )
attr = fetch( name, nil )
if attr.nil?
@@ -1095,18 +2339,29 @@ def get_attribute( name )
return attr
end
- # Sets an attribute, overwriting any existing attribute value by the
- # same name. Namespace is significant.
- # name:: the name of the attribute
- # value::
- # (optional) If supplied, the value of the attribute. If
- # nil, any existing matching attribute is deleted.
- # Returns::
- # Owning element
- # doc = Document.new "<a x:foo='1' foo='3'/>"
- # doc.root.attributes['y:foo'] = '2'
- # doc.root.attributes['foo'] = '4'
- # doc.root.attributes['x:foo'] = nil
+ # :call-seq:
+ # [name] = value -> value
+ #
+ # When +value+ is non-+nil+,
+ # assigns that to the attribute for the given +name+,
+ # overwriting the previous value if it exists:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs['foo:att'] = '2' # => "2"
+ # attrs['baz:att'] = '3' # => "3"
+ #
+ # When +value+ is +nil+, deletes the attribute if it exists:
+ #
+ # attrs['baz:att'] = nil
+ # attrs.include?('baz:att') # => false
+ #
def []=( name, value )
if value.nil? # Delete the named attribute
attr = get_attribute(name)
@@ -1129,29 +2384,25 @@ def []=( name, value )
elsif old_attr.kind_of? Hash
old_attr[value.prefix] = value
elsif old_attr.prefix != value.prefix
- # Check for conflicting namespaces
- raise ParseException.new(
- "Namespace conflict in adding attribute \"#{value.name}\": "+
- "Prefix \"#{old_attr.prefix}\" = "+
- "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
- "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
- value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
- @element.namespace( old_attr.prefix ) ==
- @element.namespace( value.prefix )
- store value.name, { old_attr.prefix => old_attr,
- value.prefix => value }
+ store value.name, {old_attr.prefix => old_attr,
+ value.prefix => value}
else
store value.name, value
end
return @element
end
- # Returns an array of Strings containing all of the prefixes declared
- # by this set of # attributes. The array does not include the default
+ # :call-seq:
+ # prefixes -> array_of_prefix_strings
+ #
+ # Returns an array of prefix strings in the attributes.
+ # The array does not include the default
# namespace declaration, if one exists.
- # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
- # "z='glorp' p:k='gru'/>")
- # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
+ #
+ # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>'
+ # d = REXML::Document.new(xml_string)
+ # d.root.attributes.prefixes # => ["x", "y"]
+ #
def prefixes
ns = []
each_attribute do |attribute|
@@ -1168,6 +2419,15 @@ def prefixes
ns
end
+ # :call-seq:
+ # namespaces
+ #
+ # Returns a hash of name/value pairs for the namespaces:
+ #
+ # xml_string = '<a xmlns="foo" xmlns:x="bar" xmlns:y="twee" z="glorp"/>'
+ # d = REXML::Document.new(xml_string)
+ # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"}
+ #
def namespaces
namespaces = {}
each_attribute do |attribute|
@@ -1184,16 +2444,34 @@ def namespaces
namespaces
end
- # Removes an attribute
- # attribute::
- # either a String, which is the name of the attribute to remove --
- # namespaces are significant here -- or the attribute to remove.
- # Returns:: the owning element
- # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
- # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
- # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
- # attr = doc.root.attributes.get_attribute('y:foo')
- # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
+ # :call-seq:
+ # delete(name) -> element
+ # delete(attribute) -> element
+ #
+ # Removes a specified attribute if it exists;
+ # returns the attributes' element.
+ #
+ # When string argument +name+ is given,
+ # removes the attribute of that name if it exists:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/>
+ # attrs.delete('foo:att') # => <ele bar:att='2' att='<'/>
+ #
+ # When attribute argument +attribute+ is given,
+ # removes that attribute if it exists:
+ #
+ # attr = REXML::Attribute.new('bar:att', '2')
+ # attrs.delete(attr) # => <ele att='<'/> # => <ele att='<'/>
+ # attrs.delete(attr) # => <ele att='<'/> # => <ele/>
+ #
def delete( attribute )
name = nil
prefix = nil
@@ -1221,19 +2499,48 @@ def delete( attribute )
@element
end
- # Adds an attribute, overriding any existing attribute by the
- # same name. Namespaces are significant.
- # attribute:: An Attribute
+ # :call-seq:
+ # add(attribute) -> attribute
+ #
+ # Adds attribute +attribute+, replacing the previous
+ # attribute of the same name if it exists;
+ # returns +attribute+:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs # => {"att"=>{"foo"=>foo:att='1', "bar"=>bar:att='2', ""=>att='<'}}
+ # attrs.add(REXML::Attribute.new('foo:att', '2')) # => foo:att='2'
+ # attrs.add(REXML::Attribute.new('baz', '3')) # => baz='3'
+ # attrs.include?('baz') # => true
+ #
def add( attribute )
self[attribute.name] = attribute
end
alias :<< :add
- # Deletes all attributes matching a name. Namespaces are significant.
- # name::
- # A String; all attributes that match this path will be removed
- # Returns:: an Array of the Attributes that were removed
+ # :call-seq:
+ # delete_all(name) -> array_of_removed_attributes
+ #
+ # Removes all attributes matching the given +name+;
+ # returns an array of the removed attributes:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs.delete_all('att') # => [att='<']
+ #
def delete_all( name )
rv = []
each_attribute { |attribute|
@@ -1243,11 +2550,23 @@ def delete_all( name )
return rv
end
- # The +get_attribute_ns+ method retrieves a method by its namespace
- # and name. Thus it is possible to reliably identify an attribute
- # even if an XML processor has changed the prefix.
+ # :call-seq:
+ # get_attribute_ns(namespace, name)
+ #
+ # Returns the \REXML::Attribute object among the attributes
+ # that matches the given +namespace+ and +name+:
+ #
+ # xml_string = <<-EOT
+ # <root xmlns:foo="http://foo" xmlns:bar="http://bar">
+ # <ele foo:att='1' bar:att='2' att='<'/>
+ # </root>
+ # EOT
+ # d = REXML::Document.new(xml_string)
+ # ele = d.root.elements['//ele'] # => <a foo:att='1' bar:att='2' att='<'/>
+ # attrs = ele.attributes
+ # attrs.get_attribute_ns('http://foo', 'att') # => foo:att='1'
+ # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil
#
- # Method contributed by Henrik Martensson
def get_attribute_ns(namespace, name)
result = nil
each_attribute() { |attribute|
diff --git a/lib/rexml/entity.rb b/lib/rexml/entity.rb
index 97c7b6b42f..1ba5a7bb7b 100644
--- a/lib/rexml/entity.rb
+++ b/lib/rexml/entity.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: false
-require 'rexml/child'
-require 'rexml/source'
-require 'rexml/xmltokens'
+require_relative 'child'
+require_relative 'source'
+require_relative 'xmltokens'
module REXML
class Entity < Child
@@ -12,6 +12,7 @@ class Entity < Child
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
+ PEREFERENCE_RE = /#{PEREFERENCE}/um
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
@@ -19,7 +20,7 @@ class Entity < Child
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
- attr_reader :name, :external, :ref, :ndata, :pubid
+ attr_reader :name, :external, :ref, :ndata, :pubid, :value
# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
@@ -68,14 +69,14 @@ def Entity::matches? string
end
# Evaluates to the unnormalized value of this entity; that is, replacing
- # all entities -- both %ent; and &ent; entities. This differs from
- # +value()+ in that +value+ only replaces %ent; entities.
+ # &ent; entities.
def unnormalized
- document.record_entity_expansion unless document.nil?
- v = value()
- return nil if v.nil?
- @unnormalized = Text::unnormalize(v, parent)
- @unnormalized
+ document&.record_entity_expansion
+
+ return nil if @value.nil?
+
+ @unnormalized = Text::unnormalize(@value, parent,
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
end
#once :unnormalized
@@ -90,7 +91,7 @@ def normalized
# object itself is valid.)
#
# out::
- # An object implementing <TT><<<TT> to which the entity will be
+ # An object implementing <TT><<</TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
@@ -121,36 +122,6 @@ def to_s
write rv
rv
end
-
- PEREFERENCE_RE = /#{PEREFERENCE}/um
- # Returns the value of this entity. At the moment, only internal entities
- # are processed. If the value contains internal references (IE,
- # %blah;), those are replaced with their values. IE, if the doctype
- # contains:
- # <!ENTITY % foo "bar">
- # <!ENTITY yada "nanoo %foo; nanoo>
- # then:
- # doctype.entity('yada').value #-> "nanoo bar nanoo"
- def value
- if @value
- matches = @value.scan(PEREFERENCE_RE)
- rv = @value.clone
- if @parent
- sum = 0
- matches.each do |entity_reference|
- entity_value = @parent.entity( entity_reference[0] )
- if sum + entity_value.bytesize > Security.entity_expansion_text_limit
- raise "entity expansion has grown too large"
- else
- sum += entity_value.bytesize
- end
- rv.gsub!( /%#{entity_reference.join};/um, entity_value )
- end
- end
- return rv
- end
- nil
- end
end
# This is a set of entity constants -- the ones defined in the XML
diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb
index c375f1468b..811b2ff3d5 100644
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: false
+
module REXML
module Formatters
class Default
@@ -101,11 +102,14 @@ def write_cdata( node, output )
end
def write_instruction( node, output )
- output << Instruction::START.sub(/\\/u, '')
+ output << Instruction::START
output << node.target
- output << ' '
- output << node.content
- output << Instruction::STOP.sub(/\\/u, '')
+ content = node.content
+ if content
+ output << ' '
+ output << content
+ end
+ output << Instruction::STOP
end
end
end
diff --git a/lib/rexml/formatters/pretty.rb b/lib/rexml/formatters/pretty.rb
index a80274bdad..a838d8357e 100644
--- a/lib/rexml/formatters/pretty.rb
+++ b/lib/rexml/formatters/pretty.rb
@@ -1,5 +1,5 @@
-# frozen_string_literal: false
-require 'rexml/formatters/default'
+# frozen_string_literal: true
+require_relative 'default'
module REXML
module Formatters
@@ -58,7 +58,7 @@ def write_element(node, output)
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
- string = ""
+ string = +""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
@@ -111,7 +111,7 @@ def write_document( node, output )
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
- next if child == node.children[-1] and child.instance_of?(Text)
+ next if child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
diff --git a/lib/rexml/formatters/transitive.rb b/lib/rexml/formatters/transitive.rb
index 81e67f3274..5ff51e10f3 100644
--- a/lib/rexml/formatters/transitive.rb
+++ b/lib/rexml/formatters/transitive.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require 'rexml/formatters/pretty'
+require_relative 'pretty'
module REXML
module Formatters
diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb
index cd879fdd28..4c11461601 100644
--- a/lib/rexml/functions.rb
+++ b/lib/rexml/functions.rb
@@ -66,11 +66,11 @@ def Functions::count( node_set )
def Functions::id( object )
end
- # UNTESTED
- def Functions::local_name( node_set=nil )
- get_namespace( node_set ) do |node|
+ def Functions::local_name(node_set=nil)
+ get_namespace(node_set) do |node|
return node.local_name
end
+ ""
end
def Functions::namespace_uri( node_set=nil )
@@ -86,10 +86,14 @@ def Functions::name( node_set=nil )
# Helper method.
def Functions::get_namespace( node_set = nil )
if node_set == nil
- yield @@context[:node] if defined? @@context[:node].namespace
+ yield @@context[:node] if @@context[:node].respond_to?(:namespace)
else
if node_set.respond_to? :each
- node_set.each { |node| yield node if defined? node.namespace }
+ result = []
+ node_set.each do |node|
+ result << yield(node) if node.respond_to?(:namespace)
+ end
+ result
elsif node_set.respond_to? :namespace
yield node_set
end
@@ -131,22 +135,38 @@ def Functions::get_namespace( node_set = nil )
#
# An object of a type other than the four basic types is converted to a
# string in a way that is dependent on that type.
- def Functions::string( object=nil )
- #object = @context unless object
- if object.instance_of? Array
- string( object[0] )
- elsif defined? object.node_type
- if object.node_type == :attribute
+ def Functions::string( object=@@context[:node] )
+ if object.respond_to?(:node_type)
+ case object.node_type
+ when :attribute
object.value
- elsif object.node_type == :element || object.node_type == :document
+ when :element
string_value(object)
+ when :document
+ string_value(object.root)
+ when :processing_instruction
+ object.content
else
object.to_s
end
- elsif object.nil?
- return ""
else
- object.to_s
+ case object
+ when Array
+ string(object[0])
+ when Float
+ if object.nan?
+ "NaN"
+ else
+ integer = object.to_i
+ if object == integer
+ "%d" % integer
+ else
+ object.to_s
+ end
+ end
+ else
+ object.to_s
+ end
end
end
@@ -167,9 +187,12 @@ def Functions::string_value( o )
rv
end
- # UNTESTED
def Functions::concat( *objects )
- objects.join
+ concatenated = ""
+ objects.each do |object|
+ concatenated << string(object)
+ end
+ concatenated
end
# Fixed by Mike Stok
@@ -239,11 +262,10 @@ def Functions::string_length( string )
string(string).length
end
- # UNTESTED
def Functions::normalize_space( string=nil )
string = string(@@context[:node]) if string.nil?
if string.kind_of? Array
- string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
+ string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
else
string.to_s.strip.gsub(/\s+/um, ' ')
end
@@ -292,18 +314,23 @@ def Functions::translate( string, tr1, tr2 )
end
end
- # UNTESTED
- def Functions::boolean( object=nil )
- if object.kind_of? String
- if object =~ /\d+/u
- return object.to_f != 0
- else
- return object.size > 0
- end
- elsif object.kind_of? Array
- object = object.find{|x| x and true}
+ def Functions::boolean(object=@@context[:node])
+ case object
+ when true, false
+ object
+ when Float
+ return false if object.zero?
+ return false if object.nan?
+ true
+ when Numeric
+ not object.zero?
+ when String
+ not object.empty?
+ when Array
+ not object.empty?
+ else
+ object ? true : false
end
- return object ? true : false
end
# UNTESTED
@@ -357,25 +384,23 @@ def Functions::compare_language lang1, lang2
#
# an object of a type other than the four basic types is converted to a
# number in a way that is dependent on that type
- def Functions::number( object=nil )
- object = @@context[:node] unless object
+ def Functions::number(object=@@context[:node])
case object
when true
Float(1)
when false
Float(0)
when Array
- number(string( object ))
+ number(string(object))
when Numeric
object.to_f
else
- str = string( object )
- # If XPath ever gets scientific notation...
- #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
- if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
- str.to_f
+ str = string(object)
+ case str.strip
+ when /\A\s*(-?(?:\d+(?:\.\d*)?|\.\d+))\s*\z/
+ $1.to_f
else
- (0.0 / 0.0)
+ Float::NAN
end
end
end
@@ -397,7 +422,7 @@ def Functions::round( number )
number = number(number)
begin
neg = number.negative?
- number = number.abs.round(half: :up)
+ number = number.abs.round
neg ? -number : number
rescue FloatDomainError
number
diff --git a/lib/rexml/instruction.rb b/lib/rexml/instruction.rb
index c4f65eefc1..318741f03b 100644
--- a/lib/rexml/instruction.rb
+++ b/lib/rexml/instruction.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: false
-require "rexml/child"
-require "rexml/source"
+
+require_relative "child"
+require_relative "source"
module REXML
# Represents an XML Instruction; IE, <? ... ?>
# TODO: Add parent arg (3rd arg) to constructor
class Instruction < Child
- START = '<\?'
- STOP = '\?>'
+ START = "<?"
+ STOP = "?>"
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
# content is everything else.
@@ -17,20 +18,25 @@ class Instruction < Child
# @param target can be one of a number of things. If String, then
# the target of this instruction is set to this. If an Instruction,
# then the Instruction is shallowly cloned (target and content are
- # copied). If a Source, then the source is scanned and parsed for
- # an Instruction declaration.
+ # copied).
# @param content Must be either a String, or a Parent. Can only
# be a Parent if the target argument is a Source. Otherwise, this
# String is set as the content of this instruction.
def initialize(target, content=nil)
- if target.kind_of? String
+ case target
+ when String
super()
@target = target
@content = content
- elsif target.kind_of? Instruction
+ when Instruction
super(content)
@target = target.target
@content = target.content
+ else
+ message =
+ "processing instruction target must be String or REXML::Instruction: "
+ message << "<#{target.inspect}>"
+ raise ArgumentError, message
end
@content.strip! if @content
end
@@ -45,11 +51,13 @@ def clone
def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated", uplevel: 1)
indent(writer, indent)
- writer << START.sub(/\\/u, '')
+ writer << START
writer << @target
- writer << ' '
- writer << @content
- writer << STOP.sub(/\\/u, '')
+ if @content
+ writer << ' '
+ writer << @content
+ end
+ writer << STOP
end
# @return true if other is an Instruction, and the content and target
diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb
index d58119a3a4..3dab885b0f 100644
--- a/lib/rexml/light/node.rb
+++ b/lib/rexml/light/node.rb
@@ -1,14 +1,6 @@
# frozen_string_literal: false
-require 'rexml/xmltokens'
-
-# [ :element, parent, name, attributes, children* ]
- # a = Node.new
- # a << "B" # => <a>B</a>
- # a.b # => <a>B<b/></a>
- # a.b[1] # => <a>B<b/><b/><a>
- # a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
- # a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
- # a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
+require_relative '../xmltokens'
+
module REXML
module Light
# Represents a tagged XML element. Elements are characterized by
diff --git a/lib/rexml/namespace.rb b/lib/rexml/namespace.rb
index 90ba7cc635..2e67252a51 100644
--- a/lib/rexml/namespace.rb
+++ b/lib/rexml/namespace.rb
@@ -1,5 +1,6 @@
-# frozen_string_literal: false
-require 'rexml/xmltokens'
+# frozen_string_literal: true
+
+require_relative 'xmltokens'
module REXML
# Adds named attributes to an object.
@@ -9,19 +10,33 @@ module Namespace
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
+ NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
# Sets the name and the expanded name
def name=( name )
@expanded_name = name
- name =~ NAMESPLIT
- if $1
- @prefix = $1
- else
+ if name.match?(NAME_WITHOUT_NAMESPACE)
@prefix = ""
@namespace = ""
+ @name = name
+ elsif name =~ NAMESPLIT
+ if $1
+ @prefix = $1
+ else
+ @prefix = ""
+ @namespace = ""
+ end
+ @name = $2
+ elsif name == ""
+ @prefix = nil
+ @namespace = nil
+ @name = nil
+ else
+ message = "name must be \#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: "
+ message += "<#{name.inspect}>"
+ raise ArgumentError, message
end
- @name = $2
end
# Compares names optionally WITH namespaces
diff --git a/lib/rexml/node.rb b/lib/rexml/node.rb
index 52337ade44..c771db70d1 100644
--- a/lib/rexml/node.rb
+++ b/lib/rexml/node.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: false
-require "rexml/parseexception"
-require "rexml/formatters/pretty"
-require "rexml/formatters/default"
+require_relative "parseexception"
+require_relative "formatters/pretty"
+require_relative "formatters/default"
module REXML
# Represents a node in the tree. Nodes are never encountered except as
@@ -52,10 +52,14 @@ def parent?
# Visit all subnodes of +self+ recursively
def each_recursive(&block) # :yields: node
- self.elements.each {|node|
- block.call(node)
- node.each_recursive(&block)
- }
+ stack = []
+ each { |child| stack.unshift child if child.node_type == :element }
+ until stack.empty?
+ child = stack.pop
+ yield child
+ n = stack.size
+ child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
+ end
end
# Find (and return) first subnode (recursively) for which the block
diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb
index 96dfea570e..88a5fb378d 100644
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require 'rexml/encoding'
+require_relative 'encoding'
module REXML
class Output
diff --git a/lib/rexml/parent.rb b/lib/rexml/parent.rb
index 3bd0a96255..6a53b37a12 100644
--- a/lib/rexml/parent.rb
+++ b/lib/rexml/parent.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/child"
+require_relative "child"
module REXML
# A parent has children, and has methods for accessing them. The Parent
diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb
index 7b16cd1a41..e57d05fd2e 100644
--- a/lib/rexml/parseexception.rb
+++ b/lib/rexml/parseexception.rb
@@ -29,6 +29,7 @@ def to_s
err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
+ err.force_encoding("ASCII-8BIT")
err << @source.buffer[0..80].force_encoding("ASCII-8BIT").gsub(/\n/, ' ')
end
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index e7ef695912..a567e045d6 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -1,14 +1,40 @@
-# frozen_string_literal: false
-
-require "strscan"
-
-require 'rexml/parseexception'
-require 'rexml/undefinednamespaceexception'
-require 'rexml/source'
+# frozen_string_literal: true
+require_relative '../parseexception'
+require_relative '../undefinednamespaceexception'
+require_relative '../security'
+require_relative '../source'
require 'set'
+require "strscan"
module REXML
module Parsers
+ unless [].respond_to?(:tally)
+ module EnumerableTally
+ refine Enumerable do
+ def tally
+ counts = {}
+ each do |item|
+ counts[item] ||= 0
+ counts[item] += 1
+ end
+ counts
+ end
+ end
+ end
+ using EnumerableTally
+ end
+
+ if StringScanner::Version < "3.0.8"
+ module StringScannerCaptures
+ refine StringScanner do
+ def captures
+ values_at(*(1...size))
+ end
+ end
+ end
+ using StringScannerCaptures
+ end
+
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
@@ -61,7 +87,7 @@ class BaseParser
XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
- INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+ INSTRUCTION_PATTERN = /<\?#{NAME}(\s+.*?)?\?>/um
TAG_MATCH = /\A<((?>#{QNAME_STR}))/um
CLOSE_MATCH = /\A\s*<\/(#{QNAME_STR})\s*>/um
@@ -98,7 +124,7 @@ class BaseParser
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um
NOTATIONDECL_START = /\A\s*<!NOTATION/um
EXTERNAL_ID_PUBLIC = /\A\s*PUBLIC\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*/um
@@ -114,16 +140,33 @@ class BaseParser
"apos" => [/'/, "'", "'", /'/]
}
-
- ######################################################################
- # These are patterns to identify common markup errors, to make the
- # error messages more informative.
- ######################################################################
- MISSING_ATTRIBUTE_QUOTES = /^<#{QNAME_STR}\s+#{QNAME_STR}\s*=\s*[^"']/um
+ module Private
+ PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
+ TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
+ CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
+ ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+ NAME_PATTERN = /#{NAME}/um
+ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+ ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
+ CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
+ DEFAULT_ENTITIES_PATTERNS = {}
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
+ default_entities.each do |term|
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
+ end
+ XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
+ end
+ private_constant :Private
def initialize( source )
self.stream = source
@listeners = []
+ @prefixes = Set.new
+ @entity_expansion_count = 0
+ @entity_expansion_limit = Security.entity_expansion_limit
+ @entity_expansion_text_limit = Security.entity_expansion_text_limit
end
def add_listener( listener )
@@ -131,15 +174,20 @@ def add_listener( listener )
end
attr_reader :source
+ attr_reader :entity_expansion_count
+ attr_writer :entity_expansion_limit
+ attr_writer :entity_expansion_text_limit
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
+ @have_root = false
@document_status = nil
@tags = []
@stack = []
@entities = []
- @nsstack = []
+ @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE}
+ @namespaces_restore_stack = []
end
def position
@@ -189,6 +237,8 @@ def peek depth=0
# Returns the next event. This is a +PullEvent+ object.
def pull
+ @source.drop_parsed_content
+
pull_event.tap do |event|
@listeners.each do |listener|
listener.receive event
@@ -201,248 +251,280 @@ def pull_event
x, @closed = @closed, nil
return [ :end_element, x ]
end
- return [ :end_document ] if empty?
+ if empty?
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
+ end
+ unless @tags.empty?
+ path = "/" + @tags.join("/")
+ raise ParseException.new("Missing end tag for '#{path}'", @source)
+ end
+ return [ :end_document ]
+ end
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+
+ @source.ensure_buffer
if @document_status == nil
- word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um )
- word = word[1] unless word.nil?
- #STDERR.puts "WORD = #{word.inspect}"
- case word
- when COMMENT_START
- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
- when XMLDECL_START
- #STDERR.puts "XMLDECL"
- results = @source.match( XMLDECL_PATTERN, true )[1]
- version = VERSION.match( results )
- version = version[1] unless version.nil?
- encoding = ENCODING.match(results)
- encoding = encoding[1] unless encoding.nil?
- if need_source_encoding_update?(encoding)
- @source.encoding = encoding
- end
- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
- encoding = "UTF-16"
- end
- standalone = STANDALONE.match(results)
- standalone = standalone[1] unless standalone.nil?
- return [ :xmldecl, version, encoding, standalone ]
- when INSTRUCTION_START
- return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
- when DOCTYPE_START
- base_error_message = "Malformed DOCTYPE"
- @source.match(DOCTYPE_START, true)
- @nsstack.unshift(curr_ns=Set.new)
- name = parse_name(base_error_message)
- if @source.match(/\A\s*\[/um, true)
- id = [nil, nil, nil]
- @document_status = :in_doctype
- elsif @source.match(/\A\s*>/um, true)
- id = [nil, nil, nil]
- @document_status = :after_doctype
- else
- id = parse_id(base_error_message,
- accept_external_id: true,
- accept_public_id: false)
- if id[0] == "SYSTEM"
- # For backward compatibility
- id[1], id[2] = id[2], nil
+ start_position = @source.position
+ if @source.match("<?", true)
+ return process_instruction
+ elsif @source.match("<!", true)
+ if @source.match("--", true)
+ md = @source.match(/(.*?)-->/um, true)
+ if md.nil?
+ raise REXML::ParseException.new("Unclosed comment", @source)
+ end
+ if /--|-\z/.match?(md[1])
+ raise REXML::ParseException.new("Malformed comment", @source)
end
- if @source.match(/\A\s*\[/um, true)
- @document_status = :in_doctype
- elsif @source.match(/\A\s*>/um, true)
+ return [ :comment, md[1] ]
+ elsif @source.match("DOCTYPE", true)
+ base_error_message = "Malformed DOCTYPE"
+ unless @source.match(/\s+/um, true)
+ if @source.match(">")
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid name"
+ end
+ @source.position = start_position
+ raise REXML::ParseException.new(message, @source)
+ end
+ name = parse_name(base_error_message)
+ if @source.match(/\s*\[/um, true)
+ id = [nil, nil, nil]
+ @document_status = :in_doctype
+ elsif @source.match(/\s*>/um, true)
+ id = [nil, nil, nil]
@document_status = :after_doctype
+ @source.ensure_buffer
else
- message = "#{base_error_message}: garbage after external ID"
- raise REXML::ParseException.new(message, @source)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: false)
+ if id[0] == "SYSTEM"
+ # For backward compatibility
+ id[1], id[2] = id[2], nil
+ end
+ if @source.match(/\s*\[/um, true)
+ @document_status = :in_doctype
+ elsif @source.match(/\s*>/um, true)
+ @document_status = :after_doctype
+ @source.ensure_buffer
+ else
+ message = "#{base_error_message}: garbage after external ID"
+ raise REXML::ParseException.new(message, @source)
+ end
end
- end
- args = [:start_doctype, name, *id]
- if @document_status == :after_doctype
- @source.match(/\A\s*/um, true)
- @stack << [ :end_doctype ]
- end
- return args
- when /\A\s+/
- else
- @document_status = :after_doctype
- if @source.encoding == "UTF-8"
- @source.buffer.force_encoding(::Encoding::UTF_8)
+ args = [:start_doctype, name, *id]
+ if @document_status == :after_doctype
+ @source.match(/\s*/um, true)
+ @stack << [ :end_doctype ]
+ end
+ return args
+ else
+ message = "Invalid XML"
+ raise REXML::ParseException.new(message, @source)
end
end
end
if @document_status == :in_doctype
- md = @source.match(/\A\s*(.*?>)/um)
- case md[1]
- when SYSTEMENTITY
- match = @source.match( SYSTEMENTITY, true )[1]
- return [ :externalentity, match ]
-
- when ELEMENTDECL_START
- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
-
- when ENTITY_START
- match = @source.match( ENTITYDECL, true ).to_a.compact
- match[0] = :entitydecl
- ref = false
- if match[1] == '%'
- ref = true
- match.delete_at 1
- end
- # Now we have to sort out what kind of entity reference this is
- if match[2] == 'SYSTEM'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match.delete_at(4) if match.size > 4 # Chop out NDATA decl
- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
- elsif match[2] == 'PUBLIC'
- # External reference
- match[3] = match[3][1..-2] # PUBID
- match[4] = match[4][1..-2] # HREF
- match.delete_at(5) if match.size > 5 # Chop out NDATA decl
- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
- else
- match[2] = match[2][1..-2]
- match.pop if match.size == 4
- # match is [ :entity, name, value ]
- end
- match << '%' if ref
- return match
- when ATTLISTDECL_START
- md = @source.match( ATTLISTDECL_PATTERN, true )
- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
- element = md[1]
- contents = md[0]
-
- pairs = {}
- values = md[0].scan( ATTDEF_RE )
- values.each do |attdef|
- unless attdef[3] == "#IMPLIED"
- attdef.compact!
- val = attdef[3]
- val = attdef[4] if val == "#FIXED "
- pairs[attdef[0]] = val
- if attdef[0] =~ /^xmlns:(.*)/
- @nsstack[0] << $1
- end
+ @source.match(/\s*/um, true) # skip spaces
+ start_position = @source.position
+ if @source.match("<!", true)
+ if @source.match("ELEMENT", true)
+ md = @source.match(/(.*?)>/um, true)
+ raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
+ return [ :elementdecl, "<!ELEMENT" + md[1] ]
+ elsif @source.match("ENTITY", true)
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
+ unless match_data
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
end
- end
- return [ :attlistdecl, element, pairs, contents ]
- when NOTATIONDECL_START
- base_error_message = "Malformed notation declaration"
- unless @source.match(/\A\s*<!NOTATION\s+/um, true)
- if @source.match(/\A\s*<!NOTATION\s*>/um)
- message = "#{base_error_message}: name is missing"
+ match = [:entitydecl, *match_data.captures.compact]
+ ref = false
+ if match[1] == '%'
+ ref = true
+ match.delete_at 1
+ end
+ # Now we have to sort out what kind of entity reference this is
+ if match[2] == 'SYSTEM'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+ elsif match[2] == 'PUBLIC'
+ # External reference
+ match[3] = match[3][1..-2] # PUBID
+ match[4] = match[4][1..-2] # HREF
+ match.delete_at(5) if match.size > 5 # Chop out NDATA decl
+ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
+ elsif Private::PEREFERENCE_PATTERN.match?(match[2])
+ raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
else
- message = "#{base_error_message}: invalid declaration name"
+ match[2] = match[2][1..-2]
+ match.pop if match.size == 4
+ # match is [ :entity, name, value ]
end
- raise REXML::ParseException.new(message, @source)
- end
- name = parse_name(base_error_message)
- id = parse_id(base_error_message,
- accept_external_id: true,
- accept_public_id: true)
- unless @source.match(/\A\s*>/um, true)
- message = "#{base_error_message}: garbage before end >"
- raise REXML::ParseException.new(message, @source)
+ match << '%' if ref
+ return match
+ elsif @source.match("ATTLIST", true)
+ md = @source.match(Private::ATTLISTDECL_END, true)
+ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+ element = md[1]
+ contents = md[0]
+
+ pairs = {}
+ values = md[0].strip.scan( ATTDEF_RE )
+ values.each do |attdef|
+ unless attdef[3] == "#IMPLIED"
+ attdef.compact!
+ val = attdef[3]
+ val = attdef[4] if val == "#FIXED "
+ pairs[attdef[0]] = val
+ if attdef[0] =~ /^xmlns:(.*)/
+ @namespaces[$1] = val
+ end
+ end
+ end
+ return [ :attlistdecl, element, pairs, contents ]
+ elsif @source.match("NOTATION", true)
+ base_error_message = "Malformed notation declaration"
+ unless @source.match(/\s+/um, true)
+ if @source.match(">")
+ message = "#{base_error_message}: name is missing"
+ else
+ message = "#{base_error_message}: invalid name"
+ end
+ @source.position = start_position
+ raise REXML::ParseException.new(message, @source)
+ end
+ name = parse_name(base_error_message)
+ id = parse_id(base_error_message,
+ accept_external_id: true,
+ accept_public_id: true)
+ unless @source.match(/\s*>/um, true)
+ message = "#{base_error_message}: garbage before end >"
+ raise REXML::ParseException.new(message, @source)
+ end
+ return [:notationdecl, name, *id]
+ elsif md = @source.match(/--(.*?)-->/um, true)
+ case md[1]
+ when /--/, /-\z/
+ raise REXML::ParseException.new("Malformed comment", @source)
+ end
+ return [ :comment, md[1] ] if md
end
- return [:notationdecl, name, *id]
- when DOCTYPE_END
+ elsif match = @source.match(/(%.*?;)\s*/um, true)
+ return [ :externalentity, match[1] ]
+ elsif @source.match(/\]\s*>/um, true)
@document_status = :after_doctype
- @source.match( DOCTYPE_END, true )
return [ :end_doctype ]
end
+ if @document_status == :in_doctype
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
+ end
end
if @document_status == :after_doctype
- @source.match(/\A\s*/um, true)
+ @source.match(/\s*/um, true)
end
begin
- @source.read if @source.buffer.size<2
- if @source.buffer[0] == ?<
- if @source.buffer[1] == ?/
- @nsstack.shift
+ start_position = @source.position
+ if @source.match("<", true)
+ # :text's read_until may remain only "<" in buffer. In the
+ # case, buffer is empty here. So we need to fill buffer
+ # here explicitly.
+ @source.ensure_buffer
+ if @source.match("/", true)
+ @namespaces_restore_stack.pop
last_tag = @tags.pop
- #md = @source.match_to_consume( '>', CLOSE_MATCH)
- md = @source.match( CLOSE_MATCH, true )
+ md = @source.match(Private::CLOSE_PATTERN, true)
if md and !last_tag
message = "Unexpected top-level end tag (got '#{md[1]}')"
raise REXML::ParseException.new(message, @source)
end
if md.nil? or last_tag != md[1]
message = "Missing end tag for '#{last_tag}'"
- message << " (got '#{md[1]}')" if md
+ message += " (got '#{md[1]}')" if md
+ @source.position = start_position if md.nil?
raise REXML::ParseException.new(message, @source)
end
return [ :end_element, last_tag ]
- elsif @source.buffer[1] == ?!
- md = @source.match(/\A(\s*[^>]*>)/um)
+ elsif @source.match("!", true)
+ md = @source.match(/([^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
- if md[0][2] == ?-
- md = @source.match( COMMENT_PATTERN, true )
+ if md[0][0] == ?-
+ md = @source.match(/--(.*?)-->/um, true)
- case md[1]
- when /--/, /-\z/
+ if md.nil? || /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end
- return [ :comment, md[1] ] if md
+ return [ :comment, md[1] ]
else
- md = @source.match( CDATA_PATTERN, true )
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
- elsif @source.buffer[1] == ??
- md = @source.match( INSTRUCTION_PATTERN, true )
- return [ :processing_instruction, md[1], md[2] ] if md
- raise REXML::ParseException.new( "Bad instruction declaration",
- @source)
+ elsif @source.match("?", true)
+ return process_instruction
else
# Get the next tag
- md = @source.match(TAG_MATCH, true)
+ md = @source.match(Private::TAG_PATTERN, true)
unless md
- # Check for missing attribute quotes
- raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
+ @source.position = start_position
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
+ tag = md[1]
@document_status = :in_element
- prefixes = Set.new
- prefixes << md[2] if md[2]
- @nsstack.unshift(curr_ns=Set.new)
- attributes, closed = parse_attributes(prefixes, curr_ns)
+ @prefixes.clear
+ @prefixes << md[2] if md[2]
+ push_namespaces_restore
+ attributes, closed = parse_attributes(@prefixes)
# Verify that all of the prefixes have been defined
- for prefix in prefixes
- unless @nsstack.find{|k| k.member?(prefix)}
+ for prefix in @prefixes
+ unless @namespaces.key?(prefix)
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if closed
- @closed = md[1]
- @nsstack.shift
+ @closed = tag
+ pop_namespaces_restore
else
- @tags.push( md[1] )
+ if @tags.empty? and @have_root
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
+ end
+ @tags.push( tag )
end
- return [ :start_element, md[1], attributes ]
+ @have_root = true
+ return [ :start_element, tag, attributes ]
end
else
- md = @source.match( TEXT_PATTERN, true )
- if md[0].length == 0
- @source.match( /(\s+)/, true )
+ text = @source.read_until("<")
+ if text.chomp!("<")
+ @source.position -= "<".bytesize
end
- #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
- #return [ :text, "" ] if md[0].length == 0
- # unnormalized = Text::unnormalize( md[1], self )
- # return PullEvent.new( :text, md[1], unnormalized )
- return [ :text, md[1] ]
+ if @tags.empty?
+ unless /\A\s*\z/.match?(text)
+ if @have_root
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
+ else
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
+ end
+ end
+ return pull_event if @have_root
+ end
+ return [ :text, text ]
end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException
raise
- rescue Exception, NameError => error
+ rescue => error
raise REXML::ParseException.new( "Exception parsing",
@source, self, (error ? error : $!) )
end
@@ -451,13 +533,13 @@ def pull_event
private :pull_event
def entity( reference, entities )
- value = nil
- value = entities[ reference ] if entities
- if not value
- value = DEFAULT_ENTITIES[ reference ]
- value = value[2] if value
- end
- unnormalize( value, entities ) if value
+ return unless entities
+
+ value = entities[ reference ]
+ return if value.nil?
+
+ record_entity_expansion
+ unnormalize( value, entities )
end
# Escapes all possible entities
@@ -478,35 +560,83 @@ def normalize( input, entities=nil, entity_filter=nil )
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
- rv = string.clone
- rv.gsub!( /\r\n?/, "\n" )
+ if string.include?("\r")
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
+ else
+ rv = string.dup
+ end
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
- rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
+ if filter
+ matches.reject! do |entity_reference|
+ filter.include?(entity_reference)
+ end
+ end
if matches.size > 0
- matches.each do |entity_reference|
- unless filter and filter.include?(entity_reference)
- entity_value = entity( entity_reference, entities )
- if entity_value
- re = /&#{entity_reference};/
- rv.gsub!( re, entity_value )
- else
- er = DEFAULT_ENTITIES[entity_reference]
- rv.gsub!( er[0], er[2] ) if er
+ matches.tally.each do |entity_reference, n|
+ entity_expansion_count_before = @entity_expansion_count
+ entity_value = entity( entity_reference, entities )
+ if entity_value
+ if n > 1
+ entity_expansion_count_delta =
+ @entity_expansion_count - entity_expansion_count_before
+ record_entity_expansion(entity_expansion_count_delta * (n - 1))
end
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
+ rv.gsub!( re, entity_value )
+ if rv.bytesize > @entity_expansion_text_limit
+ raise "entity expansion has grown too large"
+ end
+ else
+ er = DEFAULT_ENTITIES[entity_reference]
+ rv.gsub!( er[0], er[2] ) if er
end
end
- rv.gsub!( /&/, '&' )
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
private
+ def add_namespace(prefix, uri)
+ @namespaces_restore_stack.last[prefix] = @namespaces[prefix]
+ if uri.nil?
+ @namespaces.delete(prefix)
+ else
+ @namespaces[prefix] = uri
+ end
+ end
+
+ def push_namespaces_restore
+ namespaces_restore = {}
+ @namespaces_restore_stack.push(namespaces_restore)
+ namespaces_restore
+ end
+
+ def pop_namespaces_restore
+ namespaces_restore = @namespaces_restore_stack.pop
+ namespaces_restore.each do |prefix, uri|
+ if uri.nil?
+ @namespaces.delete(prefix)
+ else
+ @namespaces[prefix] = uri
+ end
+ end
+ end
+
+ def record_entity_expansion(delta=1)
+ @entity_expansion_count += delta
+ if @entity_expansion_count > @entity_expansion_limit
+ raise "number of entity expansions exceeded, processing aborted."
+ end
+ end
+
def need_source_encoding_update?(xml_declaration_encoding)
return false if xml_declaration_encoding.nil?
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
@@ -514,16 +644,16 @@ def need_source_encoding_update?(xml_declaration_encoding)
end
def parse_name(base_error_message)
- md = @source.match(/\A\s*#{NAME}/um, true)
+ md = @source.match(Private::NAME_PATTERN, true)
unless md
- if @source.match(/\A\s*\S/um)
+ if @source.match(/\S/um)
message = "#{base_error_message}: invalid name"
else
message = "#{base_error_message}: name is missing"
end
raise REXML::ParseException.new(message, @source)
end
- md[1]
+ md[0]
end
def parse_id(base_error_message,
@@ -592,88 +722,115 @@ def parse_id_invalid_details(accept_external_id:,
end
end
- def parse_attributes(prefixes, curr_ns)
- attributes = {}
- closed = false
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
- if match_data.nil?
- message = "Start tag isn't ended"
- raise REXML::ParseException.new(message, @source)
+ def process_instruction
+ name = parse_name("Malformed XML: Invalid processing instruction node")
+ if @source.match(/\s+/um, true)
+ match_data = @source.match(/(.*?)\?>/um, true)
+ unless match_data
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
+ content = match_data[1]
+ else
+ content = nil
+ unless @source.match("?>", true)
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
end
-
- raw_attributes = match_data[1]
- closed = !match_data[2].nil?
- return attributes, closed if raw_attributes.nil?
- return attributes, closed if raw_attributes.empty?
-
- scanner = StringScanner.new(raw_attributes)
- until scanner.eos?
- if scanner.scan(/\s+/)
- break if scanner.eos?
+ if name == "xml"
+ if @document_status
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
+ end
+ version = VERSION.match(content)
+ version = version[1] unless version.nil?
+ encoding = ENCODING.match(content)
+ encoding = encoding[1] unless encoding.nil?
+ if need_source_encoding_update?(encoding)
+ @source.encoding = encoding
+ end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
end
+ standalone = STANDALONE.match(content)
+ standalone = standalone[1] unless standalone.nil?
+ return [ :xmldecl, version, encoding, standalone ]
+ end
+ [:processing_instruction, name, content]
+ end
- pos = scanner.pos
- loop do
- break if scanner.scan(ATTRIBUTE_PATTERN)
- unless scanner.scan(QNAME)
- message = "Invalid attribute name: <#{scanner.rest}>"
- raise REXML::ParseException.new(message, @source)
- end
- name = scanner[0]
- unless scanner.scan(/\s*=\s*/um)
+ def parse_attributes(prefixes)
+ attributes = {}
+ expanded_names = {}
+ closed = false
+ while true
+ if @source.match(">", true)
+ return attributes, closed
+ elsif @source.match("/>", true)
+ closed = true
+ return attributes, closed
+ elsif match = @source.match(QNAME, true)
+ name = match[1]
+ prefix = match[2]
+ local_part = match[3]
+
+ unless @source.match(/\s*=\s*/um, true)
message = "Missing attribute equal: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
- quote = scanner.scan(/['"]/)
- unless quote
+ unless match = @source.match(/(['"])/, true)
message = "Missing attribute value start quote: <#{name}>"
raise REXML::ParseException.new(message, @source)
end
- unless scanner.scan(/.*#{Regexp.escape(quote)}/um)
- match_data = @source.match(/^(.*?)(\/)?>/um, true)
- if match_data
- scanner << "/" if closed
- scanner << ">"
- scanner << match_data[1]
- scanner.pos = pos
- closed = !match_data[2].nil?
- next
- end
- message =
- "Missing attribute value end quote: <#{name}>: <#{quote}>"
+ quote = match[1]
+ start_position = @source.position
+ value = @source.read_until(quote)
+ unless value.chomp!(quote)
+ @source.position = start_position
+ message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
raise REXML::ParseException.new(message, @source)
end
- end
- name = scanner[1]
- prefix = scanner[2]
- local_part = scanner[3]
- # quote = scanner[4]
- value = scanner[5]
- if prefix == "xmlns"
- if local_part == "xml"
- if value != "http://www.w3.org/XML/1998/namespace"
- msg = "The 'xml' prefix must not be bound to any other namespace "+
+ @source.match(/\s*/um, true)
+ if prefix == "xmlns"
+ if local_part == "xml"
+ if value != Private::XML_PREFIXED_NAMESPACE
+ msg = "The 'xml' prefix must not be bound to any other namespace "+
+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
+ raise REXML::ParseException.new( msg, @source, self )
+ end
+ elsif local_part == "xmlns"
+ msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self )
+ raise REXML::ParseException.new( msg, @source, self)
end
- elsif local_part == "xmlns"
- msg = "The 'xmlns' prefix must not be declared "+
- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
- raise REXML::ParseException.new( msg, @source, self)
+ add_namespace(local_part, value)
+ elsif prefix
+ prefixes << prefix unless prefix == "xml"
end
- curr_ns << local_part
- elsif prefix
- prefixes << prefix unless prefix == "xml"
- end
- if attributes.has_key?(name)
- msg = "Duplicate attribute #{name.inspect}"
- raise REXML::ParseException.new(msg, @source, self)
- end
+ if attributes[name]
+ msg = "Duplicate attribute #{name.inspect}"
+ raise REXML::ParseException.new(msg, @source, self)
+ end
- attributes[name] = value
+ unless prefix == "xmlns"
+ uri = @namespaces[prefix]
+ expanded_name = [uri, local_part]
+ existing_prefix = expanded_names[expanded_name]
+ if existing_prefix
+ message = "Namespace conflict in adding attribute " +
+ "\"#{local_part}\": " +
+ "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
+ "prefix \"#{prefix}\" = \"#{uri}\""
+ raise REXML::ParseException.new(message, @source, self)
+ end
+ expanded_names[expanded_name] = prefix
+ end
+
+ attributes[name] = value
+ else
+ message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
+ raise REXML::ParseException.new(message, @source)
+ end
end
- return attributes, closed
end
end
end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
index f0601ae51b..bdc08276a9 100644
--- a/lib/rexml/parsers/lightparser.rb
+++ b/lib/rexml/parsers/lightparser.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: false
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
-require 'rexml/light/node'
+require_relative 'streamparser'
+require_relative 'baseparser'
+require_relative '../light/node'
module REXML
module Parsers
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
index 8c49217553..a331eff524 100644
--- a/lib/rexml/parsers/pullparser.rb
+++ b/lib/rexml/parsers/pullparser.rb
@@ -1,9 +1,9 @@
# frozen_string_literal: false
require 'forwardable'
-require 'rexml/parseexception'
-require 'rexml/parsers/baseparser'
-require 'rexml/xmltokens'
+require_relative '../parseexception'
+require_relative 'baseparser'
+require_relative '../xmltokens'
module REXML
module Parsers
@@ -47,6 +47,18 @@ def add_listener( listener )
@listeners << listener
end
+ def entity_expansion_count
+ @parser.entity_expansion_count
+ end
+
+ def entity_expansion_limit=( limit )
+ @parser.entity_expansion_limit = limit
+ end
+
+ def entity_expansion_text_limit=( limit )
+ @parser.entity_expansion_text_limit = limit
+ end
+
def each
while has_next?
yield self.pull
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
index 1386f69c83..a51477de21 100644
--- a/lib/rexml/parsers/sax2parser.rb
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -1,8 +1,8 @@
# frozen_string_literal: false
-require 'rexml/parsers/baseparser'
-require 'rexml/parseexception'
-require 'rexml/namespace'
-require 'rexml/text'
+require_relative 'baseparser'
+require_relative '../parseexception'
+require_relative '../namespace'
+require_relative '../text'
module REXML
module Parsers
@@ -22,6 +22,18 @@ def source
@parser.source
end
+ def entity_expansion_count
+ @parser.entity_expansion_count
+ end
+
+ def entity_expansion_limit=( limit )
+ @parser.entity_expansion_limit = limit
+ end
+
+ def entity_expansion_text_limit=( limit )
+ @parser.entity_expansion_text_limit = limit
+ end
+
def add_listener( listener )
@parser.add_listener( listener )
end
@@ -157,25 +169,8 @@ def parse
end
end
when :text
- #normalized = @parser.normalize( event[1] )
- #handle( :characters, normalized )
- copy = event[1].clone
-
- esub = proc { |match|
- if @entities.has_key?($1)
- @entities[$1].gsub(Text::REFERENCE, &esub)
- else
- match
- end
- }
-
- copy.gsub!( Text::REFERENCE, &esub )
- copy.gsub!( Text::NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- handle( :characters, copy )
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ handle( :characters, unnormalized )
when :entitydecl
handle_entitydecl( event )
when :processing_instruction, :comment, :attlistdecl,
@@ -264,6 +259,8 @@ def add( pair )
end
def get_namespace( prefix )
+ return nil if @namespace_stack.empty?
+
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
index f6a8bfa802..6c64d97893 100644
--- a/lib/rexml/parsers/streamparser.rb
+++ b/lib/rexml/parsers/streamparser.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require "rexml/parsers/baseparser"
+require_relative "baseparser"
module REXML
module Parsers
@@ -7,37 +7,42 @@ class StreamParser
def initialize source, listener
@listener = listener
@parser = BaseParser.new( source )
- @tag_stack = []
+ @entities = {}
end
def add_listener( listener )
@parser.add_listener( listener )
end
+ def entity_expansion_count
+ @parser.entity_expansion_count
+ end
+
+ def entity_expansion_limit=( limit )
+ @parser.entity_expansion_limit = limit
+ end
+
+ def entity_expansion_text_limit=( limit )
+ @parser.entity_expansion_text_limit = limit
+ end
+
def parse
# entity string
while true
event = @parser.pull
case event[0]
when :end_document
- unless @tag_stack.empty?
- tag_path = "/" + @tag_stack.join("/")
- raise ParseException.new("Missing end tag for '#{tag_path}'",
- @parser.source)
- end
return
when :start_element
- @tag_stack << event[1]
attrs = event[2].each do |n, v|
event[2][n] = @parser.unnormalize( v )
end
@listener.tag_start( event[1], attrs )
when :end_element
@listener.tag_end( event[1] )
- @tag_stack.pop
when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ @listener.text( unnormalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
when :start_doctype
@@ -48,6 +53,7 @@ def parse
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
@listener.send( event[0].to_s, *event[1..-1] )
when :entitydecl, :notationdecl
+ @entities[ event[1] ] = event[2] if event.size == 3
@listener.send( event[0].to_s, event[1..-1] )
when :externalentity
entity_reference = event[1]
diff --git a/lib/rexml/parsers/treeparser.rb b/lib/rexml/parsers/treeparser.rb
index fc0993c72a..4565a406cb 100644
--- a/lib/rexml/parsers/treeparser.rb
+++ b/lib/rexml/parsers/treeparser.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: false
-require 'rexml/validation/validationexception'
-require 'rexml/undefinednamespaceexception'
+require_relative '../validation/validationexception'
+require_relative '../undefinednamespaceexception'
module REXML
module Parsers
@@ -15,8 +15,6 @@ def add_listener( listener )
end
def parse
- tag_stack = []
- in_doctype = false
entities = nil
begin
while true
@@ -24,32 +22,24 @@ def parse
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
case event[0]
when :end_document
- unless tag_stack.empty?
- raise ParseException.new("No close tag for #{@build_context.xpath}",
- @parser.source, @parser)
- end
return
when :start_element
- tag_stack.push(event[1])
el = @build_context = @build_context.add_element( event[1] )
event[2].each do |key, value|
el.attributes[key]=Attribute.new(key,value,self)
end
when :end_element
- tag_stack.pop
@build_context = @build_context.parent
when :text
- if not in_doctype
- if @build_context[-1].instance_of? Text
- @build_context[-1] << event[1]
- else
- @build_context.add(
- Text.new(event[1], @build_context.whitespace, nil, true)
- ) unless (
- @build_context.ignore_whitespace_nodes and
- event[1].strip.size==0
- )
- end
+ if @build_context[-1].instance_of? Text
+ @build_context[-1] << event[1]
+ else
+ @build_context.add(
+ Text.new(event[1], @build_context.whitespace, nil, true)
+ ) unless (
+ @build_context.ignore_whitespace_nodes and
+ event[1].strip.size==0
+ )
end
when :comment
c = Comment.new( event[1] )
@@ -60,14 +50,12 @@ def parse
when :processing_instruction
@build_context.add( Instruction.new( event[1], event[2] ) )
when :end_doctype
- in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
doctype = DocType.new( event[1..-1], @build_context )
@build_context = doctype
entities = {}
- in_doctype = true
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
@build_context.add( n )
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
index 6571d119bd..e0029f43da 100644
--- a/lib/rexml/parsers/ultralightparser.rb
+++ b/lib/rexml/parsers/ultralightparser.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: false
-require 'rexml/parsers/streamparser'
-require 'rexml/parsers/baseparser'
+require_relative 'streamparser'
+require_relative 'baseparser'
module REXML
module Parsers
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index 32b70bb798..bd3b6856eb 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -1,6 +1,7 @@
# frozen_string_literal: false
-require 'rexml/namespace'
-require 'rexml/xmltokens'
+
+require_relative '../namespace'
+require_relative '../xmltokens'
module REXML
module Parsers
@@ -22,7 +23,13 @@ def parse path
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
path.gsub!( /\s+([\]\)])/, '\1')
parsed = []
- OrExpr(path, parsed)
+ rest = OrExpr(path, parsed)
+ if rest
+ unless rest.strip.empty?
+ raise ParseException.new("Garbage component exists at the end: " +
+ "<#{rest}>: <#{path}>")
+ end
+ end
parsed
end
@@ -32,108 +39,143 @@ def predicate path
parsed
end
- def abbreviate( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
- document = false
- while path.size > 0
- op = path.shift
+ def abbreviate(path_or_parsed)
+ if path_or_parsed.kind_of?(String)
+ parsed = parse(path_or_parsed)
+ else
+ parsed = path_or_parsed
+ end
+ components = []
+ component = nil
+ while parsed.size > 0
+ op = parsed.shift
case op
when :node
+ component << "node()"
when :attribute
- string << "/" if string.size > 0
- string << "@"
+ component = "@"
+ components << component
when :child
- string << "/" if string.size > 0
+ component = ""
+ components << component
when :descendant_or_self
- string << "/"
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ component = ""
+ components << component
+ else
+ component = "descendant-or-self::"
+ components << component
+ end
when :self
- string << "."
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ components << "."
+ else
+ component = "self::"
+ components << component
+ end
when :parent
- string << ".."
+ next_op = parsed[0]
+ if next_op == :node
+ parsed.shift
+ components << ".."
+ else
+ component = "parent::"
+ components << component
+ end
when :any
- string << "*"
+ component << "*"
when :text
- string << "text()"
+ component << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
+ component = op.to_s.tr("_", "-") << "::"
+ components << component
when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
+ prefix = parsed.shift
+ name = parsed.shift
+ component << prefix+":" if prefix.size > 0
+ component << name
when :predicate
- string << '['
- string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
- string << ']'
+ component << '['
+ component << predicate_to_path(parsed.shift) {|x| abbreviate(x)}
+ component << ']'
when :document
- document = true
+ components << ""
when :function
- string << path.shift
- string << "( "
- string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
- string << " )"
+ component << parsed.shift
+ component << "( "
+ component << predicate_to_path(parsed.shift[0]) {|x| abbreviate(x)}
+ component << " )"
when :literal
- string << %Q{ "#{path.shift}" }
+ component << quote_literal(parsed.shift)
else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
+ component << "UNKNOWN("
+ component << op.inspect
+ component << ")"
end
end
- string = "/"+string if document
- return string
+ case components
+ when [""]
+ "/"
+ when ["", ""]
+ "//"
+ else
+ components.join("/")
+ end
end
- def expand( path )
- path = path.kind_of?(String) ? parse( path ) : path
- string = ""
+ def expand(path_or_parsed)
+ if path_or_parsed.kind_of?(String)
+ parsed = parse(path_or_parsed)
+ else
+ parsed = path_or_parsed
+ end
+ path = ""
document = false
- while path.size > 0
- op = path.shift
+ while parsed.size > 0
+ op = parsed.shift
case op
when :node
- string << "node()"
+ path << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
- string << "/" unless string.size == 0
- string << op.to_s.tr("_", "-")
- string << "::"
+ path << "/" unless path.size == 0
+ path << op.to_s.tr("_", "-")
+ path << "::"
when :any
- string << "*"
+ path << "*"
when :qname
- prefix = path.shift
- name = path.shift
- string << prefix+":" if prefix.size > 0
- string << name
+ prefix = parsed.shift
+ name = parsed.shift
+ path << prefix+":" if prefix.size > 0
+ path << name
when :predicate
- string << '['
- string << predicate_to_string( path.shift ) { |x| expand(x) }
- string << ']'
+ path << '['
+ path << predicate_to_path( parsed.shift ) { |x| expand(x) }
+ path << ']'
when :document
document = true
else
- string << "/" unless string.size == 0
- string << "UNKNOWN("
- string << op.inspect
- string << ")"
+ path << "UNKNOWN("
+ path << op.inspect
+ path << ")"
end
end
- string = "/"+string if document
- return string
+ path = "/"+path if document
+ path
end
- def predicate_to_string( path, &block )
- string = ""
- case path[0]
+ def predicate_to_path(parsed, &block)
+ path = ""
+ case parsed[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
- op = path.shift
+ op = parsed.shift
case op
when :eq
op = "="
@@ -150,42 +192,56 @@ def predicate_to_string( path, &block )
when :union
op = "|"
end
- left = predicate_to_string( path.shift, &block )
- right = predicate_to_string( path.shift, &block )
- string << " "
- string << left
- string << " "
- string << op.to_s
- string << " "
- string << right
- string << " "
+ left = predicate_to_path( parsed.shift, &block )
+ right = predicate_to_path( parsed.shift, &block )
+ path << left
+ path << " "
+ path << op.to_s
+ path << " "
+ path << right
when :function
- path.shift
- name = path.shift
- string << name
- string << "( "
- string << predicate_to_string( path.shift, &block )
- string << " )"
+ parsed.shift
+ name = parsed.shift
+ path << name
+ path << "("
+ parsed.shift.each_with_index do |argument, i|
+ path << ", " if i > 0
+ path << predicate_to_path(argument, &block)
+ end
+ path << ")"
when :literal
- path.shift
- string << " "
- string << path.shift.inspect
- string << " "
+ parsed.shift
+ path << quote_literal(parsed.shift)
else
- string << " "
- string << yield( path )
- string << " "
+ path << yield( parsed )
end
- return string.squeeze(" ")
+ return path.squeeze(" ")
end
+ # For backward compatibility
+ alias_method :preciate_to_string, :predicate_to_path
private
+ def quote_literal( literal )
+ case literal
+ when String
+ # XPath 1.0 does not support escape characters.
+ # Assumes literal does not contain both single and double quotes.
+ if literal.include?("'")
+ "\"#{literal}\""
+ else
+ "'#{literal}'"
+ end
+ else
+ literal.inspect
+ end
+ end
+
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
- path = path.strip
+ path = path.lstrip
if path[0] == ?/
parsed << :document
if path[1] == ?/
@@ -209,7 +265,12 @@ def LocationPath path, parsed
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
- while path.size > 0
+ loop do
+ original_path = path
+ path = path.lstrip
+
+ return original_path if path.empty?
+
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
# (. or ..) >
@@ -224,43 +285,44 @@ def RelativeLocationPath path, parsed
path = path[1..-1]
end
else
+ path_before_axis_specifier = path
+ parsed_not_abberviated = []
if path[0] == ?@
- parsed << :attribute
+ parsed_not_abberviated << :attribute
path = path[1..-1]
# Goto Nodetest
elsif path =~ AXIS
- parsed << $1.tr('-','_').intern
+ parsed_not_abberviated << $1.tr('-','_').intern
path = $'
# Goto Nodetest
else
- parsed << :child
+ parsed_not_abberviated << :child
end
- n = []
- path = NodeTest( path, n)
-
- if path[0] == ?[
- path = Predicate( path, n )
+ path_before_node_test = path
+ path = NodeTest(path, parsed_not_abberviated)
+ if path == path_before_node_test
+ return path_before_axis_specifier
end
+ path = Predicate(path, parsed_not_abberviated)
- parsed.concat(n)
+ parsed.concat(parsed_not_abberviated)
end
- if path.size > 0
- if path[0] == ?/
- if path[1] == ?/
- parsed << :descendant_or_self
- parsed << :node
- path = path[2..-1]
- else
- path = path[1..-1]
- end
- else
- return path
- end
+ original_path = path
+ path = path.lstrip
+ return original_path if path.empty?
+
+ return original_path if path[0] != ?/
+
+ if path[1] == ?/
+ parsed << :descendant_or_self
+ parsed << :node
+ path = path[2..-1]
+ else
+ path = path[1..-1]
end
end
- return path
end
# Returns a 1-1 map of the nodeset
@@ -269,15 +331,26 @@ def RelativeLocationPath path, parsed
# String, if a name match
#NodeTest
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
- # | NODE_TYPE '(' ')' NodeType
+ # | '*' ':' NCNAME NameTest since XPath 2.0
+ # | NODE_TYPE '(' ')' NodeType
# | PI '(' LITERAL ')' PI
# | '[' expr ']' Predicate
- NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+ PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u
+ LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u
QNAME = Namespace::NAMESPLIT
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
+ original_path = path
+ path = path.lstrip
case path
+ when PREFIX_WILDCARD
+ prefix = nil
+ name = $1
+ path = $'
+ parsed << :qname
+ parsed << prefix
+ parsed << name
when /^\*/
path = $'
parsed << :any
@@ -288,7 +361,9 @@ def NodeTest path, parsed
when PI
path = $'
literal = nil
- if path !~ /^\s*\)/
+ if path =~ /^\s*\)/
+ path = $'
+ else
path =~ LITERAL
literal = $1
path = $'
@@ -297,7 +372,7 @@ def NodeTest path, parsed
end
parsed << :processing_instruction
parsed << (literal || '')
- when NCNAMETEST
+ when LOCAL_NAME_WILDCARD
prefix = $1
path = $'
parsed << :namespace
@@ -310,13 +385,17 @@ def NodeTest path, parsed
parsed << :qname
parsed << prefix
parsed << name
+ else
+ path = original_path
end
return path
end
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
- return nil unless path[0] == ?[
+ original_path = path
+ path = path.lstrip
+ return original_path unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
@@ -421,13 +500,13 @@ def RelationalExpr path, parsed
rest
end
- #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+ #| AdditiveExpr ('+' | '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
n = []
rest = MultiplicativeExpr( path, n )
if rest != path
- while rest =~ /^\s*(\+| -)\s*/
+ while rest =~ /^\s*(\+|-)\s*/
if $1[0] == ?+
n = [ :plus, n, [] ]
else
@@ -509,13 +588,14 @@ def UnionExpr path, parsed
#| LocationPath
#| FilterExpr ('/' | '//') RelativeLocationPath
def PathExpr path, parsed
- path =~ /^\s*/
- path = $'
+ path = path.lstrip
n = []
rest = FilterExpr( path, n )
if rest != path
if rest and rest[0] == ?/
- return RelativeLocationPath(rest, n)
+ rest = RelativeLocationPath(rest, n)
+ parsed.concat(n)
+ return rest
end
end
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
@@ -527,8 +607,10 @@ def PathExpr path, parsed
#| PrimaryExpr
def FilterExpr path, parsed
n = []
- path = PrimaryExpr( path, n )
- path = Predicate(path, n) if path and path[0] == ?[
+ path_before_primary_expr = path
+ path = PrimaryExpr(path, n)
+ return path_before_primary_expr if path == path_before_primary_expr
+ path = Predicate(path, n)
parsed.concat(n)
path
end
diff --git a/lib/rexml/quickpath.rb b/lib/rexml/quickpath.rb
index 5d6c77ca38..a0466b25d9 100644
--- a/lib/rexml/quickpath.rb
+++ b/lib/rexml/quickpath.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: false
-require 'rexml/functions'
-require 'rexml/xmltokens'
+require_relative 'functions'
+require_relative 'xmltokens'
module REXML
class QuickPath
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index af65cf4751..ff887fc080 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -1,8 +1,28 @@
# coding: US-ASCII
# frozen_string_literal: false
-require 'rexml/encoding'
+
+require "strscan"
+
+require_relative 'encoding'
module REXML
+ if StringScanner::Version < "1.0.0"
+ module StringScannerCheckScanString
+ refine StringScanner do
+ def check(pattern)
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+ super(pattern)
+ end
+
+ def scan(pattern)
+ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
+ super(pattern)
+ end
+ end
+ end
+ using StringScannerCheckScanString
+ end
+
# Generates Source-s. USE THIS CLASS.
class SourceFactory
# Generates a Source object
@@ -30,18 +50,27 @@ def SourceFactory::create_from(arg)
# objects and provides consumption of text
class Source
include Encoding
- # The current buffer (what we're going to read next)
- attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
+ module Private
+ SCANNER_RESET_SIZE = 100000
+ PRE_DEFINED_TERM_PATTERNS = {}
+ pre_defined_terms = ["'", '"', "<"]
+ pre_defined_terms.each do |term|
+ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
+ end
+ end
+ private_constant :Private
+
# Constructor
# @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
- @orig = @buffer = arg
+ @orig = arg
+ @scanner = StringScanner.new(@orig)
if encoding
self.encoding = encoding
else
@@ -50,6 +79,20 @@ def initialize(arg, encoding=nil)
@line = 0
end
+ # The current buffer (what we're going to read next)
+ def buffer
+ @scanner.rest
+ end
+
+ def drop_parsed_content
+ if @scanner.pos > Private::SCANNER_RESET_SIZE
+ @scanner.string = @scanner.rest
+ end
+ end
+
+ def buffer_encoding=(encoding)
+ @scanner.string.force_encoding(encoding)
+ end
# Inherited from Encoding
# Overridden to support optimized en/decoding
@@ -58,98 +101,78 @@ def encoding=(enc)
encoding_updated
end
- # Scans the source for a given pattern. Note, that this is not your
- # usual scan() method. For one thing, the pattern argument has some
- # requirements; for another, the source can be consumed. You can easily
- # confuse this method. Originally, the patterns were easier
- # to construct and this method more robust, because this method
- # generated search regexps on the fly; however, this was
- # computationally expensive and slowed down the entire REXML package
- # considerably, since this is by far the most commonly called method.
- # @param pattern must be a Regexp, and must be in the form of
- # /^\s*(#{your pattern, with no groups})(.*)/. The first group
- # will be returned; the second group is used if the consume flag is
- # set.
- # @param consume if true, the pattern returned will be consumed, leaving
- # everything after it in the Source.
- # @return the pattern, if found, or nil if the Source is empty or the
- # pattern is not found.
- def scan(pattern, cons=false)
- return nil if @buffer.nil?
- rv = @buffer.scan(pattern)
- @buffer = $' if cons and rv.size>0
- rv
+ def read(term = nil)
end
- def read
+ def read_until(term)
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+ data = @scanner.scan_until(pattern)
+ unless data
+ data = @scanner.rest
+ @scanner.pos = @scanner.string.bytesize
+ end
+ data
end
- def consume( pattern )
- @buffer = $' if pattern.match( @buffer )
+ def ensure_buffer
end
- def match_to( char, pattern )
- return pattern.match(@buffer)
+ def match(pattern, cons=false)
+ if cons
+ @scanner.scan(pattern).nil? ? nil : @scanner
+ else
+ @scanner.check(pattern).nil? ? nil : @scanner
+ end
end
- def match_to_consume( char, pattern )
- md = pattern.match(@buffer)
- @buffer = $'
- return md
+ def position
+ @scanner.pos
end
- def match(pattern, cons=false)
- md = pattern.match(@buffer)
- @buffer = $' if cons and md
- return md
+ def position=(pos)
+ @scanner.pos = pos
end
# @return true if the Source is exhausted
def empty?
- @buffer == ""
- end
-
- def position
- @orig.index( @buffer )
+ @scanner.eos?
end
# @return the current line in the source
def current_line
lines = @orig.split
- res = lines.grep @buffer[0..30]
+ res = lines.grep @scanner.rest[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
private
+
def detect_encoding
- buffer_encoding = @buffer.encoding
+ scanner_encoding = @scanner.rest.encoding
detected_encoding = "UTF-8"
begin
- @buffer.force_encoding("ASCII-8BIT")
- if @buffer[0, 2] == "\xfe\xff"
- @buffer[0, 2] = ""
+ @scanner.string.force_encoding("ASCII-8BIT")
+ if @scanner.scan(/\xfe\xff/n)
detected_encoding = "UTF-16BE"
- elsif @buffer[0, 2] == "\xff\xfe"
- @buffer[0, 2] = ""
+ elsif @scanner.scan(/\xff\xfe/n)
detected_encoding = "UTF-16LE"
- elsif @buffer[0, 3] == "\xef\xbb\xbf"
- @buffer[0, 3] = ""
+ elsif @scanner.scan(/\xef\xbb\xbf/n)
detected_encoding = "UTF-8"
end
ensure
- @buffer.force_encoding(buffer_encoding)
+ @scanner.string.force_encoding(scanner_encoding)
end
self.encoding = detected_encoding
end
def encoding_updated
if @encoding != 'UTF-8'
- @buffer = decode(@buffer)
+ @scanner.string = decode(@scanner.rest)
@to_utf = true
else
@to_utf = false
- @buffer.force_encoding ::Encoding::UTF_8
+ @scanner.string.force_encoding(::Encoding::UTF_8)
end
end
end
@@ -172,7 +195,7 @@ def initialize(arg, block_size=500, encoding=nil)
end
if !@to_utf and
- @buffer.respond_to?(:force_encoding) and
+ @orig.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
@@ -181,65 +204,72 @@ def initialize(arg, block_size=500, encoding=nil)
end
end
- def scan(pattern, cons=false)
- rv = super
- # You'll notice that this next section is very similar to the same
- # section in match(), but just a liiittle different. This is
- # because it is a touch faster to do it this way with scan()
- # than the way match() does it; enough faster to warrant duplicating
- # some code
- if rv.size == 0
- until @buffer =~ pattern or @source.nil?
- begin
- @buffer << readline
- rescue Iconv::IllegalSequence
- raise
- rescue
- @source = nil
+ def read(term = nil, min_bytes = 1)
+ term = encode(term) if term
+ begin
+ str = readline(term)
+ @scanner << str
+ read_bytes = str.bytesize
+ begin
+ while read_bytes < min_bytes
+ str = readline(term)
+ @scanner << str
+ read_bytes += str.bytesize
end
+ rescue IOError
end
- rv = super
+ true
+ rescue Exception, NameError
+ @source = nil
+ false
end
- rv.taint
- rv
end
- def read
- begin
- @buffer << readline
- rescue Exception, NameError
- @source = nil
+ def read_until(term)
+ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
+ term = encode(term)
+ until str = @scanner.scan_until(pattern)
+ break if @source.nil?
+ break if @source.eof?
+ @scanner << readline(term)
+ end
+ if str
+ read if @scanner.eos? and !@source.eof?
+ str
+ else
+ rest = @scanner.rest
+ @scanner.pos = @scanner.string.bytesize
+ rest
end
end
- def consume( pattern )
- match( pattern, true )
+ def ensure_buffer
+ read if @scanner.eos? && @source
end
def match( pattern, cons=false )
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- while !rv and @source
- begin
- @buffer << readline
- rv = pattern.match(@buffer)
- @buffer = $' if cons and rv
- rescue
- @source = nil
+ # To avoid performance issue, we need to increase bytes to read per scan
+ min_bytes = 1
+ while true
+ if cons
+ md = @scanner.scan(pattern)
+ else
+ md = @scanner.check(pattern)
end
+ break if md
+ return nil if pattern.is_a?(String)
+ return nil if @source.nil?
+ return nil unless read(nil, min_bytes)
+ min_bytes *= 2
end
- rv.taint
- rv
+
+ md.nil? ? nil : @scanner
end
def empty?
super and ( @source.nil? || @source.eof? )
end
- def position
- @er_source.pos rescue 0
- end
-
# @return the current line in the source
def current_line
begin
@@ -254,6 +284,7 @@ def current_line
end
rescue
end
+ @er_source.seek(pos)
rescue IOError
pos = -1
line = -1
@@ -262,8 +293,8 @@ def current_line
end
private
- def readline
- str = @source.readline(@line_break)
+ def readline(term = nil)
+ str = @source.readline(term || @line_break)
if @pending_buffer
if str.nil?
str = @pending_buffer
@@ -289,7 +320,7 @@ def encoding_updated
@source.set_encoding(@encoding, @encoding)
end
@line_break = encode(">")
- @pending_buffer, @buffer = @buffer, ""
+ @pending_buffer, @scanner.string = @scanner.rest, ""
@pending_buffer.force_encoding(@encoding)
super
end
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 86269dea1e..997f77d3f5 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -1,10 +1,10 @@
-# frozen_string_literal: false
-require 'rexml/security'
-require 'rexml/entity'
-require 'rexml/doctype'
-require 'rexml/child'
-require 'rexml/doctype'
-require 'rexml/parseexception'
+# frozen_string_literal: true
+require_relative 'security'
+require_relative 'entity'
+require_relative 'doctype'
+require_relative 'child'
+require_relative 'doctype'
+require_relative 'parseexception'
module REXML
# Represents text nodes in an XML document
@@ -96,27 +96,28 @@ def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
@raw = false
@parent = nil
+ @entity_filter = nil
if parent
super( parent )
@raw = parent.raw
end
- @raw = raw unless raw.nil?
- @entity_filter = entity_filter
- clear_cache
-
if arg.kind_of? String
@string = arg.dup
- @string.squeeze!(" \n\t") unless respect_whitespace
elsif arg.kind_of? Text
- @string = arg.to_s
+ @string = arg.instance_variable_get(:@string).dup
@raw = arg.raw
- elsif
+ @entity_filter = arg.instance_variable_get(:@entity_filter)
+ else
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
end
- @string.gsub!( /\r\n?/, "\n" )
+ @string.squeeze!(" \n\t") unless respect_whitespace
+ @string.gsub!(/\r\n?/, "\n")
+ @raw = raw unless raw.nil?
+ @entity_filter = entity_filter if entity_filter
+ clear_cache
Text.check(@string, illegal, doctype) if @raw
end
@@ -130,13 +131,13 @@ def parent= parent
def Text.check string, pattern, doctype
# illegal anywhere
- if string !~ VALID_XML_CHARS
+ if !string.match?(VALID_XML_CHARS)
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
when *VALID_CHAR
else
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
end
end
else
@@ -144,31 +145,51 @@ def Text.check string, pattern, doctype
case c.unpack('U')
when *VALID_CHAR
else
- raise "Illegal character #{c.inspect} in raw string \"#{string}\""
+ raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
end
end
end
end
- # context sensitive
- string.scan(pattern) do
- if $1[-1] != ?;
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
- elsif $1[0] == ?&
- if $5 and $5[0] == ?#
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
- when *VALID_CHAR
+ pos = 0
+ while (index = string.index(/<|&/, pos))
+ if string[index] == "<"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ unless (end_index = string.index(/[^\s];/, index + 1))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ value = string[(index + 1)..end_index]
+ if /\s/.match?(value)
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ if value[0] == "#"
+ character_reference = value[1..-1]
+
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
+ if character_reference[0] == "x" || character_reference[-1] == "x"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
else
- raise "Illegal character '#{$1}' in raw string \"#{string}\""
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
end
- # FIXME: below can't work but this needs API change.
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
- # if !doctype or !doctype.entities.has_key?($3)
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
- # end
end
+
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
+ end
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end
+
+ pos = end_index + 1
end
+
+ string
end
def node_type
@@ -181,7 +202,7 @@ def empty?
def clone
- return Text.new(self)
+ return Text.new(self, true)
end
@@ -226,9 +247,7 @@ def doctype
# u.to_s #-> "sean russell"
def to_s
return @string if @raw
- return @normalized if @normalized
-
- @normalized = Text::normalize( @string, doctype, @entity_filter )
+ @normalized ||= Text::normalize( @string, doctype, @entity_filter )
end
def inspect
@@ -249,8 +268,8 @@ def inspect
# u = Text.new( "sean russell", false, nil, true )
# u.value #-> "sean russell"
def value
- return @unnormalized if @unnormalized
- @unnormalized = Text::unnormalize( @string, doctype )
+ @unnormalized ||= Text::unnormalize(@string, doctype,
+ entity_expansion_text_limit: document&.entity_expansion_text_limit)
end
# Sets the contents of this text node. This expects the text to be
@@ -266,16 +285,16 @@ def value=( val )
@raw = false
end
- def wrap(string, width, addnewline=false)
- # Recursively wrap string at width.
- return string if string.length <= width
- place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
- if addnewline then
- return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
- else
- return string[0,place] + "\n" + wrap(string[place+1..-1], width)
- end
- end
+ def wrap(string, width, addnewline=false)
+ # Recursively wrap string at width.
+ return string if string.length <= width
+ place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
+ if addnewline then
+ return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ else
+ return string[0,place] + "\n" + wrap(string[place+1..-1], width)
+ end
+ end
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
@@ -373,7 +392,7 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&' )
- copy = copy.gsub( "&", "&" )
+ copy = copy.gsub( "&", "&" ) if copy.include?("&")
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
@@ -384,18 +403,21 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
- copy = copy.gsub(entity.value, "&#{entity.name};" )
+ if copy.include?(entity.value)
+ copy = copy.gsub(entity.value, "&#{entity.name};" )
+ end
end
end
copy
end
# Unescapes all possible entities
- def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
+ def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
+ entity_expansion_text_limit ||= Security.entity_expansion_text_limit
sum = 0
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
s = Text.expand($&, doctype, filter)
- if sum + s.bytesize > Security.entity_expansion_text_limit
+ if sum + s.bytesize > entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += s.bytesize
diff --git a/lib/rexml/undefinednamespaceexception.rb b/lib/rexml/undefinednamespaceexception.rb
index e522ed57ea..492a098183 100644
--- a/lib/rexml/undefinednamespaceexception.rb
+++ b/lib/rexml/undefinednamespaceexception.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require 'rexml/parseexception'
+require_relative 'parseexception'
module REXML
class UndefinedNamespaceException < ParseException
def initialize( prefix, source, parser )
diff --git a/lib/rexml/validation/relaxng.rb b/lib/rexml/validation/relaxng.rb
index fb52438290..f29a2c05e5 100644
--- a/lib/rexml/validation/relaxng.rb
+++ b/lib/rexml/validation/relaxng.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: false
-require "rexml/validation/validation"
-require "rexml/parsers/baseparser"
+require_relative "validation"
+require_relative "../parsers/baseparser"
module REXML
module Validation
diff --git a/lib/rexml/validation/validation.rb b/lib/rexml/validation/validation.rb
index f0c76f976c..0ad6ada427 100644
--- a/lib/rexml/validation/validation.rb
+++ b/lib/rexml/validation/validation.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: false
-require 'rexml/validation/validationexception'
+require_relative 'validationexception'
module REXML
module Validation
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index a37e9f3ddc..d19407cefd 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -1,17 +1,18 @@
# frozen_string_literal: false
-require 'rexml/encoding'
-require 'rexml/source'
+
+require_relative 'encoding'
+require_relative 'source'
module REXML
# NEEDS DOCUMENTATION
class XMLDecl < Child
include Encoding
- DEFAULT_VERSION = "1.0";
- DEFAULT_ENCODING = "UTF-8";
- DEFAULT_STANDALONE = "no";
- START = '<\?xml';
- STOP = '\?>';
+ DEFAULT_VERSION = "1.0"
+ DEFAULT_ENCODING = "UTF-8"
+ DEFAULT_STANDALONE = "no"
+ START = "<?xml"
+ STOP = "?>"
attr_accessor :version, :standalone
attr_reader :writeencoding, :writethis
@@ -25,6 +26,7 @@ def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
self.encoding = version.encoding
@writeencoding = version.writeencoding
@standalone = version.standalone
+ @writethis = version.writethis
else
super()
@version = version
@@ -46,9 +48,9 @@ def clone
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output
- writer << START.sub(/\\/u, '')
+ writer << START
writer << " #{content encoding}"
- writer << STOP.sub(/\\/u, '')
+ writer << STOP
end
def ==( other )
@@ -102,14 +104,26 @@ def dowrite
end
def inspect
- START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
+ "#{START} ... #{STOP}"
end
private
def content(enc)
- rv = "version='#@version'"
- rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i
- rv << " standalone='#@standalone'" if @standalone
+ context = nil
+ context = parent.context if parent
+ if context and context[:prologue_quote] == :quote
+ quote = "\""
+ else
+ quote = "'"
+ end
+
+ rv = "version=#{quote}#{@version}#{quote}"
+ if @writeencoding or enc !~ /\Autf-8\z/i
+ rv << " encoding=#{quote}#{enc}#{quote}"
+ end
+ if @standalone
+ rv << " standalone=#{quote}#{@standalone}#{quote}"
+ end
rv
end
end
diff --git a/lib/rexml/xpath.rb b/lib/rexml/xpath.rb
index f1cb99baea..a0921bd8e1 100644
--- a/lib/rexml/xpath.rb
+++ b/lib/rexml/xpath.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: false
-require 'rexml/functions'
-require 'rexml/xpath_parser'
+require_relative 'functions'
+require_relative 'xpath_parser'
module REXML
# Wrapper class. Use this class to access the XPath functions.
@@ -28,10 +28,10 @@ class XPath
# XPath.first( doc, "//b"} )
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
# XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
- def XPath::first element, path=nil, namespaces=nil, variables={}
+ def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
+ parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
@@ -57,10 +57,10 @@ def XPath::first element, path=nil, namespaces=nil, variables={}
# XPath.each( node, 'ancestor::x' ) { |el| ... }
# XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
# {|el| ... }
- def XPath::each element, path=nil, namespaces=nil, variables={}, &block
+ def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block)
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
- parser = XPathParser.new
+ parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
@@ -69,8 +69,8 @@ def XPath::each element, path=nil, namespaces=nil, variables={}, &block
end
# Returns an array of nodes matching a given XPath.
- def XPath::match element, path=nil, namespaces=nil, variables={}
- parser = XPathParser.new
+ def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
+ parser = XPathParser.new(**options)
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 181b2b6e85..5eb1e5a961 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -1,43 +1,51 @@
# frozen_string_literal: false
-require 'rexml/namespace'
-require 'rexml/xmltokens'
-require 'rexml/attribute'
-require 'rexml/syncenumerator'
-require 'rexml/parsers/xpathparser'
-
-class Object
- # provides a unified +clone+ operation, for REXML::XPathParser
- # to use across multiple Object types
- def dclone
- clone
- end
-end
-class Symbol
- # provides a unified +clone+ operation, for REXML::XPathParser
- # to use across multiple Object types
- def dclone ; self ; end
-end
-class Integer
- # provides a unified +clone+ operation, for REXML::XPathParser
- # to use across multiple Object types
- def dclone ; self ; end
-end
-class Float
- # provides a unified +clone+ operation, for REXML::XPathParser
- # to use across multiple Object types
- def dclone ; self ; end
-end
-class Array
- # provides a unified +clone+ operation, for REXML::XPathParser
- # to use across multiple Object+ types
- def dclone
- klone = self.clone
- klone.clear
- self.each{|v| klone << v.dclone}
- klone
+
+require "pp"
+
+require_relative 'namespace'
+require_relative 'xmltokens'
+require_relative 'attribute'
+require_relative 'parsers/xpathparser'
+
+module REXML
+ module DClonable
+ refine Object do
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone
+ clone
+ end
+ end
+ refine Symbol do
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+ end
+ refine Integer do
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+ end
+ refine Float do
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object types
+ def dclone ; self ; end
+ end
+ refine Array do
+ # provides a unified +clone+ operation, for REXML::XPathParser
+ # to use across multiple Object+ types
+ def dclone
+ klone = self.clone
+ klone.clear
+ self.each{|v| klone << v.dclone}
+ klone
+ end
+ end
end
end
+using REXML::DClonable
+
module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
@@ -47,10 +55,15 @@ class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
- def initialize( )
+ DEBUG = (ENV["REXML_XPATH_PARSER_DEBUG"] == "true")
+
+ def initialize(strict: false)
+ @debug = DEBUG
@parser = REXML::Parsers::XPathParser.new
@namespaces = nil
@variables = {}
+ @nest = 0
+ @strict = strict
end
def namespaces=( namespaces={} )
@@ -75,7 +88,7 @@ def get_first path, nodeset
def predicate path, nodeset
path_stack = @parser.parse( path )
- expr( path_stack, nodeset )
+ match( path_stack, nodeset )
end
def []=( variable_name, value )
@@ -123,13 +136,24 @@ def first( path_stack, node )
end
- def match( path_stack, nodeset )
- r = expr( path_stack, nodeset )
- r
+ def match(path_stack, nodeset)
+ nodeset = nodeset.collect.with_index do |node, i|
+ position = i + 1
+ XPathNode.new(node, position: position)
+ end
+ result = expr(path_stack, nodeset)
+ case result
+ when Array # nodeset
+ unnode(result)
+ else
+ [result]
+ end
end
private
-
+ def strict?
+ @strict
+ end
# Returns a String namespace for a node, given a prefix
# The rules are:
@@ -148,343 +172,481 @@ def get_namespace( node, prefix )
# Expr takes a stack of path elements and a set of nodes (either a Parent
# or an Array and returns an Array of matching nodes
- ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
- ELEMENTS = [ :element ]
def expr( path_stack, nodeset, context=nil )
- node_types = ELEMENTS
+ enter(:expr, path_stack, nodeset) if @debug
return nodeset if path_stack.length == 0 || nodeset.length == 0
while path_stack.length > 0
+ trace(:while, path_stack, nodeset) if @debug
if nodeset.length == 0
path_stack.clear
return []
end
- case (op = path_stack.shift)
+ op = path_stack.shift
+ case op
when :document
- nodeset = [ nodeset[0].root_node ]
-
- when :qname
- prefix = path_stack.shift
- name = path_stack.shift
- nodeset.delete_if do |node|
- # FIXME: This DOUBLES the time XPath searches take
- ns = get_namespace( node, prefix )
- if node.node_type == :element
- if node.name == name
- end
- end
- !(node.node_type == :element and
- node.name == name and
- node.namespace == ns )
- end
- node_types = ELEMENTS
-
- when :any
- nodeset.delete_if { |node| !node_types.include?(node.node_type) }
-
+ first_raw_node = nodeset.first.raw_node
+ nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
when :self
- # This space left intentionally blank
-
- when :processing_instruction
- target = path_stack.shift
- nodeset.delete_if do |node|
- (node.node_type != :processing_instruction) or
- ( target!='' and ( node.target != target ) )
+ nodeset = step(path_stack) do
+ [nodeset]
end
-
- when :text
- nodeset.delete_if { |node| node.node_type != :text }
-
- when :comment
- nodeset.delete_if { |node| node.node_type != :comment }
-
- when :node
- # This space left intentionally blank
- node_types = ALL
-
when :child
- new_nodeset = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- new_nodeset += node.children if nt == :element or nt == :document
+ nodeset = step(path_stack) do
+ child(nodeset)
end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
when :literal
+ trace(:literal, path_stack, nodeset) if @debug
return path_stack.shift
-
when :attribute
- new_nodeset = []
- case path_stack.shift
- when :qname
- prefix = path_stack.shift
- name = path_stack.shift
- for element in nodeset
- if element.node_type == :element
- attrib = element.attribute( name, get_namespace(element, prefix) )
- new_nodeset << attrib if attrib
+ nodeset = step(path_stack, any_type: :attribute) do
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ next unless raw_node.node_type == :element
+ attributes = raw_node.attributes
+ next if attributes.empty?
+ nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
+ XPathNode.new(attribute, position: i + 1)
end
end
- when :any
- for element in nodeset
- if element.node_type == :element
- new_nodeset += element.attributes.to_a
+ nodesets
+ end
+ when :namespace
+ pre_defined_namespaces = {
+ "xml" => "http://www.w3.org/XML/1998/namespace",
+ }
+ nodeset = step(path_stack, any_type: :namespace) do
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ case raw_node.node_type
+ when :element
+ if @namespaces
+ nodesets << pre_defined_namespaces.merge(@namespaces)
+ else
+ nodesets << pre_defined_namespaces.merge(raw_node.namespaces)
+ end
+ when :attribute
+ if @namespaces
+ nodesets << pre_defined_namespaces.merge(@namespaces)
+ else
+ nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces)
+ end
end
end
+ nodesets
end
- nodeset = new_nodeset
-
when :parent
- nodeset = nodeset.collect{|n| n.parent}.compact
- #nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
- node_types = ELEMENTS
-
- when :ancestor
- new_nodeset = []
- nodeset.each do |node|
- while node.parent
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
+ nodeset = step(path_stack) do
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ if raw_node.node_type == :attribute
+ parent = raw_node.element
+ else
+ parent = raw_node.parent
+ end
+ nodesets << [XPathNode.new(parent, position: 1)] if parent
end
+ nodesets
end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :ancestor_or_self
- new_nodeset = []
- nodeset.each do |node|
- if node.node_type == :element
- new_nodeset << node
- while ( node.parent )
- node = node.parent
- new_nodeset << node unless new_nodeset.include? node
+ when :ancestor
+ nodeset = step(path_stack) do
+ nodesets = []
+ # new_nodes = {}
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ new_nodeset = []
+ while raw_node.parent
+ raw_node = raw_node.parent
+ # next if new_nodes.key?(node)
+ new_nodeset << XPathNode.new(raw_node,
+ position: new_nodeset.size + 1)
+ # new_nodes[node] = true
end
+ nodesets << new_nodeset unless new_nodeset.empty?
end
+ nodesets
end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :predicate
- new_nodeset = []
- subcontext = { :size => nodeset.size }
- pred = path_stack.shift
- nodeset.each_with_index { |node, index|
- subcontext[ :node ] = node
- subcontext[ :index ] = index+1
- pc = pred.dclone
- result = expr( pc, [node], subcontext )
- result = result[0] if result.kind_of? Array and result.length == 1
- if result.kind_of? Numeric
- new_nodeset << node if result == (index+1)
- elsif result.instance_of? Array
- if result.size > 0 and result.inject(false) {|k,s| s or k}
- new_nodeset << node if result.size > 0
+ when :ancestor_or_self
+ nodeset = step(path_stack) do
+ nodesets = []
+ # new_nodes = {}
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ next unless raw_node.node_type == :element
+ new_nodeset = [XPathNode.new(raw_node, position: 1)]
+ # new_nodes[node] = true
+ while raw_node.parent
+ raw_node = raw_node.parent
+ # next if new_nodes.key?(node)
+ new_nodeset << XPathNode.new(raw_node,
+ position: new_nodeset.size + 1)
+ # new_nodes[node] = true
end
- else
- new_nodeset << node if result
+ nodesets << new_nodeset unless new_nodeset.empty?
end
- }
- nodeset = new_nodeset
-=begin
- predicate = path_stack.shift
- ns = nodeset.clone
- result = expr( predicate, ns )
- if result.kind_of? Array
- nodeset = result.zip(ns).collect{|m,n| n if m}.compact
- else
- nodeset = result ? nodeset : []
+ nodesets
end
-=end
-
when :descendant_or_self
- rv = descendant_or_self( path_stack, nodeset )
- path_stack.clear
- nodeset = rv
- node_types = ELEMENTS
-
+ nodeset = step(path_stack) do
+ descendant(nodeset, true)
+ end
when :descendant
- results = []
- nt = nil
- nodeset.each do |node|
- nt = node.node_type
- results += expr( path_stack.dclone.unshift( :descendant_or_self ),
- node.children ) if nt == :element or nt == :document
+ nodeset = step(path_stack) do
+ descendant(nodeset, false)
end
- nodeset = results
- node_types = ELEMENTS
-
when :following_sibling
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- following_siblings = all_siblings[ current_index+1 .. -1 ]
- results += expr( path_stack.dclone, following_siblings )
+ nodeset = step(path_stack) do
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ next unless raw_node.respond_to?(:parent)
+ next if raw_node.parent.nil?
+ all_siblings = raw_node.parent.children
+ current_index = all_siblings.index(raw_node)
+ following_siblings = all_siblings[(current_index + 1)..-1]
+ next if following_siblings.empty?
+ nodesets << following_siblings.collect.with_index do |sibling, i|
+ XPathNode.new(sibling, position: i + 1)
+ end
+ end
+ nodesets
end
- nodeset = results
-
when :preceding_sibling
- results = []
- nodeset.each do |node|
- next if node.parent.nil?
- all_siblings = node.parent.children
- current_index = all_siblings.index( node )
- preceding_siblings = all_siblings[ 0, current_index ].reverse
- results += preceding_siblings
+ nodeset = step(path_stack, order: :reverse) do
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ next unless raw_node.respond_to?(:parent)
+ next if raw_node.parent.nil?
+ all_siblings = raw_node.parent.children
+ current_index = all_siblings.index(raw_node)
+ preceding_siblings = all_siblings[0, current_index].reverse
+ next if preceding_siblings.empty?
+ nodesets << preceding_siblings.collect.with_index do |sibling, i|
+ XPathNode.new(sibling, position: i + 1)
+ end
+ end
+ nodesets
end
- nodeset = results
- node_types = ELEMENTS
-
when :preceding
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += preceding( node )
+ nodeset = step(path_stack, order: :reverse) do
+ unnode(nodeset) do |node|
+ preceding(node)
+ end
end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
when :following
- new_nodeset = []
- nodeset.each do |node|
- new_nodeset += following( node )
- end
- nodeset = new_nodeset
- node_types = ELEMENTS
-
- when :namespace
- new_nodeset = []
- prefix = path_stack.shift
- nodeset.each do |node|
- if (node.node_type == :element or node.node_type == :attribute)
- if @namespaces
- namespaces = @namespaces
- elsif (node.node_type == :element)
- namespaces = node.namespaces
- else
- namespaces = node.element.namesapces
- end
- if (node.namespace == namespaces[prefix])
- new_nodeset << node
- end
+ nodeset = step(path_stack) do
+ unnode(nodeset) do |node|
+ following(node)
end
end
- nodeset = new_nodeset
-
when :variable
var_name = path_stack.shift
- return @variables[ var_name ]
+ return [@variables[var_name]]
- # :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
- # TODO: Special case for :or and :and -- not evaluate the right
- # operand if the left alone determines result (i.e. is true for
- # :or and false for :and).
- when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
+ when :eq, :neq, :lt, :lteq, :gt, :gteq
left = expr( path_stack.shift, nodeset.dup, context )
right = expr( path_stack.shift, nodeset.dup, context )
res = equality_relational_compare( left, op, right )
+ trace(op, left, right, res) if @debug
return res
+ when :or
+ left = expr(path_stack.shift, nodeset.dup, context)
+ return true if Functions.boolean(left)
+ right = expr(path_stack.shift, nodeset.dup, context)
+ return Functions.boolean(right)
+
when :and
- left = expr( path_stack.shift, nodeset.dup, context )
- return [] unless left
- if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
- return []
+ left = expr(path_stack.shift, nodeset.dup, context)
+ return false unless Functions.boolean(left)
+ right = expr(path_stack.shift, nodeset.dup, context)
+ return Functions.boolean(right)
+
+ when :div, :mod, :mult, :plus, :minus
+ left = expr(path_stack.shift, nodeset, context)
+ right = expr(path_stack.shift, nodeset, context)
+ left = unnode(left) if left.is_a?(Array)
+ right = unnode(right) if right.is_a?(Array)
+ left = Functions::number(left)
+ right = Functions::number(right)
+ case op
+ when :div
+ return left / right
+ when :mod
+ return left % right
+ when :mult
+ return left * right
+ when :plus
+ return left + right
+ when :minus
+ return left - right
+ else
+ raise "[BUG] Unexpected operator: <#{op.inspect}>"
end
- right = expr( path_stack.shift, nodeset.dup, context )
- res = equality_relational_compare( left, op, right )
- return res
-
- when :div
- left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
- return (left / right)
-
- when :mod
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left % right)
-
- when :mult
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left * right)
-
- when :plus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left + right)
-
- when :minus
- left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
- return (left - right)
-
when :union
left = expr( path_stack.shift, nodeset, context )
right = expr( path_stack.shift, nodeset, context )
+ left = unnode(left) if left.is_a?(Array)
+ right = unnode(right) if right.is_a?(Array)
return (left | right)
-
when :neg
res = expr( path_stack, nodeset, context )
- return -(res.to_f)
-
+ res = unnode(res) if res.is_a?(Array)
+ return -Functions.number(res)
when :not
when :function
func_name = path_stack.shift.tr('-','_')
arguments = path_stack.shift
- subcontext = context ? nil : { :size => nodeset.size }
-
- res = []
- cont = context
- nodeset.each_with_index { |n, i|
- if subcontext
- subcontext[:node] = n
- subcontext[:index] = i
- cont = subcontext
+
+ if nodeset.size != 1
+ message = "[BUG] Node set size must be 1 for function call: "
+ message += "<#{func_name}>: <#{nodeset.inspect}>: "
+ message += "<#{arguments.inspect}>"
+ raise message
+ end
+
+ node = nodeset.first
+ if context
+ target_context = context
+ else
+ target_context = {:size => nodeset.size}
+ if node.is_a?(XPathNode)
+ target_context[:node] = node.raw_node
+ target_context[:index] = node.position
+ else
+ target_context[:node] = node
+ target_context[:index] = 1
end
- arg_clone = arguments.dclone
- args = arg_clone.collect { |arg|
- expr( arg, [n], cont )
- }
- Functions.context = cont
- res << Functions.send( func_name, *args )
- }
- return res
+ end
+ args = arguments.dclone.collect do |arg|
+ result = expr(arg, nodeset, target_context)
+ result = unnode(result) if result.is_a?(Array)
+ result
+ end
+ Functions.context = target_context
+ return Functions.send(func_name, *args)
+ else
+ raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
end
end # while
return nodeset
+ ensure
+ leave(:expr, path_stack, nodeset) if @debug
+ end
+
+ def step(path_stack, any_type: :element, order: :forward)
+ nodesets = yield
+ begin
+ enter(:step, path_stack, nodesets) if @debug
+ nodesets = node_test(path_stack, nodesets, any_type: any_type)
+ while path_stack[0] == :predicate
+ path_stack.shift # :predicate
+ predicate_expression = path_stack.shift.dclone
+ nodesets = evaluate_predicate(predicate_expression, nodesets)
+ end
+ if nodesets.size == 1
+ ordered_nodeset = nodesets[0]
+ else
+ raw_nodes = []
+ nodesets.each do |nodeset|
+ nodeset.each do |node|
+ if node.respond_to?(:raw_node)
+ raw_nodes << node.raw_node
+ else
+ raw_nodes << node
+ end
+ end
+ end
+ ordered_nodeset = sort(raw_nodes, order)
+ end
+ new_nodeset = []
+ ordered_nodeset.each do |node|
+ # TODO: Remove duplicated
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ end
+ new_nodeset
+ ensure
+ leave(:step, path_stack, new_nodeset) if @debug
+ end
end
+ def node_test(path_stack, nodesets, any_type: :element)
+ enter(:node_test, path_stack, nodesets) if @debug
+ operator = path_stack.shift
+ case operator
+ when :qname
+ prefix = path_stack.shift
+ name = path_stack.shift
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ case raw_node.node_type
+ when :element
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ if strict?
+ raw_node.name == name and raw_node.namespace == ""
+ else
+ # FIXME: This DOUBLES the time XPath searches take
+ ns = get_namespace(raw_node, prefix)
+ raw_node.name == name and raw_node.namespace == ns
+ end
+ else
+ # FIXME: This DOUBLES the time XPath searches take
+ ns = get_namespace(raw_node, prefix)
+ raw_node.name == name and raw_node.namespace == ns
+ end
+ when :attribute
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ raw_node.name == name and raw_node.namespace == ""
+ else
+ # FIXME: This DOUBLES the time XPath searches take
+ ns = get_namespace(raw_node.element, prefix)
+ raw_node.name == name and raw_node.namespace == ns
+ end
+ else
+ false
+ end
+ end
+ end
+ when :namespace
+ prefix = path_stack.shift
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ case raw_node.node_type
+ when :element
+ namespaces = @namespaces || raw_node.namespaces
+ raw_node.namespace == namespaces[prefix]
+ when :attribute
+ namespaces = @namespaces || raw_node.element.namespaces
+ raw_node.namespace == namespaces[prefix]
+ else
+ false
+ end
+ end
+ end
+ when :any
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ raw_node.node_type == any_type
+ end
+ end
+ when :comment
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ raw_node.node_type == :comment
+ end
+ end
+ when :text
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ raw_node.node_type == :text
+ end
+ end
+ when :processing_instruction
+ target = path_stack.shift
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ raw_node = node.raw_node
+ (raw_node.node_type == :processing_instruction) and
+ (target.empty? or (raw_node.target == target))
+ end
+ end
+ when :node
+ new_nodesets = nodesets.collect do |nodeset|
+ filter_nodeset(nodeset) do |node|
+ true
+ end
+ end
+ else
+ message = "[BUG] Unexpected node test: " +
+ "<#{operator.inspect}>: <#{path_stack.inspect}>"
+ raise message
+ end
+ new_nodesets
+ ensure
+ leave(:node_test, path_stack, new_nodesets) if @debug
+ end
- ##########################################################
- # FIXME
- # The next two methods are BAD MOJO!
- # This is my achilles heel. If anybody thinks of a better
- # way of doing this, be my guest. This really sucks, but
- # it is a wonder it works at all.
- # ########################################################
+ def filter_nodeset(nodeset)
+ new_nodeset = []
+ nodeset.each do |node|
+ next unless yield(node)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+ end
+ new_nodeset
+ end
- def descendant_or_self( path_stack, nodeset )
- rs = []
- d_o_s( path_stack, nodeset, rs )
- document_order(rs.flatten.compact)
- #rs.flatten.compact
+ def evaluate_predicate(expression, nodesets)
+ enter(:predicate, expression, nodesets) if @debug
+ new_nodeset_count = 0
+ new_nodesets = nodesets.collect do |nodeset|
+ new_nodeset = []
+ subcontext = { :size => nodeset.size }
+ nodeset.each_with_index do |node, index|
+ if node.is_a?(XPathNode)
+ subcontext[:node] = node.raw_node
+ subcontext[:index] = node.position
+ else
+ subcontext[:node] = node
+ subcontext[:index] = index + 1
+ end
+ result = expr(expression.dclone, [node], subcontext)
+ trace(:predicate_evaluate, expression, node, subcontext, result) if @debug
+ result = result[0] if result.kind_of? Array and result.length == 1
+ if result.kind_of? Numeric
+ if result == node.position
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ end
+ elsif result.instance_of? Array
+ if result.size > 0 and result.inject(false) {|k,s| s or k}
+ if result.size > 0
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ end
+ end
+ else
+ if result
+ new_nodeset_count += 1
+ new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ end
+ end
+ end
+ new_nodeset
+ end
+ new_nodesets
+ ensure
+ leave(:predicate, new_nodesets) if @debug
end
- def d_o_s( p, ns, r )
- nt = nil
- ns.each_index do |i|
- n = ns[i]
- x = expr( p.dclone, [ n ] )
- nt = n.node_type
- d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
- r.concat(x) if x.size > 0
+ def trace(*args)
+ indent = " " * @nest
+ PP.pp(args, "").each_line do |line|
+ puts("#{indent}#{line}")
end
end
+ def enter(tag, *args)
+ trace(:enter, tag, *args)
+ @nest += 1
+ end
+
+ def leave(tag, *args)
+ @nest -= 1
+ trace(:leave, tag, *args)
+ end
# Reorders an array of nodes so that they are in document order
# It tries to do this efficiently.
@@ -494,7 +656,7 @@ def d_o_s( p, ns, r )
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
- def document_order( array_of_nodes )
+ def sort(array_of_nodes, order)
new_arry = []
array_of_nodes.each { |node|
node_idx = []
@@ -505,42 +667,68 @@ def document_order( array_of_nodes )
end
new_arry << [ node_idx.reverse, node ]
}
- new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
+ ordered = new_arry.sort_by do |index, node|
+ if order == :forward
+ index
+ else
+ -index
+ end
+ end
+ ordered.collect do |_index, node|
+ node
+ end
end
-
- def recurse( nodeset, &block )
- for node in nodeset
- yield node
- recurse( node, &block ) if node.node_type == :element
+ def descendant(nodeset, include_self)
+ nodesets = []
+ nodeset.each do |node|
+ new_nodeset = []
+ new_nodes = {}
+ descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
+ nodesets << new_nodeset unless new_nodeset.empty?
end
+ nodesets
end
+ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
+ if include_self
+ return if new_nodes.key?(raw_node)
+ new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
+ new_nodes[raw_node] = true
+ end
+ node_type = raw_node.node_type
+ if node_type == :element or node_type == :document
+ raw_node.children.each do |child|
+ descendant_recursive(child, new_nodeset, new_nodes, true)
+ end
+ end
+ end
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
# preceding:: includes every element in the document that precedes this node,
# except for ancestors
- def preceding( node )
+ def preceding(node)
ancestors = []
- p = node.parent
- while p
- ancestors << p
- p = p.parent
+ parent = node.parent
+ while parent
+ ancestors << parent
+ parent = parent.parent
end
- acc = []
- p = preceding_node_of( node )
- while p
- if ancestors.include? p
- ancestors.delete(p)
+ precedings = []
+ preceding_node = preceding_node_of(node)
+ while preceding_node
+ if ancestors.include?(preceding_node)
+ ancestors.delete(preceding_node)
else
- acc << p
+ precedings << XPathNode.new(preceding_node,
+ position: precedings.size + 1)
end
- p = preceding_node_of( p )
+ preceding_node = preceding_node_of(preceding_node)
end
- acc
+ precedings
end
def preceding_node_of( node )
@@ -558,14 +746,15 @@ def preceding_node_of( node )
psn
end
- def following( node )
- acc = []
- p = next_sibling_node( node )
- while p
- acc << p
- p = following_node_of( p )
+ def following(node)
+ followings = []
+ following_node = next_sibling_node(node)
+ while following_node
+ followings << XPathNode.new(following_node,
+ position: followings.size + 1)
+ following_node = following_node_of(following_node)
end
- acc
+ followings
end
def following_node_of( node )
@@ -587,45 +776,68 @@ def next_sibling_node(node)
return psn
end
+ def child(nodeset)
+ nodesets = []
+ nodeset.each do |node|
+ raw_node = node.raw_node
+ node_type = raw_node.node_type
+ # trace(:child, node_type, node)
+ case node_type
+ when :element
+ nodesets << raw_node.children.collect.with_index do |child_node, i|
+ XPathNode.new(child_node, position: i + 1)
+ end
+ when :document
+ new_nodeset = []
+ raw_node.children.each do |child|
+ case child
+ when XMLDecl, Text
+ # Ignore
+ else
+ new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
+ end
+ end
+ nodesets << new_nodeset unless new_nodeset.empty?
+ end
+ end
+ nodesets
+ end
+
def norm b
case b
when true, false
return b
when 'true', 'false'
return Functions::boolean( b )
- when /^\d+(\.\d+)?$/
+ when /^\d+(\.\d+)?$/, Numeric
return Functions::number( b )
else
return Functions::string( b )
end
end
- def equality_relational_compare( set1, op, set2 )
+ def equality_relational_compare(set1, op, set2)
+ set1 = unnode(set1) if set1.is_a?(Array)
+ set2 = unnode(set2) if set2.is_a?(Array)
+
if set1.kind_of? Array and set2.kind_of? Array
- if set1.size == 1 and set2.size == 1
- set1 = set1[0]
- set2 = set2[0]
- elsif set1.size == 0 or set2.size == 0
- nd = set1.size==0 ? set2 : set1
- rv = nd.collect { |il| compare( il, op, nil ) }
- return rv
- else
- res = []
- SyncEnumerator.new( set1, set2 ).each { |i1, i2|
- i1 = norm( i1 )
- i2 = norm( i2 )
- res << compare( i1, op, i2 )
- }
- return res
+ # If both objects to be compared are node-sets, then the
+ # comparison will be true if and only if there is a node in the
+ # first node-set and a node in the second node-set such that the
+ # result of performing the comparison on the string-values of
+ # the two nodes is true.
+ set1.product(set2).any? do |node1, node2|
+ node_string1 = Functions.string(node1)
+ node_string2 = Functions.string(node2)
+ compare(node_string1, op, node_string2)
end
- end
- # If one is nodeset and other is number, compare number to each item
- # in nodeset s.t. number op number(string(item))
- # If one is nodeset and other is string, compare string to each item
- # in nodeset s.t. string op string(item)
- # If one is nodeset and other is boolean, compare boolean to each item
- # in nodeset s.t. boolean op boolean(item)
- if set1.kind_of? Array or set2.kind_of? Array
+ elsif set1.kind_of? Array or set2.kind_of? Array
+ # If one is nodeset and other is number, compare number to each item
+ # in nodeset s.t. number op number(string(item))
+ # If one is nodeset and other is string, compare string to each item
+ # in nodeset s.t. string op string(item)
+ # If one is nodeset and other is boolean, compare boolean to each item
+ # in nodeset s.t. boolean op boolean(item)
if set1.kind_of? Array
a = set1
b = set2
@@ -636,15 +848,23 @@ def equality_relational_compare( set1, op, set2 )
case b
when true, false
- return a.collect {|v| compare( Functions::boolean(v), op, b ) }
+ each_unnode(a).any? do |unnoded|
+ compare(Functions.boolean(unnoded), op, b)
+ end
when Numeric
- return a.collect {|v| compare( Functions::number(v), op, b )}
- when /^\d+(\.\d+)?$/
- b = Functions::number( b )
- return a.collect {|v| compare( Functions::number(v), op, b )}
+ each_unnode(a).any? do |unnoded|
+ compare(Functions.number(unnoded), op, b)
+ end
+ when /\A\d+(\.\d+)?\z/
+ b = Functions.number(b)
+ each_unnode(a).any? do |unnoded|
+ compare(Functions.number(unnoded), op, b)
+ end
else
- b = Functions::string( b )
- return a.collect { |v| compare( Functions::string(v), op, b ) }
+ b = Functions::string(b)
+ each_unnode(a).any? do |unnoded|
+ compare(Functions::string(unnoded), op, b)
+ end
end
else
# If neither is nodeset,
@@ -654,32 +874,52 @@ def equality_relational_compare( set1, op, set2 )
# Else, convert to string
# Else
# Convert both to numbers and compare
- s1 = set1.to_s
- s2 = set2.to_s
- if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
- set1 = Functions::boolean( set1 )
- set2 = Functions::boolean( set2 )
+ compare(set1, op, set2)
+ end
+ end
+
+ def value_type(value)
+ case value
+ when true, false
+ :boolean
+ when Numeric
+ :number
+ when String
+ :string
+ else
+ raise "[BUG] Unexpected value type: <#{value.inspect}>"
+ end
+ end
+
+ def normalize_compare_values(a, operator, b)
+ a_type = value_type(a)
+ b_type = value_type(b)
+ case operator
+ when :eq, :neq
+ if a_type == :boolean or b_type == :boolean
+ a = Functions.boolean(a) unless a_type == :boolean
+ b = Functions.boolean(b) unless b_type == :boolean
+ elsif a_type == :number or b_type == :number
+ a = Functions.number(a) unless a_type == :number
+ b = Functions.number(b) unless b_type == :number
else
- if op == :eq or op == :neq
- if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
- set1 = Functions::number( s1 )
- set2 = Functions::number( s2 )
- else
- set1 = Functions::string( set1 )
- set2 = Functions::string( set2 )
- end
- else
- set1 = Functions::number( set1 )
- set2 = Functions::number( set2 )
- end
+ a = Functions.string(a) unless a_type == :string
+ b = Functions.string(b) unless b_type == :string
end
- return compare( set1, op, set2 )
+ when :lt, :lteq, :gt, :gteq
+ a = Functions.number(a) unless a_type == :number
+ b = Functions.number(b) unless b_type == :number
+ else
+ message = "[BUG] Unexpected compare operator: " +
+ "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
+ raise message
end
- return false
+ [a, b]
end
- def compare a, op, b
- case op
+ def compare(a, operator, b)
+ a, b = normalize_compare_values(a, operator, b)
+ case operator
when :eq
a == b
when :neq
@@ -692,13 +932,47 @@ def compare a, op, b
a > b
when :gteq
a >= b
- when :and
- a and b
- when :or
- a or b
else
- false
+ message = "[BUG] Unexpected compare operator: " +
+ "<#{operator.inspect}>: <#{a.inspect}>: <#{b.inspect}>"
+ raise message
+ end
+ end
+
+ def each_unnode(nodeset)
+ return to_enum(__method__, nodeset) unless block_given?
+ nodeset.each do |node|
+ if node.is_a?(XPathNode)
+ unnoded = node.raw_node
+ else
+ unnoded = node
+ end
+ yield(unnoded)
+ end
+ end
+
+ def unnode(nodeset)
+ each_unnode(nodeset).collect do |unnoded|
+ unnoded = yield(unnoded) if block_given?
+ unnoded
+ end
+ end
+ end
+
+ # @private
+ class XPathNode
+ attr_reader :raw_node, :context
+ def initialize(node, context=nil)
+ if node.is_a?(XPathNode)
+ @raw_node = node.raw_node
+ else
+ @raw_node = node
end
+ @context = context || {}
+ end
+
+ def position
+ @context[:position]
end
end
end
diff --git a/test/rexml/data/much_ado.xml b/test/rexml/data/much_ado.xml
index f008fadbb0..0040088c9c 100644
--- a/test/rexml/data/much_ado.xml
+++ b/test/rexml/data/much_ado.xml
@@ -4735,7 +4735,7 @@ CLAUDIO, BENEDICK, HERO, BEATRICE, and Attendants</STAGEDIR>
<LINE>But they shall find, awaked in such a kind,</LINE>
<LINE>Both strength of limb and policy of mind,</LINE>
<LINE>Ability in means and choice of friends,</LINE>
-<LINE>To quit me of them throughly.</LINE>
+<LINE>To quit me of them thoroughly.</LINE>
</SPEECH>
<SPEECH>
diff --git a/test/rexml/data/ofbiz-issues-full-177.xml b/test/rexml/data/ofbiz-issues-full-177.xml
index bfff771d12..e1f7bdfddc 100644
--- a/test/rexml/data/ofbiz-issues-full-177.xml
+++ b/test/rexml/data/ofbiz-issues-full-177.xml
@@ -152,8 +152,8 @@
<!-- desc : Short description for attachment. -->
<!-- ispatch : Whether attachment is a patch file. -->
<!-- filename : Filename of attachment. -->
- <!-- submitter_id : Issuezilla ID of attachement submitter. -->
- <!-- submitting_username : username of attachement submitter. -->
+ <!-- submitter_id : Issuezilla ID of attachment submitter. -->
+ <!-- submitting_username : username of attachment submitter. -->
<!-- data : Encoded attachment. -->
<!-- attachment_iz_url : URL to attachment in iz. -->
diff --git a/test/rexml/data/t75.xml b/test/rexml/data/t75.xml
index 0911fb1b1a..eb3cccee4b 100644
--- a/test/rexml/data/t75.xml
+++ b/test/rexml/data/t75.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="ISO-8859-1"?><?pos="3"?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
<!-- generated by hnb 1.9.17 (http://hnb.sourceforge.net) -->
<!DOCTYPE tree[
diff --git a/test/rexml/data/test/tests.xml b/test/rexml/data/test/tests.xml
index cf03b42b0b..fd415679c4 100644
--- a/test/rexml/data/test/tests.xml
+++ b/test/rexml/data/test/tests.xml
@@ -299,7 +299,7 @@
<valueOf select="name(/.)"></valueOf>
<valueOf select="name(/self::node())"></valueOf>
- <!-- name of root elemet -->
+ <!-- name of root element -->
<valueOf select="name(node())">web-app</valueOf>
<valueOf select="name(/node())">web-app</valueOf>
<valueOf select="name(/*)">web-app</valueOf>
@@ -318,7 +318,7 @@
<valueOf select="name(parent::node())"></valueOf>
<valueOf select="name(parent::*)"></valueOf>
- <!-- name of root elemet -->
+ <!-- name of root element -->
<valueOf select="name()">web-app</valueOf>
<valueOf select="name(.)">web-app</valueOf>
<valueOf select="name(../*)">web-app</valueOf>
diff --git a/test/rexml/data/tutorial.xml b/test/rexml/data/tutorial.xml
index bf5783d09a..9c4639b948 100644
--- a/test/rexml/data/tutorial.xml
+++ b/test/rexml/data/tutorial.xml
@@ -286,7 +286,7 @@ el1 << Text.new(" cruel world")
strings.</p>
<p>I can't emphasize this enough, because people do have problems with
- this. REXML can't possibly alway guess correctly how your text is
+ this. REXML can't possibly always guess correctly how your text is
encoded, so it always assumes the text is UTF-8. It also does not warn
you when you try to add text which isn't properly encoded, for the
same reason. You must make sure that you are adding UTF-8 text.
diff --git a/test/rexml/formatter/test_default.rb b/test/rexml/formatter/test_default.rb
new file mode 100644
index 0000000000..aa403dbed6
--- /dev/null
+++ b/test/rexml/formatter/test_default.rb
@@ -0,0 +1,17 @@
+module REXMLTests
+ class DefaultFormatterTest < Test::Unit::TestCase
+ def format(node)
+ formatter = REXML::Formatters::Default.new
+ output = +""
+ formatter.write(node, output)
+ output
+ end
+
+ class InstructionTest < self
+ def test_content_nil
+ instruction = REXML::Instruction.new("target")
+ assert_equal("<?target?>", format(instruction))
+ end
+ end
+ end
+end
diff --git a/test/rexml/functions/test_base.rb b/test/rexml/functions/test_base.rb
new file mode 100644
index 0000000000..daa38156f8
--- /dev/null
+++ b/test/rexml/functions/test_base.rb
@@ -0,0 +1,283 @@
+# frozen_string_literal: false
+require "test/unit/testcase"
+
+require "rexml/document"
+
+# TODO: Split me
+module REXMLTests
+ class FunctionsTester < Test::Unit::TestCase
+ include REXML
+
+ def setup
+ super
+ REXML::Functions.context = nil
+ end
+
+ def test_functions
+ # trivial text() test
+ # confuse-a-function
+ source = "<a>more <b id='1'/><b id='2'>dumb</b><b id='3'/><c/> text</a>"
+ doc = Document.new source
+ res = ""
+ XPath::each(doc.root, "text()") {|val| res << val.to_s}
+ assert_equal "more text", res
+
+ res = XPath::first(doc.root, "b[last()]")
+ assert_equal '3', res.attributes['id']
+ res = XPath::first(doc.root, "b[position()=2]")
+ assert_equal '2', res.attributes['id']
+ res = XPath::first(doc.root, "*[name()='c']")
+ assert_equal "c", res.name
+ end
+
+ # Contributed by Mike Stok
+ def test_starts_with
+ source = <<-EOF
+ <foo>
+ <a href="mailto:a@b.c">a@b.c</a>
+ <a href="http://www.foo.com">http://www.foo.com</a>
+ </foo>
+ EOF
+ doc = Document.new source
+ mailtos = doc.elements.to_a("//a[starts-with(@href, 'mailto:')]")
+ assert_equal 1, mailtos.size
+ assert_equal "mailto:a@b.c", mailtos[0].attributes['href']
+
+ ailtos = doc.elements.to_a("//a[starts-with(@href, 'ailto:')]")
+ assert_equal 0, ailtos.size
+ end
+
+ def test_string_length
+ doc = Document.new <<-EOF
+ <AAA>
+ <Q/>
+ <SSSS/>
+ <BB/>
+ <CCC/>
+ <DDDDDDDD/>
+ <EEEE/>
+ </AAA>
+ EOF
+ assert doc, "create doc"
+
+ set = doc.elements.to_a("//*[string-length(name()) = 3]")
+ assert_equal 2, set.size, "nodes with names length = 3"
+
+ set = doc.elements.to_a("//*[string-length(name()) < 3]")
+ assert_equal 2, set.size, "nodes with names length < 3"
+
+ set = doc.elements.to_a("//*[string-length(name()) > 3]")
+ assert_equal 3, set.size, "nodes with names length > 3"
+ end
+
+ # Test provided by Mike Stok
+ def test_contains
+ source = <<-EOF
+ <foo>
+ <a href="mailto:a@b.c">a@b.c</a>
+ <a href="http://www.foo.com">http://www.foo.com</a>
+ </foo>
+ EOF
+ doc = Document.new source
+
+ [['o', 2], ['foo', 1], ['bar', 0]].each { |test|
+ search, expected = test
+ set = doc.elements.to_a("//a[contains(@href, '#{search}')]")
+ assert_equal expected, set.size
+ }
+ end
+
+ # Mike Stok and Sean Russell
+ def test_substring
+ # examples from http://www.w3.org/TR/xpath#function-substring
+ doc = Document.new('<test string="12345" />')
+
+ #puts XPath.first(d, 'node()[0 + 1]')
+ #d = Document.new("<a b='1'/>")
+ #puts XPath.first(d, 'a[0 mod 0]')
+ [ [1.5, 2.6, '234'],
+ [0, 3, '12'],
+ [0, '0 div 0', ''],
+ [1, '0 div 0', ''],
+ ['-42', '1 div 0', '12345'],
+ ['-1 div 0', '1 div 0', '']
+ ].each { |start, length, expected|
+ set = doc.elements.to_a("//test[substring(@string, #{start}, #{length}) = '#{expected}']")
+ assert_equal 1, set.size, "#{start}, #{length}, '#{expected}'"
+ }
+ end
+
+ def test_substring_angrez
+ testString = REXML::Functions::substring_after("helloworld","hello")
+ assert_equal( 'world', testString )
+ end
+
+ def test_translate
+ source = <<-EOF
+ <doc>
+ <case name='w3c one' result='BAr' /> <!-- w3c -->
+ <case name='w3c two' result='AAA' /> <!-- w3c -->
+ <case name='alchemy' result="gold" /> <!-- mike -->
+ <case name='vbxml one' result='A Space Odyssey' />
+ <case name='vbxml two' result='AbCdEf' />
+ </doc>
+ EOF
+
+ doc = Document.new(source)
+
+ [ ['bar', 'abc', 'ABC', 'w3c one'],
+ ['--aaa--','abc-','ABC', 'w3c two'],
+ ['lead', 'dear language', 'doll groover', 'alchemy'],
+ ['A Space Odissei', 'i', 'y', 'vbxml one'],
+ ['abcdefg', 'aceg', 'ACE', 'vbxml two'],
+ ].each { |arg1, arg2, arg3, name|
+ translate = "translate('#{arg1}', '#{arg2}', '#{arg3}')"
+ set = doc.elements.to_a("//case[@result = #{translate}]")
+ assert_equal 1, set.size, translate
+ assert_equal name, set[0].attributes['name']
+ }
+ end
+
+ def test_name
+ d = REXML::Document.new("<a xmlns:x='foo'><b/><x:b/></a>")
+ assert_equal 1, d.root.elements.to_a('*[name() = "b"]').size
+ assert_equal 1, d.elements.to_a('//*[name() = "x:b"]').size
+ end
+
+ def test_local_name
+ d = REXML::Document.new("<a xmlns:x='foo'><b/><x:b/></a>")
+ assert_equal 2, d.root.elements.to_a('*[local_name() = "b"]').size
+ assert_equal 2, d.elements.to_a('//*[local_name() = "b"]').size
+ end
+
+ def test_substring2
+ doc = Document.new('<test string="12345" />')
+ assert_equal(1,doc.elements.to_a("//test[substring(@string,2)='2345']").size)
+ end
+
+ # Submitted by Kouhei
+ def test_floor_ceiling_round
+ source = "<a><b id='1'/><b id='2'/><b id='3'/></a>"
+ doc = REXML::Document.new(source)
+
+ id_1 = doc.elements["/a/b[@id='1']"]
+ id_2 = doc.elements["/a/b[@id='2']"]
+ id_3 = doc.elements["/a/b[@id='3']"]
+
+ good = {
+ "floor" => [[], [id_1], [id_2], [id_3]],
+ "ceiling" => [[id_1], [id_2], [id_3], []],
+ "round" => [[id_1], [id_2], [id_3], []]
+ }
+ good.each do |key, value|
+ (0..3).each do |i|
+ xpath = "//b[number(@id) = #{key}(#{i+0.5})]"
+ assert_equal(value[i], REXML::XPath.match(doc, xpath))
+ end
+ end
+
+ good["round"] = [[], [id_1], [id_2], [id_3]]
+ good.each do |key, value|
+ (0..3).each do |i|
+ xpath = "//b[number(@id) = #{key}(#{i+0.4})]"
+ assert_equal(value[i], REXML::XPath.match(doc, xpath))
+ end
+ end
+ end
+
+ # Submitted by Kou
+ def test_lang
+ d = Document.new(<<-XML)
+ <a xml:lang="en">
+ <b xml:lang="ja">
+ <c xml:lang="fr"/>
+ <d/>
+ <e xml:lang="ja-JP"/>
+ <f xml:lang="en-US"/>
+ </b>
+ </a>
+ XML
+
+ assert_equal(1, d.elements.to_a("//*[lang('fr')]").size)
+ assert_equal(3, d.elements.to_a("//*[lang('ja')]").size)
+ assert_equal(2, d.elements.to_a("//*[lang('en')]").size)
+ assert_equal(1, d.elements.to_a("//*[lang('en-us')]").size)
+
+ d = Document.new(<<-XML)
+ <root>
+ <para xml:lang="en"/>
+ <div xml:lang="en"><para/></div>
+ <para xml:lang="EN"/>
+ <para xml:lang="en-us"/>
+ </root>
+ XML
+
+ assert_equal(5, d.elements.to_a("//*[lang('en')]").size)
+ end
+
+ def test_ticket_60
+ document = REXML::Document.new("<a><b>A</b><b>1</b></a>")
+ assert_equal( "A", REXML::XPath.first(document, '//b[.="A"]').text )
+ assert_equal( "1", REXML::XPath.first(document, '//b[.="1"]').text )
+ end
+
+ def test_normalize_space
+ source = "<a><!--COMMENT A--><b><!-- COMMENT A --></b></a>"
+ doc = REXML::Document.new(source)
+ predicate = "string(.)=normalize_space('\nCOMMENT \n A \n\n ')"
+ m = REXML::XPath.match(doc, "//comment()[#{predicate}]")
+ assert_equal( [REXML::Comment.new("COMMENT A")], m )
+ end
+
+ def test_normalize_space_strings
+ source = <<-XML
+<a><b>breakfast boosts\t\t
+
+concentration </b><c>
+Coffee beans
+ aroma
+
+
+
+</c><d> Dessert
+ \t\t after dinner</d></a>
+ XML
+ normalized_texts = REXML::XPath.each(REXML::Document.new(source), "normalize-space(//text())").to_a
+ assert_equal([
+ "breakfast boosts concentration",
+ "Coffee beans aroma",
+ "Dessert after dinner",
+ ],
+ normalized_texts)
+ end
+
+ def test_string_nil_without_context
+ doc = REXML::Document.new(<<~XML)
+ <?xml version="1.0" encoding="UTF-8"?>
+ <root>
+ <foo bar="baz"/>
+ <foo bar=""/>
+ </root>
+ XML
+
+ assert_equal([doc.root.elements[2]],
+ REXML::XPath.match(doc,
+ "//foo[@bar=$n]",
+ nil,
+ {"n" => nil}))
+ end
+
+ def test_unregistered_method
+ doc = Document.new("<root/>")
+ assert_nil(XPath::first(doc.root, "to_s()"))
+ end
+
+ def test_nonexistent_function
+ doc = Document.new("<root><nonexistent/></root>")
+ # TODO: Maybe, this is not XPath spec behavior.
+ # This behavior must be reconsidered.
+ assert_equal(doc.root.elements[1],
+ XPath::first(doc.root, "nonexistent()"))
+ end
+ end
+end
diff --git a/test/rexml/functions/test_boolean.rb b/test/rexml/functions/test_boolean.rb
new file mode 100644
index 0000000000..b3e2117c10
--- /dev/null
+++ b/test/rexml/functions/test_boolean.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: false
+
+require "test/unit"
+require "rexml/document"
+require "rexml/functions"
+
+module REXMLTests
+ class TestFunctionsBoolean < Test::Unit::TestCase
+ def setup
+ REXML::Functions.context = nil
+ end
+
+ def test_true
+ assert_equal(true, REXML::Functions.boolean(true))
+ end
+
+ def test_false
+ assert_equal(false, REXML::Functions.boolean(false))
+ end
+
+ def test_integer_true
+ assert_equal(true, REXML::Functions.boolean(1))
+ end
+
+ def test_integer_positive_zero
+ assert_equal(false, REXML::Functions.boolean(0))
+ end
+
+ def test_integer_negative_zero
+ assert_equal(false, REXML::Functions.boolean(-0))
+ end
+
+ def test_float_true
+ assert_equal(true, REXML::Functions.boolean(1.1))
+ end
+
+ def test_float_positive_zero
+ assert_equal(false, REXML::Functions.boolean(-0.0))
+ end
+
+ def test_float_negative_zero
+ assert_equal(false, REXML::Functions.boolean(-0.0))
+ end
+
+ def test_float_nan
+ assert_equal(false, REXML::Functions.boolean(Float::NAN))
+ end
+
+ def test_string_true
+ assert_equal(true, REXML::Functions.boolean("content"))
+ end
+
+ def test_string_empty
+ assert_equal(false, REXML::Functions.boolean(""))
+ end
+
+ def test_node_set_true
+ root = REXML::Document.new("<root/>").root
+ assert_equal(true, REXML::Functions.boolean([root]))
+ end
+
+ def test_node_set_empty
+ assert_equal(false, REXML::Functions.boolean([]))
+ end
+
+ def test_nil
+ assert_equal(false, REXML::Functions.boolean(nil))
+ end
+
+ def test_context
+ REXML::Functions.context = {node: true}
+ assert_equal(true, REXML::Functions.boolean())
+ end
+ end
+end
diff --git a/test/rexml/functions/test_local_name.rb b/test/rexml/functions/test_local_name.rb
new file mode 100644
index 0000000000..97c9e74852
--- /dev/null
+++ b/test/rexml/functions/test_local_name.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: false
+
+require "test/unit"
+require "rexml/document"
+require "rexml/functions"
+
+module REXMLTests
+ class TestFunctionsLocalName < Test::Unit::TestCase
+ def setup
+ REXML::Functions.context = nil
+ end
+
+ def test_one
+ document = REXML::Document.new(<<-XML)
+<root xmlns:x="http://example.com/x/">
+ <x:child/>
+</root>
+ XML
+ node_set = document.root.children
+ assert_equal("child", REXML::Functions.local_name(node_set))
+ end
+
+ def test_multiple
+ document = REXML::Document.new(<<-XML)
+<root xmlns:x="http://example.com/x/">
+ <x:child1/>
+ <x:child2/>
+</root>
+ XML
+ node_set = document.root.children
+ assert_equal("child1", REXML::Functions.local_name(node_set))
+ end
+
+ def test_nonexistent
+ assert_equal("", REXML::Functions.local_name([]))
+ end
+
+ def test_context
+ document = REXML::Document.new("<root/>")
+ REXML::Functions.context = {node: document.root}
+ assert_equal("root", REXML::Functions.local_name())
+ end
+ end
+end
diff --git a/test/rexml/functions/test_number.rb b/test/rexml/functions/test_number.rb
new file mode 100644
index 0000000000..16e635701c
--- /dev/null
+++ b/test/rexml/functions/test_number.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: false
+
+require "test/unit"
+require "rexml/document"
+require "rexml/functions"
+
+module REXMLTests
+ class TestFunctionsNumber < Test::Unit::TestCase
+ def setup
+ REXML::Functions.context = nil
+ end
+
+ def test_true
+ assert_equal(1, REXML::Functions.number(true))
+ end
+
+ def test_false
+ assert_equal(0, REXML::Functions.number(false))
+ end
+
+ def test_numeric
+ assert_equal(29, REXML::Functions.number(29))
+ end
+
+ def test_string_integer
+ assert_equal(100, REXML::Functions.number("100"))
+ end
+
+ def test_string_float
+ assert_equal(-9.13, REXML::Functions.number("-9.13"))
+ end
+
+ def test_node_set
+ root = REXML::Document.new("<root>100</root>").root
+ assert_equal(100, REXML::Functions.number([root]))
+ end
+ end
+end
diff --git a/test/rexml/helper.rb b/test/rexml/helper.rb
new file mode 100644
index 0000000000..3de1327635
--- /dev/null
+++ b/test/rexml/helper.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: false
+
+require "test-unit"
+
+require "rexml/document"
+
+module Helper
+ module Fixture
+ def fixture_path(*components)
+ File.join(__dir__, "data", *components)
+ end
+ end
+
+ module Global
+ def suppress_warning
+ verbose = $VERBOSE
+ begin
+ $VERBOSE = nil
+ yield
+ ensure
+ $VERBOSE = verbose
+ end
+ end
+
+ def with_default_internal(encoding)
+ default_internal = Encoding.default_internal
+ begin
+ suppress_warning {Encoding.default_internal = encoding}
+ yield
+ ensure
+ suppress_warning {Encoding.default_internal = default_internal}
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_attribute_list_declaration.rb b/test/rexml/parse/test_attribute_list_declaration.rb
new file mode 100644
index 0000000000..4388252846
--- /dev/null
+++ b/test/rexml/parse/test_attribute_list_declaration.rb
@@ -0,0 +1,30 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseAttributeListDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_space
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE schema SYSTEM \"foo.dtd\" [<!ATTLIST " +
+ " " * n +
+ " root v CDATA #FIXED \"test\">]>")
+ end
+ end
+
+ def test_linear_performance_tab_and_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE root [<!ATTLIST " +
+ "\t" * n +
+ "root value CDATA \"" +
+ ">" * n +
+ "\">]>")
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_cdata.rb b/test/rexml/parse/test_cdata.rb
new file mode 100644
index 0000000000..b5f1a3bc47
--- /dev/null
+++ b/test/rexml/parse/test_cdata.rb
@@ -0,0 +1,17 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseCData < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('<description><![CDATA[ ' + ">" * n + ' ]]></description>')
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_character_reference.rb b/test/rexml/parse/test_character_reference.rb
new file mode 100644
index 0000000000..bf8d21903e
--- /dev/null
+++ b/test/rexml/parse/test_character_reference.rb
@@ -0,0 +1,17 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseCharacterReference < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def test_linear_performance_many_preceding_zeros
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>')
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_comment.rb b/test/rexml/parse/test_comment.rb
new file mode 100644
index 0000000000..4475dca753
--- /dev/null
+++ b/test/rexml/parse/test_comment.rb
@@ -0,0 +1,151 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseComment < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def parse(xml)
+ REXML::Document.new(xml)
+ end
+
+ class TestInvalid < self
+ def test_toplevel_unclosed_comment
+ exception = assert_raise(REXML::ParseException) do
+ parse("<!--")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Unclosed comment
+ Line: 1
+ Position: 4
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_toplevel_malformed_comment_inner
+ exception = assert_raise(REXML::ParseException) do
+ parse("<!-- -- -->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 11
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_toplevel_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("<!-- --->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 9
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_doctype_malformed_comment_inner
+ exception = assert_raise(REXML::ParseException) do
+ parse("<!DOCTYPE foo [<!-- -- -->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 26
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_doctype_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("<!DOCTYPE foo [<!-- --->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 24
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_short
+ exception = assert_raise(REXML::ParseException) do
+ parse("<a><!-->")
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 8
+ Last 80 unconsumed characters:
+ -->
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_inner
+ exception = assert_raise(REXML::ParseException) do
+ parse("<a><!-- -- -->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 14
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+
+ def test_after_doctype_malformed_comment_end
+ exception = assert_raise(REXML::ParseException) do
+ parse("<a><!-- --->")
+ end
+ assert_equal(<<~DETAIL, exception.to_s)
+ Malformed comment
+ Line: 1
+ Position: 12
+ Last 80 unconsumed characters:
+ DETAIL
+ end
+ end
+
+ def test_before_root
+ parser = REXML::Parsers::BaseParser.new('<!-- ok comment --><a></a>')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal(" ok comment ", events[:comment])
+ end
+
+ def test_after_root
+ parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal(" ok comment ", events[:comment])
+ end
+
+ def test_linear_performance_top_level_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('<!-- ' + ">" * n + ' -->')
+ end
+ end
+
+ def test_linear_performance_in_element_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('<xml><!-- ' + '>' * n + ' --></xml>')
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_document_type_declaration.rb b/test/rexml/parse/test_document_type_declaration.rb
index 55713909e7..99c23745a6 100644
--- a/test/rexml/parse/test_document_type_declaration.rb
+++ b/test/rexml/parse/test_document_type_declaration.rb
@@ -1,9 +1,13 @@
# frozen_string_literal: false
require "test/unit"
+require "core_assertions"
+
require "rexml/document"
module REXMLTests
class TestParseDocumentTypeDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
private
def parse(doctype)
REXML::Document.new(<<-XML).doctype
@@ -36,6 +40,66 @@ def test_garbage_plus_before_name_at_line_start
+ r SYSTEM "urn:x-rexml:test" [ ]> <r/>
DETAIL
end
+
+ def test_no_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-DOCTYPE)
+<!DOCTYPE>
+ DOCTYPE
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed DOCTYPE: name is missing
+Line: 3
+Position: 17
+Last 80 unconsumed characters:
+<!DOCTYPE> <r/>
+ DETAIL
+ end
+ end
+
+ class TestUnclosed < self
+ def test_no_extra_node
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE foo [")
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed DOCTYPE: unclosed
+ Line: 1
+ Position: 15
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+
+ def test_start_element
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new(<<~DOCTYPE)
+ <!DOCTYPE foo [ <r>
+ DOCTYPE
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed DOCTYPE: invalid declaration
+ Line: 1
+ Position: 20
+ Last 80 unconsumed characters:
+ <r>#{' '}
+ DETAIL
+ end
+
+ def test_text
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new(<<~DOCTYPE)
+ <!DOCTYPE foo [ text
+ DOCTYPE
+ end
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed DOCTYPE: invalid declaration
+ Line: 1
+ Position: 21
+ Last 80 unconsumed characters:
+ text#{' '}
+ DETAIL
+ end
end
class TestExternalID < self
@@ -225,5 +289,29 @@ def parse(internal_subset)
DOCTYPE
end
end
+
+ def test_linear_performance_percent_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ begin
+ REXML::Document.new("<!DOCTYPE root [" + "%>" * n + "]><test/>")
+ rescue
+ end
+ end
+ end
+
+ def test_linear_performance_comment_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE root [<!-- " + ">" * n + " -->]>")
+ end
+ end
+
+ def test_linear_performance_external_entity_right_bracket_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE root [%" + "]>" * n + ";]>")
+ end
+ end
end
end
diff --git a/test/rexml/parse/test_element.rb b/test/rexml/parse/test_element.rb
index e8dce4b997..ab4818dad9 100644
--- a/test/rexml/parse/test_element.rb
+++ b/test/rexml/parse/test_element.rb
@@ -1,8 +1,12 @@
require "test/unit"
+require "core_assertions"
+
require "rexml/document"
module REXMLTests
class TestParseElement < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
def parse(xml)
REXML::Document.new(xml)
end
@@ -43,7 +47,20 @@ def test_empty_namespace_attribute_name
Line: 1
Position: 13
Last 80 unconsumed characters:
+:a=""></x>
+ DETAIL
+ end
+ def test_empty_namespace_attribute_name_with_utf8_character
+ exception = assert_raise(REXML::ParseException) do
+ parse("<x :\xE2\x80\x8B>") # U+200B ZERO WIDTH SPACE
+ end
+ assert_equal(<<-DETAIL.chomp.force_encoding("ASCII-8BIT"), exception.to_s)
+Invalid attribute name: <:\xE2\x80\x8B>
+Line: 1
+Position: 8
+Last 80 unconsumed characters:
+:\xE2\x80\x8B>
DETAIL
end
@@ -72,6 +89,61 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start
</ </x>
DETAIL
end
+
+ def test_after_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('<a></a><b>')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra tag at the end of the document (got '<b')
+ Line: 1
+ Position: 10
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+
+ def test_after_empty_element_tag_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('<a/><b>')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra tag at the end of the document (got '<b')
+ Line: 1
+ Position: 7
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+ end
+
+ def test_linear_performance_attribute_value_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new('<test testing="' + ">" * n + '"></test>')
+ end
+ end
+
+ def test_linear_performance_deep_same_name_attributes
+ seq = [100, 500, 1000, 1500, 2000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ xml = <<-XML
+<?xml version="1.0"?>
+<root xmlns:ns="ns-uri">
+#{"<x ns:name='ns-value' name='value'>\n" * n}
+#{"</x>\n" * n}
+</root>
+ XML
+ REXML::Document.new(xml)
+ end
end
end
end
diff --git a/test/rexml/parse/test_entity_declaration.rb b/test/rexml/parse/test_entity_declaration.rb
new file mode 100644
index 0000000000..81d95b5868
--- /dev/null
+++ b/test/rexml/parse/test_entity_declaration.rb
@@ -0,0 +1,557 @@
+# frozen_string_literal: false
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseEntityDeclaration < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ private
+ def xml(internal_subset)
+ <<-XML
+<!DOCTYPE r SYSTEM "urn:x-henrikmartensson:test" [
+#{internal_subset}
+]>
+<r/>
+ XML
+ end
+
+ def parse(internal_subset)
+ REXML::Document.new(xml(internal_subset)).doctype
+ end
+
+ public
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-GEDecl
+ class TestGeneralEntityDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
+ class TestName < self
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY invalid&name \"valid-entity-value\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ invalid&name "valid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityDef
+ class TestEntityDefinition < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue
+ class TestEntityValue < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name invalid-entity-value>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 59
+Last 80 unconsumed characters:
+ valid-name invalid-entity-value>]>
+ DETAIL
+ end
+
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"% &\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 44
+Last 80 unconsumed characters:
+ valid-name "% &">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"invalid-entity-value'>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ valid-name "invalid-entity-value'>]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID
+ class TestExternalID < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral
+ class TestSystemLiteral < self
+ def test_no_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM invalid-system-literal>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 68
+Last 80 unconsumed characters:
+ valid-name SYSTEM invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_no_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" invalid-system-literal>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 90
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM 'invalid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 70
+Last 80 unconsumed characters:
+ valid-name SYSTEM 'invalid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" \"invalid-system-literal'>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" "invalid-system-literal'>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name SYSTEM>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 45
+Last 80 unconsumed characters:
+ valid-name SYSTEM>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 67
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar
+ class TestPublicIDLiteral < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC invalid-pubid-literal \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 90
+Last 80 unconsumed characters:
+ valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_prohibited_pubid_character
+ exception = assert_raise(REXML::ParseException) do
+ # U+3042 HIRAGANA LETTER A
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"\u3042\" \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8'))
+Malformed entity declaration
+Line: 1
+Position: 74
+Last 80 unconsumed characters:
+ valid-name PUBLIC "\u3042" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"invalid-pubid-literal' \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ valid-name PUBLIC "invalid-pubid-literal' "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 45
+Last 80 unconsumed characters:
+ valid-name PUBLIC>]>
+ DETAIL
+ end
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NDataDecl
+ class TestNotationDataDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name PUBLIC \"valid-pubid-literal\" \"valid-system-literal\" NDATA invalid&name>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 109
+Last 80 unconsumed characters:
+ valid-name PUBLIC "valid-pubid-literal" "valid-system-literal" NDATA invalid&nam
+ DETAIL
+ end
+ end
+
+ def test_entity_value_and_notation_data_declaration
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-name \"valid-entity-value\" NDATA valid-ndata-value>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 83
+Last 80 unconsumed characters:
+ valid-name "valid-entity-value" NDATA valid-ndata-value>]>
+ DETAIL
+ end
+ end
+
+ def test_no_space
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY valid-namePUBLIC\"valid-pubid-literal\"\"valid-system-literal\"NDATAvalid-name>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 102
+Last 80 unconsumed characters:
+ valid-namePUBLIC"valid-pubid-literal""valid-system-literal"NDATAvalid-name>]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDecl
+ class TestParsedEntityDeclaration < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
+ class TestName < self
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % invalid&name \"valid-entity-value\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 63
+Last 80 unconsumed characters:
+ % invalid&name "valid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEDef
+ class TestParsedEntityDefinition < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-EntityValue
+ class TestEntityValue < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name invalid-entity-value>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 61
+Last 80 unconsumed characters:
+ % valid-name invalid-entity-value>]>
+ DETAIL
+ end
+
+ def test_prohibited_character
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name \"% &\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 46
+Last 80 unconsumed characters:
+ % valid-name "% &">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name 'invalid-entity-value\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 63
+Last 80 unconsumed characters:
+ % valid-name 'invalid-entity-value">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ExternalID
+ class TestExternalID < self
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-SystemLiteral
+ class TestSystemLiteral < self
+ def test_no_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM invalid-system-literal>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 70
+Last 80 unconsumed characters:
+ % valid-name SYSTEM invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_no_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\" invalid-system-literal>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal" invalid-system-literal>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM \"invalid-system-literal'>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 72
+Last 80 unconsumed characters:
+ % valid-name SYSTEM "invalid-system-literal'>]>
+ DETAIL
+ end
+
+ def test_mixed_quote_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\" 'invalid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 94
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal" 'invalid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal_in_system
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name SYSTEM>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 47
+Last 80 unconsumed characters:
+ % valid-name SYSTEM>]>
+ DETAIL
+ end
+
+ def test_no_literal_in_public
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"valid-pubid-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 69
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "valid-pubid-literal">]>
+ DETAIL
+ end
+ end
+
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidLiteral
+ # https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PubidChar
+ class TestPublicIDLiteral < self
+ def test_no_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC invalid-pubid-literal \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 92
+Last 80 unconsumed characters:
+ % valid-name PUBLIC invalid-pubid-literal "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_prohibited_pubid_character
+ exception = assert_raise(REXML::ParseException) do
+ # U+3042 HIRAGANA LETTER A
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC \"\u3042\" \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.force_encoding('utf-8').chomp, exception.to_s.force_encoding('utf-8'))
+Malformed entity declaration
+Line: 1
+Position: 76
+Last 80 unconsumed characters:
+ % valid-name PUBLIC "\u3042" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_mixed_quote
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC 'invalid-pubid-literal\" \"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 94
+Last 80 unconsumed characters:
+ % valid-name PUBLIC 'invalid-pubid-literal" "valid-system-literal">]>
+ DETAIL
+ end
+
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name PUBLIC>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 47
+Last 80 unconsumed characters:
+ % valid-name PUBLIC>]>
+ DETAIL
+ end
+ end
+ end
+
+ def test_entity_value_and_notation_data_declaration
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY % valid-name \"valid-entity-value\" NDATA valid-ndata-value>]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 85
+Last 80 unconsumed characters:
+ % valid-name "valid-entity-value" NDATA valid-ndata-value>]>
+ DETAIL
+ end
+ end
+
+ def test_no_space
+ exception = assert_raise(REXML::ParseException) do
+ REXML::Document.new("<!DOCTYPE root [<!ENTITY %valid-nameSYSTEM\"valid-system-literal\">]>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 1
+Position: 67
+Last 80 unconsumed characters:
+ %valid-nameSYSTEM"valid-system-literal">]>
+ DETAIL
+ end
+ end
+
+ def test_empty
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+<!ENTITY>
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed entity declaration
+Line: 5
+Position: 70
+Last 80 unconsumed characters:
+> ]> <r/>
+ DETAIL
+ end
+
+ def test_linear_performance_entity_value_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" +
+ ">" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_entity_value_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" +
+ ">]" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_system_literal_in_system_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version SYSTEM \"" +
+ ">]" * n +
+ "\">]>")
+ end
+ end
+
+ def test_linear_performance_system_literal_in_public_gt_right_bracket
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version PUBLIC \"pubid-literal\" \"" +
+ ">]" * n +
+ "\">]>")
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_notation_declaration.rb b/test/rexml/parse/test_notation_declaration.rb
index 19a0536d0a..9e81b6a428 100644
--- a/test/rexml/parse/test_notation_declaration.rb
+++ b/test/rexml/parse/test_notation_declaration.rb
@@ -35,7 +35,7 @@ def test_no_name
Line: 5
Position: 72
Last 80 unconsumed characters:
- <!NOTATION> ]> <r/>
+<!NOTATION> ]> <r/>
DETAIL
end
diff --git a/test/rexml/parse/test_processing_instruction.rb b/test/rexml/parse/test_processing_instruction.rb
new file mode 100644
index 0000000000..ba381dc4cd
--- /dev/null
+++ b/test/rexml/parse/test_processing_instruction.rb
@@ -0,0 +1,127 @@
+require "test/unit"
+require "core_assertions"
+
+require "rexml/document"
+
+module REXMLTests
+ class TestParseProcessingInstruction < Test::Unit::TestCase
+ include Test::Unit::CoreAssertions
+
+ def parse(xml)
+ REXML::Document.new(xml)
+ end
+
+ class TestInvalid < self
+ def test_no_name
+ exception = assert_raise(REXML::ParseException) do
+ parse("<??>")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed XML: Invalid processing instruction node: invalid name
+Line: 1
+Position: 4
+Last 80 unconsumed characters:
+?>
+ DETAIL
+ end
+
+ def test_unclosed_content
+ exception = assert_raise(REXML::ParseException) do
+ parse("<?name content")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed XML: Unclosed processing instruction
+Line: 1
+Position: 14
+Last 80 unconsumed characters:
+content
+ DETAIL
+ end
+
+ def test_unclosed_no_content
+ exception = assert_raise(REXML::ParseException) do
+ parse("<?name")
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed XML: Unclosed processing instruction
+Line: 1
+Position: 6
+Last 80 unconsumed characters:
+
+ DETAIL
+ end
+
+ def test_xml_declaration_not_at_document_start
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('<a><?xml version="1.0" ?></a>')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: XML declaration is not at the start
+ Line: 1
+ Position: 25
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+ end
+
+ def test_comment
+ doc = parse(<<-XML)
+<?x y
+<!--?><?x -->?>
+<r/>
+ XML
+ assert_equal([["x", "y\n<!--"],
+ ["x", "-->"]],
+ [[doc.children[0].target, doc.children[0].content],
+ [doc.children[1].target, doc.children[1].content]])
+ end
+
+ def test_before_root
+ parser = REXML::Parsers::BaseParser.new('<?abc version="1.0" ?><a></a>')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal("abc", events[:processing_instruction])
+ end
+
+ def test_after_root
+ parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>')
+
+ events = {}
+ while parser.has_next?
+ event = parser.pull
+ events[event[0]] = event[1]
+ end
+
+ assert_equal("abc", events[:processing_instruction])
+ end
+
+ def test_content_question
+ document = REXML::Document.new("<a><?name con?tent?></a>")
+ assert_equal("con?tent", document.root.children.first.content)
+ end
+
+ def test_linear_performance_gt
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<?xml version=\"1.0\" " + ">" * n + " ?>")
+ end
+ end
+
+ def test_linear_performance_tab
+ seq = [10000, 50000, 100000, 150000, 200000]
+ assert_linear_performance(seq, rehearsal: 10) do |n|
+ REXML::Document.new("<?name" + "\t" * n + "version=\"1.0\" > ?>")
+ end
+ end
+ end
+end
diff --git a/test/rexml/parse/test_text.rb b/test/rexml/parse/test_text.rb
new file mode 100644
index 0000000000..04f553ae8b
--- /dev/null
+++ b/test/rexml/parse/test_text.rb
@@ -0,0 +1,57 @@
+require "test/unit"
+require 'rexml/parsers/baseparser'
+
+module REXMLTests
+ class TestParseText < Test::Unit::TestCase
+ class TestInvalid < self
+ def test_before_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('b<a></a>')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Content at the start of the document (got 'b')
+ Line: 1
+ Position: 4
+ Last 80 unconsumed characters:
+ <a>
+ DETAIL
+ end
+
+ def test_after_root
+ exception = assert_raise(REXML::ParseException) do
+ parser = REXML::Parsers::BaseParser.new('<a></a>c')
+ while parser.has_next?
+ parser.pull
+ end
+ end
+
+ assert_equal(<<~DETAIL.chomp, exception.to_s)
+ Malformed XML: Extra content at the end of the document (got 'c')
+ Line: 1
+ Position: 8
+ Last 80 unconsumed characters:
+
+ DETAIL
+ end
+ end
+
+ def test_whitespace_characters_after_root
+ parser = REXML::Parsers::BaseParser.new('<a>b</a> ')
+
+ events = []
+ while parser.has_next?
+ event = parser.pull
+ case event[0]
+ when :text
+ events << event[1]
+ end
+ end
+
+ assert_equal(["b"], events)
+ end
+ end
+end
diff --git a/test/rexml/parser/test_base_parser.rb b/test/rexml/parser/test_base_parser.rb
new file mode 100644
index 0000000000..6f213978c0
--- /dev/null
+++ b/test/rexml/parser/test_base_parser.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: false
+
+require 'rexml/parsers/baseparser'
+
+module REXMLTests
+ class BaseParserTester < Test::Unit::TestCase
+ def test_large_xml
+ large_text = "a" * 100_000
+ xml = <<-XML
+ <?xml version="1.0"?>
+ <root>
+ <child>#{large_text}</child>
+ <child>#{large_text}</child>
+ </root>
+ XML
+
+ parser = REXML::Parsers::BaseParser.new(xml)
+ while parser.has_next?
+ parser.pull
+ end
+
+ assert do
+ parser.position < xml.bytesize
+ end
+ end
+
+ def test_attribute_prefixed_by_xml
+ xml = <<-XML
+ <?xml version="1.0" encoding="UTF-8"?>
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ <head>
+ <title>XHTML Document</title>
+ </head>
+ <body>
+ <h1>XHTML Document</h1>
+ <p xml:lang="ja" lang="ja">For Japanese</p>
+ </body>
+ </html>
+ XML
+
+ parser = REXML::Parsers::BaseParser.new(xml)
+ 5.times {parser.pull}
+
+ html = parser.pull
+ assert_equal([:start_element,
+ "html",
+ {"xmlns" => "http://www.w3.org/1999/xhtml",
+ "xml:lang" => "en",
+ "lang" => "en"}],
+ html)
+
+ 15.times {parser.pull}
+
+ p = parser.pull
+ assert_equal([:start_element,
+ "p",
+ {"xml:lang" => "ja", "lang" => "ja"}],
+ p)
+ end
+ end
+end
diff --git a/test/rexml/parser/test_sax2.rb b/test/rexml/parser/test_sax2.rb
index 91d135f5f7..c254890744 100644
--- a/test/rexml/parser/test_sax2.rb
+++ b/test/rexml/parser/test_sax2.rb
@@ -4,200 +4,200 @@
require "rexml/sax2listener"
module REXMLTests
-class TestSAX2Parser < Test::Unit::TestCase
- class TestDocumentTypeDeclaration < self
- private
- def xml(internal_subset)
- <<-XML
+ class TestSAX2Parser < Test::Unit::TestCase
+ class TestDocumentTypeDeclaration < self
+ private
+ def xml(internal_subset)
+ <<-XML
<!DOCTYPE r SYSTEM "urn:x-henrikmartensson:test" [
#{internal_subset}
]>
<r/>
- XML
- end
+ XML
+ end
- class TestEntityDeclaration < self
- class Listener
- include REXML::SAX2Listener
- attr_reader :entity_declarations
- def initialize
- @entity_declarations = []
- end
+ class TestEntityDeclaration < self
+ class Listener
+ include REXML::SAX2Listener
+ attr_reader :entity_declarations
+ def initialize
+ @entity_declarations = []
+ end
- def entitydecl(declaration)
- super
- @entity_declarations << declaration
+ def entitydecl(declaration)
+ super
+ @entity_declarations << declaration
+ end
end
- end
- private
- def parse(internal_subset)
- listener = Listener.new
- parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset))
- parser.listen(listener)
- parser.parse
- listener.entity_declarations
- end
+ private
+ def parse(internal_subset)
+ listener = Listener.new
+ parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset))
+ parser.listen(listener)
+ parser.parse
+ listener.entity_declarations
+ end
- class TestGeneralEntity < self
- class TestValue < self
- def test_double_quote
- assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET))
+ class TestGeneralEntity < self
+ class TestValue < self
+ def test_double_quote
+ assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET))
<!ENTITY name "value">
- INTERNAL_SUBSET
- end
+ INTERNAL_SUBSET
+ end
- def test_single_quote
- assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET))
+ def test_single_quote
+ assert_equal([["name", "value"]], parse(<<-INTERNAL_SUBSET))
<!ENTITY name 'value'>
- INTERNAL_SUBSET
+ INTERNAL_SUBSET
+ end
end
- end
- class TestExternlID < self
- class TestSystem < self
- def test_with_ndata
- declaration = [
- "name",
- "SYSTEM", "system-literal",
- "NDATA", "ndata-name",
- ]
- assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
+ class TestExternlID < self
+ class TestSystem < self
+ def test_with_ndata
+ declaration = [
+ "name",
+ "SYSTEM", "system-literal",
+ "NDATA", "ndata-name",
+ ]
+ assert_equal([declaration],
+ parse(<<-INTERNAL_SUBSET))
<!ENTITY name SYSTEM "system-literal" NDATA ndata-name>
+ INTERNAL_SUBSET
+ end
+
+ def test_without_ndata
+ declaration = [
+ "name",
+ "SYSTEM", "system-literal",
+ ]
+ assert_equal([declaration],
+ parse(<<-INTERNAL_SUBSET))
+<!ENTITY name SYSTEM "system-literal">
+ INTERNAL_SUBSET
+ end
+ end
+
+ class TestPublic < self
+ def test_with_ndata
+ declaration = [
+ "name",
+ "PUBLIC", "public-literal", "system-literal",
+ "NDATA", "ndata-name",
+ ]
+ assert_equal([declaration],
+ parse(<<-INTERNAL_SUBSET))
+<!ENTITY name PUBLIC "public-literal" "system-literal" NDATA ndata-name>
+ INTERNAL_SUBSET
+ end
+
+ def test_without_ndata
+ declaration = [
+ "name",
+ "PUBLIC", "public-literal", "system-literal",
+ ]
+ assert_equal([declaration], parse(<<-INTERNAL_SUBSET))
+<!ENTITY name PUBLIC "public-literal" "system-literal">
+ INTERNAL_SUBSET
+ end
+ end
+ end
+ end
+
+ class TestParameterEntity < self
+ class TestValue < self
+ def test_double_quote
+ assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET))
+<!ENTITY % name "value">
INTERNAL_SUBSET
end
- def test_without_ndata
- declaration = [
- "name",
- "SYSTEM", "system-literal",
- ]
- assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
-<!ENTITY name SYSTEM "system-literal">
+ def test_single_quote
+ assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET))
+<!ENTITY % name 'value'>
INTERNAL_SUBSET
end
end
- class TestPublic < self
- def test_with_ndata
+ class TestExternlID < self
+ def test_system
declaration = [
+ "%",
"name",
- "PUBLIC", "public-literal", "system-literal",
- "NDATA", "ndata-name",
+ "SYSTEM", "system-literal",
]
assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
-<!ENTITY name PUBLIC "public-literal" "system-literal" NDATA ndata-name>
+ parse(<<-INTERNAL_SUBSET))
+<!ENTITY % name SYSTEM "system-literal">
INTERNAL_SUBSET
end
- def test_without_ndata
+ def test_public
declaration = [
+ "%",
"name",
"PUBLIC", "public-literal", "system-literal",
]
assert_equal([declaration], parse(<<-INTERNAL_SUBSET))
-<!ENTITY name PUBLIC "public-literal" "system-literal">
+<!ENTITY % name PUBLIC "public-literal" "system-literal">
INTERNAL_SUBSET
end
end
end
end
- class TestParameterEntity < self
- class TestValue < self
- def test_double_quote
- assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET))
-<!ENTITY % name "value">
- INTERNAL_SUBSET
+ class TestNotationDeclaration < self
+ class Listener
+ include REXML::SAX2Listener
+ attr_reader :notation_declarations
+ def initialize
+ @notation_declarations = []
end
- def test_single_quote
- assert_equal([["%", "name", "value"]], parse(<<-INTERNAL_SUBSET))
-<!ENTITY % name 'value'>
- INTERNAL_SUBSET
+ def notationdecl(*declaration)
+ super
+ @notation_declarations << declaration
end
end
+ private
+ def parse(internal_subset)
+ listener = Listener.new
+ parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset))
+ parser.listen(listener)
+ parser.parse
+ listener.notation_declarations
+ end
+
class TestExternlID < self
def test_system
- declaration = [
- "%",
- "name",
- "SYSTEM", "system-literal",
- ]
+ declaration = ["name", "SYSTEM", nil, "system-literal"]
assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
-<!ENTITY % name SYSTEM "system-literal">
+ parse(<<-INTERNAL_SUBSET))
+<!NOTATION name SYSTEM "system-literal">
INTERNAL_SUBSET
end
def test_public
- declaration = [
- "%",
- "name",
- "PUBLIC", "public-literal", "system-literal",
- ]
+ declaration = ["name", "PUBLIC", "public-literal", "system-literal"]
assert_equal([declaration], parse(<<-INTERNAL_SUBSET))
-<!ENTITY % name PUBLIC "public-literal" "system-literal">
+<!NOTATION name PUBLIC "public-literal" "system-literal">
INTERNAL_SUBSET
end
end
- end
- end
- class TestNotationDeclaration < self
- class Listener
- include REXML::SAX2Listener
- attr_reader :notation_declarations
- def initialize
- @notation_declarations = []
- end
-
- def notationdecl(*declaration)
- super
- @notation_declarations << declaration
- end
- end
-
- private
- def parse(internal_subset)
- listener = Listener.new
- parser = REXML::Parsers::SAX2Parser.new(xml(internal_subset))
- parser.listen(listener)
- parser.parse
- listener.notation_declarations
- end
-
- class TestExternlID < self
- def test_system
- declaration = ["name", "SYSTEM", nil, "system-literal"]
- assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
-<!NOTATION name SYSTEM "system-literal">
- INTERNAL_SUBSET
- end
-
- def test_public
- declaration = ["name", "PUBLIC", "public-literal", "system-literal"]
- assert_equal([declaration], parse(<<-INTERNAL_SUBSET))
-<!NOTATION name PUBLIC "public-literal" "system-literal">
- INTERNAL_SUBSET
- end
- end
-
- class TestPublicID < self
- def test_literal
- declaration = ["name", "PUBLIC", "public-literal", nil]
- assert_equal([declaration],
- parse(<<-INTERNAL_SUBSET))
+ class TestPublicID < self
+ def test_literal
+ declaration = ["name", "PUBLIC", "public-literal", nil]
+ assert_equal([declaration],
+ parse(<<-INTERNAL_SUBSET))
<!NOTATION name PUBLIC "public-literal">
- INTERNAL_SUBSET
+ INTERNAL_SUBSET
+ end
end
end
end
end
end
-end
diff --git a/test/rexml/parser/test_tree.rb b/test/rexml/parser/test_tree.rb
index 8a5d9d1223..315be9c23f 100644
--- a/test/rexml/parser/test_tree.rb
+++ b/test/rexml/parser/test_tree.rb
@@ -4,40 +4,39 @@
require "rexml/parsers/treeparser"
module REXMLTests
-class TestTreeParser < Test::Unit::TestCase
- class TestInvalid < self
- def test_unmatched_close_tag
- xml = "<root></not-root>"
- exception = assert_raise(REXML::ParseException) do
- parse(xml)
- end
- assert_equal(<<-MESSAGE, exception.to_s)
+ class TestTreeParser < Test::Unit::TestCase
+ private def parse(xml)
+ document = REXML::Document.new
+ parser = REXML::Parsers::TreeParser.new(xml, document)
+ parser.parse
+ end
+
+ class TestInvalid < self
+ def test_unmatched_close_tag
+ xml = "<root></not-root>"
+ exception = assert_raise(REXML::ParseException) do
+ parse(xml)
+ end
+ assert_equal(<<-MESSAGE, exception.to_s)
Missing end tag for 'root' (got 'not-root')
Line: 1
Position: #{xml.bytesize}
Last 80 unconsumed characters:
- MESSAGE
- end
-
- def test_no_close_tag
- xml = "<root>"
- exception = assert_raise(REXML::ParseException) do
- parse(xml)
+ MESSAGE
end
- assert_equal(<<-MESSAGE, exception.to_s)
-No close tag for /root
+
+ def test_no_close_tag
+ xml = "<root>"
+ exception = assert_raise(REXML::ParseException) do
+ parse(xml)
+ end
+ assert_equal(<<-MESSAGE, exception.to_s)
+Missing end tag for '/root'
Line: 1
Position: #{xml.bytesize}
Last 80 unconsumed characters:
- MESSAGE
- end
-
- private
- def parse(xml)
- document = REXML::Document.new
- parser = REXML::Parsers::TreeParser.new(xml, document)
- parser.parse
+ MESSAGE
+ end
end
end
end
-end
diff --git a/test/rexml/parser/test_ultra_light.rb b/test/rexml/parser/test_ultra_light.rb
index cb6ee5a8ab..d1364d6a99 100644
--- a/test/rexml/parser/test_ultra_light.rb
+++ b/test/rexml/parser/test_ultra_light.rb
@@ -3,67 +3,66 @@
require "rexml/parsers/ultralightparser"
module REXMLTests
-class TestUltraLightParser < Test::Unit::TestCase
- class TestDocumentTypeDeclaration < self
- def test_entity_declaration
- assert_equal([
- [
- :start_doctype,
- :parent,
- "root",
- "SYSTEM",
- "urn:x-test",
- nil,
- [:entitydecl, "name", "value"]
+ class TestUltraLightParser < Test::Unit::TestCase
+ class TestDocumentTypeDeclaration < self
+ def test_entity_declaration
+ assert_equal([
+ [
+ :start_doctype,
+ :parent,
+ "root",
+ "SYSTEM",
+ "urn:x-test",
+ nil,
+ [:entitydecl, "name", "value"]
+ ],
+ [:start_element, :parent, "root", {}],
],
- [:start_element, :parent, "root", {}],
- [:text, "\n"],
- ],
- parse(<<-INTERNAL_SUBSET))
+ parse(<<-INTERNAL_SUBSET))
<!ENTITY name "value">
- INTERNAL_SUBSET
- end
+ INTERNAL_SUBSET
+ end
- private
- def xml(internal_subset)
- <<-XML
+ private
+ def xml(internal_subset)
+ <<-XML
<!DOCTYPE root SYSTEM "urn:x-test" [
#{internal_subset}
]>
<root/>
- XML
- end
+ XML
+ end
- def parse(internal_subset)
- parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset))
- normalize(parser.parse)
- end
+ def parse(internal_subset)
+ parser = REXML::Parsers::UltraLightParser.new(xml(internal_subset))
+ normalize(parser.parse)
+ end
- def normalize(root)
- root.collect do |child|
- normalize_child(child)
+ def normalize(root)
+ root.collect do |child|
+ normalize_child(child)
+ end
end
- end
- def normalize_child(child)
- tag = child.first
- case tag
- when :start_doctype
- normalized_parent = :parent
- normalized_doctype = child.dup
- normalized_doctype[1] = normalized_parent
- normalized_doctype
- when :start_element
- tag, parent, name, attributes, *children = child
- normalized_parent = :parent
- normalized_children = children.collect do |sub_child|
- normalize_child(sub_child)
+ def normalize_child(child)
+ tag = child.first
+ case tag
+ when :start_doctype
+ normalized_parent = :parent
+ normalized_doctype = child.dup
+ normalized_doctype[1] = normalized_parent
+ normalized_doctype
+ when :start_element
+ tag, _parent, name, attributes, *children = child
+ normalized_parent = :parent
+ normalized_children = children.collect do |sub_child|
+ normalize_child(sub_child)
+ end
+ [tag, normalized_parent, name, attributes, *normalized_children]
+ else
+ child
end
- [tag, normalized_parent, name, attributes, *normalized_children]
- else
- child
end
end
end
end
-end
diff --git a/test/rexml/parser/test_xpath.rb b/test/rexml/parser/test_xpath.rb
new file mode 100644
index 0000000000..360b9b793a
--- /dev/null
+++ b/test/rexml/parser/test_xpath.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: false
+
+require "test/unit"
+require "rexml/parsers/xpathparser"
+
+module REXMLTests
+ class TestXPathParser < Test::Unit::TestCase
+ def self.sub_test_case(name, &block)
+ parent_test_case = self
+ sub_test_case = Class.new(self) do
+ singleton_class = class << self; self; end
+ singleton_class.__send__(:define_method, :name) do
+ [parent_test_case.name, name].compact.join("::")
+ end
+ end
+ sub_test_case.class_eval(&block)
+ sub_test_case
+ end
+
+ sub_test_case("#abbreviate") do
+ def abbreviate(xpath)
+ parser = REXML::Parsers::XPathParser.new
+ parser.abbreviate(xpath)
+ end
+
+ def test_document
+ assert_equal("/",
+ abbreviate("/"))
+ end
+
+ def test_descendant_or_self_only
+ assert_equal("//",
+ abbreviate("/descendant-or-self::node()/"))
+ end
+
+ def test_descendant_or_self_absolute
+ assert_equal("//a/b",
+ abbreviate("/descendant-or-self::node()/a/b"))
+ end
+
+ def test_descendant_or_self_relative
+ assert_equal("a//b",
+ abbreviate("a/descendant-or-self::node()/b"))
+ end
+
+ def test_descendant_or_self_not_node
+ assert_equal("/descendant-or-self::text()",
+ abbreviate("/descendant-or-self::text()"))
+ end
+
+ def test_self_absolute
+ assert_equal("/a/./b",
+ abbreviate("/a/self::node()/b"))
+ end
+
+ def test_self_relative
+ assert_equal("a/./b",
+ abbreviate("a/self::node()/b"))
+ end
+
+ def test_self_not_node
+ assert_equal("/self::text()",
+ abbreviate("/self::text()"))
+ end
+
+ def test_parent_absolute
+ assert_equal("/a/../b",
+ abbreviate("/a/parent::node()/b"))
+ end
+
+ def test_parent_relative
+ assert_equal("a/../b",
+ abbreviate("a/parent::node()/b"))
+ end
+
+ def test_parent_not_node
+ assert_equal("/a/parent::text()",
+ abbreviate("/a/parent::text()"))
+ end
+
+ def test_any_absolute
+ assert_equal("/*/a",
+ abbreviate("/*/a"))
+ end
+
+ def test_any_relative
+ assert_equal("a/*/b",
+ abbreviate("a/*/b"))
+ end
+
+ def test_following_sibling_absolute
+ assert_equal("/following-sibling::a/b",
+ abbreviate("/following-sibling::a/b"))
+ end
+
+ def test_following_sibling_relative
+ assert_equal("a/following-sibling::b/c",
+ abbreviate("a/following-sibling::b/c"))
+ end
+
+ def test_predicate_index
+ assert_equal("a[5]/b",
+ abbreviate("a[5]/b"))
+ end
+
+ def test_attribute_relative
+ assert_equal("a/@b",
+ abbreviate("a/attribute::b"))
+ end
+
+ def test_filter_attribute
+ assert_equal("a/b[@i = 1]/c",
+ abbreviate("a/b[attribute::i=1]/c"))
+ end
+
+ def test_filter_string_single_quote
+ assert_equal("a/b[@name = \"single ' quote\"]/c",
+ abbreviate("a/b[attribute::name=\"single ' quote\"]/c"))
+ end
+
+ def test_filter_string_double_quote
+ assert_equal("a/b[@name = 'double \" quote']/c",
+ abbreviate("a/b[attribute::name='double \" quote']/c"))
+ end
+ end
+ end
+end
diff --git a/test/rexml/run.rb b/test/rexml/run.rb
new file mode 100755
index 0000000000..089318b014
--- /dev/null
+++ b/test/rexml/run.rb
@@ -0,0 +1,13 @@
+#!/usr/bin/env ruby
+
+$VERBOSE = true
+
+base_dir = File.dirname(File.expand_path(__dir__))
+lib_dir = File.join(base_dir, "lib")
+test_dir = File.join(base_dir, "test")
+
+$LOAD_PATH.unshift(lib_dir)
+
+require_relative "helper"
+
+exit(Test::Unit::AutoRunner.run(true, test_dir))
diff --git a/test/rexml/test_attribute.rb b/test/rexml/test_attribute.rb
new file mode 100644
index 0000000000..b66e462d4a
--- /dev/null
+++ b/test/rexml/test_attribute.rb
@@ -0,0 +1,12 @@
+module REXMLTests
+ class AttributeTest < Test::Unit::TestCase
+ def test_empty_prefix
+ error = assert_raise(ArgumentError) do
+ REXML::Attribute.new(":x")
+ end
+ assert_equal("name must be " +
+ "\#{PREFIX}:\#{LOCAL_NAME} or \#{LOCAL_NAME}: <\":x\">",
+ error.message)
+ end
+ end
+end
diff --git a/test/rexml/test_attributes.rb b/test/rexml/test_attributes.rb
index d6f566bdf8..09fde44224 100644
--- a/test/rexml/test_attributes.rb
+++ b/test/rexml/test_attributes.rb
@@ -1,6 +1,4 @@
# frozen_string_literal: false
-require 'test/unit/testcase'
-require 'rexml/document'
module REXMLTests
class AttributesTester < Test::Unit::TestCase
@@ -180,18 +178,27 @@ def test_amp_and_lf_attributes
attr_test('name','value with LF 
 & ampersand')
end
- def test_quoting
+ def test_quote_root
d = Document.new(%q{<a x='1' y="2"/>})
assert_equal( %q{<a x='1' y='2'/>}, d.to_s )
d.root.context[:attribute_quote] = :quote
assert_equal( %q{<a x="1" y="2"/>}, d.to_s )
+ end
+ def test_quote_sub_element
d = Document.new(%q{<a x='1' y="2"><b z='3'/></a>})
assert_equal( %q{<a x='1' y='2'><b z='3'/></a>}, d.to_s )
d.root.context[:attribute_quote] = :quote
assert_equal( %q{<a x="1" y="2"><b z="3"/></a>}, d.to_s )
end
+ def test_quote_to_s_value
+ doc = Document.new(%q{<root a="'"/>}, {attribute_quote: :quote})
+ assert_equal(%q{<root a="'"/>}, doc.to_s)
+ assert_equal("'", doc.root.attribute("a").value)
+ assert_equal(%q{<root a="'"/>}, doc.to_s)
+ end
+
def test_ticket_127
doc = Document.new
doc.add_element 'a', { 'v' => 'x & y' }
diff --git a/test/rexml/test_attributes_mixin.rb b/test/rexml/test_attributes_mixin.rb
index 3a9f54eefd..2b9108cbc6 100644
--- a/test/rexml/test_attributes_mixin.rb
+++ b/test/rexml/test_attributes_mixin.rb
@@ -1,6 +1,4 @@
# frozen_string_literal: false
-require 'test/unit'
-require 'rexml/document'
module REXMLTests
class TestAttributes < Test::Unit::TestCase
diff --git a/test/rexml/test_comment.rb b/test/rexml/test_comment.rb
index 0af2f5ca76..f6f4d8099d 100644
--- a/test/rexml/test_comment.rb
+++ b/test/rexml/test_comment.rb
@@ -1,7 +1,4 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-
-require 'rexml/document'
module REXMLTests
class CommentTester < Test::Unit::TestCase
diff --git a/test/rexml/test_contrib.rb b/test/rexml/test_contrib.rb
index 8462b3c23f..c16c72f2a6 100644
--- a/test/rexml/test_contrib.rb
+++ b/test/rexml/test_contrib.rb
@@ -1,15 +1,14 @@
# coding: binary
# frozen_string_literal: false
-require_relative "rexml_test_utils"
-
-require "rexml/document"
require "rexml/parseexception"
require "rexml/formatters/default"
+require_relative "helper"
+
module REXMLTests
class ContribTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
XML_STRING_01 = <<DELIMITER
@@ -73,6 +72,7 @@ def test_bad_doctype_Tobias
>
]
>
+ <svg/>
EOF
doc = REXML::Document.new source
doc.write(out="")
@@ -82,7 +82,7 @@ def test_bad_doctype_Tobias
# Peter Verhage
def test_namespace_Peter
- source = <<-EOF
+ source = <<~EOF
<?xml version="1.0"?>
<config:myprog-config xmlns:config="http://someurl/program/version">
<!-- main options -->
@@ -379,7 +379,7 @@ def test_various_xpath
end
def test_entities_Holden_Glova
- document = <<-EOL
+ document = <<~EOL
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rubynet [
<!ENTITY rbconfig.MAJOR "1">
diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb
index ee5438d5e5..eed9cca2c0 100644
--- a/test/rexml/test_core.rb
+++ b/test/rexml/test_core.rb
@@ -1,8 +1,6 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
# frozen_string_literal: false
-require_relative "rexml_test_utils"
-
require "rexml/document"
require "rexml/parseexception"
require "rexml/output"
@@ -11,13 +9,14 @@
require "rexml/undefinednamespaceexception"
require_relative "listener"
+require_relative "helper"
module REXMLTests
class Tester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
def setup
- @xsa_source = <<-EOL
+ @xsa_source = <<~EOL
<?xml version="1.0"?>
<?xsl stylesheet="blah.xsl"?>
<!-- The first line tests the XMLDecl, the second tests PI.
@@ -91,7 +90,7 @@ def test_attribute
# Bryan Murphy <murphybryanp@yahoo.com>
text = "this is a {target[@name='test']/@value} test"
- source = <<-EOL
+ source = <<~EOL
<?xml version="1.0"?>
<doc search="#{text}"/>
EOL
@@ -116,6 +115,72 @@ def test_attribute
name4='test4'/>).join(' '), e.to_s
end
+ def test_attribute_duplicated
+ # https://www.w3.org/TR/xml-names/#uniqAttrs
+ message = <<-MESSAGE.chomp
+Duplicate attribute "a"
+Line: 2
+Position: 24
+Last 80 unconsumed characters:
+/>
+ MESSAGE
+ assert_raise(REXML::ParseException.new(message)) do
+ Document.new(<<-XML)
+<x>
+ <bad a="1" a="2"/>
+</x>
+ XML
+ end
+ end
+
+ def test_attribute_namespace_conflict
+ # https://www.w3.org/TR/xml-names/#uniqAttrs
+ message = <<-MESSAGE.chomp
+Namespace conflict in adding attribute "a": Prefix "n1" = "http://www.w3.org" and prefix "n2" = "http://www.w3.org"
+Line: 4
+Position: 140
+Last 80 unconsumed characters:
+/>
+ MESSAGE
+ assert_raise(REXML::ParseException.new(message)) do
+ Document.new(<<-XML)
+<!-- http://www.w3.org is bound to n1 and n2 -->
+<x xmlns:n1="http://www.w3.org"
+ xmlns:n2="http://www.w3.org">
+ <bad n1:a="1" n2:a="2"/>
+</x>
+ XML
+ end
+ end
+
+ def test_attribute_default_namespace
+ # https://www.w3.org/TR/xml-names/#uniqAttrs
+ document = Document.new(<<-XML)
+<!-- http://www.w3.org is bound to n1 and is the default -->
+<x xmlns:n1="http://www.w3.org"
+ xmlns="http://www.w3.org" >
+ <good a="1" b="2" />
+ <good a="1" n1:a="2" />
+</x>
+ XML
+ attributes = document.root.elements.collect do |element|
+ element.attributes.each_attribute.collect do |attribute|
+ [attribute.prefix, attribute.namespace, attribute.name]
+ end
+ end
+ assert_equal([
+ [
+ ["", "", "a"],
+ ["", "", "b"],
+ ],
+ [
+ ["", "", "a"],
+ ["n1", "http://www.w3.org", "a"],
+ ],
+ ],
+ attributes)
+ end
+
def test_cdata
test = "The quick brown fox jumped
& < & < \" '
@@ -681,7 +746,7 @@ def test_iso_8859_1_output_function
koln_iso_8859_1 = "K\xF6ln"
koln_utf8 = "K\xc3\xb6ln"
source = Source.new( koln_iso_8859_1, 'iso-8859-1' )
- results = source.scan(/.*/)[0]
+ results = source.match(/.*/)[0]
koln_utf8.force_encoding('UTF-8') if koln_utf8.respond_to?(:force_encoding)
assert_equal koln_utf8, results
output << results
@@ -779,7 +844,7 @@ def test_deep_clone
end
def test_whitespace_before_root
- a = <<EOL
+ a = <<EOL.chomp
<?xml version='1.0'?>
<blo>
<wak>
@@ -823,7 +888,7 @@ def test_attlist_decl
assert_equal 'two', doc.root.elements[1].namespace
assert_equal 'foo', doc.root.namespace
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
<?xml version="1.0"?>
<!DOCTYPE schema SYSTEM "XMLSchema.dtd" [
<!ENTITY % p ''>
@@ -877,18 +942,18 @@ def test_ticket_51
EOL
# The most common case. People not caring about the namespaces much.
- assert_equal( "XY", XPath.match( doc, "/test/a/text()" ).join )
- assert_equal( "XY", XPath.match( doc, "/test/x:a/text()" ).join )
+ assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()" ).join )
+ assert_equal( "XY", XPath.match( doc, "/*:test/x:a/text()" ).join )
# Surprising? I don't think so, if you believe my definition of the "common case"
- assert_equal( "XYZ", XPath.match( doc, "//a/text()" ).join )
+ assert_equal( "XYZ", XPath.match( doc, "//*:a/text()" ).join )
# These are the uncommon cases. Namespaces are actually important, so we define our own
# mappings, and pass them in.
assert_equal( "XY", XPath.match( doc, "/f:test/f:a/text()", { "f" => "1" } ).join )
# The namespaces are defined, and override the original mappings
- assert_equal( "", XPath.match( doc, "/test/a/text()", { "f" => "1" } ).join )
+ assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()", { "f" => "1" } ).join )
assert_equal( "", XPath.match( doc, "/x:test/x:a/text()", { "f" => "1" } ).join )
- assert_equal( "", XPath.match( doc, "//a/text()", { "f" => "1" } ).join )
+ assert_equal( "XYZ", XPath.match( doc, "//*:a/text()", { "f" => "1" } ).join )
end
def test_processing_instruction
@@ -900,7 +965,7 @@ def test_processing_instruction
end
def test_oses_with_bad_EOLs
- Document.new("\n\n\n<?xml version='1.0'?>\n\n\n<a/>\n\n")
+ Document.new("<?xml version='1.0'?>\n\n\n<a/>\n\n")
end
# Contributed (with patch to fix bug) by Kouhei
@@ -927,7 +992,7 @@ def test_0xD_in_preface
end
def test_hyphens_in_doctype
- doc = REXML::Document.new <<-EOQ
+ doc = REXML::Document.new <<~EOQ
<?xml version="1.0"?>
<!DOCTYPE a-b-c>
<a-b-c>
@@ -1043,7 +1108,7 @@ def test_null_element_name
def test_text_raw
# From the REXML tutorial
# (http://www.germane-software.com/software/rexml/test/data/tutorial.html)
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
<?xml version="1.0"?>
<!DOCTYPE schema SYSTEM "XMLSchema.dtd" [
<!ENTITY % s 'Sean'>
@@ -1277,11 +1342,26 @@ def test_ticket_21
exception = assert_raise(ParseException) do
Document.new(src)
end
- assert_equal(<<-DETAIL, exception.to_s)
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
Missing attribute value start quote: <bar>
Line: 1
Position: 16
Last 80 unconsumed characters:
+value/>
+ DETAIL
+ end
+
+ def test_parse_exception_on_missing_attribute_end_quote
+ src = '<foo bar="value/>'
+ exception = assert_raise(ParseException) do
+ Document.new(src)
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Missing attribute value end quote: <bar>: <">
+Line: 1
+Position: 17
+Last 80 unconsumed characters:
+value/>
DETAIL
end
@@ -1377,7 +1457,7 @@ def test_ticket_91
d.root.add_element( "bah" )
p=REXML::Formatters::Pretty.new(2)
p.compact = true # Don't add whitespace to text nodes unless necessary
- p.write(d,out="")
+ p.write(d,out=+"")
assert_equal( expected, out )
end
@@ -1391,8 +1471,8 @@ def test_ticket_95
def test_ticket_102
doc = REXML::Document.new '<doc xmlns="ns"><item name="foo"/></doc>'
- assert_equal( "foo", doc.root.elements["item"].attribute("name","ns").to_s )
- assert_equal( "item", doc.root.elements["item[@name='foo']"].name )
+ assert_equal( "foo", doc.root.elements["*:item"].attribute("name","ns").to_s )
+ assert_equal( "item", doc.root.elements["*:item[@name='foo']"].name )
end
def test_ticket_14
@@ -1421,11 +1501,11 @@ def test_ticket_121
doc = REXML::Document.new(
'<doc xmlns="ns" xmlns:phantom="ns"><item name="foo">text</item></doc>'
)
- assert_equal 'text', doc.text( "/doc/item[@name='foo']" )
+ assert_equal 'text', doc.text( "/*:doc/*:item[@name='foo']" )
assert_equal "name='foo'",
- doc.root.elements["item"].attribute("name", "ns").inspect
+ doc.root.elements["*:item"].attribute("name", "ns").inspect
assert_equal "<item name='foo'>text</item>",
- doc.root.elements["item[@name='foo']"].to_s
+ doc.root.elements["*:item[@name='foo']"].to_s
end
def test_ticket_135
@@ -1453,8 +1533,10 @@ def test_ticket_138
"" => attribute("version", "1.0"),
},
}
- assert_equal(expected, doc.root.attributes)
- assert_equal(expected, REXML::Document.new(doc.root.to_s).root.attributes)
+ assert_equal(expected,
+ doc.root.attributes.to_h)
+ assert_equal(expected,
+ REXML::Document.new(doc.root.to_s).root.attributes.to_h)
end
def test_empty_doc
diff --git a/test/rexml/test_doctype.rb b/test/rexml/test_doctype.rb
index d728cba606..b20d30ae2f 100644
--- a/test/rexml/test_doctype.rb
+++ b/test/rexml/test_doctype.rb
@@ -1,6 +1,4 @@
# frozen_string_literal: false
-require 'test/unit'
-require 'rexml/document'
module REXMLTests
class TestDocTypeAccessor < Test::Unit::TestCase
@@ -41,6 +39,12 @@ def test_to_s
@doc_type_public_system.to_s)
end
+ def test_to_s_apostrophe
+ @doc_type_public_system.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root PUBLIC '#{@pubid}' '#{@sysid}'>",
+ @doc_type_public_system.to_s)
+ end
+
def test_system
assert_equal([
@sysid,
@@ -82,6 +86,35 @@ def test_to_s
assert_equal("<!DOCTYPE root SYSTEM \"root.dtd\">",
doctype.to_s)
end
+
+ def test_to_s_apostrophe
+ doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root SYSTEM 'root.dtd'>",
+ doctype.to_s)
+ end
+
+ def test_to_s_single_quote_apostrophe
+ doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root'.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ # This isn't used.
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root SYSTEM \"root'.dtd\">",
+ doctype.to_s)
+ end
+
+ def test_to_s_double_quote
+ doctype = REXML::DocType.new(["root", "SYSTEM", nil, "root\".dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ # This isn't used.
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root SYSTEM 'root\".dtd'>",
+ doctype.to_s)
+ end
end
end
@@ -92,6 +125,25 @@ def test_to_s
assert_equal("<!DOCTYPE root PUBLIC \"pub\" \"root.dtd\">",
doctype.to_s)
end
+
+ def test_to_s_apostrophe
+ doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root PUBLIC 'pub' 'root.dtd'>",
+ doctype.to_s)
+ end
+
+ def test_to_s_apostrophe_include_apostrophe
+ doctype = REXML::DocType.new(["root", "PUBLIC", "pub'", "root.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ # This isn't used.
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root PUBLIC \"pub'\" 'root.dtd'>",
+ doctype.to_s)
+ end
end
class TestSystemLiteral < self
@@ -101,6 +153,25 @@ def test_to_s
doctype.to_s)
end
+ def test_to_s_apostrophe
+ doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root PUBLIC 'pub' 'root.dtd'>",
+ doctype.to_s)
+ end
+
+ def test_to_s_apostrophe_include_apostrophe
+ doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root'.dtd"])
+ doc = REXML::Document.new
+ doc << doctype
+ # This isn't used.
+ doctype.parent.context[:prologue_quote] = :apostrophe
+ assert_equal("<!DOCTYPE root PUBLIC 'pub' \"root'.dtd\">",
+ doctype.to_s)
+ end
+
def test_to_s_double_quote
doctype = REXML::DocType.new(["root", "PUBLIC", "pub", "root\".dtd"])
assert_equal("<!DOCTYPE root PUBLIC \"pub\" 'root\".dtd'>",
@@ -143,6 +214,62 @@ def test_to_s_system_literal_include_double_quote
decl(@id, "system\"literal").to_s)
end
+ def test_to_s_apostrophe
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl(@id, @uri).to_s}
+ ]>
+ <root/>
+ XML
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' '#{@uri}'>",
+ notation.to_s)
+ end
+
+ def test_to_s_apostrophe_pubid_literal_include_apostrophe
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl("#{@id}'", @uri).to_s}
+ ]>
+ <root/>
+ XML
+ # This isn't used for PubidLiteral because PubidChar includes '.
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} PUBLIC \"#{@id}'\" '#{@uri}'>",
+ notation.to_s)
+ end
+
+ def test_to_s_apostrophe_system_literal_include_apostrophe
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl(@id, "system'literal").to_s}
+ ]>
+ <root/>
+ XML
+ # This isn't used for SystemLiteral because SystemLiteral includes '.
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' \"system'literal\">",
+ notation.to_s)
+ end
+
+ def test_to_s_apostrophe_system_literal_include_double_quote
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl(@id, "system\"literal").to_s}
+ ]>
+ <root/>
+ XML
+ # This isn't used for SystemLiteral because SystemLiteral includes ".
+ # But quoted by ' because SystemLiteral includes ".
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} PUBLIC '#{@id}' 'system\"literal'>",
+ notation.to_s)
+ end
+
private
def decl(id, uri)
REXML::NotationDecl.new(@name, "PUBLIC", id, uri)
@@ -170,6 +297,48 @@ def test_to_s_include_double_quote
decl("#{@id}\"").to_s)
end
+ def test_to_s_apostrophe
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl(@id).to_s}
+ ]>
+ <root/>
+ XML
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} SYSTEM '#{@id}'>",
+ notation.to_s)
+ end
+
+ def test_to_s_apostrophe_include_apostrophe
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl("#{@id}'").to_s}
+ ]>
+ <root/>
+ XML
+ # This isn't used for SystemLiteral because SystemLiteral includes '.
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} SYSTEM \"#{@id}'\">",
+ notation.to_s)
+ end
+
+ def test_to_s_apostrophe_include_double_quote
+ document = REXML::Document.new(<<-XML)
+ <!DOCTYPE root SYSTEM "urn:x-test:sysid" [
+ #{decl("#{@id}\"").to_s}
+ ]>
+ <root/>
+ XML
+ # This isn't used for SystemLiteral because SystemLiteral includes ".
+ # But quoted by ' because SystemLiteral includes ".
+ document.context[:prologue_quote] = :apostrophe
+ notation = document.doctype.notations[0]
+ assert_equal("<!NOTATION #{@name} SYSTEM '#{@id}\"'>",
+ notation.to_s)
+ end
+
private
def decl(id)
REXML::NotationDecl.new(@name, "SYSTEM", nil, id)
diff --git a/test/rexml/test_document.rb b/test/rexml/test_document.rb
index c0faae4ae0..cda4354f31 100644
--- a/test/rexml/test_document.rb
+++ b/test/rexml/test_document.rb
@@ -1,13 +1,10 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
-require "rexml/document"
-require "test/unit"
-
module REXMLTests
class TestDocument < Test::Unit::TestCase
def test_version_attributes_to_s
- doc = REXML::Document.new(<<-eoxml)
+ doc = REXML::Document.new(<<~eoxml)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg id="svg2"
xmlns:sodipodi="foo"
@@ -34,17 +31,9 @@ def test_new
end
class EntityExpansionLimitTest < Test::Unit::TestCase
- def setup
- @default_entity_expansion_limit = REXML::Security.entity_expansion_limit
- end
-
- def teardown
- REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
- end
-
class GeneralEntityTest < self
def test_have_value
- xml = <<EOF
+ xml = <<XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
@@ -58,23 +47,23 @@ def test_have_value
<member>
&a;
</member>
-EOF
+XML
doc = REXML::Document.new(xml)
- assert_raise(RuntimeError) do
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
doc.root.children.first.value
end
- REXML::Security.entity_expansion_limit = 100
- assert_equal(100, REXML::Security.entity_expansion_limit)
+
doc = REXML::Document.new(xml)
- assert_raise(RuntimeError) do
+ doc.entity_expansion_limit = 100
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
doc.root.children.first.value
end
assert_equal(101, doc.entity_expansion_count)
end
def test_empty_value
- xml = <<EOF
+ xml = <<XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
@@ -88,23 +77,23 @@ def test_empty_value
<member>
&a;
</member>
-EOF
+XML
doc = REXML::Document.new(xml)
- assert_raise(RuntimeError) do
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
doc.root.children.first.value
end
- REXML::Security.entity_expansion_limit = 100
- assert_equal(100, REXML::Security.entity_expansion_limit)
+
doc = REXML::Document.new(xml)
- assert_raise(RuntimeError) do
+ doc.entity_expansion_limit = 100
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
doc.root.children.first.value
end
assert_equal(101, doc.entity_expansion_count)
end
def test_with_default_entity
- xml = <<EOF
+ xml = <<XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "a">
@@ -115,68 +104,35 @@ def test_with_default_entity
&a2;
<
</member>
-EOF
+XML
- REXML::Security.entity_expansion_limit = 4
doc = REXML::Document.new(xml)
+ doc.entity_expansion_limit = 4
assert_equal("\na\na a\n<\n", doc.root.children.first.value)
- REXML::Security.entity_expansion_limit = 3
+
doc = REXML::Document.new(xml)
- assert_raise(RuntimeError) do
+ doc.entity_expansion_limit = 3
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
doc.root.children.first.value
end
end
- end
- class ParameterEntityTest < self
- def test_have_value
- xml = <<EOF
-<!DOCTYPE root [
- <!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.">
- <!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
- <!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
- <!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
- <!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
- <!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
- <!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
- <!ENTITY test "test %g;">
-]>
-<cd></cd>
-EOF
-
- assert_raise(REXML::ParseException) do
- REXML::Document.new(xml)
- end
- REXML::Security.entity_expansion_limit = 100
- assert_equal(100, REXML::Security.entity_expansion_limit)
- assert_raise(REXML::ParseException) do
- REXML::Document.new(xml)
- end
- end
-
- def test_empty_value
- xml = <<EOF
-<!DOCTYPE root [
- <!ENTITY % a "">
- <!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
- <!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
- <!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
- <!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
- <!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
- <!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
- <!ENTITY test "test %g;">
+ def test_entity_expansion_text_limit
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;">
+ <!ENTITY b "&c;&d;&e;">
+ <!ENTITY c "xxxxxxxxxx">
+ <!ENTITY d "yyyyyyyyyy">
+ <!ENTITY e "zzzzzzzzzz">
]>
-<cd></cd>
-EOF
+<member>&a;</member>
+ XML
- assert_raise(REXML::ParseException) do
- REXML::Document.new(xml)
- end
- REXML::Security.entity_expansion_limit = 100
- assert_equal(100, REXML::Security.entity_expansion_limit)
- assert_raise(REXML::ParseException) do
- REXML::Document.new(xml)
- end
+ doc = REXML::Document.new(xml)
+ doc.entity_expansion_text_limit = 90
+ assert_equal(90, doc.root.children.first.value.bytesize)
end
end
end
@@ -203,9 +159,45 @@ def test_xml_declaration_standalone
assert_equal('no', doc.stand_alone?, bug2539)
end
+ def test_each_recursive
+ xml_source = <<~XML
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+ <root name="root">
+ <x name="1_1">
+ <x name="1_2">
+ <x name="1_3" />
+ </x>
+ </x>
+ <x name="2_1">
+ <x name="2_2">
+ <x name="2_3" />
+ </x>
+ </x>
+ <!-- comment -->
+ <![CDATA[ cdata ]]>
+ </root>
+ XML
+
+ expected_names = %w[
+ root
+ 1_1 1_2 1_3
+ 2_1 2_2 2_3
+ ]
+
+ document = REXML::Document.new(xml_source)
+
+ # Node#each_recursive iterates elements only.
+ # This does not iterate XML declarations, comments, attributes, CDATA sections, etc.
+ actual_names = []
+ document.each_recursive do |element|
+ actual_names << element.attributes["name"]
+ end
+ assert_equal(expected_names, actual_names)
+ end
+
class WriteTest < Test::Unit::TestCase
def setup
- @document = REXML::Document.new(<<-EOX)
+ @document = REXML::Document.new(<<-EOX.chomp)
<?xml version="1.0" encoding="UTF-8"?>
<message>Hello world!</message>
EOX
@@ -215,7 +207,7 @@ class ArgumentsTest < self
def test_output
output = ""
@document.write(output)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
<?xml version='1.0' encoding='UTF-8'?>
<message>Hello world!</message>
EOX
@@ -238,7 +230,7 @@ def test_transitive
indent = 2
transitive = true
@document.write(output, indent, transitive)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
<?xml version='1.0' encoding='UTF-8'?>
<message
>Hello world!</message
@@ -267,7 +259,7 @@ def test_encoding
japanese_text = "こんにちは"
@document.root.text = japanese_text
@document.write(output, indent, transitive, ie_hack, encoding)
- assert_equal(<<-EOX.encode(encoding), output)
+ assert_equal(<<-EOX.chomp.encode(encoding), output)
<?xml version='1.0' encoding='SHIFT_JIS'?>
<message>#{japanese_text}</message>
EOX
@@ -278,7 +270,7 @@ class OptionsTest < self
def test_output
output = ""
@document.write(:output => output)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
<?xml version='1.0' encoding='UTF-8'?>
<message>Hello world!</message>
EOX
@@ -298,7 +290,7 @@ def test_indent
def test_transitive
output = ""
@document.write(:output => output, :indent => 2, :transitive => true)
- assert_equal(<<-EOX, output)
+ assert_equal(<<-EOX.chomp, output)
<?xml version='1.0' encoding='UTF-8'?>
<message
>Hello world!</message
@@ -320,7 +312,7 @@ def test_encoding
japanese_text = "こんにちは"
@document.root.text = japanese_text
@document.write(:output => output, :encoding => encoding)
- assert_equal(<<-EOX.encode(encoding), output)
+ assert_equal(<<-EOX.chomp.encode(encoding), output)
<?xml version='1.0' encoding='SHIFT_JIS'?>
<message>#{japanese_text}</message>
EOX
@@ -404,7 +396,7 @@ def test_utf_16
actual_xml = ""
document.write(actual_xml)
- expected_xml = <<-EOX.encode("UTF-16BE")
+ expected_xml = <<-EOX.chomp.encode("UTF-16BE")
\ufeff<?xml version='1.0' encoding='UTF-16'?>
<message>Hello world!</message>
EOX
diff --git a/test/rexml/test_element.rb b/test/rexml/test_element.rb
index 82830b44e6..202168955c 100644
--- a/test/rexml/test_element.rb
+++ b/test/rexml/test_element.rb
@@ -1,8 +1,5 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require "rexml/document"
-
module REXMLTests
class ElementTester < Test::Unit::TestCase
def test_array_reference_string
diff --git a/test/rexml/test_elements.rb b/test/rexml/test_elements.rb
index a850e62209..c0f1b22007 100644
--- a/test/rexml/test_elements.rb
+++ b/test/rexml/test_elements.rb
@@ -1,6 +1,4 @@
# frozen_string_literal: false
-require 'test/unit/testcase'
-require 'rexml/document'
module REXMLTests
class ElementsTester < Test::Unit::TestCase
@@ -115,5 +113,10 @@ def test_inject
}
assert_equal 6, r
end
+
+ def test_parent
+ doc = Document.new( "<a><b id='1'/><b id='2'/></a>" )
+ assert_equal('a', doc.root.elements.parent.name)
+ end
end
end
diff --git a/test/rexml/test_encoding.rb b/test/rexml/test_encoding.rb
index 919db131f0..586f1f8b68 100644
--- a/test/rexml/test_encoding.rb
+++ b/test/rexml/test_encoding.rb
@@ -1,14 +1,15 @@
-# coding: binary
+# coding: utf-8
# frozen_string_literal: false
-require_relative "rexml_test_utils"
-
require 'rexml/source'
require 'rexml/document'
+require_relative "helper"
+
module REXMLTests
class EncodingTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
+ include Helper::Global
include REXML
def setup
@@ -23,7 +24,7 @@ def test_encoded_in_encoded_out
doc = Document.new( @encoded )
doc.write( out="" )
out.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( @encoded, out )
+ assert_equal( @encoded.b, out )
end
# Given an encoded document, try to change the encoding and write it out
@@ -33,10 +34,10 @@ def test_encoded_in_change_out
assert_equal("UTF-8", doc.encoding)
REXML::Formatters::Default.new.write( doc.root, out="" )
out.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( @not_encoded, out )
+ assert_equal( @not_encoded.b, out )
char = XPath.first( doc, "/a/b/text()" ).to_s
char.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( "ĉ", char )
+ assert_equal( "ĉ".b, char )
end
# * Given an encoded document, try to write it to a different encoding
@@ -44,7 +45,7 @@ def test_encoded_in_different_out
doc = Document.new( @encoded )
REXML::Formatters::Default.new.write( doc.root, Output.new( out="", "UTF-8" ) )
out.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( @not_encoded, out )
+ assert_equal( @not_encoded.b, out )
end
# * Given a non-encoded document, change the encoding
@@ -54,7 +55,7 @@ def test_in_change_out
assert_equal("ISO-8859-3", doc.encoding)
doc.write( out="" )
out.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( @encoded, out )
+ assert_equal( @encoded.b, out )
end
# * Given a non-encoded document, write to a different encoding
@@ -62,13 +63,13 @@ def test_in_different_out
doc = Document.new( @not_encoded )
doc.write( Output.new( out="", "ISO-8859-3" ) )
out.force_encoding(::Encoding::ASCII_8BIT)
- assert_equal( "<?xml version='1.0'?>#{@encoded_root}", out )
+ assert_equal( "<?xml version='1.0'?>#{@encoded_root}".b, out )
end
# * Given an encoded document, accessing text and attribute nodes
# should provide UTF-8 text.
def test_in_different_access
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
<?xml version='1.0' encoding='ISO-8859-1'?>
<a a="\xFF">\xFF</a>
EOL
@@ -80,7 +81,7 @@ def test_in_different_access
def test_ticket_89
- doc = Document.new <<-EOL
+ doc = Document.new <<~EOL
<?xml version="1.0" encoding="CP-1252" ?>
<xml><foo></foo></xml>
EOL
@@ -97,7 +98,7 @@ def test_parse_utf16
end
def test_parse_utf16_with_utf8_default_internal
- EnvUtil.with_default_internal("UTF-8") do
+ with_default_internal("UTF-8") do
utf16 = File.open(fixture_path("utf16.xml")) do |f|
REXML::Document.new(f)
end
diff --git a/test/rexml/test_entity.rb b/test/rexml/test_entity.rb
index 6dc6637074..89f8389445 100644
--- a/test/rexml/test_entity.rb
+++ b/test/rexml/test_entity.rb
@@ -1,7 +1,5 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require 'rexml/document'
require 'rexml/entity'
require 'rexml/source'
@@ -61,8 +59,7 @@ def test_parse_entity
def test_constructor
one = [ %q{<!ENTITY % YN '"Yes"'>},
- %q{<!ENTITY % YN2 "Yes">},
- %q{<!ENTITY WhatHeSaid "He said %YN;">},
+ %q{<!ENTITY WhatHeSaid 'He said "Yes"'>},
'<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">',
'<!ENTITY open-hatch2
@@ -73,8 +70,7 @@ def test_constructor
NDATA gif>' ]
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
- <!ENTITY % YN2 "Yes">
- <!ENTITY WhatHeSaid "He said %YN;">
+ <!ENTITY WhatHeSaid 'He said "Yes"'>
<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY open-hatch2
@@ -106,6 +102,84 @@ def test_replace_entities
assert_equal source, out
end
+ def test_readers_with_reference
+ entity = REXML::Entity.new([:entitydecl, "a", "B", "%"])
+ assert_equal([
+ '<!ENTITY % a "B">',
+ "a",
+ "B",
+ "B",
+ "B",
+ ],
+ [
+ entity.to_s,
+ entity.name,
+ entity.value,
+ entity.normalized,
+ entity.unnormalized,
+ ])
+ end
+
+ def test_readers_without_reference
+ entity = REXML::Entity.new([:entitydecl, "a", "&b;"])
+ assert_equal([
+ '<!ENTITY a "&b;">',
+ "a",
+ "&b;",
+ "&b;",
+ "&b;",
+ ],
+ [
+ entity.to_s,
+ entity.name,
+ entity.value,
+ entity.normalized,
+ entity.unnormalized,
+ ])
+ end
+
+ def test_readers_with_nested_references
+ doctype = REXML::DocType.new('root')
+ doctype.add(REXML::Entity.new([:entitydecl, "a", "&b;"]))
+ doctype.add(REXML::Entity.new([:entitydecl, "b", "X"]))
+ assert_equal([
+ "a",
+ "&b;",
+ "&b;",
+ "X",
+ "b",
+ "X",
+ "X",
+ "X",
+ ],
+ [
+ doctype.entities["a"].name,
+ doctype.entities["a"].value,
+ doctype.entities["a"].normalized,
+ doctype.entities["a"].unnormalized,
+ doctype.entities["b"].name,
+ doctype.entities["b"].value,
+ doctype.entities["b"].normalized,
+ doctype.entities["b"].unnormalized,
+ ])
+ end
+
+ def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser
+ source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>'
+ parser = REXML::Parsers::BaseParser.new(source)
+ exception = assert_raise(REXML::ParseException) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ assert_equal(<<-DETAIL, exception.to_s)
+Parameter entity references forbidden in internal subset: "%a;"
+Line: 1
+Position: 54
+Last 80 unconsumed characters:
+ DETAIL
+ end
+
def test_entity_string_limit
template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>'
len = 5120 # 5k per entity
@@ -124,22 +198,6 @@ def test_entity_string_limit
end
end
- def test_entity_string_limit_for_parameter_entity
- template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>'
- len = 5120 # 5k per entity
- template.sub!(/\^/, "B" * len)
-
- # 10k is OK
- entities = '%a;' * 2 # 5k entity * 2 = 10k
- REXML::Document.new(template.sub(/\$/, entities))
-
- # above 10k explodes
- entities = '%a;' * 3 # 5k entity * 2 = 15k
- assert_raise(REXML::ParseException) do
- REXML::Document.new(template.sub(/\$/, entities))
- end
- end
-
def test_raw
source = '<!DOCTYPE foo [
<!ENTITY ent "replace">
@@ -163,7 +221,7 @@ def test_lazy_evaluation
def test_entity_replacement
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
- <!ENTITY WhatHeSaid "He said %YN;">]>
+ <!ENTITY WhatHeSaid 'He said "Yes"'>]>
<a>&WhatHeSaid;</a>}
d = REXML::Document.new( source )
diff --git a/test/rexml/test_instruction.rb b/test/rexml/test_instruction.rb
new file mode 100644
index 0000000000..5451e367b8
--- /dev/null
+++ b/test/rexml/test_instruction.rb
@@ -0,0 +1,12 @@
+module REXMLTests
+ class InstructionTest < Test::Unit::TestCase
+ def test_target_nil
+ error = assert_raise(ArgumentError) do
+ REXML::Instruction.new(nil)
+ end
+ assert_equal("processing instruction target must be String or " +
+ "REXML::Instruction: <nil>",
+ error.message)
+ end
+ end
+end
diff --git a/test/rexml/test_jaxen.rb b/test/rexml/test_jaxen.rb
index 9cd7bee8c2..38d2e959c6 100644
--- a/test/rexml/test_jaxen.rb
+++ b/test/rexml/test_jaxen.rb
@@ -1,5 +1,4 @@
# frozen_string_literal: false
-require_relative 'rexml_test_utils'
require "rexml/document"
require "rexml/xpath"
@@ -7,124 +6,127 @@
# Harness to test REXML's capabilities against the test suite from Jaxen
# ryan.a.cox@gmail.com
+require_relative "helper"
+
module REXMLTests
class JaxenTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
- def test_axis ; test("axis") ; end
- def test_basic ; test("basic") ; end
- def test_basicupdate ; test("basicupdate") ; end
- def test_contents ; test("contents") ; end
- def test_defaultNamespace ; test("defaultNamespace") ; end
- def test_fibo ; test("fibo") ; end
- def test_id ; test("id") ; end
- def test_jaxen24 ; test("jaxen24") ; end
- def test_lang ; test("lang") ; end
- def test_message ; test("message") ; end
- def test_moreover ; test("moreover") ; end
- def test_much_ado ; test("much_ado") ; end
- def test_namespaces ; test("namespaces") ; end
- def test_nitf ; test("nitf") ; end
- def test_numbers ; test("numbers") ; end
- def test_pi ; test("pi") ; end
- def test_pi2 ; test("pi2") ; end
- def test_simple ; test("simple") ; end
- def test_testNamespaces ; test("testNamespaces") ; end
- def test_text ; test("text") ; end
- def test_underscore ; test("underscore") ; end
- def test_web ; test("web") ; end
- def test_web2 ; test("web2") ; end
+ def test_axis ; process_test_case("axis") ; end
+ def test_basic ; process_test_case("basic") ; end
+ def test_basicupdate ; process_test_case("basicupdate") ; end
+ def test_contents ; process_test_case("contents") ; end
+ def test_defaultNamespace ; process_test_case("defaultNamespace") ; end
+ def test_fibo ; process_test_case("fibo") ; end
+ def test_id ; process_test_case("id") ; end
+ def test_jaxen24 ; process_test_case("jaxen24") ; end
+ def test_lang ; process_test_case("lang") ; end
+ # document() function for XSLT isn't supported
+ def _test_message ; process_test_case("message") ; end
+ def test_moreover ; process_test_case("moreover") ; end
+ def test_much_ado ; process_test_case("much_ado") ; end
+ def test_namespaces ; process_test_case("namespaces") ; end
+ def test_nitf ; process_test_case("nitf") ; end
+ # Exception should be considered
+ def _test_numbers ; process_test_case("numbers") ; end
+ def test_pi ; process_test_case("pi") ; end
+ def test_pi2 ; process_test_case("pi2") ; end
+ def test_simple ; process_test_case("simple") ; end
+ # TODO: namespace node is needed
+ def _test_testNamespaces ; process_test_case("testNamespaces") ; end
+ # document() function for XSLT isn't supported
+ def _test_text ; process_test_case("text") ; end
+ def test_underscore ; process_test_case("underscore") ; end
+ def _test_web ; process_test_case("web") ; end
+ def test_web2 ; process_test_case("web2") ; end
private
- def test( fname )
-# Dir.entries( xml_dir ).each { |fname|
-# if fname =~ /\.xml$/
- doc = File.open(fixture_path(fname+".xml")) do |file|
- Document.new(file)
- end
- XPath.each( doc, "/tests/document" ) {|e| handleDocument(e)}
-# end
-# }
+ def process_test_case(name)
+ xml_path = "#{name}.xml"
+ doc = File.open(fixture_path(xml_path)) do |file|
+ Document.new(file)
+ end
+ test_doc = File.open(fixture_path("test/tests.xml")) do |file|
+ Document.new(file)
+ end
+ XPath.each(test_doc,
+ "/tests/document[@url='xml/#{xml_path}']/context") do |context|
+ process_context(doc, context)
+ end
end
# processes a tests/document/context node
- def handleContext( testDoc, ctxElement)
- testCtx = XPath.match( testDoc, ctxElement.attributes["select"] )[0]
- namespaces = {}
- if testCtx.class == Element
- testCtx.prefixes.each { |pre| handleNamespace( testCtx, pre, namespaces ) }
- end
+ def process_context(doc, context)
+ test_context = XPath.match(doc, context.attributes["select"])
+ namespaces = context.namespaces
+ namespaces.delete("var")
+ namespaces = nil if namespaces.empty?
variables = {}
- XPath.each( ctxElement, "@*[namespace-uri() = 'http://jaxen.org/test-harness/var']") { |attrib| handleVariable(testCtx, variables, attrib) }
- XPath.each( ctxElement, "valueOf") { |e| handleValueOf(testCtx, variables, namespaces, e) }
- XPath.each( ctxElement, "test[not(@exception) or (@exception != 'true') ]") { |e| handleNominalTest(testCtx,variables, namespaces, e) }
- XPath.each( ctxElement, "test[@exception = 'true']") { |e| handleExceptionalTest(testCtx,variables, namespaces, e) }
+ var_namespace = "http://jaxen.org/test-harness/var"
+ XPath.each(context,
+ "@*[namespace-uri() = '#{var_namespace}']") do |attribute|
+ variables[attribute.name] = attribute.value
+ end
+ XPath.each(context, "valueOf") do |value|
+ process_value_of(test_context, variables, namespaces, value)
+ end
+ XPath.each(context,
+ "test[not(@exception) or (@exception != 'true')]") do |test|
+ process_nominal_test(test_context, variables, namespaces, test)
+ end
+ XPath.each(context,
+ "test[@exception = 'true']") do |test|
+ process_exceptional_test(test_context, variables, namespaces, test)
+ end
end
# processes a tests/document/context/valueOf or tests/document/context/test/valueOf node
- def handleValueOf(ctx,variables, namespaces, valueOfElement)
- expected = valueOfElement.text
- got = XPath.match( ctx, valueOfElement.attributes["select"], namespaces, variables )[0]
- assert_true( (got.nil? && expected.nil?) || !got.nil? )
- case got.class
- when Element
- assert_equal( got.class, Element )
- when Attribute, Text, Comment, TrueClass, FalseClass
- assert_equal( expected, got.to_s )
- when Instruction
- assert_equal( expected, got.content )
- when Integer
- assert_equal( exected.to_f, got )
- when String
- # normalize values for comparison
- got = "" if got == nil or got == ""
- expected = "" if expected == nil or expected == ""
- assert_equal( expected, got )
- else
- assert_fail( "Wassup?" )
- end
- end
+ def process_value_of(context, variables, namespaces, value_of)
+ expected = value_of.text
+ xpath = value_of.attributes["select"]
+ matched = XPath.match(context, xpath, namespaces, variables, strict: true)
+ message = user_message(context, xpath, matched)
+ assert_equal(expected || "",
+ REXML::Functions.string(matched),
+ message)
+ end
# processes a tests/document/context/test node ( where @exception is false or doesn't exist )
- def handleNominalTest(ctx, variables, namespaces, testElement)
- expected = testElement.attributes["count"]
- got = XPath.match( ctx, testElement.attributes["select"], namespaces, variables )
+ def process_nominal_test(context, variables, namespaces, test)
+ xpath = test.attributes["select"]
+ matched = XPath.match(context, xpath, namespaces, variables, strict: true)
# might be a test with no count attribute, but nested valueOf elements
- assert( expected == got.size.to_s ) if !expected.nil?
+ expected = test.attributes["count"]
+ if expected
+ assert_equal(Integer(expected, 10),
+ matched.size,
+ user_message(context, xpath, matched))
+ end
- XPath.each( testElement, "valueOf") { |e|
- handleValueOf(got, variables, namespaces, e)
- }
+ XPath.each(test, "valueOf") do |value_of|
+ process_value_of(matched, variables, namespaces, value_of)
+ end
end
# processes a tests/document/context/test node ( where @exception is true )
- def handleExceptionalTest(ctx, variables, namespaces, testElement)
- assert_raise( Exception ) {
- XPath.match( ctx, testElement.attributes["select"], namespaces, variables )
- }
- end
-
- # processes a tests/document node
- def handleDocument(docElement)
- puts "- Processing document: #{docElement.attributes['url']}"
- testFile = File.new( docElement.attributes["url"] )
- testDoc = Document.new testFile
- XPath.each( docElement, "context") { |e| handleContext(testDoc, e) }
- end
-
- # processes a variable definition in a namespace like <test var:foo="bar">
- def handleVariable( ctx, variables, attrib )
- puts "--- Found attribute: #{attrib.name}"
- variables[attrib.name] = attrib.value
+ def process_exceptional_test(context, variables, namespaces, test)
+ xpath = test.attributes["select"]
+ assert_raise(REXML::ParseException) do
+ XPath.match(context, xpath, namespaces, variables, strict: true)
+ end
end
- # processes a namespace definition like <test xmlns:foo="fiz:bang:bam">
- def handleNamespace( ctx, prefix, namespaces )
- puts "--- Found namespace: #{prefix}"
- namespaces[prefix] = ctx.namespaces[prefix]
+ def user_message(context, xpath, matched)
+ message = ""
+ context.each_with_index do |node, i|
+ message << "Node#{i}:\n"
+ message << "#{node}\n"
+ end
+ message << "XPath: <#{xpath}>\n"
+ message << "Matched <#{matched}>"
+ message
end
-
end
end
diff --git a/test/rexml/test_light.rb b/test/rexml/test_light.rb
index 99bd9cac3f..fd39948daa 100644
--- a/test/rexml/test_light.rb
+++ b/test/rexml/test_light.rb
@@ -1,18 +1,20 @@
# frozen_string_literal: false
-require_relative "rexml_test_utils"
+
require "rexml/light/node"
require "rexml/parsers/lightparser"
+require_relative "helper"
+
module REXMLTests
class LightTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML::Light
def test_parse_large
- xml_string = fixture_path("documentation.xml")
+ xml_string = File.read(fixture_path("documentation.xml"))
parser = REXML::Parsers::LightParser.new(xml_string)
tag, content = parser.parse
- assert_equal([:document, :text], [tag, content.first])
+ assert_equal([:document, :xmldecl], [tag, content.first])
end
# FIXME INCOMPLETE
@@ -62,7 +64,7 @@ def test_access_child_elements
assert_equal( 'c', a[1].name )
end
- def test_itterate_over_children
+ def test_iterate_over_children
foo = make_small_document
ctr = 0
foo[0].each { ctr += 1 }
diff --git a/test/rexml/test_lightparser.rb b/test/rexml/test_lightparser.rb
index 1aeac072dd..507e067b17 100644
--- a/test/rexml/test_lightparser.rb
+++ b/test/rexml/test_lightparser.rb
@@ -1,10 +1,12 @@
# frozen_string_literal: false
-require_relative 'rexml_test_utils'
+
require 'rexml/parsers/lightparser'
+require_relative "helper"
+
module REXMLTests
class LightParserTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
def test_parsing
File.open(fixture_path("documentation.xml")) do |f|
diff --git a/test/rexml/test_listener.rb b/test/rexml/test_listener.rb
index 322d368be8..ae834a1fb6 100644
--- a/test/rexml/test_listener.rb
+++ b/test/rexml/test_listener.rb
@@ -1,14 +1,13 @@
# coding: binary
# frozen_string_literal: false
-require_relative 'rexml_test_utils'
-
-require 'rexml/document'
require 'rexml/streamlistener'
+require_relative "helper"
+
module REXMLTests
class BaseTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
def test_empty
return unless defined? @listener
# Empty.
diff --git a/test/rexml/test_martin_fowler.rb b/test/rexml/test_martin_fowler.rb
index da685a80ec..ce27d72576 100644
--- a/test/rexml/test_martin_fowler.rb
+++ b/test/rexml/test_martin_fowler.rb
@@ -1,9 +1,7 @@
# frozen_string_literal: false
-require 'test/unit'
-require 'rexml/document'
module REXMLTests
- class OrderTester < Test::Unit::TestCase
+ class OrderTesterMF < Test::Unit::TestCase
DOC = <<END
<paper>
<title>Remove this element and figs order differently</title>
@@ -18,12 +16,12 @@ class OrderTester < Test::Unit::TestCase
</paper>
END
- def initialize n
+ def setup
@doc = REXML::Document.new(DOC)
@figs = REXML::XPath.match(@doc,'//figure')
@names = @figs.collect {|f| f.attributes['src']}
- super
end
+
def test_fig1
assert_equal 'fig1', @figs[0].attributes['src']
end
diff --git a/test/rexml/test_namespace.rb b/test/rexml/test_namespace.rb
index 90e1d36945..57a0a3e5ad 100644
--- a/test/rexml/test_namespace.rb
+++ b/test/rexml/test_namespace.rb
@@ -1,11 +1,10 @@
# frozen_string_literal: false
-require_relative "rexml_test_utils"
-require "rexml/document"
+require_relative "helper"
module REXMLTests
class TestNamespace < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
def setup
diff --git a/test/rexml/test_order.rb b/test/rexml/test_order.rb
index 807d9faa96..6dc0204691 100644
--- a/test/rexml/test_order.rb
+++ b/test/rexml/test_order.rb
@@ -1,14 +1,15 @@
# frozen_string_literal: false
-require_relative 'rexml_test_utils'
-require 'rexml/document'
+
begin
require 'zlib'
rescue LoadError
end
+require_relative "helper"
+
module REXMLTests
class OrderTester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
TESTDOC = <<END
<a>
diff --git a/test/rexml/test_preceding_sibling.rb b/test/rexml/test_preceding_sibling.rb
index d89a1e1c90..7e661eb00c 100644
--- a/test/rexml/test_preceding_sibling.rb
+++ b/test/rexml/test_preceding_sibling.rb
@@ -1,7 +1,5 @@
# frozen_string_literal: false
# ISSUE 32
-require 'test/unit'
-require 'rexml/document'
module REXMLTests
# daz - for report by Dan Kohn in:
diff --git a/test/rexml/test_pullparser.rb b/test/rexml/test_pullparser.rb
index 31b5b74bd6..bdf8be17fa 100644
--- a/test/rexml/test_pullparser.rb
+++ b/test/rexml/test_pullparser.rb
@@ -1,5 +1,4 @@
# frozen_string_literal: false
-require "test/unit/testcase"
require 'rexml/parsers/pullparser'
@@ -63,6 +62,63 @@ def test_entity_replacement
end
end
+ def test_character_references
+ source = '<root><a>A</a><b>B</b></root>'
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+
+ assert_equal('A', events['a'])
+ assert_equal("B", events['b'])
+ end
+
+ def test_text_entity_references
+ source = '<root><a><P> <I> <B> Text </B> </I></a></root>'
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = []
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :text
+ events << event[1]
+ end
+ end
+
+ assert_equal(["<P> <I> <B> Text </B> </I>"], events)
+ end
+
+ def test_text_content_with_line_breaks
+ source = "<root><a>A</a><b>B\n</b><c>C\r\n</c></root>"
+ parser = REXML::Parsers::PullParser.new( source )
+
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+
+ assert_equal('A', events['a'])
+ assert_equal("B\n", events['b'])
+ assert_equal("C\n", events['c'])
+ end
+
def test_peek_unshift
source = "<a><b/></a>"
REXML::Parsers::PullParser.new(source)
@@ -99,5 +155,152 @@ def test_peek
end
assert_equal( 0, names.length )
end
+
+ class EntityExpansionLimitTest < Test::Unit::TestCase
+ class GeneralEntityTest < self
+ def test_have_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ end
+
+ def test_empty_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ parser.entity_expansion_limit = 100000
+ while parser.has_next?
+ parser.pull
+ end
+ assert_equal(11111, parser.entity_expansion_count)
+
+ parser = REXML::Parsers::PullParser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ assert do
+ parser.entity_expansion_count > REXML::Security.entity_expansion_limit
+ end
+ end
+
+ def test_with_default_entity
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "a">
+ <!ENTITY a2 "&a; &a;">
+]>
+<member>
+&a;
+&a2;
+<
+</member>
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ parser.entity_expansion_limit = 4
+ while parser.has_next?
+ parser.pull
+ end
+
+ parser = REXML::Parsers::PullParser.new(source)
+ parser.entity_expansion_limit = 3
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ while parser.has_next?
+ parser.pull
+ end
+ end
+ end
+
+ def test_with_only_default_entities
+ member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<member>
+#{member_value}
+</member>
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+
+ expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ assert_equal(expected_value, events['member'].strip)
+ assert_equal(0, parser.entity_expansion_count)
+ assert do
+ events['member'].bytesize > REXML::Security.entity_expansion_text_limit
+ end
+ end
+
+ def test_entity_expansion_text_limit
+ source = <<-XML
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;">
+ <!ENTITY b "&c;&d;&e;">
+ <!ENTITY c "xxxxxxxxxx">
+ <!ENTITY d "yyyyyyyyyy">
+ <!ENTITY e "zzzzzzzzzz">
+]>
+<member>&a;</member>
+ XML
+
+ parser = REXML::Parsers::PullParser.new(source)
+ parser.entity_expansion_text_limit = 90
+ events = {}
+ element_name = ''
+ while parser.has_next?
+ event = parser.pull
+ case event.event_type
+ when :start_element
+ element_name = event[0]
+ when :text
+ events[element_name] = event[1]
+ end
+ end
+ assert_equal(90, events['member'].size)
+ end
+ end
+ end
end
end
diff --git a/test/rexml/test_rexml_issuezilla.rb b/test/rexml/test_rexml_issuezilla.rb
index 1c54c9d53d..fb9f75de9b 100644
--- a/test/rexml/test_rexml_issuezilla.rb
+++ b/test/rexml/test_rexml_issuezilla.rb
@@ -1,10 +1,10 @@
# frozen_string_literal: false
-require_relative 'rexml_test_utils'
-require 'rexml/document'
+
+require_relative "helper"
module REXMLTests
class TestIssuezillaParsing < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
def test_rexml
doc = File.open(fixture_path("ofbiz-issues-full-177.xml")) do |f|
REXML::Document.new(f)
diff --git a/test/rexml/test_sax.rb b/test/rexml/test_sax.rb
index 00539f0d4c..fa1d1cb612 100644
--- a/test/rexml/test_sax.rb
+++ b/test/rexml/test_sax.rb
@@ -1,12 +1,13 @@
# frozen_string_literal: false
-require_relative "rexml_test_utils"
+
require 'rexml/sax2listener'
require 'rexml/parsers/sax2parser'
-require 'rexml/document'
+
+require_relative "helper"
module REXMLTests
class SAX2Tester < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
def test_characters
d = Document.new( "<A>@blah@</A>" )
@@ -32,6 +33,17 @@ def test_entity_replacement
assert_equal '--1234--', results[1]
end
+ def test_characters_predefined_entities
+ source = '<root><a><P> <I> <B> Text </B> </I></a></root>'
+
+ sax = Parsers::SAX2Parser.new( source )
+ results = []
+ sax.listen(:characters) {|x| results << x }
+ sax.parse
+
+ assert_equal(["<P> <I> <B> Text </B> </I>"], results)
+ end
+
def test_sax2
File.open(fixture_path("documentation.xml")) do |f|
parser = Parsers::SAX2Parser.new( f )
@@ -89,6 +101,177 @@ def test_sax2
end
end
+ def test_without_namespace
+ xml = <<-XML
+<root >
+ <a att1='1' att2='2' att3='<'>
+ <b />
+ </a>
+</root>
+ XML
+
+ parser = REXML::Parsers::SAX2Parser.new(xml)
+ elements = []
+ parser.listen(:start_element) do |uri, localname, qname, attrs|
+ elements << [uri, localname, qname, attrs]
+ end
+ parser.parse
+ assert_equal([
+ [nil, "root", "root", {}],
+ [nil, "a", "a", {"att1"=>"1", "att2"=>"2", "att3"=>"<"}],
+ [nil, "b", "b", {}]
+ ], elements)
+ end
+
+ def test_with_namespace
+ xml = <<-XML
+<root xmlns="http://example.org/default"
+ xmlns:foo="http://example.org/foo"
+ xmlns:bar="http://example.org/bar">
+ <a foo:att='1' bar:att='2' att='<'>
+ <bar:b />
+ </a>
+</root>
+ XML
+
+ parser = REXML::Parsers::SAX2Parser.new(xml)
+ elements = []
+ parser.listen(:start_element) do |uri, localname, qname, attrs|
+ elements << [uri, localname, qname, attrs]
+ end
+ parser.parse
+ assert_equal([
+ ["http://example.org/default", "root", "root", {"xmlns"=>"http://example.org/default", "xmlns:bar"=>"http://example.org/bar", "xmlns:foo"=>"http://example.org/foo"}],
+ ["http://example.org/default", "a", "a", {"att"=>"<", "bar:att"=>"2", "foo:att"=>"1"}],
+ ["http://example.org/bar", "b", "bar:b", {}]
+ ], elements)
+ end
+
+ class EntityExpansionLimitTest < Test::Unit::TestCase
+ class GeneralEntityTest < self
+ def test_have_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
+ sax.parse
+ end
+ end
+
+ def test_empty_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ sax.entity_expansion_limit = 100000
+ sax.parse
+ assert_equal(11111, sax.entity_expansion_count)
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ sax.parse
+ end
+ assert do
+ sax.entity_expansion_count > REXML::Security.entity_expansion_limit
+ end
+ end
+
+ def test_with_default_entity
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "a">
+ <!ENTITY a2 "&a; &a;">
+]>
+<member>
+&a;
+&a2;
+<
+</member>
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ sax.entity_expansion_limit = 4
+ sax.parse
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ sax.entity_expansion_limit = 3
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ sax.parse
+ end
+ end
+
+ def test_with_only_default_entities
+ member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<member>
+#{member_value}
+</member>
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ text_value = nil
+ sax.listen(:characters, ["member"]) do |text|
+ text_value = text
+ end
+ sax.parse
+
+ expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ assert_equal(expected_value, text_value.strip)
+ assert_equal(0, sax.entity_expansion_count)
+ assert do
+ text_value.bytesize > REXML::Security.entity_expansion_text_limit
+ end
+ end
+
+ def test_entity_expansion_text_limit
+ source = <<-XML
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;">
+ <!ENTITY b "&c;&d;&e;">
+ <!ENTITY c "xxxxxxxxxx">
+ <!ENTITY d "yyyyyyyyyy">
+ <!ENTITY e "zzzzzzzzzz">
+]>
+<member>&a;</member>
+ XML
+
+ sax = REXML::Parsers::SAX2Parser.new(source)
+ sax.entity_expansion_text_limit = 90
+ text_size = nil
+ sax.listen(:characters, ["member"]) do |text|
+ text_size = text.size
+ end
+ sax.parse
+ assert_equal(90, text_size)
+ end
+ end
+ end
+
# used by test_simple_doctype_listener
# submitted by Jeff Barczewski
class SimpleDoctypeListener
@@ -110,7 +293,7 @@ def doctype(name, pub_sys, long_name, uri)
# test simple non-entity doctype in sax listener
# submitted by Jeff Barczewski
def test_simple_doctype_listener
- xml = <<-END
+ xml = <<~END
<?xml version="1.0"?>
<!DOCTYPE greeting PUBLIC "Hello Greeting DTD" "http://foo/hello.dtd">
<greeting>Hello, world!</greeting>
@@ -141,8 +324,8 @@ def test_simple_doctype_listener
# test doctype with missing name, should throw ParseException
# submitted by Jeff Barczewseki
- def test_doctype_with_mising_name_throws_exception
- xml = <<-END
+ def test_doctype_with_missing_name_throws_exception
+ xml = <<~END
<?xml version="1.0"?>
<!DOCTYPE >
<greeting>Hello, world!</greeting>
diff --git a/test/rexml/test_stream.rb b/test/rexml/test_stream.rb
index d7ceedc70e..7917760ae6 100644
--- a/test/rexml/test_stream.rb
+++ b/test/rexml/test_stream.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require "rexml/document"
+
require 'rexml/streamlistener'
require 'stringio'
@@ -15,8 +14,8 @@ class StreamTester < Test::Unit::TestCase
def test_listener
data = %Q{<session1 user="han" password="rootWeiler" />\n<session2 user="han" password="rootWeiler" />}
- b = RequestReader.new( data )
- b = RequestReader.new( data )
+ RequestReader.new( data )
+ RequestReader.new( data )
end
def test_ticket_49
@@ -88,8 +87,175 @@ def entity(content)
assert_equal(["ISOLat2"], listener.entities)
end
+
+ def test_entity_replacement
+ source = <<-XML
+<!DOCTYPE foo [
+ <!ENTITY la "1234">
+ <!ENTITY lala "--&la;--">
+ <!ENTITY lalal "&la;&la;">
+]><a><la>&la;</la><lala>&lala;</lala></a>
+ XML
+
+ listener = MyListener.new
+ class << listener
+ attr_accessor :text_values
+ def text(text)
+ @text_values << text
+ end
+ end
+ listener.text_values = []
+ REXML::Document.parse_stream(source, listener)
+ assert_equal(["1234", "--1234--"], listener.text_values)
+ end
+
+ def test_characters_predefined_entities
+ source = '<root><a><P> <I> <B> Text </B> </I></a></root>'
+
+ listener = MyListener.new
+ class << listener
+ attr_accessor :text_value
+ def text(text)
+ @text_value << text
+ end
+ end
+ listener.text_value = ""
+ REXML::Document.parse_stream(source, listener)
+ assert_equal("<P> <I> <B> Text </B> </I>", listener.text_value)
+ end
end
+ class EntityExpansionLimitTest < Test::Unit::TestCase
+ def test_have_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ assert_raise(RuntimeError.new("entity expansion has grown too large")) do
+ REXML::Document.parse_stream(source, MyListener.new)
+ end
+ end
+
+ def test_empty_value
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "">
+]>
+<member>
+&a;
+</member>
+ XML
+
+ listener = MyListener.new
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ parser.entity_expansion_limit = 100000
+ parser.parse
+ assert_equal(11111, parser.entity_expansion_count)
+
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ parser.parse
+ end
+ assert do
+ parser.entity_expansion_count > REXML::Security.entity_expansion_limit
+ end
+ end
+
+ def test_with_default_entity
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE member [
+ <!ENTITY a "a">
+ <!ENTITY a2 "&a; &a;">
+]>
+<member>
+&a;
+&a2;
+<
+</member>
+ XML
+
+ listener = MyListener.new
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ parser.entity_expansion_limit = 4
+ parser.parse
+
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ parser.entity_expansion_limit = 3
+ assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
+ parser.parse
+ end
+ end
+
+ def test_with_only_default_entities
+ member_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ source = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<member>
+#{member_value}
+</member>
+ XML
+
+ listener = MyListener.new
+ class << listener
+ attr_accessor :text_value
+ def text(text)
+ @text_value << text
+ end
+ end
+ listener.text_value = ""
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ parser.parse
+
+ expected_value = "<p>#{'A' * REXML::Security.entity_expansion_text_limit}</p>"
+ assert_equal(expected_value, listener.text_value.strip)
+ assert_equal(0, parser.entity_expansion_count)
+ assert do
+ listener.text_value.bytesize > REXML::Security.entity_expansion_text_limit
+ end
+ end
+
+ def test_entity_expansion_text_limit
+ source = <<-XML
+<!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;">
+ <!ENTITY b "&c;&d;&e;">
+ <!ENTITY c "xxxxxxxxxx">
+ <!ENTITY d "yyyyyyyyyy">
+ <!ENTITY e "zzzzzzzzzz">
+]>
+<member>&a;</member>
+ XML
+
+ listener = MyListener.new
+ class << listener
+ attr_accessor :text_value
+ def text(text)
+ @text_value << text
+ end
+ end
+ listener.text_value = ""
+ parser = REXML::Parsers::StreamParser.new( source, listener )
+ parser.entity_expansion_text_limit = 90
+ parser.parse
+ assert_equal(90, listener.text_value.size)
+ end
+ end
# For test_listener
class RequestReader
diff --git a/test/rexml/test_text.rb b/test/rexml/test_text.rb
index 3f8036eee3..bae216562e 100644
--- a/test/rexml/test_text.rb
+++ b/test/rexml/test_text.rb
@@ -1,10 +1,55 @@
# frozen_string_literal: false
-require "rexml/text"
module REXMLTests
class TextTester < Test::Unit::TestCase
include REXML
+ def test_new_text_response_whitespace_default
+ text = Text.new("a b\t\tc", true)
+ assert_equal("a b\tc", Text.new(text).to_s)
+ end
+
+ def test_new_text_response_whitespace_true
+ text = Text.new("a b\t\tc", true)
+ assert_equal("a b\t\tc", Text.new(text, true).to_s)
+ end
+
+ def test_new_text_raw_default
+ text = Text.new("&lt;", false, nil, true)
+ assert_equal("&lt;", Text.new(text).to_s)
+ end
+
+ def test_new_text_raw_false
+ text = Text.new("&lt;", false, nil, true)
+ assert_equal("&amp;lt;", Text.new(text, false, nil, false).to_s)
+ end
+
+ def test_new_text_entity_filter_default
+ document = REXML::Document.new(<<-XML)
+<!DOCTYPE root [
+ <!ENTITY a "aaa">
+ <!ENTITY b "bbb">
+]>
+<root/>
+ XML
+ text = Text.new("aaa bbb", false, document.root, nil, ["a"])
+ assert_equal("aaa &b;",
+ Text.new(text, false, document.root).to_s)
+ end
+
+ def test_new_text_entity_filter_custom
+ document = REXML::Document.new(<<-XML)
+<!DOCTYPE root [
+ <!ENTITY a "aaa">
+ <!ENTITY b "bbb">
+]>
+<root/>
+ XML
+ text = Text.new("aaa bbb", false, document.root, nil, ["a"])
+ assert_equal("&a; bbb",
+ Text.new(text, false, document.root, nil, ["b"]).to_s)
+ end
+
def test_shift_operator_chain
text = Text.new("original\r\n")
text << "append1\r\n" << "append2\r\n"
@@ -18,5 +63,11 @@ def test_shift_operator_cache
text << "append3\r\n" << "append4\r\n"
assert_equal("original\nappend1\nappend2\nappend3\nappend4\n", text.to_s)
end
+
+ def test_clone
+ text = Text.new("&lt; <")
+ assert_equal(text.to_s,
+ text.clone.to_s)
+ end
end
end
diff --git a/test/rexml/test_text_check.rb b/test/rexml/test_text_check.rb
new file mode 100644
index 0000000000..11cf65a36f
--- /dev/null
+++ b/test/rexml/test_text_check.rb
@@ -0,0 +1,121 @@
+# frozen_string_literal: false
+
+module REXMLTests
+ class TextCheckTester < Test::Unit::TestCase
+
+ def check(string)
+ REXML::Text.check(string, REXML::Text::NEEDS_A_SECOND_CHECK, nil)
+ end
+
+ def assert_check(string)
+ assert_nothing_raised { check(string) }
+ end
+
+ def assert_check_failed(string, illegal_part)
+ message = "Illegal character #{illegal_part.inspect} in raw string #{string.inspect}"
+ assert_raise(RuntimeError.new(message)) do
+ check(string)
+ end
+ end
+
+ class TestValid < self
+ def test_entity_name_start_char_colon
+ assert_check("&:;")
+ end
+
+ def test_entity_name_start_char_under_score
+ assert_check("&_;")
+ end
+
+ def test_entity_name_mix
+ assert_check("&A.b-0123;")
+ end
+
+ def test_character_reference_decimal
+ assert_check("¢")
+ end
+
+ def test_character_reference_hex
+ assert_check("")
+ end
+
+ def test_entity_name_non_ascii
+ # U+3042 HIRAGANA LETTER A
+ # U+3044 HIRAGANA LETTER I
+ assert_check("&\u3042\u3044;")
+ end
+
+ def test_normal_string
+ assert_check("foo")
+ end
+ end
+
+ class TestInvalid < self
+ def test_lt
+ assert_check_failed("<;", "<")
+ end
+
+ def test_lt_mix
+ assert_check_failed("ab<cd", "<")
+ end
+
+ def test_reference_empty
+ assert_check_failed("&;", "&")
+ end
+
+ def test_entity_reference_missing_colon
+ assert_check_failed("&", "&")
+ end
+
+ def test_character_reference_decimal_garbage_at_the_end
+ # U+0030 DIGIT ZERO
+ assert_check_failed("0x;", "&")
+ end
+
+ def test_character_reference_decimal_space_at_the_start
+ # U+0030 DIGIT ZERO
+ assert_check_failed("&# 48;", "&")
+ end
+
+ def test_character_reference_decimal_control_character
+ # U+0008 BACKSPACE
+ assert_check_failed("", "")
+ end
+
+ def test_character_reference_format_hex_0x
+ # U+0041 LATIN CAPITAL LETTER A
+ assert_check_failed("�x41;", "�x41;")
+ end
+
+ def test_character_reference_format_hex_00x
+ # U+0041 LATIN CAPITAL LETTER A
+ assert_check_failed("�x41;", "�x41;")
+ end
+
+ def test_character_reference_hex_garbage_at_the_end
+ # U+0030 DIGIT ZERO
+ assert_check_failed("Hx;", "&")
+ end
+
+ def test_character_reference_hex_space_at_the_start
+ # U+0030 DIGIT ZERO
+ assert_check_failed("&#x 30;", "&")
+ end
+
+ def test_character_reference_hex_surrogate_block
+ # U+0D800 SURROGATE PAIR
+ assert_check_failed("�", "�")
+ end
+
+ def test_entity_name_non_ascii_symbol
+ # U+00BF INVERTED QUESTION MARK
+ assert_check_failed("&\u00BF;", "&")
+ end
+
+ def test_entity_name_new_line
+ # U+0026 AMPERSAND
+ assert_check_failed("&\namp\nx;", "&")
+ end
+ end
+ end
+end
diff --git a/test/rexml/test_ticket_80.rb b/test/rexml/test_ticket_80.rb
index ab6a57efaf..daebdc5972 100644
--- a/test/rexml/test_ticket_80.rb
+++ b/test/rexml/test_ticket_80.rb
@@ -7,9 +7,6 @@
# copy: (C) CopyLoose 2006 Bib Development Team <bib-devel>at<uberdev>dot<org>
#------------------------------------------------------------------------------
-require 'test/unit'
-require 'rexml/document'
-
module REXMLTests
class Ticket80 < Test::Unit::TestCase
diff --git a/test/rexml/test_validation_rng.rb b/test/rexml/test_validation_rng.rb
index b5b50450e0..4872396b84 100644
--- a/test/rexml/test_validation_rng.rb
+++ b/test/rexml/test_validation_rng.rb
@@ -1,7 +1,5 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require "rexml/document"
require "rexml/validation/relaxng"
module REXMLTests
@@ -9,7 +7,7 @@ class RNGValidation < Test::Unit::TestCase
include REXML
def test_validate
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -26,7 +24,7 @@ def test_validate
</element>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B><C X="x"><E/><E/></C><D/></B></A>} )
@@ -35,7 +33,7 @@ def test_validate
def test_sequence
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -47,7 +45,7 @@ def test_sequence
</element>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B><C/><C/><D/></B></A>} )
@@ -58,7 +56,7 @@ def test_sequence
def test_choice
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -72,7 +70,7 @@ def test_choice
</choice>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B><C/><D/></B></A>} )
@@ -81,7 +79,7 @@ def test_choice
end
def test_optional
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -92,7 +90,7 @@ def test_optional
</optional>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B/></A>} )
@@ -102,7 +100,7 @@ def test_optional
end
def test_zero_or_more
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -113,7 +111,7 @@ def test_zero_or_more
</zeroOrMore>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B/></A>} )
no_error( validator, %q{<A><B><C/></B></A>} )
@@ -121,7 +119,7 @@ def test_zero_or_more
error( validator, %q{<A><B><D/></B></A>} )
error( validator, %q{<A></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -135,7 +133,7 @@ def test_zero_or_more
</zeroOrMore>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B/></A>} )
@@ -145,7 +143,7 @@ def test_zero_or_more
end
def test_one_or_more
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -156,7 +154,7 @@ def test_one_or_more
</oneOrMore>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -167,13 +165,13 @@ def test_one_or_more
end
def test_attribute
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<attribute name="X"/>
<attribute name="Y"/>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -183,7 +181,7 @@ def test_attribute
end
def test_choice_attributes
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<choice>
@@ -191,7 +189,7 @@ def test_choice_attributes
<attribute name="Y"/>
</choice>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A X="1" Y="1"/>} )
@@ -201,7 +199,7 @@ def test_choice_attributes
end
def test_choice_attribute_element
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<choice>
@@ -209,7 +207,7 @@ def test_choice_attribute_element
<element name="B"/>
</choice>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A X="1"><B/></A>} )
@@ -219,12 +217,12 @@ def test_choice_attribute_element
end
def test_empty
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<empty/>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -233,12 +231,12 @@ def test_empty
end
def test_text_val
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<text/>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -247,7 +245,7 @@ def test_text_val
end
def test_choice_text
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<choice>
@@ -255,7 +253,7 @@ def test_choice_text
<text/>
</choice>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/>Text</A>} )
@@ -265,7 +263,7 @@ def test_choice_text
end
def test_group
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<choice>
@@ -276,7 +274,7 @@ def test_group
</group>
</choice>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/><C/></A>} )
@@ -284,7 +282,7 @@ def test_group
no_error( validator, %q{<A><B/></A>} )
no_error( validator, %q{<A><C/><D/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B"/>
@@ -293,7 +291,7 @@ def test_group
<element name="D"/>
</group>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/><C/></A>} )
@@ -304,14 +302,14 @@ def test_group
def test_value
# Values as text nodes
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
<value>VaLuE</value>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B>X</B></A>} )
@@ -319,7 +317,7 @@ def test_value
no_error( validator, %q{<A><B>VaLuE</B></A>} )
# Values as text nodes, via choice
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -329,7 +327,7 @@ def test_value
</choice>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -338,14 +336,14 @@ def test_value
no_error( validator, %q{<A><B>Option 2</B></A>} )
# Attribute values
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<attribute name="B">
<value>VaLuE</value>
</attribute>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A/>} )
@@ -354,7 +352,7 @@ def test_value
no_error( validator, %q{<A B="VaLuE"/>} )
# Attribute values via choice
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<attribute name="B">
@@ -364,7 +362,7 @@ def test_value
</choice>
</attribute>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A B=""/>} )
@@ -374,7 +372,7 @@ def test_value
end
def test_interleave
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -385,7 +383,7 @@ def test_interleave
</interleave>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B><C/></B></A>} )
@@ -398,7 +396,7 @@ def test_interleave
end
def test_mixed
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<element name="A" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="B">
@@ -407,7 +405,7 @@ def test_mixed
</mixed>
</element>
</element>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B>Text<D/></B></A>} )
@@ -415,7 +413,7 @@ def test_mixed
end
def test_ref_sequence
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -431,7 +429,7 @@ def test_ref_sequence
</element>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A><B X=''/><B X=''/></A>} )
@@ -439,7 +437,7 @@ def test_ref_sequence
end
def test_ref_choice
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -455,7 +453,7 @@ def test_ref_choice
<element name="C"/>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><D/></A>} )
@@ -463,7 +461,7 @@ def test_ref_choice
no_error( validator, %q{<A><B/></A>} )
no_error( validator, %q{<A><C/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -479,7 +477,7 @@ def test_ref_choice
</choice>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><D/></A>} )
@@ -487,7 +485,7 @@ def test_ref_choice
no_error( validator, %q{<A><B/></A>} )
no_error( validator, %q{<A><C/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -504,7 +502,7 @@ def test_ref_choice
<element name="C"/>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/><C/></A>} )
@@ -515,7 +513,7 @@ def test_ref_choice
def test_ref_zero_plus
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -532,7 +530,7 @@ def test_ref_zero_plus
</element>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -540,7 +538,7 @@ def test_ref_zero_plus
no_error( validator, %q{<A><B X=''/></A>} )
no_error( validator, %q{<A><B X=''/><B X=''/><B X=''/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -557,7 +555,7 @@ def test_ref_zero_plus
</zeroOrMore>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -568,7 +566,7 @@ def test_ref_zero_plus
def test_ref_one_plus
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -585,7 +583,7 @@ def test_ref_one_plus
</element>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -593,7 +591,7 @@ def test_ref_one_plus
no_error( validator, %q{<A><B X=''/></A>} )
no_error( validator, %q{<A><B X=''/><B X=''/><B X=''/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -610,7 +608,7 @@ def test_ref_one_plus
</oneOrMore>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -620,7 +618,7 @@ def test_ref_one_plus
end
def test_ref_interleave
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -636,7 +634,7 @@ def test_ref_interleave
<element name="C"/>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -645,7 +643,7 @@ def test_ref_interleave
no_error( validator, %q{<A><B/><C/></A>} )
no_error( validator, %q{<A><C/><B/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -661,7 +659,7 @@ def test_ref_interleave
</interleave>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -670,7 +668,7 @@ def test_ref_interleave
no_error( validator, %q{<A><B/><C/></A>} )
no_error( validator, %q{<A><C/><B/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -689,7 +687,7 @@ def test_ref_interleave
<element name="C"/>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A><B/></A>} )
@@ -700,7 +698,7 @@ def test_ref_interleave
end
def test_ref_recurse
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -717,7 +715,7 @@ def test_ref_recurse
</element>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
error( validator, %q{<A></A>} )
@@ -726,7 +724,7 @@ def test_ref_recurse
end
def test_ref_optional
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -742,7 +740,7 @@ def test_ref_optional
</element>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A></A>} )
@@ -750,7 +748,7 @@ def test_ref_optional
error( validator, %q{<A><B/><B/></A>} )
error( validator, %q{<A><C/></A>} )
- rng = %q{
+ rng = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start>
@@ -766,7 +764,7 @@ def test_ref_optional
</optional>
</define>
</grammar>
- }
+ XML
validator = REXML::Validation::RelaxNG.new( rng )
no_error( validator, %q{<A></A>} )
diff --git a/test/rexml/test_xml_declaration.rb b/test/rexml/test_xml_declaration.rb
index a4d97c41d0..6a1f4df053 100644
--- a/test/rexml/test_xml_declaration.rb
+++ b/test/rexml/test_xml_declaration.rb
@@ -1,20 +1,16 @@
-# -*- coding: utf-8 -*-
# frozen_string_literal: false
#
# Created by Henrik Mårtensson on 2007-02-18.
# Copyright (c) 2007. All rights reserved.
-require "rexml/document"
-require "test/unit"
-
module REXMLTests
class TestXmlDeclaration < Test::Unit::TestCase
def setup
- xml = <<-'END_XML'
+ xml = <<~XML
<?xml encoding= 'UTF-8' standalone='yes'?>
<root>
</root>
- END_XML
+ XML
@doc = REXML::Document.new xml
@root = @doc.root
@xml_declaration = @doc.children[0]
@@ -32,5 +28,18 @@ def test_has_sibling
assert_kind_of(REXML::XMLDecl, @root.previous_sibling.previous_sibling)
assert_kind_of(REXML::Element, @xml_declaration.next_sibling.next_sibling)
end
+
+ def test_write_prologue_quote
+ @doc.context[:prologue_quote] = :quote
+ assert_equal("<?xml version=\"1.0\" " +
+ "encoding=\"UTF-8\" standalone=\"yes\"?>",
+ @xml_declaration.to_s)
+ end
+
+ def test_is_writethis_attribute_copied_by_clone
+ assert_equal(true, @xml_declaration.clone.writethis)
+ @xml_declaration.nowrite
+ assert_equal(false, @xml_declaration.clone.writethis)
+ end
end
end
diff --git a/test/rexml/xpath/test_attribute.rb b/test/rexml/xpath/test_attribute.rb
index 9304db4e0d..b778ff81f8 100644
--- a/test/rexml/xpath/test_attribute.rb
+++ b/test/rexml/xpath/test_attribute.rb
@@ -1,13 +1,11 @@
# frozen_string_literal: false
-require 'test/unit'
-require 'rexml/document'
module REXMLTests
class TestXPathAttribute < Test::Unit::TestCase
def setup
@xml = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
-<root>
+<root xmlns="http://example.com/">
<child name="one">child1</child>
<child name="two">child2</child>
<child name="three">child3</child>
@@ -26,5 +24,13 @@ def test_xpath_each
children = REXML::XPath.each(@document, "/root/child[@name='two']")
assert_equal(["child2"], children.collect(&:text))
end
+
+ def test_no_namespace
+ children = REXML::XPath.match(@document,
+ "/root/child[@nothing:name='two']",
+ "" => "http://example.com/",
+ "nothing" => "")
+ assert_equal(["child2"], children.collect(&:text))
+ end
end
end
diff --git a/test/rexml/xpath/test_axis_preceding_sibling.rb b/test/rexml/xpath/test_axis_preceding_sibling.rb
index 5842c6bc50..9c44ad63cc 100644
--- a/test/rexml/xpath/test_axis_preceding_sibling.rb
+++ b/test/rexml/xpath/test_axis_preceding_sibling.rb
@@ -1,6 +1,4 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require "rexml/document"
module REXMLTests
class TestXPathAxisPredcedingSibling < Test::Unit::TestCase
diff --git a/test/rexml/xpath/test_axis_self.rb b/test/rexml/xpath/test_axis_self.rb
new file mode 100644
index 0000000000..4e422f54bf
--- /dev/null
+++ b/test/rexml/xpath/test_axis_self.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: false
+
+module REXMLTests
+ class TestXPathAxisSelf < Test::Unit::TestCase
+ def test_only
+ doc = REXML::Document.new("<root><child/></root>")
+ assert_equal([doc.root],
+ REXML::XPath.match(doc.root, "."))
+ end
+
+ def test_have_predicate
+ doc = REXML::Document.new("<root><child/></root>")
+ error = assert_raise(REXML::ParseException) do
+ REXML::XPath.match(doc.root, ".[child]")
+ end
+ assert_equal("Garbage component exists at the end: <[child]>: <.[child]>",
+ error.message)
+ end
+ end
+end
diff --git a/test/rexml/xpath/test_base.rb b/test/rexml/xpath/test_base.rb
index 5079fdd75a..771cc48c58 100644
--- a/test/rexml/xpath/test_base.rb
+++ b/test/rexml/xpath/test_base.rb
@@ -1,11 +1,10 @@
# frozen_string_literal: false
-require_relative "../rexml_test_utils"
-require "rexml/document"
+require_relative "helper"
module REXMLTests
class TestXPathBase < Test::Unit::TestCase
- include REXMLTestUtils
+ include Helper::Fixture
include REXML
SOURCE = <<-EOF
<a id='1'>
@@ -369,11 +368,15 @@ def test_complex
assert_equal 2, c
end
+ def match(xpath)
+ XPath.match(@@doc, xpath).collect(&:to_s)
+ end
+
def test_grouping
- t = XPath.first( @@doc, "a/d/*[name()='d' and (name()='f' or name()='q')]" )
- assert_nil t
- t = XPath.first( @@doc, "a/d/*[(name()='d' and name()='f') or name()='q']" )
- assert_equal 'q', t.name
+ assert_equal([],
+ match("a/d/*[name()='d' and (name()='f' or name()='q')]"))
+ assert_equal(["<q id='19'/>"],
+ match("a/d/*[(name()='d' and name()='f') or name()='q']"))
end
def test_preceding
@@ -450,6 +453,46 @@ def test_following
# puts results
#end
+ def test_nested_predicates
+ doc = Document.new <<-EOF
+ <div>
+ <div>
+ <test>ab</test>
+ <test>cd</test>
+ </div>
+ <div>
+ <test>ef</test>
+ <test>gh</test>
+ </div>
+ <div>
+ <test>hi</test>
+ </div>
+ </div>
+ EOF
+
+ matches = XPath.match(doc, '(/div/div/test[0])').map(&:text)
+ assert_equal [], matches
+ matches = XPath.match(doc, '(/div/div/test[1])').map(&:text)
+ assert_equal ["ab", "ef", "hi"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])').map(&:text)
+ assert_equal ["cd", "gh"], matches
+ matches = XPath.match(doc, '(/div/div/test[3])').map(&:text)
+ assert_equal [], matches
+
+ matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text)
+ assert_equal ["ab"], matches
+ matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text)
+ assert_equal ["ef"], matches
+ matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text)
+ assert_equal ["hi"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text)
+ assert_equal ["cd"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text)
+ assert_equal ["gh"], matches
+ matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text)
+ assert_equal [], matches
+ end
+
# Contributed by Mike Stok
def test_starts_with
source = <<-EOF
@@ -610,7 +653,7 @@ def test_comparisons
source = "<a><b id='1'/><b id='2'/><b id='3'/></a>"
doc = REXML::Document.new(source)
- # NOTE TO SER: check that number() is required
+ # NOTE: check that number() is required
assert_equal 2, REXML::XPath.match(doc, "//b[number(@id) > 1]").size
assert_equal 3, REXML::XPath.match(doc, "//b[number(@id) >= 1]").size
assert_equal 1, REXML::XPath.match(doc, "//b[number(@id) <= 1]").size
@@ -632,29 +675,36 @@ def test_spaces
<c id='a'/>
</b>
<c id='b'/>
+ <c id='c'/>
+ <c/>
</a>")
- assert_equal( 1, REXML::XPath.match(doc,
- "//*[local-name()='c' and @id='b']").size )
- assert_equal( 1, REXML::XPath.match(doc,
- "//*[ local-name()='c' and @id='b' ]").size )
- assert_equal( 1, REXML::XPath.match(doc,
- "//*[ local-name() = 'c' and @id = 'b' ]").size )
- assert_equal( 1,
- REXML::XPath.match(doc, '/a/c[@id]').size )
- assert_equal( 1,
- REXML::XPath.match(doc, '/a/c[(@id)]').size )
- assert_equal( 1,
- REXML::XPath.match(doc, '/a/c[ @id ]').size )
- assert_equal( 1,
- REXML::XPath.match(doc, '/a/c[ (@id) ]').size )
- assert_equal( 1,
- REXML::XPath.match(doc, '/a/c[( @id )]').size )
- assert_equal( 1, REXML::XPath.match(doc.root,
- '/a/c[ ( @id ) ]').size )
- assert_equal( 1, REXML::XPath.match(doc,
- '/a/c [ ( @id ) ] ').size )
- assert_equal( 1, REXML::XPath.match(doc,
- ' / a / c [ ( @id ) ] ').size )
+ match = lambda do |xpath|
+ REXML::XPath.match(doc, xpath).collect(&:to_s)
+ end
+ assert_equal(["<c id='b'/>"],
+ match.call("//*[local-name()='c' and @id='b']"))
+ assert_equal(["<c id='b'/>"],
+ match.call("//*[ local-name()='c' and @id='b' ]"))
+ assert_equal(["<c id='b'/>"],
+ match.call("//*[ local-name() = 'c' and @id = 'b' ]"))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[@id]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[(@id)]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[ @id ]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[ (@id) ]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[( @id )]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c[ ( @id ) ]'))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/a/c [ ( @id ) ] '))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call(' / a / c [ ( @id ) ] '))
+ assert_equal(["<c id='b'/>", "<c id='c'/>"],
+ match.call('/ a / child:: c [( @id )] /'))
end
def test_text_nodes
@@ -692,11 +742,22 @@ def test_auto_string_value
end
def test_ordering
- source = "<a><b><c id='1'/><c id='2'/></b><b><d id='1'/><d id='2'/></b></a>"
+ source = <<-XML
+<a>
+ <b>
+ <c id='1'/>
+ <c id='2'/>
+ </b>
+ <b>
+ <d id='3'/>
+ <d id='4'/>
+ </b>
+</a>
+ XML
d = REXML::Document.new( source )
r = REXML::XPath.match( d, %q{/a/*/*[1]} )
- assert_equal( 1, r.size )
- r.each { |el| assert_equal( '1', el.attribute('id').value ) }
+ assert_equal(["1", "3"],
+ r.collect {|element| element.attribute("id").value})
end
def test_descendant_or_self_ordering
@@ -830,31 +891,44 @@ def test_a_star_star_one
</a>
EOL
d = REXML::Document.new( string )
- c1 = XPath.match( d, '/a/*/*[1]' )
- assert_equal( 1, c1.length )
- assert_equal( 'c1', c1[0].name )
+ cs = XPath.match( d, '/a/*/*[1]' )
+ assert_equal(["c1", "c2"], cs.collect(&:name))
end
def test_sum
- d = Document.new("<a>"+
- "<b>1</b><b>2</b><b>3</b>"+
- "<c><d>1</d><d>2</d></c>"+
- "<e att='1'/><e att='2'/>"+
- "</a>")
-
- for v,p in [[6, "sum(/a/b)"],
- [9, "sum(//b | //d)"],
- [3, "sum(/a/e/@*)"] ]
- assert_equal( v, XPath::match( d, p ).first )
- end
+ d = Document.new(<<-XML)
+<a>
+ <b>1</b>
+ <b>2</b>
+ <b>3</b>
+ <c>
+ <d>1</d>
+ <d>2</d>
+ </c>
+ <e att='1'/>
+ <e att='2'/>
+</a>
+ XML
+
+ assert_equal([6], XPath::match(d, "sum(/a/b)"))
+ assert_equal([9], XPath::match(d, "sum(//b | //d)"))
+ assert_equal([3], XPath::match(d, "sum(/a/e/@*)"))
end
def test_xpath_namespace
- d = REXML::Document.new("<tag1 xmlns='ns1'><tag2 xmlns='ns2'/><tada>xa</tada></tag1>")
- x = d.root
- num = 0
- x.each_element('tada') { num += 1 }
- assert_equal(1, num)
+ d = REXML::Document.new(<<-XML)
+<tag1 xmlns='ns1'>
+ <tag2 xmlns='ns2'/>
+ <tada>xa</tada>
+ <tada xmlns=''>xb</tada>
+</tag1>
+ XML
+ actual = []
+ d.root.each_element('tada') do |element|
+ actual << element.to_s
+ end
+ assert_equal(["<tada>xa</tada>", "<tada xmlns=''>xb</tada>"],
+ actual)
end
def test_ticket_39
@@ -990,7 +1064,7 @@ def test_ticket_59
</a>"
d = Document.new(data)
res = d.elements.to_a( "//c" ).collect {|e| e.attributes['id'].to_i}
- assert_equal( res, res.sort )
+ assert_equal((1..12).to_a, res)
end
def ticket_61_fixture(doc, xpath)
diff --git a/test/rexml/xpath/test_compare.rb b/test/rexml/xpath/test_compare.rb
new file mode 100644
index 0000000000..11d11e55be
--- /dev/null
+++ b/test/rexml/xpath/test_compare.rb
@@ -0,0 +1,252 @@
+# frozen_string_literal: false
+
+module REXMLTests
+ class TestXPathCompare < Test::Unit::TestCase
+ def match(xml, xpath)
+ document = REXML::Document.new(xml)
+ REXML::XPath.match(document, xpath)
+ end
+
+ class TestEqual < self
+ class TestNodeSet < self
+ def test_boolean_true
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child/>
+ <child/>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child=true()"))
+ end
+
+ def test_boolean_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child=true()"))
+ end
+
+ def test_number_true
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child=100"))
+ end
+
+ def test_number_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child=300"))
+ end
+
+ def test_string_true
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>text</child>
+ <child>string</child>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child='string'"))
+ end
+
+ def test_string_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>text</child>
+ <child>string</child>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child='nonexistent'"))
+ end
+ end
+
+ class TestBoolean < self
+ def test_number_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "true()=1"))
+ end
+
+ def test_number_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "true()=0"))
+ end
+
+ def test_string_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "true()='string'"))
+ end
+
+ def test_string_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "true()=''"))
+ end
+ end
+
+ class TestNumber < self
+ def test_string_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "1='1'"))
+ end
+
+ def test_string_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "1='2'"))
+ end
+ end
+ end
+
+ class TestGreaterThan < self
+ class TestNodeSet < self
+ def test_boolean_truex
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child/>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child>false()"))
+ end
+
+ def test_boolean_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child/>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child>true()"))
+ end
+
+ def test_number_true
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child>199"))
+ end
+
+ def test_number_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child>200"))
+ end
+
+ def test_string_true
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([true],
+ match(xml, "/root/child>'199'"))
+ end
+
+ def test_string_false
+ xml = <<-XML
+<?xml version="1.0" encoding="UTF-8"?>
+<root>
+ <child>100</child>
+ <child>200</child>
+</root>
+ XML
+ assert_equal([false],
+ match(xml, "/root/child>'200'"))
+ end
+ end
+
+ class TestBoolean < self
+ def test_string_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "true()>'0'"))
+ end
+
+ def test_string_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "true()>'1'"))
+ end
+ end
+
+ class TestNumber < self
+ def test_boolean_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "true()>0"))
+ end
+
+ def test_number_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "true()>1"))
+ end
+
+ def test_string_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "1>'0'"))
+ end
+
+ def test_string_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "1>'1'"))
+ end
+ end
+
+ class TestString < self
+ def test_string_true
+ xml = "<root/>"
+ assert_equal([true],
+ match(xml, "'1'>'0'"))
+ end
+
+ def test_string_false
+ xml = "<root/>"
+ assert_equal([false],
+ match(xml, "'1'>'1'"))
+ end
+ end
+ end
+ end
+end
diff --git a/test/rexml/xpath/test_node.rb b/test/rexml/xpath/test_node.rb
index e0e958e70f..742bfbbab6 100644
--- a/test/rexml/xpath/test_node.rb
+++ b/test/rexml/xpath/test_node.rb
@@ -1,10 +1,6 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: false
-require_relative "../rexml_test_utils"
-
-require "rexml/document"
-
module REXMLTests
class TestXPathNode < Test::Unit::TestCase
def matches(xml, xpath)
diff --git a/test/rexml/xpath/test_predicate.rb b/test/rexml/xpath/test_predicate.rb
index ce1aaa324b..278e3765b6 100644
--- a/test/rexml/xpath/test_predicate.rb
+++ b/test/rexml/xpath/test_predicate.rb
@@ -1,13 +1,12 @@
# frozen_string_literal: false
-require "test/unit/testcase"
-require "rexml/document"
+
require "rexml/xpath"
require "rexml/parsers/xpathparser"
module REXMLTests
class TestXPathPredicate < Test::Unit::TestCase
include REXML
- SRC=<<-EOL
+ SRC=<<~EOL
<article>
<section role="subdivision" id="1">
<para>free flowing text.</para>
@@ -29,6 +28,15 @@ def setup
end
+ def test_predicate_only
+ error = assert_raise(REXML::ParseException) do
+ do_path("[article]")
+ end
+ assert_equal("Garbage component exists at the end: " +
+ "<[article]>: <[article]>",
+ error.message)
+ end
+
def test_predicates_parent
path = '//section[../self::section[@role="division"]]'
m = do_path( path )
diff --git a/test/rexml/xpath/test_text.rb b/test/rexml/xpath/test_text.rb
index 7222388e1b..dccc4c83c0 100644
--- a/test/rexml/xpath/test_text.rb
+++ b/test/rexml/xpath/test_text.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: false
-require 'test/unit'
-require 'rexml/document'
+
require 'rexml/element'
require 'rexml/xpath'