File 002-CVE-2020-26247.patch of Package rubygem-nokogiri.13223
From 9c87439d9afa14a365ff13e73adc809cb2c3d97b Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Mon, 23 Nov 2020 00:47:02 -0500
Subject: [PATCH] feat: XML::Schema and RelaxNG creation accept optional
ParseOptions
I'm trying out a new pattern, which is that the parsed object carries
around the ParseOptions it was created with, which should make some
testing a bit easier.
I'm also not implementing the "config block" pattern in use for
Documents, because I think the UX is weird and I'm hoping to change
everything to use kwargs in a 2.0 release, anyway.
---
ext/nokogiri/xml_relax_ng.c | 39 +++++++++++++++++--------
ext/nokogiri/xml_schema.c | 46 ++++++++++++++++++++++--------
lib/nokogiri/xml/parse_options.rb | 2 ++
lib/nokogiri/xml/relax_ng.rb | 4 +--
lib/nokogiri/xml/schema.rb | 10 ++++---
test/xml/test_relax_ng.rb | 34 ++++++++++++++++++++++
test/xml/test_schema.rb | 33 ++++++++++++++++++++++
9 files changed, 182 insertions(+), 44 deletions(-)
diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c
index cb80bdb05..9e62ff391 100644
--- a/ext/nokogiri/xml_relax_ng.c
+++ b/ext/nokogiri/xml_relax_ng.c
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
*
* Create a new RelaxNG from the contents of +string+
*/
-static VALUE read_memory(VALUE klass, VALUE content)
+static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
{
- xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
- (const char *)StringValuePtr(content),
- (int)RSTRING_LEN(content)
- );
+ VALUE content;
+ VALUE parse_options;
+ xmlRelaxNGParserCtxtPtr ctx;
xmlRelaxNGPtr schema;
- VALUE errors = rb_ary_new();
+ VALUE errors;
VALUE rb_schema;
+ int scanned_args = 0;
+
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
+ if (scanned_args == 1) {
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
+ }
+ ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
+
+ errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
return rb_schema;
}
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
*
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
*/
-static VALUE from_document(VALUE klass, VALUE document)
+static VALUE from_document(int argc, VALUE *argv, VALUE klass)
{
+ VALUE document;
+ VALUE parse_options;
xmlDocPtr doc;
xmlRelaxNGParserCtxtPtr ctx;
xmlRelaxNGPtr schema;
VALUE errors;
VALUE rb_schema;
+ int scanned_args = 0;
+
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
Data_Get_Struct(document, xmlDoc, doc);
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
- /* In case someone passes us a node. ugh. */
- doc = doc->doc;
+ if (scanned_args == 1) {
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
+ }
ctx = xmlRelaxNGNewDocParserCtxt(doc);
@@ -143,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
return rb_schema;
}
@@ -156,7 +173,7 @@ void init_xml_relax_ng()
cNokogiriXmlRelaxNG = klass;
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
- rb_define_singleton_method(klass, "from_document", from_document, 1);
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
}
diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c
index 439f72196..ea7c3d316 100644
--- a/ext/nokogiri/xml_schema.c
+++ b/ext/nokogiri/xml_schema.c
@@ -93,15 +93,26 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
*
* Create a new Schema from the contents of +string+
*/
-static VALUE read_memory(VALUE klass, VALUE content)
+static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
{
+ VALUE content;
+ VALUE parse_options;
+ int parse_options_int;
+ xmlSchemaParserCtxtPtr ctx;
xmlSchemaPtr schema;
- xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
- (const char *)StringValuePtr(content),
- (int)RSTRING_LEN(content)
- );
+ VALUE errors;
VALUE rb_schema;
- VALUE errors = rb_ary_new();
+ int scanned_args = 0;
+
+ scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
+ if (scanned_args == 1) {
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
+ }
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
+
+ ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
+
+ errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
@@ -109,7 +120,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
ctx,
Nokogiri_error_array_pusher,
(void *)errors
- );
+ );
#endif
schema = xmlSchemaParse(ctx);
@@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
return rb_schema;
}
@@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache)
*
* Create a new Schema from the Nokogiri::XML::Document +doc+
*/
-static VALUE from_document(VALUE klass, VALUE document)
+static VALUE from_document(int argc, VALUE *argv, VALUE klass)
{
+ VALUE document;
+ VALUE parse_options;
+ int parse_options_int;
xmlDocPtr doc;
xmlSchemaParserCtxtPtr ctx;
xmlSchemaPtr schema;
VALUE errors;
VALUE rb_schema;
+ int scanned_args = 0;
+
+ scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
Data_Get_Struct(document, xmlDoc, doc);
+ doc = doc->doc; /* In case someone passes us a node. ugh. */
- /* In case someone passes us a node. ugh. */
- doc = doc->doc;
+ if (scanned_args == 1) {
+ parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
+ }
+ parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
ctx = xmlSchemaNewDocParserCtxt(doc);
@@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document)
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
+ rb_iv_set(rb_schema, "@parse_options", parse_options);
return rb_schema;
@@ -226,8 +248,8 @@ void init_xml_schema()
cNokogiriXmlSchema = klass;
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
- rb_define_singleton_method(klass, "from_document", from_document, 1);
+ rb_define_singleton_method(klass, "read_memory", read_memory, -1);
+ rb_define_singleton_method(klass, "from_document", from_document, -1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
rb_define_private_method(klass, "validate_file", validate_file, 1);
diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb
index 039afa2dc..a266d5ba0 100644
--- a/lib/nokogiri/xml/parse_options.rb
+++ b/lib/nokogiri/xml/parse_options.rb
@@ -73,6 +73,8 @@ class ParseOptions
DEFAULT_XML = RECOVER | NONET
# the default options used for parsing HTML documents
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
+ # the default options used for parsing XML schemas
+ DEFAULT_SCHEMA = NONET
attr_accessor :options
def initialize options = STRICT
diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb
index 4d9ad65da..b1e83efb0 100644
--- a/lib/nokogiri/xml/relax_ng.rb
+++ b/lib/nokogiri/xml/relax_ng.rb
@@ -5,8 +5,8 @@ class << self
###
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
# See Nokogiri::XML::RelaxNG for an example.
- def RelaxNG string_or_io
- RelaxNG.new(string_or_io)
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
+ RelaxNG.new(string_or_io, options)
end
end
diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb
index 60f1b2d36..b3719d7d2 100644
--- a/lib/nokogiri/xml/schema.rb
+++ b/lib/nokogiri/xml/schema.rb
@@ -5,8 +5,8 @@ class << self
###
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
# object.
- def Schema string_or_io
- Schema.new(string_or_io)
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
+ Schema.new(string_or_io, options)
end
end
@@ -30,12 +30,14 @@ def Schema string_or_io
class Schema
# Errors while parsing the schema file
attr_accessor :errors
+ # The Nokogiri::XML::ParseOptions used to parse the schema
+ attr_accessor :parse_options
###
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
# object.
- def self.new string_or_io
- from_document Nokogiri::XML(string_or_io)
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
+ from_document(Nokogiri::XML(string_or_io), options)
end
###
diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb
index 23ede368a..02fd91b01 100644
--- a/test/xml/test_relax_ng.rb
+++ b/test/xml/test_relax_ng.rb
@@ -26,6 +26,40 @@ def test_parse_with_io
assert_equal 0, xsd.errors.length
end
+ def test_constructor_method_with_parse_options
+ schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_new_with_parse_options
+ schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_from_document_with_parse_options
+ schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)),
+ Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_read_memory_with_parse_options
+ schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE),
+ Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
def test_parse_with_errors
xml = File.read(ADDRESS_SCHEMA_FILE).sub(/name="/, 'name=')
assert_raises(Nokogiri::XML::SyntaxError) {
diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb
index 908c7c18d..2bd267b9d 100644
--- a/test/xml/test_schema.rb
+++ b/test/xml/test_schema.rb
@@ -109,6 +109,39 @@ def test_new
assert_instance_of Nokogiri::XML::Schema, xsd
end
+ def test_schema_method_with_parse_options
+ schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_schema_new_with_parse_options
+ schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_schema_from_document_with_parse_options
+ schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE)))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE)),
+ Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
+ def test_schema_read_memory_with_parse_options
+ schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE))
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options
+
+ schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
+ assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
+ end
+
def test_parse_with_io
xsd = nil
File.open(PO_SCHEMA_FILE, 'rb') { |f|