File CVE-2022-23516.patch of Package rubygem-loofah
--- loofah-2.2.2/lib/loofah/scrubber.rb.old 2023-03-26 21:18:08.323719057 +0200
+++ loofah-2.2.2/lib/loofah/scrubber.rb 2023-03-26 21:18:45.739718114 +0200
@@ -107,6 +107,10 @@
return Scrubber::CONTINUE
end
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
+ if HTML5::Scrub.cdata_needs_escaping?(node)
+ node.before(HTML5::Scrub.cdata_escape(node))
+ return Scrubber::STOP
+ end
return Scrubber::CONTINUE
end
Scrubber::STOP
--- loofah-2.2.2/lib/loofah/html5/scrub.rb.old 2023-03-26 19:38:19.483869978 +0200
+++ loofah-2.2.2/lib/loofah/html5/scrub.rb 2023-03-26 20:29:54.555791981 +0200
@@ -127,6 +127,45 @@
end
end
+ def cdata_needs_escaping?(node)
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
+ node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
+ end
+
+ def cdata_escape(node)
+ escaped_text = escape_tags(node.text)
+ if Nokogiri.jruby?
+ node.document.create_text_node(escaped_text)
+ else
+ node.document.create_cdata(escaped_text)
+ end
+ end
+
+ TABLE_FOR_ESCAPE_HTML__ = {
+ '<' => '<',
+ '>' => '>',
+ '&' => '&',
+ }
+
+ def escape_tags(string)
+ # modified version of CGI.escapeHTML from ruby 3.1
+ enc = string.encoding
+ unless enc.ascii_compatible?
+ if enc.dummy?
+ origenc = enc
+ enc = Encoding::Converter.asciicompat_encoding(enc)
+ string = enc ? string.encode(enc) : string.b
+ end
+ table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
+ string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
+ string.encode!(origenc) if origenc
+ string
+ else
+ string = string.b
+ string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
+ string.force_encoding(enc)
+ end
+ end
end
end
end
--- loofah-2.2.2/lib/loofah/scrubbers.rb.old 2023-03-26 21:19:08.231717547 +0200
+++ loofah-2.2.2/lib/loofah/scrubbers.rb 2023-03-26 21:20:02.743716173 +0200
@@ -99,13 +99,9 @@
def scrub(node)
return CONTINUE if html5lib_sanitize(node) == CONTINUE
- if node.children.length == 1 && node.children.first.cdata?
- sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
- node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
- else
- node.before node.children
- end
+ node.before(node.children)
node.remove
+ return STOP
end
end