File 0003_CVE-2022-23517_CVE-2022-23518_CVE-2022-23519_CVE-2022-23520.patch of Package rubygem-rails-html-sanitizer.30440

diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass/parser.rb rails-html-sanitizer-1.0.3/lib/crass/parser.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass/parser.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass/parser.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,648 @@
+# encoding: utf-8
+require_relative 'token-scanner'
+require_relative 'tokenizer'
+
+module Crass
+
+  # Parses a CSS string or list of tokens.
+  #
+  # 5. http://dev.w3.org/csswg/css-syntax/#parsing
+  class Parser
+    BLOCK_END_TOKENS = {
+      :'{' => :'}',
+      :'[' => :']',
+      :'(' => :')'
+    }
+
+    # -- Class Methods ---------------------------------------------------------
+
+    # Parses CSS properties (such as the contents of an HTML element's `style`
+    # attribute) and returns a parse tree.
+    #
+    # See {Tokenizer#initialize} for _options_.
+    #
+    # 5.3.6. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations
+    def self.parse_properties(input, options = {})
+      Parser.new(input, options).parse_properties
+    end
+
+    # Parses CSS rules (such as the content of a `@media` block) and returns a
+    # parse tree. The only difference from {parse_stylesheet} is that CDO/CDC
+    # nodes (`<!--` and `-->`) aren't ignored.
+    #
+    # See {Tokenizer#initialize} for _options_.
+    #
+    # 5.3.3. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-rules
+    def self.parse_rules(input, options = {})
+      parser = Parser.new(input, options)
+      rules  = parser.consume_rules
+
+      rules.map do |rule|
+        if rule[:node] == :qualified_rule
+          parser.create_style_rule(rule)
+        else
+          rule
+        end
+      end
+    end
+
+    # Parses a CSS stylesheet and returns a parse tree.
+    #
+    # See {Tokenizer#initialize} for _options_.
+    #
+    # 5.3.2. http://dev.w3.org/csswg/css-syntax/#parse-a-stylesheet
+    def self.parse_stylesheet(input, options = {})
+      parser = Parser.new(input, options)
+      rules  = parser.consume_rules(:top_level => true)
+
+      rules.map do |rule|
+        if rule[:node] == :qualified_rule
+          parser.create_style_rule(rule)
+        else
+          rule
+        end
+      end
+    end
+
+    # Converts a node or array of nodes into a CSS string based on their
+    # original tokenized input.
+    #
+    # Options:
+    #
+    #   * **:exclude_comments** - When `true`, comments will be excluded.
+    #
+    def self.stringify(nodes, options = {})
+      nodes  = [nodes] unless nodes.is_a?(Array)
+      string = ''
+
+      nodes.each do |node|
+        next if node.nil?
+
+        case node[:node]
+        when :at_rule
+          string << '@'
+          string << node[:name]
+          string << self.stringify(node[:prelude], options)
+
+          if node[:block]
+            string << '{' << self.stringify(node[:block], options) << '}'
+          else
+            string << ';'
+          end
+
+        when :comment
+          string << node[:raw] unless options[:exclude_comments]
+
+        when :simple_block
+          string << node[:start]
+          string << self.stringify(node[:value], options)
+          string << node[:end]
+
+        when :style_rule
+          string << self.stringify(node[:selector][:tokens], options)
+          string << '{' << self.stringify(node[:children], options) << '}'
+
+        else
+          if node.key?(:raw)
+            string << node[:raw]
+          elsif node.key?(:tokens)
+            string << self.stringify(node[:tokens], options)
+          end
+        end
+      end
+
+      string
+    end
+
+    # -- Instance Methods ------------------------------------------------------
+
+    # {TokenScanner} wrapping the tokens generated from this parser's input.
+    attr_reader :tokens
+
+    # Initializes a parser based on the given _input_, which may be a CSS string
+    # or an array of tokens.
+    #
+    # See {Tokenizer#initialize} for _options_.
+    def initialize(input, options = {})
+      unless input.kind_of?(Enumerable)
+        input = Tokenizer.tokenize(input, options)
+      end
+
+      @tokens = TokenScanner.new(input)
+    end
+
+    # Consumes an at-rule and returns it.
+    #
+    # 5.4.2. http://dev.w3.org/csswg/css-syntax-3/#consume-at-rule
+    def consume_at_rule(input = @tokens)
+      rule = {}
+
+      rule[:tokens] = input.collect do
+        rule[:name]    = input.consume[:value]
+        rule[:prelude] = []
+
+        while token = input.consume
+          node = token[:node]
+
+          if node == :comment # Non-standard.
+            next
+
+          elsif node == :semicolon
+            break
+
+          elsif node === :'{'
+            # Note: The spec says the block should _be_ the consumed simple
+            # block, but Simon Sapin's CSS parsing tests and tinycss2 expect
+            # only the _value_ of the consumed simple block here. I assume I'm
+            # interpreting the spec too literally, so I'm going with the
+            # tinycss2 behavior.
+            rule[:block] = consume_simple_block(input)[:value]
+            break
+
+          elsif node == :simple_block && token[:start] == '{'
+            # Note: The spec says the block should _be_ the simple block, but
+            # Simon Sapin's CSS parsing tests and tinycss2 expect only the
+            # _value_ of the simple block here. I assume I'm interpreting the
+            # spec too literally, so I'm going with the tinycss2 behavior.
+            rule[:block] = token[:value]
+            break
+
+          else
+            input.reconsume
+            rule[:prelude] << consume_component_value(input)
+          end
+        end
+      end
+
+      create_node(:at_rule, rule)
+    end
+
+    # Consumes a component value and returns it, or `nil` if there are no more
+    # tokens.
+    #
+    # 5.4.6. http://dev.w3.org/csswg/css-syntax-3/#consume-a-component-value
+    def consume_component_value(input = @tokens)
+      return nil unless token = input.consume
+
+      case token[:node]
+      when :'{', :'[', :'('
+        consume_simple_block(input)
+
+      when :function
+        if token.key?(:name)
+          # This is a parsed function, not a function token. This step isn't
+          # mentioned in the spec, but it's necessary to avoid re-parsing
+          # functions that have already been parsed.
+          token
+        else
+          consume_function(input)
+        end
+
+      else
+        token
+      end
+    end
+
+    # Consumes a declaration and returns it, or `nil` on parse error.
+    #
+    # 5.4.5. http://dev.w3.org/csswg/css-syntax-3/#consume-a-declaration
+    def consume_declaration(input = @tokens)
+      declaration = {}
+      value       = []
+
+      declaration[:tokens] = input.collect do
+        declaration[:name] = input.consume[:value]
+
+        next_token = input.peek
+
+        while next_token && next_token[:node] == :whitespace
+          input.consume
+          next_token = input.peek
+        end
+
+        unless next_token && next_token[:node] == :colon
+          # Parse error.
+          #
+          # Note: The spec explicitly says to return nothing here, but Simon
+          # Sapin's CSS parsing tests expect an error node.
+          return create_node(:error, :value => 'invalid')
+        end
+
+        input.consume
+
+        until input.peek.nil?
+          value << consume_component_value(input)
+        end
+      end
+
+      # Look for !important.
+      important_tokens = value.reject {|token|
+        node = token[:node]
+        node == :whitespace || node == :comment || node == :semicolon
+      }.last(2)
+
+      if important_tokens.size == 2 &&
+          important_tokens[0][:node] == :delim &&
+          important_tokens[0][:value] == '!' &&
+          important_tokens[1][:node] == :ident &&
+          important_tokens[1][:value].downcase == 'important'
+
+        declaration[:important] = true
+        excl_index = value.index(important_tokens[0])
+
+        # Technically the spec doesn't require us to trim trailing tokens after
+        # the !important, but Simon Sapin's CSS parsing tests expect it and
+        # tinycss2 does it, so we'll go along with the cool kids.
+        value.slice!(excl_index, value.size - excl_index)
+      else
+        declaration[:important] = false
+      end
+
+      declaration[:value] = value
+      create_node(:declaration, declaration)
+    end
+
+    # Consumes a list of declarations and returns them.
+    #
+    # By default, the returned list may include `:comment`, `:semicolon`, and
+    # `:whitespace` nodes, which is non-standard.
+    #
+    # Options:
+    #
+    #   * **:strict** - Set to `true` to exclude non-standard `:comment`,
+    #     `:semicolon`, and `:whitespace` nodes.
+    #
+    # 5.4.4. http://dev.w3.org/csswg/css-syntax/#consume-a-list-of-declarations
+    def consume_declarations(input = @tokens, options = {})
+      declarations = []
+
+      while token = input.consume
+        case token[:node]
+
+        # Non-standard: Preserve comments, semicolons, and whitespace.
+        when :comment, :semicolon, :whitespace
+          declarations << token unless options[:strict]
+
+        when :at_keyword
+          # When parsing a style rule, this is a parse error. Otherwise it's
+          # not.
+          input.reconsume
+          declarations << consume_at_rule(input)
+
+        when :ident
+          decl_tokens = [token]
+
+          while next_token = input.peek
+            break if next_token[:node] == :semicolon
+            decl_tokens << consume_component_value(input)
+          end
+
+          if decl = consume_declaration(TokenScanner.new(decl_tokens))
+            declarations << decl
+          end
+
+        else
+          # Parse error (invalid property name, etc.).
+          #
+          # Note: The spec doesn't say we should append anything to the list of
+          # declarations here, but Simon Sapin's CSS parsing tests expect an
+          # error node.
+          declarations << create_node(:error, :value => 'invalid')
+          input.reconsume
+
+          while next_token = input.peek
+            break if next_token[:node] == :semicolon
+            consume_component_value(input)
+          end
+        end
+      end
+
+      declarations
+    end
+
+    # Consumes a function and returns it.
+    #
+    # 5.4.8. http://dev.w3.org/csswg/css-syntax-3/#consume-a-function
+    def consume_function(input = @tokens)
+      function = {
+        :name   => input.current[:value],
+        :value  => [],
+        :tokens => [input.current] # Non-standard, used for serialization.
+      }
+
+      function[:tokens].concat(input.collect {
+        while token = input.consume
+          case token[:node]
+          when :')'
+            break
+
+          # Non-standard.
+          when :comment
+            next
+
+          else
+            input.reconsume
+            function[:value] << consume_component_value(input)
+          end
+        end
+      })
+
+      create_node(:function, function)
+    end
+
+    # Consumes a qualified rule and returns it, or `nil` if a parse error
+    # occurs.
+    #
+    # 5.4.3. http://dev.w3.org/csswg/css-syntax-3/#consume-a-qualified-rule
+    def consume_qualified_rule(input = @tokens)
+      rule = {:prelude => []}
+
+      rule[:tokens] = input.collect do
+        while true
+          unless token = input.consume
+            # Parse error.
+            #
+            # Note: The spec explicitly says to return nothing here, but Simon
+            # Sapin's CSS parsing tests expect an error node.
+            return create_node(:error, :value => 'invalid')
+          end
+
+          if token[:node] == :'{'
+            # Note: The spec says the block should _be_ the consumed simple
+            # block, but Simon Sapin's CSS parsing tests and tinycss2 expect
+            # only the _value_ of the consumed simple block here. I assume I'm
+            # interpreting the spec too literally, so I'm going with the
+            # tinycss2 behavior.
+            rule[:block] = consume_simple_block(input)[:value]
+            break
+          elsif token[:node] == :simple_block && token[:start] == '{'
+            # Note: The spec says the block should _be_ the simple block, but
+            # Simon Sapin's CSS parsing tests and tinycss2 expect only the
+            # _value_ of the simple block here. I assume I'm interpreting the
+            # spec too literally, so I'm going with the tinycss2 behavior.
+            rule[:block] = token[:value]
+            break
+          else
+            input.reconsume
+            rule[:prelude] << consume_component_value(input)
+          end
+        end
+      end
+
+      create_node(:qualified_rule, rule)
+    end
+
+    # Consumes a list of rules and returns them.
+    #
+    # 5.4.1. http://dev.w3.org/csswg/css-syntax/#consume-a-list-of-rules
+    def consume_rules(flags = {})
+      rules = []
+
+      while token = @tokens.consume
+        case token[:node]
+          # Non-standard. Spec says to discard comments and whitespace, but we
+          # keep them so we can serialize faithfully.
+          when :comment, :whitespace
+            rules << token
+
+          when :cdc, :cdo
+            unless flags[:top_level]
+              @tokens.reconsume
+              rule = consume_qualified_rule
+              rules << rule if rule
+            end
+
+          when :at_keyword
+            @tokens.reconsume
+            rule = consume_at_rule
+            rules << rule if rule
+
+          else
+            @tokens.reconsume
+            rule = consume_qualified_rule
+            rules << rule if rule
+        end
+      end
+
+      rules
+    end
+
+    # Consumes and returns a simple block associated with the current input
+    # token.
+    #
+    # 5.4.7. http://dev.w3.org/csswg/css-syntax/#consume-a-simple-block
+    def consume_simple_block(input = @tokens)
+      start_token = input.current[:node]
+      end_token   = BLOCK_END_TOKENS[start_token]
+
+      block = {
+        :start  => start_token.to_s,
+        :end    => end_token.to_s,
+        :value  => [],
+        :tokens => [input.current] # Non-standard. Used for serialization.
+      }
+
+      block[:tokens].concat(input.collect do
+        while token = input.consume
+          break if token[:node] == end_token
+
+          input.reconsume
+          block[:value] << consume_component_value(input)
+        end
+      end)
+
+      create_node(:simple_block, block)
+    end
+
+    # Creates and returns a new parse node with the given _properties_.
+    def create_node(type, properties = {})
+      {:node => type}.merge!(properties)
+    end
+
+    # Parses the given _input_ tokens into a selector node and returns it.
+    #
+    # Doesn't bother splitting the selector list into individual selectors or
+    # validating them. Feel free to do that yourself! It'll be fun!
+    def create_selector(input)
+      create_node(:selector,
+        :value  => parse_value(input),
+        :tokens => input)
+    end
+
+    # Creates a `:style_rule` node from the given qualified _rule_, and returns
+    # it.
+    def create_style_rule(rule)
+      create_node(:style_rule,
+        :selector => create_selector(rule[:prelude]),
+        :children => parse_properties(rule[:block]))
+    end
+
+    # Parses a single component value and returns it.
+    #
+    # 5.3.7. http://dev.w3.org/csswg/css-syntax-3/#parse-a-component-value
+    def parse_component_value(input = @tokens)
+      input = TokenScanner.new(input) unless input.is_a?(TokenScanner)
+
+      while input.peek && input.peek[:node] == :whitespace
+        input.consume
+      end
+
+      if input.peek.nil?
+        return create_node(:error, :value => 'empty')
+      end
+
+      value = consume_component_value(input)
+
+      while input.peek && input.peek[:node] == :whitespace
+        input.consume
+      end
+
+      if input.peek.nil?
+        value
+      else
+        create_node(:error, :value => 'extra-input')
+      end
+    end
+
+    # Parses a list of component values and returns an array of parsed tokens.
+    #
+    # 5.3.8. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-component-values
+    def parse_component_values(input = @tokens)
+      input  = TokenScanner.new(input) unless input.is_a?(TokenScanner)
+      tokens = []
+
+      while token = consume_component_value(input)
+        tokens << token
+      end
+
+      tokens
+    end
+
+    # Parses a single declaration and returns it.
+    #
+    # 5.3.5. http://dev.w3.org/csswg/css-syntax/#parse-a-declaration
+    def parse_declaration(input = @tokens)
+      input = TokenScanner.new(input) unless input.is_a?(TokenScanner)
+
+      while input.peek && input.peek[:node] == :whitespace
+        input.consume
+      end
+
+      if input.peek.nil?
+        # Syntax error.
+        return create_node(:error, :value => 'empty')
+      elsif input.peek[:node] != :ident
+        # Syntax error.
+        return create_node(:error, :value => 'invalid')
+      end
+
+      if decl = consume_declaration(input)
+        return decl
+      end
+
+      # Syntax error.
+      create_node(:error, :value => 'invalid')
+    end
+
+    # Parses a list of declarations and returns them.
+    #
+    # See {#consume_declarations} for _options_.
+    #
+    # 5.3.6. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations
+    def parse_declarations(input = @tokens, options = {})
+      input = TokenScanner.new(input) unless input.is_a?(TokenScanner)
+      consume_declarations(input, options)
+    end
+
+    # Parses a list of declarations and returns an array of `:property` nodes
+    # (and any non-declaration nodes that were in the input). This is useful for
+    # parsing the contents of an HTML element's `style` attribute.
+    def parse_properties(input = @tokens)
+      properties = []
+
+      parse_declarations(input).each do |decl|
+        unless decl[:node] == :declaration
+          properties << decl
+          next
+        end
+
+        children = decl[:value].dup
+        children.pop if children.last && children.last[:node] == :semicolon
+
+        properties << create_node(:property,
+          :name      => decl[:name],
+          :value     => parse_value(decl[:value]),
+          :children  => children,
+          :important => decl[:important],
+          :tokens    => decl[:tokens])
+      end
+
+      properties
+    end
+
+    # Parses a single rule and returns it.
+    #
+    # 5.3.4. http://dev.w3.org/csswg/css-syntax-3/#parse-a-rule
+    def parse_rule(input = @tokens)
+      input = TokenScanner.new(input) unless input.is_a?(TokenScanner)
+
+      while input.peek && input.peek[:node] == :whitespace
+        input.consume
+      end
+
+      if input.peek.nil?
+        # Syntax error.
+        return create_node(:error, :value => 'empty')
+      elsif input.peek[:node] == :at_keyword
+        rule = consume_at_rule(input)
+      else
+        rule = consume_qualified_rule(input)
+      end
+
+      while input.peek && input.peek[:node] == :whitespace
+        input.consume
+      end
+
+      if input.peek.nil?
+        rule
+      else
+        # Syntax error.
+        create_node(:error, :value => 'extra-input')
+      end
+    end
+
+    # Returns the unescaped value of a selector name or property declaration.
+    def parse_value(nodes)
+      nodes  = [nodes] unless nodes.is_a?(Array)
+      string = ''
+
+      nodes.each do |node|
+        case node[:node]
+        when :comment, :semicolon
+          next
+
+        when :at_keyword, :ident
+          string << node[:value]
+
+        when :function
+          if node[:value].is_a?(String)
+            string << node[:value]
+            string << '('
+          else
+            string << parse_value(node[:tokens])
+          end
+
+        else
+          if node.key?(:raw)
+            string << node[:raw]
+          elsif node.key?(:tokens)
+            string << parse_value(node[:tokens])
+          end
+        end
+      end
+
+      string.strip
+    end
+  end
+
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass/scanner.rb rails-html-sanitizer-1.0.3/lib/crass/scanner.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass/scanner.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass/scanner.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,125 @@
+# encoding: utf-8
+require 'strscan'
+
+module Crass
+
+  # Similar to a StringScanner, but with extra functionality needed to tokenize
+  # CSS while preserving the original text.
+  class Scanner
+    # Current character, or `nil` if the scanner hasn't yet consumed a
+    # character, or is at the end of the string.
+    attr_reader :current
+
+    # Current marker position. Use {#marked} to get the substring between
+    # {#marker} and {#pos}.
+    attr_accessor :marker
+
+    # Position of the next character that will be consumed. This is a character
+    # position, not a byte position, so it accounts for multi-byte characters.
+    attr_accessor :pos
+
+    # String being scanned.
+    attr_reader :string
+
+    # Creates a Scanner instance for the given _input_ string or IO instance.
+    def initialize(input)
+      @string  = input.is_a?(IO) ? input.read : input.to_s
+      @scanner = StringScanner.new(@string)
+
+      reset
+    end
+
+    # Consumes the next character and returns it, advancing the pointer, or
+    # an empty string if the end of the string has been reached.
+    def consume
+      if @pos < @len
+        @pos    += 1
+        @current = @scanner.getch
+      else
+        ''
+      end
+    end
+
+    # Consumes the rest of the string and returns it, advancing the pointer to
+    # the end of the string. Returns an empty string is the end of the string
+    # has already been reached.
+    def consume_rest
+      result = @scanner.rest
+
+      @current = result[-1]
+      @pos     = @len
+
+      result
+    end
+
+    # Returns `true` if the end of the string has been reached, `false`
+    # otherwise.
+    def eos?
+      @pos == @len
+    end
+
+    # Sets the marker to the position of the next character that will be
+    # consumed.
+    def mark
+      @marker = @pos
+    end
+
+    # Returns the substring between {#marker} and {#pos}, without altering the
+    # pointer.
+    def marked
+      if result = @string[@marker, @pos - @marker]
+        result
+      else
+        ''
+      end
+    end
+
+    # Returns up to _length_ characters starting at the current position, but
+    # doesn't consume them. The number of characters returned may be less than
+    # _length_ if the end of the string is reached.
+    def peek(length = 1)
+      @string[pos, length]
+    end
+
+    # Moves the pointer back one character without changing the value of
+    # {#current}. The next call to {#consume} will re-consume the current
+    # character.
+    def reconsume
+      @scanner.unscan
+      @pos -= 1 if @pos > 0
+    end
+
+    # Resets the pointer to the beginning of the string.
+    def reset
+      @current = nil
+      @len     = @string.size
+      @marker  = 0
+      @pos     = 0
+    end
+
+    # Tries to match _pattern_ at the current position. If it matches, the
+    # matched substring will be returned and the pointer will be advanced.
+    # Otherwise, `nil` will be returned.
+    def scan(pattern)
+      if match = @scanner.scan(pattern)
+        @pos     += match.size
+        @current  = match[-1]
+      end
+
+      match
+    end
+
+    # Scans the string until the _pattern_ is matched. Returns the substring up
+    # to and including the end of the match, and advances the pointer. If there
+    # is no match, `nil` is returned and the pointer is not advanced.
+    def scan_until(pattern)
+      if match = @scanner.scan_until(pattern)
+        @pos     += match.size
+        @current  = match[-1]
+      end
+
+      match
+    end
+  end
+
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass/tokenizer.rb rails-html-sanitizer-1.0.3/lib/crass/tokenizer.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass/tokenizer.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass/tokenizer.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,689 @@
+# encoding: utf-8
+require_relative 'scanner'
+
+module Crass
+
+  # Tokenizes a CSS string.
+  #
+  # 4. http://dev.w3.org/csswg/css-syntax/#tokenization
+  class Tokenizer
+    RE_COMMENT_CLOSE   = /\*\//
+    RE_DIGIT           = /[0-9]+/
+    RE_ESCAPE          = /\\[^\n]/
+    RE_HEX             = /[0-9A-Fa-f]{1,6}/
+    RE_NAME            = /[0-9A-Za-z_\u0080-\u{10ffff}-]+/
+    RE_NAME_START      = /[A-Za-z_\u0080-\u{10ffff}]+/
+    RE_NON_PRINTABLE   = /[\u0000-\u0008\u000b\u000e-\u001f\u007f]+/
+    RE_NUMBER_DECIMAL  = /\.[0-9]+/
+    RE_NUMBER_EXPONENT = /[Ee][+-]?[0-9]+/
+    RE_NUMBER_SIGN     = /[+-]/
+
+    RE_NUMBER_STR = /\A
+      (?<sign> [+-]?)
+      (?<integer> [0-9]*)
+      (?:\.
+        (?<fractional> [0-9]*)
+      )?
+      (?:[Ee]
+        (?<exponent_sign> [+-]?)
+        (?<exponent> [0-9]*)
+      )?
+    \z/x
+
+    RE_QUOTED_URL_START    = /\A[\n\u0009\u0020]?["']/
+    RE_UNICODE_RANGE_START = /\+(?:[0-9A-Fa-f]|\?)/
+    RE_UNICODE_RANGE_END   = /-[0-9A-Fa-f]/
+    RE_WHITESPACE          = /[\n\u0009\u0020]+/
+    RE_WHITESPACE_ANCHORED = /\A[\n\u0009\u0020]+\z/
+
+    # -- Class Methods ---------------------------------------------------------
+
+    # Tokenizes the given _input_ as a CSS string and returns an array of
+    # tokens.
+    #
+    # See {#initialize} for _options_.
+    def self.tokenize(input, options = {})
+      Tokenizer.new(input, options).tokenize
+    end
+
+    # -- Instance Methods ------------------------------------------------------
+
+    # Initializes a new Tokenizer.
+    #
+    # Options:
+    #
+    #   * **:preserve_comments** - If `true`, comments will be preserved as
+    #     `:comment` tokens.
+    #
+    #   * **:preserve_hacks** - If `true`, certain non-standard browser hacks
+    #     such as the IE "*" hack will be preserved even though they violate
+    #     CSS 3 syntax rules.
+    #
+    def initialize(input, options = {})
+      @s       = Scanner.new(preprocess(input))
+      @options = options
+    end
+
+    # Consumes a token and returns the token that was consumed.
+    #
+    # 4.3.1. http://dev.w3.org/csswg/css-syntax/#consume-a-token
+    def consume
+      return nil if @s.eos?
+
+      @s.mark
+
+      # Consume comments.
+      if comment_token = consume_comments
+        if @options[:preserve_comments]
+          return comment_token
+        else
+          return consume
+        end
+      end
+
+      # Consume whitespace.
+      return create_token(:whitespace) if @s.scan(RE_WHITESPACE)
+
+      char = @s.consume
+
+      case char.to_sym
+      when :'"'
+        consume_string
+
+      when :'#'
+        if @s.peek =~ RE_NAME || valid_escape?(@s.peek(2))
+          create_token(:hash,
+            :type  => start_identifier?(@s.peek(3)) ? :id : :unrestricted,
+            :value => consume_name)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :'$'
+        if @s.peek == '='
+          @s.consume
+          create_token(:suffix_match)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :"'"
+        consume_string
+
+      when :'('
+        create_token(:'(')
+
+      when :')'
+        create_token(:')')
+
+      when :*
+        if @s.peek == '='
+          @s.consume
+          create_token(:substring_match)
+
+        # Non-standard: Preserve the IE * hack.
+        elsif @options[:preserve_hacks] && @s.peek =~ RE_NAME_START
+          @s.reconsume
+          consume_ident
+
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :+
+        if start_number?
+          @s.reconsume
+          consume_numeric
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :','
+        create_token(:comma)
+
+      when :-
+        nextTwoChars   = @s.peek(2)
+        nextThreeChars = char + nextTwoChars
+
+        if start_number?(nextThreeChars)
+          @s.reconsume
+          consume_numeric
+        elsif nextTwoChars == '->'
+          @s.consume
+          @s.consume
+          create_token(:cdc)
+        elsif start_identifier?(nextThreeChars)
+          @s.reconsume
+          consume_ident
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :'.'
+        if start_number?
+          @s.reconsume
+          consume_numeric
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :':'
+        create_token(:colon)
+
+      when :';'
+        create_token(:semicolon)
+
+      when :<
+        if @s.peek(3) == '!--'
+          @s.consume
+          @s.consume
+          @s.consume
+
+          create_token(:cdo)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :'@'
+        if start_identifier?(@s.peek(3))
+          create_token(:at_keyword, :value => consume_name)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :'['
+        create_token(:'[')
+
+      when :'\\'
+        if valid_escape?
+          @s.reconsume
+          consume_ident
+        else
+          # Parse error.
+          create_token(:delim,
+            :error => true,
+            :value => char)
+        end
+
+      when :']'
+        create_token(:']')
+
+      when :'^'
+        if @s.peek == '='
+          @s.consume
+          create_token(:prefix_match)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :'{'
+        create_token(:'{')
+
+      when :'}'
+        create_token(:'}')
+
+      when :U, :u
+        if @s.peek(2) =~ RE_UNICODE_RANGE_START
+          @s.consume
+          consume_unicode_range
+        else
+          @s.reconsume
+          consume_ident
+        end
+
+      when :|
+        case @s.peek
+        when '='
+          @s.consume
+          create_token(:dash_match)
+
+        when '|'
+          @s.consume
+          create_token(:column)
+
+        else
+          create_token(:delim, :value => char)
+        end
+
+      when :~
+        if @s.peek == '='
+          @s.consume
+          create_token(:include_match)
+        else
+          create_token(:delim, :value => char)
+        end
+
+      else
+        case char
+        when RE_DIGIT
+          @s.reconsume
+          consume_numeric
+
+        when RE_NAME_START
+          @s.reconsume
+          consume_ident
+
+        else
+          create_token(:delim, :value => char)
+        end
+      end
+    end
+
+    # Consumes the remnants of a bad URL and returns the consumed text.
+    #
+    # 4.3.15. http://dev.w3.org/csswg/css-syntax/#consume-the-remnants-of-a-bad-url
+    def consume_bad_url
+      text = ''
+
+      until @s.eos?
+        if valid_escape?
+          text << consume_escaped
+        elsif valid_escape?(@s.peek(2))
+          @s.consume
+          text << consume_escaped
+        else
+          char = @s.consume
+
+          if char == ')'
+            break
+          else
+            text << char
+          end
+        end
+      end
+
+      text
+    end
+
+    # Consumes comments and returns them, or `nil` if no comments were consumed.
+    #
+    # 4.3.2. http://dev.w3.org/csswg/css-syntax/#consume-comments
+    def consume_comments
+      if @s.peek(2) == '/*'
+        @s.consume
+        @s.consume
+
+        if text = @s.scan_until(RE_COMMENT_CLOSE)
+          text.slice!(-2, 2)
+        else
+          # Parse error.
+          text = @s.consume_rest
+        end
+
+        return create_token(:comment, :value => text)
+      end
+
+      nil
+    end
+
+    # Consumes an escaped code point and returns its unescaped value.
+    #
+    # This method assumes that the `\` has already been consumed, and that the
+    # next character in the input has already been verified not to be a newline
+    # or EOF.
+    #
+    # 4.3.8. http://dev.w3.org/csswg/css-syntax/#consume-an-escaped-code-point
+    def consume_escaped
+      return "\ufffd" if @s.eos?
+
+      if hex_str = @s.scan(RE_HEX)
+        @s.consume if @s.peek =~ RE_WHITESPACE
+
+        codepoint = hex_str.hex
+
+        if codepoint == 0 ||
+            codepoint.between?(0xD800, 0xDFFF) ||
+            codepoint > 0x10FFFF
+
+          return "\ufffd"
+        else
+          return codepoint.chr(Encoding::UTF_8)
+        end
+      end
+
+      @s.consume
+    end
+
+    # Consumes an ident-like token and returns it.
+    #
+    # 4.3.4. http://dev.w3.org/csswg/css-syntax/#consume-an-ident-like-token
+    def consume_ident
+      value = consume_name
+
+      if @s.peek == '('
+        @s.consume
+
+        if value.downcase == 'url'
+          @s.consume while @s.peek(2) =~ RE_WHITESPACE_ANCHORED
+
+          if @s.peek(2) =~ RE_QUOTED_URL_START
+            create_token(:function, :value => value)
+          else
+            consume_url
+          end
+        else
+          create_token(:function, :value => value)
+        end
+      else
+        create_token(:ident, :value => value)
+      end
+    end
+
+    # Consumes a name and returns it.
+    #
+    # 4.3.12. http://dev.w3.org/csswg/css-syntax/#consume-a-name
+    def consume_name
+      result = ''
+
+      until @s.eos?
+        if match = @s.scan(RE_NAME)
+          result << match
+          next
+        end
+
+        char = @s.consume
+
+        if valid_escape?
+          result << consume_escaped
+
+        # Non-standard: IE * hack
+        elsif char == '*' && @options[:preserve_hacks]
+          result << @s.consume
+
+        else
+          @s.reconsume
+          return result
+        end
+      end
+
+      result
+    end
+
+    # Consumes a number and returns a 3-element array containing the number's
+    # original representation, its numeric value, and its type (either
+    # `:integer` or `:number`).
+    #
+    # 4.3.13. http://dev.w3.org/csswg/css-syntax/#consume-a-number
+    def consume_number
+      repr = ''
+      type = :integer
+
+      repr << @s.consume if @s.peek =~ RE_NUMBER_SIGN
+      repr << (@s.scan(RE_DIGIT) || '')
+
+      if match = @s.scan(RE_NUMBER_DECIMAL)
+        repr << match
+        type = :number
+      end
+
+      if match = @s.scan(RE_NUMBER_EXPONENT)
+        repr << match
+        type = :number
+      end
+
+      [repr, convert_string_to_number(repr), type]
+    end
+
+    # Consumes a numeric token and returns it.
+    #
+    # 4.3.3. http://dev.w3.org/csswg/css-syntax/#consume-a-numeric-token
+    def consume_numeric
+      number = consume_number
+
+      if start_identifier?(@s.peek(3))
+        create_token(:dimension,
+          :repr  => number[0],
+          :type  => number[2],
+          :unit  => consume_name,
+          :value => number[1])
+
+      elsif @s.peek == '%'
+        @s.consume
+
+        create_token(:percentage,
+          :repr  => number[0],
+          :type  => number[2],
+          :value => number[1])
+
+      else
+        create_token(:number,
+          :repr  => number[0],
+          :type  => number[2],
+          :value => number[1])
+      end
+    end
+
+    # Consumes a string token that ends at the given character, and returns the
+    # token.
+    #
+    # 4.3.5. http://dev.w3.org/csswg/css-syntax/#consume-a-string-token
+    def consume_string(ending = nil)
+      ending = @s.current if ending.nil?
+      value  = ''
+
+      until @s.eos?
+        case char = @s.consume
+        when ending
+          break
+
+        when "\n"
+          # Parse error.
+          @s.reconsume
+          return create_token(:bad_string,
+            :error => true,
+            :value => value)
+
+        when '\\'
+          case @s.peek
+          when ''
+            # End of the input, so do nothing.
+            next
+
+          when "\n"
+            @s.consume
+
+          else
+            value << consume_escaped
+          end
+
+        else
+          value << char
+        end
+      end
+
+      create_token(:string, :value => value)
+    end
+
+    # Consumes a Unicode range token and returns it. Assumes the initial "u+" or
+    # "U+" has already been consumed.
+    #
+    # 4.3.7. http://dev.w3.org/csswg/css-syntax/#consume-a-unicode-range-token
+    def consume_unicode_range
+      value = @s.scan(RE_HEX) || ''
+
+      while value.length < 6
+        break unless @s.peek == '?'
+        value << @s.consume
+      end
+
+      range = {}
+
+      if value.include?('?')
+        range[:start] = value.gsub('?', '0').hex
+        range[:end]   = value.gsub('?', 'F').hex
+        return create_token(:unicode_range, range)
+      end
+
+      range[:start] = value.hex
+
+      if @s.peek(2) =~ RE_UNICODE_RANGE_END
+        @s.consume
+        range[:end] = (@s.scan(RE_HEX) || '').hex
+      else
+        range[:end] = range[:start]
+      end
+
+      create_token(:unicode_range, range)
+    end
+
+    # Consumes a URL token and returns it. Assumes the original "url(" has
+    # already been consumed.
+    #
+    # 4.3.6. http://dev.w3.org/csswg/css-syntax/#consume-a-url-token
+    def consume_url
+      value = ''
+
+      @s.scan(RE_WHITESPACE)
+
+      until @s.eos?
+        case char = @s.consume
+          when ')'
+            break
+
+          when RE_WHITESPACE
+            @s.scan(RE_WHITESPACE)
+
+            if @s.eos? || @s.peek == ')'
+              @s.consume
+              break
+            else
+              return create_token(:bad_url, :value => value + consume_bad_url)
+            end
+
+          when '"', "'", '(', RE_NON_PRINTABLE
+            # Parse error.
+            return create_token(:bad_url,
+              :error => true,
+              :value => value + consume_bad_url)
+
+          when '\\'
+            if valid_escape?
+              value << consume_escaped
+            else
+              # Parse error.
+              return create_token(:bad_url,
+                :error => true,
+                :value => value + consume_bad_url
+              )
+            end
+
+          else
+            value << char
+        end
+      end
+
+      create_token(:url, :value => value)
+    end
+
+    # Converts a valid CSS number string into a number and returns the number.
+    #
+    # 4.3.14. http://dev.w3.org/csswg/css-syntax/#convert-a-string-to-a-number
+    def convert_string_to_number(str)
+      matches = RE_NUMBER_STR.match(str)
+
+      s = matches[:sign] == '-' ? -1 : 1
+      i = matches[:integer].to_i
+      f = matches[:fractional].to_i
+      d = matches[:fractional] ? matches[:fractional].length : 0
+      t = matches[:exponent_sign] == '-' ? -1 : 1
+      e = matches[:exponent].to_i
+
+      # I know this looks nutty, but it's exactly what's defined in the spec,
+      # and it works.
+      s * (i + f * 10**-d) * 10**(t * e)
+    end
+
+    # Creates and returns a new token with the given _properties_.
+    def create_token(type, properties = {})
+      {
+        :node => type,
+        :pos  => @s.marker,
+        :raw  => @s.marked
+      }.merge!(properties)
+    end
+
+    # Preprocesses _input_ to prepare it for the tokenizer.
+    #
+    # 3.3. http://dev.w3.org/csswg/css-syntax/#input-preprocessing
+    def preprocess(input)
+      input = input.to_s.encode('UTF-8',
+        :invalid => :replace,
+        :undef   => :replace)
+
+      input.gsub!(/(?:\r\n|[\r\f])/, "\n")
+      input.gsub!("\u0000", "\ufffd")
+      input
+    end
+
+    # Returns `true` if the given three-character _text_ would start an
+    # identifier. If _text_ is `nil`, the current and next two characters in the
+    # input stream will be checked, but will not be consumed.
+    #
+    # 4.3.10. http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier
+    def start_identifier?(text = nil)
+      text = @s.current + @s.peek(2) if text.nil?
+
+      case text[0]
+      when '-'
+        nextChar = text[1]
+        !!(nextChar == '-' || nextChar =~ RE_NAME_START || valid_escape?(text[1, 2]))
+
+      when RE_NAME_START
+        true
+
+      when '\\'
+        valid_escape?(text[0, 2])
+
+      else
+        false
+      end
+    end
+
+    # Returns `true` if the given three-character _text_ would start a number.
+    # If _text_ is `nil`, the current and next two characters in the input
+    # stream will be checked, but will not be consumed.
+    #
+    # 4.3.11. http://dev.w3.org/csswg/css-syntax/#starts-with-a-number
+    def start_number?(text = nil)
+      text = @s.current + @s.peek(2) if text.nil?
+
+      case text[0]
+      when '+', '-'
+        !!(text[1] =~ RE_DIGIT || (text[1] == '.' && text[2] =~ RE_DIGIT))
+
+      when '.'
+        !!(text[1] =~ RE_DIGIT)
+
+      when RE_DIGIT
+        true
+
+      else
+        false
+      end
+    end
+
+    # Tokenizes the input stream and returns an array of tokens.
+    def tokenize
+      @s.reset
+
+      tokens = []
+
+      while token = consume
+        tokens << token
+      end
+
+      tokens
+    end
+
+    # Returns `true` if the given two-character _text_ is the beginning of a
+    # valid escape sequence. If _text_ is `nil`, the current and next character
+    # in the input stream will be checked, but will not be consumed.
+    #
+    # 4.3.9. http://dev.w3.org/csswg/css-syntax/#starts-with-a-valid-escape
+    def valid_escape?(text = nil)
+      text = @s.current + @s.peek if text.nil?
+      !!(text[0] == '\\' && text[1] != "\n")
+    end
+  end
+
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass/token-scanner.rb rails-html-sanitizer-1.0.3/lib/crass/token-scanner.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass/token-scanner.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass/token-scanner.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,50 @@
+# encoding: utf-8
+
+module Crass
+
+  # Like {Scanner}, but for tokens!
+  class TokenScanner
+    attr_reader :current, :pos, :tokens
+
+    def initialize(tokens)
+      @tokens = tokens.to_a
+      reset
+    end
+
+    # Executes the given block, collects all tokens that are consumed during its
+    # execution, and returns them.
+    def collect
+      start = @pos
+      yield
+      @tokens[start...@pos] || []
+    end
+
+    # Consumes the next token and returns it, advancing the pointer. Returns
+    # `nil` if there is no next token.
+    def consume
+      @current = @tokens[@pos]
+      @pos += 1 if @current
+      @current
+    end
+
+    # Returns the next token without consuming it, or `nil` if there is no next
+    # token.
+    def peek
+      @tokens[@pos]
+    end
+
+    # Reconsumes the current token, moving the pointer back one position.
+    #
+    # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#reconsume-the-current-input-token
+    def reconsume
+      @pos -= 1 if @pos > 0
+    end
+
+    # Resets the pointer to the first token in the list.
+    def reset
+      @current = nil
+      @pos     = 0
+    end
+  end
+
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass/version.rb rails-html-sanitizer-1.0.3/lib/crass/version.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass/version.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass/version.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,5 @@
+# encoding: utf-8
+
+module Crass
+  VERSION = '1.0.2'
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/crass.rb rails-html-sanitizer-1.0.3/lib/crass.rb
--- rails-html-sanitizer-1.0.3-ori/lib/crass.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/crass.rb	2015-04-18 06:16:46.000000000 +0200
@@ -0,0 +1,22 @@
+# encoding: utf-8
+require_relative 'crass/parser'
+
+# A CSS parser based on the CSS Syntax Module Level 3 spec.
+module Crass
+
+  # Parses _input_ as a CSS stylesheet and returns a parse tree.
+  #
+  # See {Tokenizer#initialize} for _options_.
+  def self.parse(input, options = {})
+    Parser.parse_stylesheet(input, options)
+  end
+
+  # Parses _input_ as a string of CSS properties (such as the contents of an
+  # HTML element's `style` attribute) and returns a parse tree.
+  #
+  # See {Tokenizer#initialize} for _options_.
+  def self.parse_properties(input, options = {})
+    Parser.parse_properties(input, options)
+  end
+
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/loofah/html5/scrub.rb rails-html-sanitizer-1.0.3/lib/loofah/html5/scrub.rb
--- rails-html-sanitizer-1.0.3-ori/lib/loofah/html5/scrub.rb	1970-01-01 01:00:00.000000000 +0100
+++ rails-html-sanitizer-1.0.3/lib/loofah/html5/scrub.rb	2023-08-29 12:50:26.371689045 +0200
@@ -0,0 +1,26 @@
+require "crass"
+
+module Loofah
+  module HTML5
+    module Scrub
+      def scrub_attribute_that_allows_local_ref(attr_node)
+          return unless attr_node.value
+
+          nodes = Crass::Parser.new(attr_node.value).parse_component_values
+
+          values = nodes.map do |node|
+            case node[:node]
+            when :url
+              if node[:value].start_with?("#")
+                node[:raw]
+              end
+            when :hash, :ident, :string
+              node[:raw]
+            end
+          end.compact
+
+          attr_node.value = values.join(" ")
+        end
+    end
+  end
+end
diff --color -rubN rails-html-sanitizer-1.0.3-ori/lib/rails/html/scrubbers.rb rails-html-sanitizer-1.0.3/lib/rails/html/scrubbers.rb
--- rails-html-sanitizer-1.0.3-ori/lib/rails/html/scrubbers.rb	2023-08-28 14:25:04.825458612 +0200
+++ rails-html-sanitizer-1.0.3/lib/rails/html/scrubbers.rb	2023-08-28 14:41:54.377457394 +0200
@@ -60,9 +60,9 @@
       end
 
       def scrub(node)
-        if node.cdata?
-          text = node.document.create_text_node node.text
-          node.replace text
+        if Loofah::HTML5::Scrub.cdata_needs_escaping?(node)
+          replacement = Loofah::HTML5::Scrub.cdata_escape(node)
+          node.replace(replacement)
           return CONTINUE
         end
         return CONTINUE if skip_node?(node)
@@ -138,14 +138,10 @@
                     end
 
         if Loofah::HTML5::WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
-          # this block lifted nearly verbatim from HTML5 sanitization
-          val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase
-          if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::WhiteList::PROTOCOL_SEPARATOR)[0])
-            attr_node.remove
-          end
+          return if Loofah::HTML5::Scrub.scrub_uri_attribute(attr_node)
         end
         if Loofah::HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
-          attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
+          Loofah::HTML5::Scrub.scrub_attribute_that_allows_local_ref(attr_node)
         end
         if Loofah::HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
           attr_node.remove
diff --color -rubN rails-html-sanitizer-1.0.3-ori/test/sanitizer_test.rb rails-html-sanitizer-1.0.3/test/sanitizer_test.rb
--- rails-html-sanitizer-1.0.3-ori/test/sanitizer_test.rb	2023-08-28 14:25:04.825458612 +0200
+++ rails-html-sanitizer-1.0.3/test/sanitizer_test.rb	2023-08-28 16:39:45.544550156 +0200
@@ -54,6 +54,7 @@
 
   def test_strip_tags_with_quote
     input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
+    expected = libxml_2_9_14_recovery_lt? ? %{&lt;"  hi} : %{ hi}
     assert_equal ' hi', full_sanitize(input)
   end
 
@@ -79,11 +80,16 @@
   end
 
   def test_strip_cdata
-    assert_equal "This has a ]]&gt; here.", full_sanitize("This has a <![CDATA[<section>]]> here.")
+    input = "This has a <![CDATA[<section>]]> here."
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
+    assert_equal(expected, full_sanitize(input))
   end
 
   def test_strip_unclosed_cdata
-    assert_equal "This has an unclosed ]] here...", full_sanitize("This has an unclosed <![CDATA[<section>]] here...")
+    input = "This has an unclosed <![CDATA[<section>]] here..."
+    expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
+    assert_equal(expected, full_sanitize(input))
+
   end
 
   def test_strip_blank_string
@@ -434,11 +440,15 @@
   end
 
   def test_should_sanitize_cdata_section
-    assert_sanitized "<![CDATA[<span>section</span>]]>", "section]]&gt;"
+     input = "<![CDATA[<span>section</span>]]>"
+     expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
+     assert_sanitized(input, expected)
   end
 
   def test_should_sanitize_unterminated_cdata_section
-    assert_sanitized "<![CDATA[<span>neverending...", "neverending..."
+     input = "<![CDATA[<span>neverending..."
+     expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
+     assert_sanitized(input, expected)
   end
 
   def test_should_not_mangle_urls_with_ampersand
@@ -482,7 +492,7 @@
     assert_equal %(<a data-foo="foo">foo</a>), white_list_sanitize(text, attributes: ['data-foo'])
   end
 
-protected
+   protected
 
   def xpath_sanitize(input, options = {})
     XpathRemovalTestSanitizer.new.sanitize(input, options)
@@ -527,4 +537,119 @@
   ensure
     Rails::Html::WhiteListSanitizer.allowed_attributes = old_attributes
   end
+
+   def test_mediatype_text_html_disallowed
+     input = %q(<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
+     expected = %q(<img>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+
+     input = %q(<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
+     expected = %q(<img>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+   end
+
+   def test_mediatype_image_svg_xml_disallowed
+     input = %q(<img src="data:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
+     expected = %q(<img>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+
+     input = %q(<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
+     expected = %q(<img>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+   end
+
+   def test_mediatype_other_disallowed
+     input = %q(<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
+     expected = %q(<a>foo</a>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+
+     input = %q(<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
+     expected = %q(<a>foo</a>)
+     actual = safe_list_sanitize(input)
+     assert_equal(expected, actual)
+   end
+
+   def test_scrubbing_svg_attr_values_that_allow_ref
+     input = %Q(<div fill="yellow url(http://bad.com/) #fff">hey</div>)
+     expected = %Q(<div fill="yellow #fff">hey</div>)
+     actual = scope_allowed_attributes %w(fill) do
+       safe_list_sanitize(input)
+     end
+
+     assert_equal(expected, actual)
+   end
+
+   def test_style_with_css_payload
+     input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
+     expected = "<style>div &gt; span { background: \"red\"; }</style>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+
+   def test_combination_of_select_and_style_with_css_payload
+     input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
+     expected = "<select><style>div &gt; span { background: \"red\"; }</style></select>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+
+   def test_combination_of_select_and_style_with_script_payload
+     input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
+     expected = "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+
+   def test_combination_of_svg_and_style_with_script_payload
+     input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
+     expected = "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+
+   def test_combination_of_math_and_style_with_img_payload
+     input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
+     expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+
+     input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
+     expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+
+   def test_combination_of_svg_and_style_with_img_payload
+     input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
+     expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+
+     input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
+     expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
+     actual = safe_list_sanitize(input, tags: tags)
+
+     assert_equal(expected, actual)
+   end
+   def libxml_2_9_14_recovery_lt?
+     # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
+     Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
+   end
+   def libxml_2_9_14_recovery_lt_bang?
+     # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
+     # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
+     Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14")
+   end
 end
openSUSE Build Service is sponsored by