File rubygem-activesupport-2.1.2_CVE-2009-3009_activesupport.patch of Package rubygem-activesupport-2_1

From e3db21fe4f54539be7fc212167553665970a955f Mon Sep 17 00:00:00 2001
From: Manfred Stienstra <manfred@fngtps.com>
Date: Tue, 1 Sep 2009 20:16:11 +0200
Subject: [PATCH] Add methods for string verification and encoding cleanup code.

Signed-off-by: Michael Koziarski <michael@koziarski.com>
---
 activesupport/lib/active_support/multibyte.rb      |   18 ++++
 .../multibyte/handlers/utf8_handler.rb             |   13 +--
 .../lib/active_support/multibyte/utils.rb          |   39 +++++++
 activesupport/test/multibyte_utils_test.rb         |  106 ++++++++++++++++++++
 4 files changed, 165 insertions(+), 11 deletions(-)
 create mode 100644 activesupport/lib/active_support/multibyte/utils.rb
 create mode 100644 activesupport/test/multibyte_utils_test.rb

Index: lib/active_support/multibyte.rb
===================================================================
--- lib/active_support/multibyte.rb.orig	1970-01-01 01:00:00.000000000 +0100
+++ lib/active_support/multibyte.rb	2009-09-16 15:10:56.135032606 +0200
@@ -3,7 +3,25 @@ module ActiveSupport
     DEFAULT_NORMALIZATION_FORM = :kc
     NORMALIZATIONS_FORMS = [:c, :kc, :d, :kd]
     UNICODE_VERSION = '5.0.0'
+
+    # Regular expressions that describe valid byte sequences for a character
+    VALID_CHARACTER = {
+      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
+      'UTF-8' => /\A(?:
+                 [\x00-\x7f]                                         |
+                 [\xc2-\xdf] [\x80-\xbf]                             |
+                 \xe0        [\xa0-\xbf] [\x80-\xbf]                 |
+                 [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]                 |
+                 \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                 [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                 \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
+      # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
+      'Shift_JIS' => /\A(?:
+                 [\x00-\x7e \xa1-\xdf]                                     |
+                 [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn
+    }
   end
 end
 
 require 'active_support/multibyte/chars'
+require 'active_support/multibyte/utils'
\ No newline at end of file
Index: lib/active_support/multibyte/handlers/utf8_handler.rb
===================================================================
--- lib/active_support/multibyte/handlers/utf8_handler.rb.orig	1970-01-01 01:00:00.000000000 +0100
+++ lib/active_support/multibyte/handlers/utf8_handler.rb	2009-09-16 15:10:56.155045731 +0200
@@ -100,16 +100,7 @@ module ActiveSupport::Multibyte::Handler
     # between little and big endian. This is not an issue in utf-8, so it must be ignored.
     UNICODE_LEADERS_AND_TRAILERS = UNICODE_WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
     
-    # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
-     UTF8_PAT = /\A(?:
-                   [\x00-\x7f]                                     |
-                   [\xc2-\xdf] [\x80-\xbf]                         |
-                   \xe0        [\xa0-\xbf] [\x80-\xbf]             |
-                   [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]             |
-                   \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
-                   [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
-                   \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
-                  )*\z/xn
+    UTF8_PAT = ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8']
     
     # Returns a regular expression pattern that matches the passed Unicode codepoints
     def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
@@ -357,7 +348,7 @@ module ActiveSupport::Multibyte::Handler
       # Replaces all the non-utf-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid utf-8 string
       def tidy_bytes(str)
         str.split(//u).map do |c|
-          if !UTF8_PAT.match(c)
+          if !ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'].match(c)
             n = c.unpack('C')[0]
             n < 128 ? n.chr :
             n < 160 ? [UCD.cp1252[n] || n].pack('U') :
Index: lib/active_support/multibyte/utils.rb
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ lib/active_support/multibyte/utils.rb	2009-09-16 15:10:56.175045422 +0200
@@ -0,0 +1,39 @@
+module ActiveSupport #:nodoc:
+ module Multibyte #:nodoc:
+   # Returns a regular expression that matches valid characters in the current encoding
+   def self.valid_character
+     case $KCODE
+     when 'UTF8'
+       VALID_CHARACTER['UTF-8']
+     when 'SJIS'
+       VALID_CHARACTER['Shift_JIS']
+     end
+   end
+
+   # Verifies the encoding of a string
+   def self.verify(string)
+     if expression = valid_character
+       for c in string.split(//)
+         return false unless valid_character.match(c)
+       end
+     end
+     true
+   end
+
+   # Verifies the encoding of the string and raises an exception when it's not valid
+   def self.verify!(string)
+     raise ActiveSupport::Multibyte::Handlers::EncodingError.new("Found characters with invalid encoding") unless verify(string)
+   end
+
+   # Removes all invalid characters from the string
+   def self.clean(string)
+     if expression = valid_character
+       stripped = []; for c in string.split(//)
+         stripped << c if valid_character.match(c)
+       end; stripped.join
+     else
+       string
+     end
+   end
+ end
+end
\ No newline at end of file
Index: test/multibyte_utils_test.rb
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ test/multibyte_utils_test.rb	2009-09-16 15:10:56.200060604 +0200
@@ -0,0 +1,106 @@
+require 'abstract_unit'
+
+class MultibyteUtilsTest < Test::Unit::TestCase
+
+  def test_valid_character_returns_an_expression_for_the_current_encoding
+    with_kcode('None') do
+      assert_nil ActiveSupport::Multibyte.valid_character
+    end
+    with_kcode('UTF8') do
+      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'], ActiveSupport::Multibyte.valid_character
+    end
+    with_kcode('SJIS') do
+      assert_equal ActiveSupport::Multibyte::VALID_CHARACTER['Shift_JIS'], ActiveSupport::Multibyte.valid_character
+    end
+  end
+
+  def test_verify_verifies_ASCII_strings_are_properly_encoded
+    with_kcode('None') do
+      examples.each do |example|
+        assert ActiveSupport::Multibyte.verify(example)
+      end
+    end
+  end
+
+  def test_verify_verifies_UTF_8_strings_are_properly_encoded
+    with_kcode('UTF8') do
+      assert ActiveSupport::Multibyte.verify(example('valid UTF-8'))
+      assert !ActiveSupport::Multibyte.verify(example('invalid UTF-8'))
+    end
+  end
+
+  def test_verify_verifies_Shift_JIS_strings_are_properly_encoded
+    with_kcode('SJIS') do
+      assert ActiveSupport::Multibyte.verify(example('valid Shift-JIS'))
+      assert !ActiveSupport::Multibyte.verify(example('invalid Shift-JIS'))
+    end
+  end
+
+  def test_verify_bang_raises_an_exception_when_it_finds_an_invalid_character
+    with_kcode('UTF8') do
+      assert_raises(ActiveSupport::Multibyte::Handlers::EncodingError) do
+        ActiveSupport::Multibyte.verify!(example('invalid UTF-8'))
+      end
+    end
+  end
+
+  def test_verify_bang_doesnt_raise_an_exception_when_the_encoding_is_valid
+    with_kcode('UTF8') do
+      assert_nothing_raised do
+        ActiveSupport::Multibyte.verify!(example('valid UTF-8'))
+      end
+    end
+  end
+
+  def test_clean_leaves_ASCII_strings_intact
+    with_kcode('None') do
+      [
+        'word', "\270\236\010\210\245"
+      ].each do |string|
+        assert_equal string, ActiveSupport::Multibyte.clean(string)
+      end
+    end
+  end
+
+  def test_clean_cleans_invalid_characters_from_UTF_8_encoded_strings
+    with_kcode('UTF8') do
+      cleaned_utf8 = [8].pack('C*')
+      assert_equal example('valid UTF-8'), ActiveSupport::Multibyte.clean(example('valid UTF-8'))
+      assert_equal cleaned_utf8, ActiveSupport::Multibyte.clean(example('invalid UTF-8'))
+    end
+  end
+
+  def test_clean_cleans_invalid_characters_from_Shift_JIS_encoded_strings
+    with_kcode('SJIS') do
+      cleaned_sjis = [184, 0, 136, 165].pack('C*')
+      assert_equal example('valid Shift-JIS'), ActiveSupport::Multibyte.clean(example('valid Shift-JIS'))
+      assert_equal cleaned_sjis, ActiveSupport::Multibyte.clean(example('invalid Shift-JIS'))
+    end
+  end
+
+  private
+
+  STRINGS = {
+    'valid ASCII'       => [65, 83, 67, 73, 73].pack('C*'),
+    'invalid ASCII'     => [128].pack('C*'),
+    'valid UTF-8'       => [227, 129, 147, 227, 129, 171, 227, 129, 161, 227, 130, 143].pack('C*'),
+    'invalid UTF-8'     => [184, 158, 8, 136, 165].pack('C*'),
+    'valid Shift-JIS'   => [131, 122, 129, 91, 131, 128].pack('C*'),
+    'invalid Shift-JIS' => [184, 158, 8, 0, 255, 136, 165].pack('C*')
+  }
+
+  def example(key)
+    STRINGS[key]
+  end
+
+  def examples
+    STRINGS.values
+  end
+
+  def with_kcode(code)
+    before = $KCODE
+    $KCODE = code
+    yield
+    $KCODE = before
+  end
+end
\ No newline at end of file
Places

File rubygem-activesupport-2.1.2_CVE-2009-3009_activesupport.patch of Package rubygem-activesupport-2_1

Places