File sort-keycompare-mb.patch of Package coreutils

Subject: sort i18n: make sure to NUL-terminate the sort keys

Fixes http://bugs.gnu.org/18540

* src/sort.c (keycompare_mb): use the keys alone, i.e. null-terminated.
Patch analogously to non-multibyte upstream patch:
  http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=b877ea4b3e

* tests/misc/sort.pl (23): Add test, taken from upstream patch:
  http://git.sv.gnu.org/cgit/coreutils.git/commit/?id=4d8c4dfc21
(11a): Exempt from MB run, as the collation order in the French
locale would provoke a false positive, because the strcoll() comparison
between the trailing blank and tabs leads to a different result there.
(11b): Likewise.

---
 src/sort.c         |   22 ++++++++++++++++++++--
 tests/misc/sort.pl |    5 +++++
 2 files changed, 25 insertions(+), 2 deletions(-)

Index: src/sort.c
===================================================================
--- src/sort.c.orig
+++ src/sort.c
@@ -3236,6 +3236,9 @@ keycompare_mb (const struct line *a, con
       size_t lena = lima <= texta ? 0 : lima - texta;
       size_t lenb = limb <= textb ? 0 : limb - textb;
 
+      char enda IF_LINT (= 0);
+      char endb IF_LINT (= 0);
+
       char const *translate = key->translate;
       bool const *ignore = key->ignore;
 
@@ -3253,6 +3256,12 @@ keycompare_mb (const struct line *a, con
           texta = copy_a; textb = copy_b;
           lena = new_len_a; lenb = new_len_b;
         }
+      else
+        {
+          /* Use the keys in-place, temporarily null-terminated.  */
+          enda = texta[lena]; texta[lena] = '\0';
+          endb = textb[lenb]; textb[lenb] = '\0';
+        }
 
       if (key->random)
         diff = compare_random (texta, lena, textb, lenb);
@@ -3276,13 +3285,22 @@ keycompare_mb (const struct line *a, con
         diff = 1;
       else if (hard_LC_COLLATE && !folding)
         {
-          diff = xmemcoll0 (texta, lena, textb, lenb);
+          diff = xmemcoll0 (texta, lena + 1, textb, lenb + 1);
         }
       else
-        diff = memcmp (texta, textb, MIN (lena + 1,lenb + 1));
+        {
+          diff = memcmp (texta, textb, MIN (lena, lenb));
+          if (diff == 0)
+            diff = lena < lenb ? -1 : lena != lenb;
+        }
 
       if (ignore || translate)
         free (texta);
+      else
+        {
+          texta[lena] = enda;
+          textb[lenb] = endb;
+        }
 
       if (diff)
         goto not_equal;
Index: tests/misc/sort.pl
===================================================================
--- tests/misc/sort.pl.orig
+++ tests/misc/sort.pl
@@ -322,6 +322,10 @@ my @Tests =
 ["22a", '-k 2,2fd -k 1,1r', {IN=>"3 b\n4 B\n"}, {OUT=>"4 B\n3 b\n"}],
 ["22b", '-k 2,2d  -k 1,1r', {IN=>"3 b\n4 b\n"}, {OUT=>"4 b\n3 b\n"}],
 
+# This fails in Fedora 20, per Göran Uddeborg in: http://bugs.gnu.org/18540
+["23", '-s -k1,1 -t/', {IN=>"a b/x\na-b-c/x\n"}, {OUT=>"a b/x\na-b-c/x\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
+
 ["no-file1", 'no-file', {EXIT=>2}, {ERR=>$no_file}],
 # This test failed until 1.22f.  Sort didn't give an error.
 # From Will Edgington.
@@ -446,6 +450,7 @@ if ($mb_locale ne 'C')
         #disable several failing tests until investigation, disable all tests with envvars set
         next if (grep {ref $_ eq 'HASH' && exists $_->{ENV}} (@new_t));
         next if ($test_name =~ "18g" or $test_name =~ "sort-numeric" or $test_name =~ "08[ab]" or $test_name =~ "03[def]" or $test_name =~ "h4" or $test_name =~ "n1" or $test_name =~ "2[01]a");
+        next if ($test_name =~ "11[ab]"); # avoid FP: expected result differs to MB result due to collation rules.
         push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
        }
     push @Tests, @new;
openSUSE Build Service is sponsored by