File gconv-assertion-iso-2022-jp.patch of Package glibc.21693

From 7d88c6142c6efc160c0ee5e4f85cde382c072888 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Wed, 27 Jan 2021 13:36:12 +0100
Subject: [PATCH] gconv: Fix assertion failure in ISO-2022-JP-3 module (bug
 27256)

The conversion loop to the internal encoding does not follow
the interface contract that __GCONV_FULL_OUTPUT is only returned
after the internal wchar_t buffer has been filled completely.  This
is enforced by the first of the two asserts in iconv/skeleton.c:

	      /* We must run out of output buffer space in this
		 rerun.  */
	      assert (outbuf == outerr);
	      assert (nstatus == __GCONV_FULL_OUTPUT);

This commit solves this issue by queuing a second wide character
which cannot be written immediately in the state variable, like
other converters already do (e.g., BIG5-HKSCS or TSCII).

Reported-by: Tavis Ormandy <taviso@gmail.com>
---
 iconvdata/Makefile        |   4 +-
 iconvdata/bug-iconv14.c   | 127 ++++++++++++++++++++++++++++++++++++++
 iconvdata/iso-2022-jp-3.c |  67 ++++++++++++++------
 3 files changed, 178 insertions(+), 20 deletions(-)
 create mode 100644 iconvdata/bug-iconv14.c

Index: glibc-2.26/iconvdata/Makefile
===================================================================
--- glibc-2.26.orig/iconvdata/Makefile
+++ glibc-2.26/iconvdata/Makefile
@@ -316,6 +316,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv
 			  $(addprefix $(objpfx),$(modules.so))
 $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
 			  $(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
+			  $(addprefix $(objpfx),$(modules.so))
 
 $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
 			 $(addprefix $(objpfx),$(modules.so)) \
Index: glibc-2.26/iconvdata/iso-2022-jp-3.c
===================================================================
--- glibc-2.26.orig/iconvdata/iso-2022-jp-3.c
+++ glibc-2.26/iconvdata/iso-2022-jp-3.c
@@ -67,23 +67,34 @@ enum
   CURRENT_SEL_MASK = 7 << 3
 };
 
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
-   also contains the last two bytes to be output, shifted by 6 bits, and a
-   one-bit indicator whether they must be preceded by the shift sequence,
-   in bit 22.  */
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
+   state also contains the last two bytes to be output, shifted by 6
+   bits, and a one-bit indicator whether they must be preceded by the
+   shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
+   conversion, COUNT may also contain a non-zero pending wide
+   character, shifted by six bits.  This happens for certain inputs in
+   JISX0213_1_2004_set and JISX0213_2_set if the second wide character
+   in a combining sequence cannot be written because the buffer is
+   full.  */
 
 /* Since this is a stateful encoding we have to provide code which resets
    the output state to the initial state.  This has to be done during the
    flushing.  */
 #define EMIT_SHIFT_TO_INIT \
-  if ((data->__statep->__count & ~7) != ASCII_set)			      \
+  if (data->__statep->__count != ASCII_set)			      \
     {									      \
       if (FROM_DIRECTION)						      \
 	{								      \
-	  /* It's easy, we don't have to emit anything, we just reset the     \
-	     state for the input.  */					      \
-	  data->__statep->__count &= 7;					      \
-	  data->__statep->__count |= ASCII_set;				      \
+	  if (__glibc_likely (outbuf + 4 <= outend))			      \
+	    {								      \
+	      /* Write out the last character.  */			      \
+	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
+	      outbuf += sizeof (uint32_t);				      \
+	      data->__statep->__count = ASCII_set;			\
+	    }								      \
+	  else								      \
+	    /* We don't have enough room in the output buffer.  */	      \
+	    status = __GCONV_FULL_OUTPUT;				      \
 	}								      \
       else								      \
 	{								      \
@@ -151,7 +162,21 @@ enum
 #define LOOPFCT			FROM_LOOP
 #define BODY \
   {									      \
-    uint32_t ch = *inptr;						      \
+    uint32_t ch;							      \
+									      \
+    /* Output any pending character.  */				      \
+    ch = set >> 6;							      \
+    if (__glibc_unlikely (ch != 0))					      \
+      {									      \
+	put32 (outptr, ch);						      \
+	outptr += 4;							      \
+	/* Remove the pending character, but preserve state bits.  */	      \
+	set &= (1 << 6) - 1;						      \
+	continue;							      \
+      }									      \
+									      \
+    /* Otherwise read the next input byte.  */				      \
+    ch = *inptr;							      \
 									      \
     /* Recognize escape sequences.  */					      \
     if (__glibc_unlikely (ch == ESC))					      \
@@ -297,21 +322,25 @@ enum
 	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
 	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
 									      \
+	    inptr += 2;							      \
+									      \
+	    put32 (outptr, u1);						      \
+	    outptr += 4;						      \
+									      \
 	    /* See whether we have room for two characters.  */		      \
-	    if (outptr + 8 <= outend)					      \
+	    if (outptr + 4 <= outend)					      \
 	      {								      \
-		inptr += 2;						      \
-		put32 (outptr, u1);					      \
-		outptr += 4;						      \
 		put32 (outptr, u2);					      \
 		outptr += 4;						      \
 		continue;						      \
 	      }								      \
-	    else							      \
-	      {								      \
-		result = __GCONV_FULL_OUTPUT;				      \
-		break;							      \
-	      }								      \
+									      \
+	    /* Otherwise store only the first character now, and	      \
+	       put the second one into the queue.  */			      \
+	    set |= u2 << 6;						      \
+	    /* Tell the caller why we terminate the loop.  */		      \
+	    result = __GCONV_FULL_OUTPUT;				      \
+	    break;							      \
 	  }								      \
 									      \
 	inptr += 2;							      \
openSUSE Build Service is sponsored by