File icu-CVE-2016-6293-2017-7867-2017-7868-2017-14952-2017-15422-2017-17484.patch of Package icu
diff -Nura icu/source/common/cmemory.h icu_new/source/common/cmemory.h
--- icu/source/common/cmemory.h 2013-10-05 04:49:16.000000000 +0800
+++ icu_new/source/common/cmemory.h 2018-05-04 18:08:23.333412933 +0800
@@ -59,6 +59,14 @@
#endif /* U_DEBUG */
+/**
+ * \def UPRV_LENGTHOF
+ * Convenience macro to determine the length of a fixed array at compile-time.
+ * @param array A fixed length array
+ * @return The length of the array, in elements
+ * @internal
+ */
+#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
diff -Nura icu/source/common/ucnv_err.c icu_new/source/common/ucnv_err.c
--- icu/source/common/ucnv_err.c 2013-10-05 04:49:16.000000000 +0800
+++ icu_new/source/common/ucnv_err.c 2018-05-04 18:08:23.333412933 +0800
@@ -50,6 +50,76 @@
#define UCNV_PRV_ESCAPE_CSS2 'S'
#define UCNV_PRV_STOP_ON_ILLEGAL 'i'
+/*
+ * IS_DEFAULT_IGNORABLE_CODE_POINT
+ * This is to check if a code point has the default ignorable unicode property.
+ * As such, this list needs to be updated if the ignorable code point list ever
+ * changes.
+ * To avoid dependency on other code, this list is hard coded here.
+ * When an ignorable code point is found and is unmappable, the default callbacks
+ * will ignore them.
+ * (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols)
+ * (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner)
+ * (c == 0x061C) || \ (Arabic Format Character)
+ * (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants)
+ * (c == 0x1160) || \ (Hangul Jamo Medial Vowels)
+ * (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels)
+ * (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls)
+ * (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters)
+ * (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters)
+ * (c == 0x2060) || \ (General Punctuation Format Characters)
+ * (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters)
+ * (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators)
+ * (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated)
+ * (c == 0x3164) || \ (Hangul Compatibility Jamo)
+ * (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors)
+ * (c == 0x0FEFF) || \ (Arabic Presentation Forms B)
+ * (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms)
+ * (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls)
+ * (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols)
+ * (c == 0x0E0001) || \ (Tag Identifiers)
+ * (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components)
+ * (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement)
+ * (c == 0x2065) || \ (Unassigned)
+ * (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned)
+ * (c == 0x0E0000) || \ (Unassigned)
+ * (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned)
+ * (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned)
+ * (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned)
+ */
+
+#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
+ (c == 0x00AD) || \
+ (c == 0x034F) || \
+ (c == 0x061C) || \
+ (c == 0x115F) || \
+ (c == 0x1160) || \
+ (0x17B4 <= c && c <= 0x17B5) || \
+ (0x180B <= c && c <= 0x180E) || \
+ (0x200B <= c && c <= 0x200F) || \
+ (0x202A <= c && c <= 0x202E) || \
+ (c == 0x2060) || \
+ (0x2066 <= c && c <= 0x2069) || \
+ (0x2061 <= c && c <= 0x2064) || \
+ (0x206A <= c && c <= 0x206F) || \
+ (c == 0x3164) || \
+ (0x0FE00 <= c && c <= 0x0FE0F) || \
+ (c == 0x0FEFF) || \
+ (c == 0x0FFA0) || \
+ (0x01BCA0 <= c && c <= 0x01BCA3) || \
+ (0x01D173 <= c && c <= 0x01D17A) || \
+ (c == 0x0E0001) || \
+ (0x0E0020 <= c && c <= 0x0E007F) || \
+ (0x0E0100 <= c && c <= 0x0E01EF) || \
+ (c == 0x2065) || \
+ (0x0FFF0 <= c && c <= 0x0FFF8) || \
+ (c == 0x0E0000) || \
+ (0x0E0002 <= c && c <= 0x0E001F) || \
+ (0x0E0080 <= c && c <= 0x0E00FF) || \
+ (0x0E01F0 <= c && c <= 0x0E0FFF) \
+ )
+
+
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP (
@@ -61,6 +131,13 @@
UConverterCallbackReason reason,
UErrorCode * err)
{
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
/* the caller must have set the error code accordingly */
return;
}
@@ -92,7 +169,14 @@
{
if (reason <= UCNV_IRREGULAR)
{
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
{
*err = U_ZERO_ERROR;
}
@@ -113,7 +197,14 @@
{
if (reason <= UCNV_IRREGULAR)
{
- if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
+ if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ }
+ else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
{
*err = U_ZERO_ERROR;
ucnv_cbFromUWriteSub(fromArgs, 0, err);
@@ -155,6 +246,14 @@
{
return;
}
+ else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
+ {
+ /*
+ * Skip if the codepoint has unicode property of default ignorable.
+ */
+ *err = U_ZERO_ERROR;
+ return;
+ }
ucnv_setFromUCallBack (fromArgs->converter,
(UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
diff -Nura icu/source/common/ucnv_u8.c icu_new/source/common/ucnv_u8.c
--- icu/source/common/ucnv_u8.c 2013-10-05 04:49:18.000000000 +0800
+++ icu_new/source/common/ucnv_u8.c 2018-05-04 18:08:23.333412933 +0800
@@ -26,9 +26,11 @@
#include "unicode/utf.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
+#include "uassert.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "cmemory.h"
+#include "ustr_imp.h"
/* Prototypes --------------------------------------------------------------- */
@@ -748,7 +750,7 @@
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
-static void
+static void U_CALLCONV
ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
UConverterToUnicodeArgs *pToUArgs,
UErrorCode *pErrorCode) {
@@ -793,39 +795,37 @@
*pErrorCode=U_USING_DEFAULT_WARNING;
return;
} else {
- /*
- * Use a single counter for source and target, counting the minimum of
- * the source length and the target capacity.
- * As a result, the source length is checked only once per multi-byte
- * character instead of twice.
- *
- * Make sure that the last byte sequence is complete, or else
- * stop just before it.
- * (The longest legal byte sequence has 3 trail bytes.)
- * Count oldToULength (number of source bytes from a previous buffer)
- * into the source length but reduce the source index by toULimit
- * while going back over trail bytes in order to not go back into
- * the bytes that will be read for finishing a partial
- * sequence from the previous buffer.
- * Let the standard converter handle edge cases.
- */
- int32_t i;
-
+ // Use a single counter for source and target, counting the minimum of
+ // the source length and the target capacity.
+ // Let the standard converter handle edge cases.
+ const uint8_t *limit=sourceLimit;
if(count>targetCapacity) {
+ limit-=(count-targetCapacity);
count=targetCapacity;
}
- i=0;
- while(i<3 && i<(count-toULimit)) {
- b=source[count-oldToULength-i-1];
- if(U8_IS_TRAIL(b)) {
- ++i;
- } else {
- if(i<U8_COUNT_TRAIL_BYTES(b)) {
- /* stop converting before the lead byte if there are not enough trail bytes for it */
- count-=i+1;
+ // The conversion loop checks count>0 only once per 1/2/3-byte character.
+ // If the buffer ends with a truncated 2- or 3-byte sequence,
+ // then we reduce the count to stop before that,
+ // and collect the remaining bytes after the conversion loop.
+ {
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=count-toULimit;
+ if(length>0) {
+ uint8_t b1=*(limit-1);
+ if(U8_IS_SINGLE(b1)) {
+ // common ASCII character
+ } else if(U8_IS_TRAIL(b1) && length>=2) {
+ uint8_t b2=*(limit-2);
+ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ // truncated 3-byte sequence
+ count-=2;
+ }
+ } else if(0xc2<=b1 && b1<0xf0) {
+ // truncated 2- or 3-byte sequence
+ --count;
}
- break;
}
}
}
@@ -940,7 +940,7 @@
}
/* copy the legal byte sequence to the target */
- {
+ if(count>=toULength) {
int8_t i;
for(i=0; i<oldToULength; ++i) {
@@ -951,9 +951,18 @@
*target++=*source++;
}
count-=toULength;
+ } else {
+ // A supplementary character that does not fit into the target.
+ // Let the standard converter handle this.
+ source-=(toULength-oldToULength);
+ pToUArgs->source=(char *)source;
+ pFromUArgs->target=(char *)target;
+ *pErrorCode=U_USING_DEFAULT_WARNING;
+ return;
}
}
}
+ U_ASSERT(count>=0);
if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
if(target==(const uint8_t *)pFromUArgs->targetLimit) {
diff -Nura icu/source/common/uloc.cpp icu_new/source/common/uloc.cpp
--- icu/source/common/uloc.cpp 2013-10-05 04:49:26.000000000 +0800
+++ icu_new/source/common/uloc.cpp 2018-05-04 18:08:23.334412945 +0800
@@ -46,6 +46,8 @@
#include <stdio.h> /* for sprintf */
+using namespace icu;
+
/* ### Declarations **************************************************/
/* Locale stuff from locid.cpp */
@@ -2239,7 +2241,7 @@
typedef struct {
float q;
int32_t dummy; /* to avoid uninitialized memory copy from qsort */
- char *locale;
+ char locale[ULOC_FULLNAME_CAPACITY+1];
} _acceptLangItem;
static int32_t U_CALLCONV
@@ -2281,9 +2283,7 @@
UEnumeration* availableLocales,
UErrorCode *status)
{
- _acceptLangItem *j;
- _acceptLangItem smallBuffer[30];
- char **strs;
+ MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
char tmp[ULOC_FULLNAME_CAPACITY +1];
int32_t n = 0;
const char *itemEnd;
@@ -2293,11 +2293,7 @@
int32_t res;
int32_t i;
int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
- int32_t jSize;
- char *tempstr; /* Use for null pointer check */
-
- j = smallBuffer;
- jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
+
if(U_FAILURE(*status)) {
return -1;
}
@@ -2325,27 +2321,29 @@
while(isspace(*t)) {
t++;
}
- j[n].q = (float)_uloc_strtod(t,NULL);
+ items[n].q = (float)_uloc_strtod(t,NULL);
} else {
/* no semicolon - it's 1.0 */
- j[n].q = 1.0f;
+ items[n].q = 1.0f;
paramEnd = itemEnd;
}
- j[n].dummy=0;
+ items[n].dummy=0;
/* eat spaces prior to semi */
for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
;
- /* Check for null pointer from uprv_strndup */
- tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
- if (tempstr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return -1;
- }
- j[n].locale = tempstr;
- uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
- if(strcmp(j[n].locale,tmp)) {
- uprv_free(j[n].locale);
- j[n].locale=uprv_strdup(tmp);
+ int32_t slen = ((t+1)-s);
+ if(slen > ULOC_FULLNAME_CAPACITY) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return -1; // too big
+ }
+ uprv_strncpy(items[n].locale, s, slen);
+ items[n].locale[slen]=0; // terminate
+ int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
+ if(U_FAILURE(*status)) return -1;
+ if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
+ // canonicalization had an effect- copy back
+ uprv_strncpy(items[n].locale, tmp, clen);
+ items[n].locale[clen] = 0; // terminate
}
#if defined(ULOC_DEBUG)
/*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
@@ -2355,42 +2353,22 @@
while(*s==',') { /* eat duplicate commas */
s++;
}
- if(n>=jSize) {
- if(j==smallBuffer) { /* overflowed the small buffer. */
- j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
- if(j!=NULL) {
- uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
- }
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"malloced at size %d\n", jSize);
-#endif
- } else {
- j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"re-alloced at size %d\n", jSize);
-#endif
- }
- jSize *= 2;
- if(j==NULL) {
+ if(n>=items.getCapacity()) { // If we need more items
+ if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
*status = U_MEMORY_ALLOCATION_ERROR;
return -1;
}
- }
- }
- uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
- if(U_FAILURE(*status)) {
- if(j != smallBuffer) {
#if defined(ULOC_DEBUG)
- fprintf(stderr,"freeing j %p\n", j);
+ fprintf(stderr,"malloced at size %d\n", items.getCapacity());
#endif
- uprv_free(j);
}
+ }
+ uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
+ if(U_FAILURE(*status)) {
return -1;
}
- strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
- /* Check for null pointer */
- if (strs == NULL) {
- uprv_free(j); /* Free to avoid memory leak */
+ LocalMemory<const char*> strs(NULL);
+ if (strs.allocateInsteadAndReset(n) == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return -1;
}
@@ -2398,20 +2376,10 @@
#if defined(ULOC_DEBUG)
/*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
#endif
- strs[i]=j[i].locale;
+ strs[i]=items[i].locale;
}
res = uloc_acceptLanguage(result, resultAvailable, outResult,
- (const char**)strs, n, availableLocales, status);
- for(i=0;i<n;i++) {
- uprv_free(strs[i]);
- }
- uprv_free(strs);
- if(j != smallBuffer) {
-#if defined(ULOC_DEBUG)
- fprintf(stderr,"freeing j %p\n", j);
-#endif
- uprv_free(j);
- }
+ strs.getAlias(), n, availableLocales, status);
return res;
}
diff -Nura icu/source/common/unicode/utf8.h icu_new/source/common/unicode/utf8.h
--- icu/source/common/unicode/utf8.h 2013-10-05 04:49:08.000000000 +0800
+++ icu_new/source/common/unicode/utf8.h 2018-05-04 18:08:23.334412945 +0800
@@ -106,6 +106,40 @@
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
diff -Nura icu/source/common/utext.cpp icu_new/source/common/utext.cpp
--- icu/source/common/utext.cpp 2013-10-05 04:49:22.000000000 +0800
+++ icu_new/source/common/utext.cpp 2018-05-04 18:08:23.335412957 +0800
@@ -831,9 +831,15 @@
//------------------------------------------------------------------------------
// Chunk size.
-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
-// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
-// to two UChars.)
+// Must be less than 42 (256/6), because of byte mapping from UChar indexes to native indexes.
+// Worst case there are six UTF-8 bytes per UChar.
+// obsolete 6 byte form fd + 5 trails maps to fffd
+// obsolete 5 byte form fc + 4 trails maps to fffd
+// non-shortest 4 byte forms maps to fffd
+// normal supplementaries map to a pair of utf-16, two utf8 bytes per utf-16 unit
+// mapToUChars array size must allow for the worst case, 6.
+// This could be brought down to 4, by treating fd and fc as pure illegal,
+// rather than obsolete lead bytes. But that is not compatible with the utf-8 access macros.
//
enum { UTF8_TEXT_CHUNK_SIZE=32 };
@@ -873,7 +879,7 @@
// Requires two extra slots,
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
- uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
+ uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*6+6]; // Map native offset from bufNativeStart to
// correspoding offset in filled part of buf.
int32_t align;
};
@@ -1016,6 +1022,7 @@
// Requested index is in this buffer.
u8b = (UTF8Buf *)ut->p; // the current buffer
mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
return TRUE;
@@ -1282,6 +1289,10 @@
// Can only do this if the incoming index is somewhere in the interior of the string.
// If index is at the end, there is no character there to look at.
if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
U8_SET_CP_START(s8, 0, ix);
}
@@ -1295,7 +1306,10 @@
UChar *buf = u8b->buf;
uint8_t *mapToNative = u8b->mapToNative;
uint8_t *mapToUChars = u8b->mapToUChars;
- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+ int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
// at end of buffer to leave room
// for a surrogate pair at the
@@ -1322,6 +1336,7 @@
if (c<0x80) {
// Special case ASCII range for speed.
buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
} else {
@@ -1351,6 +1366,7 @@
do {
mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
} while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
// Set native indexing limit to be the current position.
// We are processing a non-ascii, non-native-indexing char now;
@@ -1525,6 +1541,7 @@
U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
U_ASSERT(index<=ut->chunkNativeLimit);
int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
U_ASSERT(offset>=0 && offset<=ut->chunkLength);
return offset;
diff -Nura icu/source/i18n/gregoimp.cpp icu_new/source/i18n/gregoimp.cpp
--- icu/source/i18n/gregoimp.cpp 2013-10-05 04:48:52.000000000 +0800
+++ icu_new/source/i18n/gregoimp.cpp 2018-05-04 18:08:23.335412957 +0800
@@ -29,6 +29,11 @@
numerator / denominator : ((numerator + 1) / denominator) - 1;
}
+int64_t ClockMath::floorDivide(int64_t numerator, int64_t denominator) {
+ return (numerator >= 0) ?
+ numerator / denominator : ((numerator + 1) / denominator) - 1;
+}
+
int32_t ClockMath::floorDivide(double numerator, int32_t denominator,
int32_t& remainder) {
double quotient;
diff -Nura icu/source/i18n/gregoimp.h icu_new/source/i18n/gregoimp.h
--- icu/source/i18n/gregoimp.h 2013-10-05 04:48:56.000000000 +0800
+++ icu_new/source/i18n/gregoimp.h 2018-05-04 18:08:23.335412957 +0800
@@ -39,6 +39,17 @@
static int32_t floorDivide(int32_t numerator, int32_t denominator);
/**
+ * Divide two integers, returning the floor of the quotient.
+ * Unlike the built-in division, this is mathematically
+ * well-behaved. E.g., <code>-1/4</code> => 0 but
+ * <code>floorDivide(-1,4)</code> => -1.
+ * @param numerator the numerator
+ * @param denominator a divisor which must be != 0
+ * @return the floor of the quotient
+ */
+ static int64_t floorDivide(int64_t numerator, int64_t denominator);
+
+ /**
* Divide two numbers, returning the floor of the quotient.
* Unlike the built-in division, this is mathematically
* well-behaved. E.g., <code>-1/4</code> => 0 but
diff -Nura icu/source/i18n/persncal.cpp icu_new/source/i18n/persncal.cpp
--- icu/source/i18n/persncal.cpp 2013-10-05 04:48:52.000000000 +0800
+++ icu_new/source/i18n/persncal.cpp 2018-05-04 18:08:23.335412957 +0800
@@ -211,7 +211,7 @@
int32_t year, month, dayOfMonth, dayOfYear;
int32_t daysSinceEpoch = julianDay - PERSIAN_EPOCH;
- year = 1 + ClockMath::floorDivide(33 * daysSinceEpoch + 3, 12053);
+ year = 1 + (int32_t)ClockMath::floorDivide(33 * (int64_t)daysSinceEpoch + 3, (int64_t)12053);
int32_t farvardin1 = 365 * (year - 1) + ClockMath::floorDivide(8 * year + 21, 33);
dayOfYear = (daysSinceEpoch - farvardin1); // 0-based
diff -Nura icu/source/i18n/zonemeta.cpp icu_new/source/i18n/zonemeta.cpp
--- icu/source/i18n/zonemeta.cpp 2013-10-05 04:48:44.000000000 +0800
+++ icu_new/source/i18n/zonemeta.cpp 2018-05-04 18:08:23.335412957 +0800
@@ -685,7 +685,6 @@
mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
if (U_FAILURE(status)) {
delete mzMappings;
- deleteOlsonToMetaMappingEntry(entry);
uprv_free(entry);
break;
}
diff -Nura icu/source/test/cintltst/cloctst.c icu_new/source/test/cintltst/cloctst.c
--- icu/source/test/cintltst/cloctst.c 2013-10-05 04:47:36.000000000 +0800
+++ icu_new/source/test/cintltst/cloctst.c 2018-05-04 18:08:23.336412969 +0800
@@ -2687,16 +2687,20 @@
const char *icuSet; /**< ? */
const char *expect; /**< The expected locale result */
UAcceptResult res; /**< The expected error code */
+ UErrorCode expectStatus; /**< expected status */
} tests[] = {
- /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID },
- /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID },
- /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK },
- /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED },
- /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID },
-
- /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID }, /* XF */
- /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK }, /* XF */
- /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK }, /* XF */
+ /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*2*/{ 2, NULL, "en", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},
+ /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR},
+ /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
+ /*5*/{ 5, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR}, /* XF */
+ /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */
+ /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR}, /* XF */
+ /*8*/{ 8, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */
+ /*9*/{ 9, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR }, /* */
+ /*10*/{10, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */
+ /*11*/{11, NULL, "", ULOC_ACCEPT_FAILED, U_BUFFER_OVERFLOW_ERROR }, /* */
};
const int32_t numTests = sizeof(tests)/sizeof(tests[0]);
static const char *http[] = {
@@ -2711,11 +2715,26 @@
"xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, "
"xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, "
"xxx-yyy;q=.01, xxx-yyy;q=.01, xxx-yyy;q=.01, xx-yy;q=.1, "
- "es",
-
+ "es",
/*5*/ "zh-xx;q=0.9, en;q=0.6",
/*6*/ "ja-JA",
/*7*/ "zh-xx;q=0.9",
+ /*08*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 156
+ /*09*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB", // 157 (this hits U_STRING_NOT_TERMINATED_WARNING )
+ /*10*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABC", // 158
+ /*11*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 163 bytes
};
for(i=0;i<numTests;i++) {
@@ -2730,6 +2749,10 @@
(void)rc; /* Suppress set but not used warning. */
uenum_close(available);
log_verbose(" got %s, %s [%s]\n", tmp[0]?tmp:"(EMPTY)", acceptResult(outResult), u_errorName(status));
+ if(status != tests[i].expectStatus) {
+ log_err_status(status, "FAIL: expected status %s but got %s\n", u_errorName(tests[i].expectStatus), u_errorName(status));
+ } else if(U_SUCCESS(tests[i].expectStatus)) {
+ /* don't check content if expected failure */
if(outResult != tests[i].res) {
log_err_status(status, "FAIL: #%d: expected outResult of %s but got %s\n", i,
acceptResult( tests[i].res),
@@ -2741,6 +2764,7 @@
log_err_status(status, "FAIL: #%d: expected %s but got %s\n", i, tests[i].expect, tmp);
log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
+ }
}
}
}
diff -Nura icu/source/test/intltest/calregts.cpp icu_new/source/test/intltest/calregts.cpp
--- icu/source/test/intltest/calregts.cpp 2013-10-05 04:47:58.000000000 +0800
+++ icu_new/source/test/intltest/calregts.cpp 2018-05-04 18:08:23.337412982 +0800
@@ -10,6 +10,7 @@
#include "calregts.h"
+#include "unicode/calendar.h"
#include "unicode/gregocal.h"
#include "unicode/simpletz.h"
#include "unicode/smpdtfmt.h"
@@ -88,6 +89,7 @@
CASE(48,TestT8596);
CASE(49,Test9019);
CASE(50,TestT9452);
+ CASE(52,TestPersianCalOverflow);
default: name = ""; break;
}
}
@@ -2944,4 +2946,34 @@
}
}
+/**
+ * @bug ticket 13454
+ */
+void CalendarRegressionTest::TestPersianCalOverflow(void) {
+ const char* localeID = "bs_Cyrl@calendar=persian";
+ UErrorCode status = U_ZERO_ERROR;
+ Calendar* cal = Calendar::createInstance(Locale(localeID), status);
+ if(U_FAILURE(status)) {
+ dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status));
+ } else {
+ int32_t maxMonth = cal->getMaximum(UCAL_MONTH);
+ int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE);
+ int32_t jd, month, dayOfMonth;
+ for (jd = 67023580; jd <= 67023584; jd++) { // year 178171, int32_t overflow if jd >= 67023582
+ status = U_ZERO_ERROR;
+ cal->clear();
+ cal->set(UCAL_JULIAN_DAY, jd);
+ month = cal->get(UCAL_MONTH, status);
+ dayOfMonth = cal->get(UCAL_DATE, status);
+ if ( U_FAILURE(status) ) {
+ errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status));
+ } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+ errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n",
+ localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth);
+ }
+ }
+ delete cal;
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
diff -Nura icu/source/test/intltest/calregts.h icu_new/source/test/intltest/calregts.h
--- icu/source/test/intltest/calregts.h 2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/calregts.h 2018-05-04 18:08:23.337412982 +0800
@@ -75,6 +75,7 @@
void TestT8596(void);
void Test9019(void);
void TestT9452(void);
+ void TestPersianCalOverflow(void);
void printdate(GregorianCalendar *cal, const char *string);
void dowTest(UBool lenient) ;
diff -Nura icu/source/test/intltest/convtest.cpp icu_new/source/test/intltest/convtest.cpp
--- icu/source/test/intltest/convtest.cpp 2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/convtest.cpp 2018-05-07 15:54:38.740421990 +0800
@@ -6,7 +6,7 @@
*
*******************************************************************************
* file name: convtest.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
@@ -35,13 +35,13 @@
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/ures.h"
+#include "unicode/utf16.h"
#include "convtest.h"
+#include "cmemory.h"
#include "unicode/tstdtmod.h"
#include <string.h>
#include <stdlib.h>
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
enum {
// characters used in test data for callbacks
SUB_CB='?',
@@ -66,19 +66,16 @@
void
ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if (exec) logln("TestSuite ConversionTest: ");
- switch (index) {
+ TESTCASE_AUTO_BEGIN;
#if !UCONFIG_NO_FILE_IO
- case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
- case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
- case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
-#else
- case 0:
- case 1:
- case 2: name="skip"; break;
+ TESTCASE_AUTO(TestToUnicode);
+ TESTCASE_AUTO(TestFromUnicode);
+ TESTCASE_AUTO(TestGetUnicodeSet);
#endif
- case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
- default: name=""; break; //needed to end loop
- }
+ TESTCASE_AUTO(TestGetUnicodeSet2);
+ TESTCASE_AUTO(TestDefaultIgnorableCallback);
+ TESTCASE_AUTO(TestUTF8ToUTF8Overflow);
+ TESTCASE_AUTO_END;
}
// test data interface ----------------------------------------------------- ***
@@ -289,7 +286,7 @@
// read a substitution string, separated by an equal sign
p=s.getBuffer()+index+1;
length=s.length()-(index+1);
- if(length<0 || length>=LENGTHOF(cc.subString)) {
+ if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
} else {
u_memcpy(cc.subString, p, length);
@@ -443,7 +440,7 @@
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
charset, i);
@@ -455,7 +452,7 @@
if(!diffSet.isEmpty()) {
diffSet.toPattern(s, TRUE);
if(s.length()>100) {
- s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
charset, i);
@@ -553,7 +550,7 @@
LocalUConverterPointer cnv;
char buffer[1024];
int32_t i;
- for(i=0; i<LENGTHOF(cnvNames); ++i) {
+ for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
UErrorCode errorCode=U_ZERO_ERROR;
cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
if(U_FAILURE(errorCode)) {
@@ -623,7 +620,7 @@
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
cnvNames[i], which);
@@ -635,7 +632,7 @@
if(!diffSet.isEmpty()) {
diffSet.toPattern(out, TRUE);
if(out.length()>100) {
- out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
+ out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
}
errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
cnvNames[i], which);
@@ -648,6 +645,151 @@
delete [] s0;
}
+// Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
+// If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
+void
+ConversionTest::TestDefaultIgnorableCallback() {
+ UErrorCode status = U_ZERO_ERROR;
+ const char *cnv_name = "euc-jp-2007";
+ const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
+ const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
+
+ UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
+ return;
+ }
+
+ UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
+ return;
+ }
+
+ UConverter *cnv = cnv_open(cnv_name, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
+ return;
+ }
+
+ // set callback for the converter
+ ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
+
+ UChar32 input[1];
+ char output[10];
+ int32_t outputLength;
+
+ // test default ignorables are ignored
+ int size = set_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_ignorable->charAt(i);
+
+ outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status) || outputLength != 0) {
+ errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
+ }
+ }
+
+ // test non-ignorables are not ignored
+ size = set_not_ignorable->size();
+ for (int i = 0; i < size; i++) {
+ status = U_ZERO_ERROR;
+ outputLength= 0;
+
+ input[0] = set_not_ignorable->charAt(i);
+
+ if (input[0] == 0) {
+ continue;
+ }
+
+ ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
+ if (U_FAILURE(status)) {
+ errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
+ }
+ }
+
+ ucnv_close(cnv);
+ delete set_not_ignorable;
+ delete set_ignorable;
+}
+
+void
+ConversionTest::TestUTF8ToUTF8Overflow() {
+ IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow");
+ LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode));
+ LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode));
+ static const char *text = "aä"; // ä: 2 bytes
+ const char *source = text;
+ const char *sourceLimit = text + strlen(text);
+ char result[20];
+ char *target = result;
+ const char *targetLimit = result + sizeof(result);
+ UChar buffer16[20];
+ UChar *pivotSource = buffer16;
+ UChar *pivotTarget = buffer16;
+ const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16);
+
+ // Convert with insufficient target capacity.
+ result[2] = 5;
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, result + 2, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, FALSE, errorCode);
+ assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+ int32_t length = (int32_t)(target - result);
+ assertEquals("number of bytes written", 2, length);
+ assertEquals("next byte not clobbered", 5, result[2]);
+
+ // Convert the rest and flush.
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+
+ assertSuccess("UTF-8->UTF-8", errorCode);
+ length = (int32_t)(target - result);
+ assertEquals("3 bytes", 3, length);
+ if (length == 3) {
+ assertTrue("result same as input", memcmp(text, result, length) == 0);
+ }
+
+ ucnv_reset(cnv1.getAlias());
+ ucnv_reset(cnv2.getAlias());
+ memset(result, 0, sizeof(result));
+ static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes
+ source = text2;
+ sourceLimit = text2 + strlen(text2);
+ target = result;
+ pivotSource = pivotTarget = buffer16;
+
+ // Convert with insufficient target capacity.
+ result[3] = 5;
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, result + 3, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, FALSE, errorCode);
+ assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+ length = (int32_t)(target - result);
+ assertEquals("text2 number of bytes written", 3, length);
+ assertEquals("text2 next byte not clobbered", 5, result[3]);
+
+ // Convert the rest and flush.
+ ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(),
+ &target, targetLimit, &source, sourceLimit,
+ buffer16, &pivotSource, &pivotTarget, pivotLimit,
+ FALSE, TRUE, errorCode);
+
+ assertSuccess("text2 UTF-8->UTF-8", errorCode);
+ length = (int32_t)(target - result);
+ assertEquals("text2 5 bytes", 5, length);
+ if (length == 5) {
+ assertTrue("text2 result same as input", memcmp(text2, result, length) == 0);
+ }
+}
+
// open testdata or ICU data converter ------------------------------------- ***
UConverter *
@@ -949,6 +1091,7 @@
// open the converter
IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
+ // with no data, the above crashes with "pointer being freed was not allocated" for charset "x11-compound-text", see #13078
if(errorCode.isFailure()) {
errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
@@ -990,7 +1133,7 @@
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
if(step<0 && !cc.finalFlush) {
// skip ucnv_getNextUChar() if !finalFlush because
@@ -1002,12 +1145,12 @@
cc.offsets=NULL;
}
else {
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
}
- memset(result, -1, LENGTHOF(result));
+ memset(result, -1, UPRV_LENGTHOF(result));
errorCode.reset();
resultLength=stepToUnicode(cc, cnv.getAlias(),
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
step, errorCode);
ok=checkToUnicode(
@@ -1037,7 +1180,7 @@
errorCode.reset();
resultLength=ucnv_toUChars(cnv.getAlias(),
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
(const char *)cc.bytes, cc.bytesLength,
errorCode);
ok=checkToUnicode(
@@ -1184,7 +1327,7 @@
targetLimit=resultLimit;
flush=cc.finalFlush;
- pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
+ pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
} else {
// start with empty partial buffers
sourceLimit=source;
@@ -1403,7 +1546,7 @@
// convert unicode to utf8
char utf8[256];
cc.utf8=utf8;
- u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
+ u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
cc.unicode, cc.unicodeLength,
&errorCode);
if(U_FAILURE(errorCode)) {
@@ -1430,13 +1573,13 @@
int32_t i, step;
ok=TRUE;
- for(i=0; i<LENGTHOF(steps) && ok; ++i) {
+ for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
step=steps[i].step;
- memset(resultOffsets, -1, LENGTHOF(resultOffsets));
- memset(result, -1, LENGTHOF(result));
+ memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
+ memset(result, -1, UPRV_LENGTHOF(result));
errorCode=U_ZERO_ERROR;
resultLength=stepFromUnicode(cc, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step==0 ? resultOffsets : NULL,
step, &errorCode);
ok=checkFromUnicode(
@@ -1465,7 +1608,7 @@
if(cc.utf8Length>=0) {
errorCode=U_ZERO_ERROR;
resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
step, &errorCode);
ok=checkFromUnicode(
cc, cnv, steps[i].utf8Name,
@@ -1488,7 +1631,7 @@
errorCode=U_ZERO_ERROR;
resultLength=ucnv_fromUChars(cnv,
- result, LENGTHOF(result),
+ result, UPRV_LENGTHOF(result),
cc.unicode, cc.unicodeLength,
&errorCode);
ok=checkFromUnicode(
@@ -1537,7 +1680,7 @@
msg=NULL;
errorCode=U_ZERO_ERROR;
- resultInvalidLength=LENGTHOF(resultInvalidUChars);
+ resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
if(U_FAILURE(errorCode)) {
errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
diff -Nura icu/source/test/intltest/convtest.h icu_new/source/test/intltest/convtest.h
--- icu/source/test/intltest/convtest.h 2013-10-05 04:47:50.000000000 +0800
+++ icu_new/source/test/intltest/convtest.h 2018-05-04 18:08:23.337412982 +0800
@@ -6,7 +6,7 @@
*
*******************************************************************************
* file name: convtest.h
- * encoding: US-ASCII
+ * encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
@@ -73,6 +73,8 @@
void TestFromUnicode();
void TestGetUnicodeSet();
void TestGetUnicodeSet2();
+ void TestDefaultIgnorableCallback();
+ void TestUTF8ToUTF8Overflow();
private:
UBool
diff -Nura icu/source/test/intltest/utxttest.cpp icu_new/source/test/intltest/utxttest.cpp
--- icu/source/test/intltest/utxttest.cpp 2013-10-05 04:47:58.000000000 +0800
+++ icu_new/source/test/intltest/utxttest.cpp 2018-05-04 18:08:23.338412994 +0800
@@ -57,6 +57,8 @@
if (exec) Ticket5560(); break;
case 4: name = "Ticket6847";
if (exec) Ticket6847(); break;
+ case 8: name = "Ticket12888";
+ if (exec) Ticket12888(); break;
default: name = ""; break;
}
}
@@ -1452,3 +1454,62 @@
utext_close(ut);
}
+// Ticket 12888: bad handling of illegal utf-8 containing many instances of the archaic, now illegal,
+// six byte utf-8 forms. Original implementation had an assumption that
+// there would be at most three utf-8 bytes per UTF-16 code unit.
+// The five and six byte sequences map to a single replacement character.
+
+void UTextTest::Ticket12888() {
+ const char *badString =
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80"
+ "\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80\xfd\x80\x80\x80\x80\x80";
+
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUTextPointer ut(utext_openUTF8(NULL, badString, -1, &status));
+ TEST_SUCCESS(status);
+ for (;;) {
+ UChar32 c = utext_next32(ut.getAlias());
+ if (c == U_SENTINEL) {
+ break;
+ }
+ }
+ int32_t endIdx = utext_getNativeIndex(ut.getAlias());
+ if (endIdx != (int32_t)strlen(badString)) {
+ errln("%s:%d expected=%d, actual=%d", __FILE__, __LINE__, strlen(badString), endIdx);
+ return;
+ }
+
+ for (int32_t prevIndex = endIdx; prevIndex>0;) {
+ UChar32 c = utext_previous32(ut.getAlias());
+ int32_t currentIndex = utext_getNativeIndex(ut.getAlias());
+ if (c != 0xfffd) {
+ errln("%s:%d (expected, actual, index) = (%d, %d, %d)\n",
+ __FILE__, __LINE__, 0xfffd, c, currentIndex);
+ break;
+ }
+ if (currentIndex != prevIndex - 6) {
+ errln("%s:%d: wrong index. Expected, actual = %d, %d",
+ __FILE__, __LINE__, prevIndex - 6, currentIndex);
+ break;
+ }
+ prevIndex = currentIndex;
+ }
+}
diff -Nura icu/source/test/intltest/utxttest.h icu_new/source/test/intltest/utxttest.h
--- icu/source/test/intltest/utxttest.h 2013-10-05 04:47:56.000000000 +0800
+++ icu_new/source/test/intltest/utxttest.h 2018-05-04 18:08:23.338412994 +0800
@@ -33,6 +33,7 @@
void FreezeTest();
void Ticket5560();
void Ticket6847();
+ void Ticket12888();
private:
struct m { // Map between native indices & code points.