File icu-CVE-2025-5222-shim06_9e4365c.patch of Package icu
From 9e4365c9e6ae95aad618af4bf0ae690c5d43d441 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Fri, 28 Mar 2014 22:03:03 +0000
Subject: [PATCH] ICU-10810 genrb: preflight strings on final parse tree, not
while building the tree
X-SVN-Rev: 35544
---
icu4c/source/tools/genrb/parse.cpp | 42 ++++--
icu4c/source/tools/genrb/reslist.c | 222 +++++++++++++++++------------
icu4c/source/tools/genrb/reslist.h | 11 +-
3 files changed, 160 insertions(+), 115 deletions(-)
--- a/source/tools/genrb/parse.cpp
+++ b/source/tools/genrb/parse.cpp
@@ -886,7 +886,11 @@
return NULL;
}
- if (uprv_strcmp(subtag, "Version") == 0)
+ if (result == NULL)
+ {
+ // Ignore the parsed resources, continue parsing.
+ }
+ else if (uprv_strcmp(subtag, "Version") == 0)
{
char ver[40];
int32_t length = member->u.fString.fLength;
@@ -900,13 +904,7 @@
u_versionFromString(version, ver);
table_add(result, member, line, status);
-
- }
- else if (uprv_strcmp(subtag, "Override") == 0)
- {
- // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
- table_add(result, member, line, status);
-
+ member = NULL;
}
else if(uprv_strcmp(subtag, "%%CollationBin")==0)
{
@@ -1013,12 +1011,17 @@
#endif
/* in order to achieve smaller data files, we can direct genrb */
/* to omit collation rules */
- if(state->omitCollationRules) {
- bundle_closeString(state->bundle, member);
- } else {
+ if(!state->omitCollationRules) {
table_add(result, member, line, status);
+ member = NULL;
}
}
+ else // Just copy non-special items.
+ {
+ table_add(result, member, line, status);
+ member = NULL;
+ }
+ res_close(member); // TODO: use LocalPointer
if (U_FAILURE(*status))
{
res_close(result);
@@ -1031,6 +1034,11 @@
return NULL;
}
+static UBool
+keepCollationType(const char *type) {
+ return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
+}
+
static struct SResource *
parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
{
@@ -1110,9 +1118,14 @@
/* then, we cannot handle aliases */
if(token == TOK_OPEN_BRACE) {
token = getToken(state, &tokenValue, &comment, &line, status);
- collationRes = table_open(state->bundle, subtag, NULL, status);
- collationRes = addCollation(state, collationRes, subtag, startline, status); /* need to parse the collation data regardless */
- if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
+ if (keepCollationType(subtag)) {
+ collationRes = table_open(state->bundle, subtag, NULL, status);
+ } else {
+ collationRes = NULL;
+ }
+ // need to parse the collation data regardless
+ collationRes = addCollation(state, collationRes, subtag, startline, status);
+ if (collationRes != NULL) {
table_add(result, collationRes, startline, status);
}
} else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
--- a/source/tools/genrb/reslist.c
+++ b/source/tools/genrb/reslist.c
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2012, International Business Machines
+* Copyright (C) 2000-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -25,6 +25,7 @@
#include "uarrsort.h"
#include "uelement.h"
+#include "uhash.h"
#include "uinvchar.h"
#include "ustr_imp.h"
#include "unicode/utf16.h"
@@ -105,6 +106,19 @@
/* Writing Functions */
/*
+ * Preflight strings.
+ * Find duplicates and count the total number of string code units
+ * so that they can be written first to the 16-bit array,
+ * for minimal string and container storage.
+ *
+ * We walk the final parse tree, rather than collecting this information while building it,
+ * so that we need not deal with changes to the parse tree (especially removing resources).
+ */
+static void
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+ UErrorCode *status);
+
+/*
* type_write16() functions write resource values into f16BitUnits
* and determine the resource item word, if possible.
*/
@@ -112,6 +126,92 @@
res_write16(struct SRBRoot *bundle, struct SResource *res,
UErrorCode *status);
+static void
+string_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+ UErrorCode *status) {
+ res->u.fString.fSame = uhash_get(stringSet, res);
+ if (res->u.fString.fSame != NULL) {
+ return; /* This is a duplicate of an earlier-visited string. */
+ }
+ /* Put this string into the set for finding duplicates. */
+ uhash_put(stringSet, res, res, status);
+
+ if (bundle->fStringsForm != STRINGS_UTF16_V1) {
+ const UChar *s = res->u.fString.fChars;
+ int32_t len = res->u.fString.fLength;
+ if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(s[0]) && len == u_strlen(s)) {
+ /*
+ * This string will be stored without an explicit length.
+ * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
+ */
+ res->u.fString.fNumCharsForLength = 0;
+ } else if (len <= 0x3ee) {
+ res->u.fString.fNumCharsForLength = 1;
+ } else if (len <= 0xfffff) {
+ res->u.fString.fNumCharsForLength = 2;
+ } else {
+ res->u.fString.fNumCharsForLength = 3;
+ }
+ bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */
+ }
+}
+
+static void
+array_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+ UErrorCode *status) {
+ struct SResource *current;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+ res_preflightStrings(bundle, current, stringSet, status);
+ }
+}
+
+static void
+table_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+ UErrorCode *status) {
+ struct SResource *current;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+ res_preflightStrings(bundle, current, stringSet, status);
+ }
+}
+
+static void
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+ UErrorCode *status) {
+ if (U_FAILURE(*status) || res == NULL) {
+ return;
+ }
+ if (res->fRes != RES_BOGUS) {
+ /*
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty string/binary/etc.
+ */
+ return;
+ }
+ switch (res->fType) {
+ case URES_STRING:
+ string_preflightStrings(bundle, res, stringSet, status);
+ break;
+ case URES_ARRAY:
+ array_preflightStrings(bundle, res, stringSet, status);
+ break;
+ case URES_TABLE:
+ table_preflightStrings(bundle, res, stringSet, status);
+ break;
+ default:
+ /* Neither a string nor a container. */
+ break;
+ }
+}
+
/*
* type_preWrite() functions calculate ("preflight") and advance the *byteOffset
* by the size of their data in the binary file and
@@ -221,10 +321,7 @@
struct SResource *same;
if ((same = res->u.fString.fSame) != NULL) {
/* This is a duplicate. */
- if (same->fRes == RES_BOGUS) {
- /* The original has not been visited yet. */
- string_write16(bundle, same, status);
- }
+ assert(same->fRes != RES_BOGUS && same->fWritten);
res->fRes = same->fRes;
res->fWritten = same->fWritten;
}
@@ -900,98 +997,42 @@
FALSE);
}
-struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+static struct SResource *
+stringbase_open(struct SRBRoot *bundle, const char *tag, int8_t type,
+ const UChar *value, int32_t len, const struct UString* comment,
+ UErrorCode *status) {
struct SResource *res = res_open(bundle, tag, comment, status);
if (U_FAILURE(*status)) {
return NULL;
}
- res->fType = URES_STRING;
+ res->fType = type;
if (len == 0 && gFormatVersion > 1) {
res->u.fString.fChars = &gEmptyString;
- res->fRes = 0;
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(type);
res->fWritten = TRUE;
return res;
}
res->u.fString.fLength = len;
-
- if (gFormatVersion > 1) {
- /* check for duplicates */
- res->u.fString.fChars = (UChar *)value;
- if (bundle->fStringSet == NULL) {
- UErrorCode localStatus = U_ZERO_ERROR; /* if failure: just don't detect dups */
- bundle->fStringSet = uhash_open(string_hash, string_comp, string_comp, &localStatus);
- } else {
- res->u.fString.fSame = uhash_get(bundle->fStringSet, res);
- }
- }
- if (res->u.fString.fSame == NULL) {
- /* this is a new string */
- res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
-
- if (res->u.fString.fChars == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(res);
- return NULL;
- }
-
- uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
- res->u.fString.fChars[len] = 0;
- if (bundle->fStringSet != NULL) {
- /* put it into the set for finding duplicates */
- uhash_put(bundle->fStringSet, res, res, status);
- }
-
- if (bundle->fStringsForm != STRINGS_UTF16_V1) {
- if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(value[0]) && len == u_strlen(value)) {
- /*
- * This string will be stored without an explicit length.
- * Runtime will detect !U16_IS_TRAIL(value[0]) and call u_strlen().
- */
- res->u.fString.fNumCharsForLength = 0;
- } else if (len <= 0x3ee) {
- res->u.fString.fNumCharsForLength = 1;
- } else if (len <= 0xfffff) {
- res->u.fString.fNumCharsForLength = 2;
- } else {
- res->u.fString.fNumCharsForLength = 3;
- }
- bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1; /* +1 for the NUL */
- }
- } else {
- /* this is a duplicate of fSame */
- struct SResource *same = res->u.fString.fSame;
- res->u.fString.fChars = same->u.fString.fChars;
- }
- return res;
-}
-
-/* TODO: make alias_open and string_open use the same code */
-struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
- struct SResource *res = res_open(bundle, tag, comment, status);
- if (U_FAILURE(*status)) {
- return NULL;
- }
- res->fType = URES_ALIAS;
- if (len == 0 && gFormatVersion > 1) {
- res->u.fString.fChars = &gEmptyString;
- res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ALIAS);
- res->fWritten = TRUE;
- return res;
- }
-
- res->u.fString.fLength = len;
- res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
+ res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
if (res->u.fString.fChars == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(res);
return NULL;
}
- uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1));
+ uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
+ res->u.fString.fChars[len] = 0;
return res;
}
+struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+ return stringbase_open(bundle, tag, URES_STRING, value, len, comment, status);
+}
+
+struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+ return stringbase_open(bundle, tag, URES_ALIAS, value, len, comment, status);
+}
struct SResource* intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
struct SResource *res = res_open(bundle, tag, comment, status);
@@ -1142,9 +1183,7 @@
static void string_close(struct SResource *string) {
if (string->u.fString.fChars != NULL &&
- string->u.fString.fChars != &gEmptyString &&
- string->u.fString.fSame == NULL
- ) {
+ string->u.fString.fChars != &gEmptyString) {
uprv_free(string->u.fString.fChars);
string->u.fString.fChars =NULL;
}
@@ -1218,18 +1257,10 @@
uprv_free(bundle->fLocale);
uprv_free(bundle->fKeys);
uprv_free(bundle->fKeyMap);
- uhash_close(bundle->fStringSet);
uprv_free(bundle->f16BitUnits);
uprv_free(bundle);
}
-void bundle_closeString(struct SRBRoot *bundle, struct SResource *string) {
- if (bundle->fStringSet != NULL) {
- uhash_remove(bundle->fStringSet, string);
- }
- string_close(string);
-}
-
/* Adding Functions */
void table_add(struct SResource *table, struct SResource *res, int linenumber, UErrorCode *status) {
struct SResource *current = NULL;
@@ -1664,14 +1695,22 @@
static void
bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
+ UHashtable *stringSet;
+ if (gFormatVersion > 1) {
+ stringSet = uhash_open(string_hash, string_comp, string_comp, status);
+ res_preflightStrings(bundle, bundle->fRoot, stringSet, status);
+ } else {
+ stringSet = NULL;
+ }
if (U_FAILURE(*status)) {
+ uhash_close(stringSet);
return;
}
switch(bundle->fStringsForm) {
case STRINGS_UTF16_V2:
if (bundle->f16BitUnitsLength > 0) {
struct SResource **array;
- int32_t count = uhash_count(bundle->fStringSet);
+ int32_t count = uhash_count(stringSet);
int32_t i, pos;
/*
* Allocate enough space for the initial NUL and the UTF-16 v2 strings,
@@ -1685,6 +1724,7 @@
uprv_free(bundle->f16BitUnits);
bundle->f16BitUnits = NULL;
uprv_free(array);
+ uhash_close(stringSet);
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
@@ -1694,7 +1734,7 @@
utf16Length = 1;
++bundle->f16BitUnitsLength;
for (pos = -1, i = 0; i < count; ++i) {
- array[i] = (struct SResource *)uhash_nextElement(bundle->fStringSet, &pos)->key.pointer;
+ array[i] = (struct SResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
}
/* Sort the strings so that each one is immediately followed by all of its suffixes. */
uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **),
@@ -1769,4 +1809,5 @@
default:
break;
}
+ uhash_close(stringSet);
}
--- a/source/tools/genrb/reslist.h
+++ b/source/tools/genrb/reslist.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2000-2011, International Business Machines
+* Copyright (C) 2000-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -29,7 +29,6 @@
#include "cstring.h"
#include "unewdata.h"
#include "ustr.h"
-#include "uhash.h"
U_CDECL_BEGIN
@@ -54,7 +53,6 @@
int32_t fKeysCount;
int32_t fLocalKeyLimit; /* key offset < limit fits into URES_TABLE */
- UHashtable *fStringSet;
uint16_t *f16BitUnits;
int32_t f16BitUnitsCapacity;
int32_t f16BitUnitsLength;
@@ -132,13 +130,6 @@
struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
-/**
- * Remove a string from a bundle and close (delete) it.
- * The string must not have been added to a table or array yet.
- * This function only undoes what string_open() did.
- */
-void bundle_closeString(struct SRBRoot *bundle, struct SResource *string);
-
struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
struct SResIntVector {