File icu-CVE-2025-5222-shim06_9e4365c.patch of Package icu

From 9e4365c9e6ae95aad618af4bf0ae690c5d43d441 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Fri, 28 Mar 2014 22:03:03 +0000
Subject: [PATCH] ICU-10810 genrb: preflight strings on final parse tree, not
 while building the tree

X-SVN-Rev: 35544
---
 icu4c/source/tools/genrb/parse.cpp |  42 ++++--
 icu4c/source/tools/genrb/reslist.c | 222 +++++++++++++++++------------
 icu4c/source/tools/genrb/reslist.h |  11 +-
 3 files changed, 160 insertions(+), 115 deletions(-)

--- a/source/tools/genrb/parse.cpp
+++ b/source/tools/genrb/parse.cpp
@@ -886,7 +886,11 @@
             return NULL;
         }
 
-        if (uprv_strcmp(subtag, "Version") == 0)
+        if (result == NULL)
+        {
+            // Ignore the parsed resources, continue parsing.
+        }
+        else if (uprv_strcmp(subtag, "Version") == 0)
         {
             char     ver[40];
             int32_t length = member->u.fString.fLength;
@@ -900,13 +904,7 @@
             u_versionFromString(version, ver);
 
             table_add(result, member, line, status);
-
-        }
-        else if (uprv_strcmp(subtag, "Override") == 0)
-        {
-            // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
-            table_add(result, member, line, status);
-
+            member = NULL;
         }
         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
         {
@@ -1013,12 +1011,17 @@
 #endif
             /* in order to achieve smaller data files, we can direct genrb */
             /* to omit collation rules */
-            if(state->omitCollationRules) {
-                bundle_closeString(state->bundle, member);
-            } else {
+            if(!state->omitCollationRules) {
                 table_add(result, member, line, status);
+                member = NULL;
             }
         }
+        else  // Just copy non-special items.
+        {
+            table_add(result, member, line, status);
+            member = NULL;
+        }
+        res_close(member);  // TODO: use LocalPointer
         if (U_FAILURE(*status))
         {
             res_close(result);
@@ -1031,6 +1034,11 @@
     return NULL;
 }
 
+static UBool
+keepCollationType(const char *type) {
+    return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
+}
+
 static struct SResource *
 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
 {
@@ -1110,9 +1118,14 @@
                 /* then, we cannot handle aliases */
                 if(token == TOK_OPEN_BRACE) {
                     token = getToken(state, &tokenValue, &comment, &line, status);
-                    collationRes = table_open(state->bundle, subtag, NULL, status);
-                    collationRes = addCollation(state, collationRes, subtag, startline, status); /* need to parse the collation data regardless */
-                    if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
+                    if (keepCollationType(subtag)) {
+                        collationRes = table_open(state->bundle, subtag, NULL, status);
+                    } else {
+                        collationRes = NULL;
+                    }
+                    // need to parse the collation data regardless
+                    collationRes = addCollation(state, collationRes, subtag, startline, status);
+                    if (collationRes != NULL) {
                         table_add(result, collationRes, startline, status);
                     }
                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */

--- a/source/tools/genrb/reslist.c
+++ b/source/tools/genrb/reslist.c
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2012, International Business Machines
+*   Copyright (C) 2000-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -25,6 +25,7 @@
 
 #include "uarrsort.h"
 #include "uelement.h"
+#include "uhash.h"
 #include "uinvchar.h"
 #include "ustr_imp.h"
 #include "unicode/utf16.h"
@@ -105,6 +106,19 @@
 /* Writing Functions */
 
 /*
+ * Preflight strings.
+ * Find duplicates and count the total number of string code units
+ * so that they can be written first to the 16-bit array,
+ * for minimal string and container storage.
+ *
+ * We walk the final parse tree, rather than collecting this information while building it,
+ * so that we need not deal with changes to the parse tree (especially removing resources).
+ */
+static void
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+                     UErrorCode *status);
+
+/*
  * type_write16() functions write resource values into f16BitUnits
  * and determine the resource item word, if possible.
  */
@@ -112,6 +126,92 @@
 res_write16(struct SRBRoot *bundle, struct SResource *res,
             UErrorCode *status);
 
+static void
+string_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+                        UErrorCode *status) {
+    res->u.fString.fSame = uhash_get(stringSet, res);
+    if (res->u.fString.fSame != NULL) {
+        return;  /* This is a duplicate of an earlier-visited string. */
+    }
+    /* Put this string into the set for finding duplicates. */
+    uhash_put(stringSet, res, res, status);
+
+    if (bundle->fStringsForm != STRINGS_UTF16_V1) {
+        const UChar *s = res->u.fString.fChars;
+        int32_t len = res->u.fString.fLength;
+        if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(s[0]) && len == u_strlen(s)) {
+            /*
+             * This string will be stored without an explicit length.
+             * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
+             */
+            res->u.fString.fNumCharsForLength = 0;
+        } else if (len <= 0x3ee) {
+            res->u.fString.fNumCharsForLength = 1;
+        } else if (len <= 0xfffff) {
+            res->u.fString.fNumCharsForLength = 2;
+        } else {
+            res->u.fString.fNumCharsForLength = 3;
+        }
+        bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1;  /* +1 for the NUL */
+    }
+}
+
+static void
+array_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+                       UErrorCode *status) {
+    struct SResource *current;
+
+    if (U_FAILURE(*status)) {
+        return;
+    }
+    for (current = res->u.fArray.fFirst; current != NULL; current = current->fNext) {
+        res_preflightStrings(bundle, current, stringSet, status);
+    }
+}
+
+static void
+table_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+                       UErrorCode *status) {
+    struct SResource *current;
+
+    if (U_FAILURE(*status)) {
+        return;
+    }
+    for (current = res->u.fTable.fFirst; current != NULL; current = current->fNext) {
+        res_preflightStrings(bundle, current, stringSet, status);
+    }
+}
+
+static void
+res_preflightStrings(struct SRBRoot *bundle, struct SResource *res, UHashtable *stringSet,
+                     UErrorCode *status) {
+    if (U_FAILURE(*status) || res == NULL) {
+        return;
+    }
+    if (res->fRes != RES_BOGUS) {
+        /*
+         * The resource item word was already precomputed, which means
+         * no further data needs to be written.
+         * This might be an integer, or an empty string/binary/etc.
+         */
+        return;
+    }
+    switch (res->fType) {
+    case URES_STRING:
+        string_preflightStrings(bundle, res, stringSet, status);
+        break;
+    case URES_ARRAY:
+        array_preflightStrings(bundle, res, stringSet, status);
+        break;
+    case URES_TABLE:
+        table_preflightStrings(bundle, res, stringSet, status);
+        break;
+    default:
+        /* Neither a string nor a container. */
+        break;
+    }
+}
+
 /*
  * type_preWrite() functions calculate ("preflight") and advance the *byteOffset
  * by the size of their data in the binary file and
@@ -221,10 +321,7 @@
     struct SResource *same;
     if ((same = res->u.fString.fSame) != NULL) {
         /* This is a duplicate. */
-        if (same->fRes == RES_BOGUS) {
-            /* The original has not been visited yet. */
-            string_write16(bundle, same, status);
-        }
+        assert(same->fRes != RES_BOGUS && same->fWritten);
         res->fRes = same->fRes;
         res->fWritten = same->fWritten;
     }
@@ -900,98 +997,42 @@
                              FALSE);
 }
 
-struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+static struct SResource *
+stringbase_open(struct SRBRoot *bundle, const char *tag, int8_t type,
+                const UChar *value, int32_t len, const struct UString* comment,
+                UErrorCode *status) {
     struct SResource *res = res_open(bundle, tag, comment, status);
     if (U_FAILURE(*status)) {
         return NULL;
     }
-    res->fType = URES_STRING;
+    res->fType = type;
 
     if (len == 0 && gFormatVersion > 1) {
         res->u.fString.fChars = &gEmptyString;
-        res->fRes = 0;
+        res->fRes = URES_MAKE_EMPTY_RESOURCE(type);
         res->fWritten = TRUE;
         return res;
     }
 
     res->u.fString.fLength = len;
-
-    if (gFormatVersion > 1) {
-        /* check for duplicates */
-        res->u.fString.fChars  = (UChar *)value;
-        if (bundle->fStringSet == NULL) {
-            UErrorCode localStatus = U_ZERO_ERROR;  /* if failure: just don't detect dups */
-            bundle->fStringSet = uhash_open(string_hash, string_comp, string_comp, &localStatus);
-        } else {
-            res->u.fString.fSame = uhash_get(bundle->fStringSet, res);
-        }
-    }
-    if (res->u.fString.fSame == NULL) {
-        /* this is a new string */
-        res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
-
-        if (res->u.fString.fChars == NULL) {
-            *status = U_MEMORY_ALLOCATION_ERROR;
-            uprv_free(res);
-            return NULL;
-        }
-
-        uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
-        res->u.fString.fChars[len] = 0;
-        if (bundle->fStringSet != NULL) {
-            /* put it into the set for finding duplicates */
-            uhash_put(bundle->fStringSet, res, res, status);
-        }
-
-        if (bundle->fStringsForm != STRINGS_UTF16_V1) {
-            if (len <= MAX_IMPLICIT_STRING_LENGTH && !U16_IS_TRAIL(value[0]) && len == u_strlen(value)) {
-                /*
-                 * This string will be stored without an explicit length.
-                 * Runtime will detect !U16_IS_TRAIL(value[0]) and call u_strlen().
-                 */
-                res->u.fString.fNumCharsForLength = 0;
-            } else if (len <= 0x3ee) {
-                res->u.fString.fNumCharsForLength = 1;
-            } else if (len <= 0xfffff) {
-                res->u.fString.fNumCharsForLength = 2;
-            } else {
-                res->u.fString.fNumCharsForLength = 3;
-            }
-            bundle->f16BitUnitsLength += res->u.fString.fNumCharsForLength + len + 1;  /* +1 for the NUL */
-        }
-    } else {
-        /* this is a duplicate of fSame */
-        struct SResource *same = res->u.fString.fSame;
-        res->u.fString.fChars = same->u.fString.fChars;
-    }
-    return res;
-}
-
-/* TODO: make alias_open and string_open use the same code */
-struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
-    struct SResource *res = res_open(bundle, tag, comment, status);
-    if (U_FAILURE(*status)) {
-        return NULL;
-    }
-    res->fType = URES_ALIAS;
-    if (len == 0 && gFormatVersion > 1) {
-        res->u.fString.fChars = &gEmptyString;
-        res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_ALIAS);
-        res->fWritten = TRUE;
-        return res;
-    }
-
-    res->u.fString.fLength = len;
-    res->u.fString.fChars  = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
+    res->u.fString.fChars = (UChar *) uprv_malloc(sizeof(UChar) * (len + 1));
     if (res->u.fString.fChars == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         uprv_free(res);
         return NULL;
     }
-    uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * (len + 1));
+    uprv_memcpy(res->u.fString.fChars, value, sizeof(UChar) * len);
+    res->u.fString.fChars[len] = 0;
     return res;
 }
 
+struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+    return stringbase_open(bundle, tag, URES_STRING, value, len, comment, status);
+}
+
+struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+    return stringbase_open(bundle, tag, URES_ALIAS, value, len, comment, status);
+}
 
 struct SResource* intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
     struct SResource *res = res_open(bundle, tag, comment, status);
@@ -1142,9 +1183,7 @@
 
 static void string_close(struct SResource *string) {
     if (string->u.fString.fChars != NULL &&
-        string->u.fString.fChars != &gEmptyString &&
-        string->u.fString.fSame == NULL
-    ) {
+             string->u.fString.fChars != &gEmptyString) {
         uprv_free(string->u.fString.fChars);
         string->u.fString.fChars =NULL;
     }
@@ -1218,18 +1257,10 @@
     uprv_free(bundle->fLocale);
     uprv_free(bundle->fKeys);
     uprv_free(bundle->fKeyMap);
-    uhash_close(bundle->fStringSet);
     uprv_free(bundle->f16BitUnits);
     uprv_free(bundle);
 }
 
-void bundle_closeString(struct SRBRoot *bundle, struct SResource *string) {
-    if (bundle->fStringSet != NULL) {
-        uhash_remove(bundle->fStringSet, string);
-    }
-    string_close(string);
-}
-
 /* Adding Functions */
 void table_add(struct SResource *table, struct SResource *res, int linenumber, UErrorCode *status) {
     struct SResource *current = NULL;
@@ -1664,14 +1695,22 @@
 
 static void
 bundle_compactStrings(struct SRBRoot *bundle, UErrorCode *status) {
+    UHashtable *stringSet;
+    if (gFormatVersion > 1) {
+        stringSet = uhash_open(string_hash, string_comp, string_comp, status);
+        res_preflightStrings(bundle, bundle->fRoot, stringSet, status);
+    } else {
+        stringSet = NULL;
+    }
     if (U_FAILURE(*status)) {
+        uhash_close(stringSet);
         return;
     }
     switch(bundle->fStringsForm) {
     case STRINGS_UTF16_V2:
         if (bundle->f16BitUnitsLength > 0) {
             struct SResource **array;
-            int32_t count = uhash_count(bundle->fStringSet);
+            int32_t count = uhash_count(stringSet);
             int32_t i, pos;
             /*
              * Allocate enough space for the initial NUL and the UTF-16 v2 strings,
@@ -1685,6 +1724,7 @@
                 uprv_free(bundle->f16BitUnits);
                 bundle->f16BitUnits = NULL;
                 uprv_free(array);
+                uhash_close(stringSet);
                 *status = U_MEMORY_ALLOCATION_ERROR;
                 return;
             }
@@ -1694,7 +1734,7 @@
             utf16Length = 1;
             ++bundle->f16BitUnitsLength;
             for (pos = -1, i = 0; i < count; ++i) {
-                array[i] = (struct SResource *)uhash_nextElement(bundle->fStringSet, &pos)->key.pointer;
+                array[i] = (struct SResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
             }
             /* Sort the strings so that each one is immediately followed by all of its suffixes. */
             uprv_sortArray(array, count, (int32_t)sizeof(struct SResource **),
@@ -1769,4 +1809,5 @@
     default:
         break;
     }
+    uhash_close(stringSet);
 }

--- a/source/tools/genrb/reslist.h
+++ b/source/tools/genrb/reslist.h
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2000-2011, International Business Machines
+*   Copyright (C) 2000-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -29,7 +29,6 @@
 #include "cstring.h"
 #include "unewdata.h"
 #include "ustr.h"
-#include "uhash.h"
 
 U_CDECL_BEGIN
 
@@ -54,7 +53,6 @@
   int32_t fKeysCount;
   int32_t fLocalKeyLimit; /* key offset < limit fits into URES_TABLE */
 
-  UHashtable *fStringSet;
   uint16_t *f16BitUnits;
   int32_t f16BitUnitsCapacity;
   int32_t f16BitUnitsLength;
@@ -132,13 +130,6 @@
 
 struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
 
-/**
- * Remove a string from a bundle and close (delete) it.
- * The string must not have been added to a table or array yet.
- * This function only undoes what string_open() did.
- */
-void bundle_closeString(struct SRBRoot *bundle, struct SResource *string);
-
 struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, UChar *value, int32_t len, const struct UString* comment, UErrorCode *status);
 
 struct SResIntVector {
openSUSE Build Service is sponsored by