File CVE-2018-21232-2.patch of Package re2c.30052

From 7b5643476bd99c994c4f51b8143f942982d85521 Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Wed, 22 Apr 2020 22:37:24 +0100
Subject: [PATCH] Rewrite recursion into iteration (fixed tags computation).

This is to avoid stack overflow on large RE (especially on instrumented
builds that have larger stack frames, like AddressSanitizer).

Partial fix for #219 "overflow-1.re test fails on system with small stack".

Upstream-Stauts: Backport:
https://github.com/skvadrik/re2c/commit/7b5643476bd99c994c4f51b8143f942982d85521

CVE: CVE-2018-21232

Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
---
diff --git a/src/re/tag.cc b/src/re/tag.cc
--- a/src/re/tag.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/tag.cc	(date 1646986908580)
@@ -6,7 +6,7 @@
 {
 
 const size_t Tag::RIGHTMOST = std::numeric_limits<size_t>::max();
-const size_t Tag::VARDIST = std::numeric_limits<size_t>::max();
+const uint32_t Tag::VARDIST = std::numeric_limits<uint32_t>::max();
 const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1;
 
 } // namespace re2c


diff --git a/src/re/tag.h b/src/re/tag.h
--- a/src/re/tag.h	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/tag.h	(date 1646986922376)
@@ -19,7 +19,7 @@
 struct Tag
 {
 	static const size_t RIGHTMOST;
-	static const size_t VARDIST;
+    static const uint32_t VARDIST;
 	static const size_t FICTIVE;
 
 	const std::string *name;


diff --git a/src/re/fixed_tags.cc b/src/re/fixed_tags.cc
--- a/src/re/fixed_tags.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/fixed_tags.cc	(date 1646991137317)
@@ -7,78 +7,131 @@
 #include "src/re/tag.h"
 
 namespace re2c {
+namespace {
 
 /* note [fixed and variable tags]
  *
- * If distance between two tags is constant (equal for all strings that
- * match the given regexp), then lexer only needs to track one of them:
- * the second tag equals the first tag plus static offset.
+ * If distance between two tags is constant (equal for all strings that match
+ * the given regexp), then lexer only needs to track one of them: the second
+ * tag equals the first tag plus static offset.
  *
- * However, this optimization is applied only to tags in top-level
- * concatenation, because other tags may be uninitialized and we don't
- * want to mess with conditional calculation of fixed tags.
- *
+ * This optimization is applied only to tags in top-level concatenation,
+ * because in other cases the base tag may be NULL, and the calculation of
+ * the fixed tag value is not as simple as substracting a fixed offset.
  * Furthermore, fixed tags are fobidden with generic API because it cannot
- * express fixed offsets.
- *
- * Tags with history also cannot be fixed.
+ * express fixed offsets. M-tags (with history) also cannot be fixed.
  *
  * Another special case is fictive tags (those that exist only to impose
- * hierarchical laws of POSIX disambiguation). We treat them as fixed
- * in order to suppress code generation.
+ * hierarchical laws of POSIX disambiguation). We treat them as fixed in order
+ * to suppress code generation.
  */
 
-static void find_fixed_tags(RE *re, std::vector<Tag> &tags,
-	size_t &dist, size_t &base, bool toplevel)
+struct StackItem {
+    RE       *re;       // current sub-RE
+    uint32_t  dist;     // distance backup for alternative, unused for other RE
+    uint8_t   succ;     // index of the next successor to be visited
+    bool      toplevel; // if this sub-RE is in top-level concatenation
+};
+
+static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0)
 {
-	switch (re->type) {
-		case RE::NIL: break;
-		case RE::SYM:
-			if (dist != Tag::VARDIST) ++dist;
-			break;
-		case RE::ALT: {
-			size_t d1 = dist, d2 = dist;
-			find_fixed_tags(re->alt.re1, tags, d1, base, false);
-			find_fixed_tags(re->alt.re2, tags, d2, base, false);
-			dist = (d1 == d2) ? d1 : Tag::VARDIST;
-			break;
-		}
-		case RE::CAT:
-			find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
-			find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
-			break;
-		case RE::ITER:
-			find_fixed_tags(re->iter.re, tags, dist, base, false);
-			dist = Tag::VARDIST;
-			break;
-		case RE::TAG: {
-			// see note [fixed and variable tags]
-			Tag &tag = tags[re->tag.idx];
-			if (fictive(tag)) {
-				tag.base = tag.dist = 0;
-			} else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
-				tag.base = base;
-				tag.dist = dist;
-			} else if (toplevel) {
-				base = re->tag.idx;
-				dist = 0;
-			}
-			if (trailing(tag)) dist = 0;
-			break;
-		}
-	}
+    static const uint32_t VARDIST = Tag::VARDIST;
+    bool toplevel = spec.opts->input_api != INPUT_CUSTOM;
+
+    // base tag, intially the fake "rightmost tag" (the end of RE)
+    size_t base = Tag::RIGHTMOST;
+
+    // the distance to the nearest top-level tag to the right (base tag)
+    uint32_t dist = 0;
+
+    const StackItem i0 = {re0, VARDIST, 0, toplevel};
+    stack.push_back(i0);
+
+    while (!stack.empty()) {
+        const StackItem i = stack.back();
+        stack.pop_back();
+        RE *re = i.re;
+
+        if (re->type == RE::SYM) {
+            if (dist != VARDIST) ++dist;
+        }
+        else if (re->type == RE::ALT) {
+            if (i.succ == 0) {
+                // save the current distance on stack (from the alternative end
+                // to base) and recurse into the left sub-RE
+                StackItem k = {re, dist, 1, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->alt.re1, VARDIST, 0, false};
+                stack.push_back(j);
+            }
+            else if (i.succ == 1) {
+                // save the current distance on stack (from the left sub-RE to
+                // base), reset distance to the distance popped from stack (from
+                // the alternative end to base), recurse into the right sub-RE
+                StackItem k = {re, dist, 2, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->alt.re2, VARDIST, 0, false};
+                stack.push_back(j);
+                dist = i.dist;
+            }
+            else {
+                // both sub-RE visited, compare the distance on stack (from the
+                // left sub-RE to base) to the current distance (from the right
+                // sub-RE to base), if not equal set variable distance
+                dist = (i.dist == dist) ? i.dist : VARDIST;
+            }
+        }
+        else if (re->type == RE::ITER) {
+            if (i.succ == 0) {
+                // recurse into the sub-RE
+                StackItem k = {re, VARDIST, 1, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->iter.re, VARDIST, 0, false};
+                stack.push_back(j);
+            }
+            else {
+                // sub-RE visited, assume unknown number of iterations
+                // TODO: find precise distance for fixed repetition counter
+                dist = VARDIST;
+            }
+        }
+        else if (re->type == RE::CAT) {
+            // the right sub-RE is pushed on stack after the left sub-RE and
+            // visited earlier (because distance is computed from right to left)
+            StackItem j1 = {re->cat.re1, VARDIST, 0, i.toplevel};
+            stack.push_back(j1);
+            StackItem j2 = {re->cat.re2, VARDIST, 0, i.toplevel};
+            stack.push_back(j2);
+        }
+        else if (re->type == RE::TAG) {
+            // see note [fixed and variable tags]
+            Tag &tag = spec.tags[re->tag.idx];
+            if (fictive(tag)) {
+                tag.base = tag.dist = 0;
+            }
+            else if (i.toplevel && dist != VARDIST && !history(tag)) {
+                tag.base = base;
+                tag.dist = dist;
+            }
+            else if (i.toplevel) {
+                base = re->tag.idx;
+                dist = 0;
+            }
+            if (trailing(tag)) {
+                dist = 0;
+            }
+        }
+    }
 }
+
+} // anonymous namespace
 
-void find_fixed_tags(RESpec &spec)
-{
-	const bool generic = spec.opts->input_api == INPUT_CUSTOM;
-	std::vector<RE*>::iterator
-		i = spec.res.begin(),
-		e = spec.res.end();
-	for (; i != e; ++i) {
-		size_t base = Tag::RIGHTMOST, dist = 0;
-		find_fixed_tags(*i, spec.tags, dist, base, !generic);
-	}
-}
+    void find_fixed_tags(RESpec &spec)
+    {
+        std::vector<StackItem> stack;
+        for (std::vector<RE*>::iterator i = spec.res.begin(); i != spec.res.end(); ++i) {
+            find_fixed_tags(spec, stack, *i);
+        }
+    }
 
-} // namespace re2c
+} // namespace re2c
\ No newline at end of file
openSUSE Build Service is sponsored by