File simplify-utils.json.find_json-function.patch of Package salt

From 7cbb68f36824161743f4cc60d8920e2cea039e5e Mon Sep 17 00:00:00 2001
From: Marek Czernek <marek.czernek@suse.com>
Date: Fri, 9 Jan 2026 16:49:19 +0100
Subject: [PATCH] Simplify utils.json.find_json function

The previous implementation computed all combinations of potential JSON
documents and tried to `json.loads()`them. That resumted in num({) *
num(}) tries, which could take hours on large inputs.

The approach implemented with this change simplifies the work we do: we
only look for opening '{' and '[' characters, and try to parse the rest
of input string with JSONDecoder.raw_decode. This method ignores
extraneous data at the end and is faster than doing it ourselves in
Python.

Co-authored-by: Alexander Graul <agraul@suse.com>
---
 changelog/68258.fixed.md              |  1 +
 salt/utils/json.py                    | 80 ++++++---------------------
 tests/pytests/unit/utils/test_json.py |  5 --
 tests/unit/utils/test_json.py         | 12 ++++
 4 files changed, 31 insertions(+), 67 deletions(-)
 create mode 100644 changelog/68258.fixed.md

diff --git a/changelog/68258.fixed.md b/changelog/68258.fixed.md
new file mode 100644
index 0000000000..a9afeccef7
--- /dev/null
+++ b/changelog/68258.fixed.md
@@ -0,0 +1 @@
+Simplied and sped up `utils.json.find_json` function
diff --git a/salt/utils/json.py b/salt/utils/json.py
index 26cb38cdbe..1605e75f9f 100644
--- a/salt/utils/json.py
+++ b/salt/utils/json.py
@@ -2,7 +2,7 @@
 Functions to work with JSON
 """
 
-
+import contextlib
 import json
 import logging
 
@@ -25,69 +25,25 @@ def __split(raw):
     return raw.splitlines()
 
 
-def find_json(raw):
-    """
-    Pass in a raw string and load the json when it starts. This allows for a
-    string to start with garbage and end with json but be cleanly loaded
-    """
-    ret = {}
-    lines = __split(raw)
-    lengths = list(map(len, lines))
-    starts = []
-    ends = []
-
-    # Search for possible starts end ends of the json fragments
-    for ind, _ in enumerate(lines):
-        line = lines[ind].lstrip()
-        line = line[0] if line else line
-        if line == "{" or line == "[":
-            starts.append((ind, line))
-        if line == "}" or line == "]":
-            ends.append((ind, line))
-
-    # List all the possible pairs of starts and ends,
-    # and fill the length of each block to sort by size after
-    starts_ends = []
-    for start, start_br in starts:
-        for end, end_br in reversed(ends):
-            if end > start and (
-                (start_br == "{" and end_br == "}")
-                or (start_br == "[" and end_br == "]")
-            ):
-                starts_ends.append((start, end, sum(lengths[start : end + 1])))
-
-    # Iterate through all the possible pairs starting from the largest
-    starts_ends.sort(key=lambda x: (x[2], x[1] - x[0], x[0]), reverse=True)
-    for start, end, _ in starts_ends:
-        working = "\n".join(lines[start : end + 1])
-        try:
-            ret = json.loads(working)
-            return ret
-        except ValueError:
-            pass
-        # Try filtering non-JSON text right after the last closing curly brace
-        end_str = lines[end].lstrip()[0]
-        working = "\n".join(lines[start : end]) + end_str
-        try:
-            ret = json.loads(working)
-            return ret
-        except ValueError:
-            continue
+def find_json(s: str):
+    """Pass in a string and load JSON within it.
 
-    # Fall back to old implementation for backward compatibility
-    # excpecting json after the text
-    for ind, _ in enumerate(lines):
-        working = "\n".join(lines[ind:])
-        try:
-            ret = json.loads(working)
-        except ValueError:
-            continue
-        if ret:
-            return ret
+    The string may contain non-JSON text before and after the JSON document.
 
-    if not ret:
-        # Not json, raise an error
-        raise ValueError
+    Raises ValueError if no valid JSON was found.
+    """
+    decoder = json.JSONDecoder()
+
+    # We look for the beginning of JSON objects / arrays and let raw_decode() handle
+    # extraneous data at the end.
+    for idx, char in enumerate(s):
+        if char == "{" or char == "[":
+            # JSONDecodeErrors are expected on stray '{'/'[' in the non-JSON part
+            with contextlib.suppress(json.JSONDecodeError):
+                data, _ = decoder.raw_decode(s[idx:])
+                return data
+
+    raise ValueError
 
 
 def import_json():
diff --git a/tests/pytests/unit/utils/test_json.py b/tests/pytests/unit/utils/test_json.py
index 72b1023003..f7aed28b42 100644
--- a/tests/pytests/unit/utils/test_json.py
+++ b/tests/pytests/unit/utils/test_json.py
@@ -107,11 +107,6 @@ def test_find_json():
     ret = salt.utils.json.find_json(garbage_around_json)
     assert ret == expected_ret
 
-    # Now pre-pend small json and re-test
-    small_json_pre_json = f"{test_small_json}{test_sample_json}"
-    ret = salt.utils.json.find_json(small_json_pre_json)
-    assert ret == expected_ret
-
     # Now post-pend small json and re-test
     small_json_post_json = f"{test_sample_json}{test_small_json}"
     ret = salt.utils.json.find_json(small_json_post_json)
diff --git a/tests/unit/utils/test_json.py b/tests/unit/utils/test_json.py
index 5ea409a705..f5dcc1f72d 100644
--- a/tests/unit/utils/test_json.py
+++ b/tests/unit/utils/test_json.py
@@ -49,6 +49,18 @@ class JSONTestCase(TestCase):
         )
     )
 
+    def test_find_json_unbalanced_brace_in_string(self):
+        test_sample_json = '{"title": "I like curly braces like this one:{"}'
+        expected_ret = {"title": "I like curly braces like this one:{"}
+        ret = salt.utils.json.find_json(test_sample_json)
+        self.assertDictEqual(ret, expected_ret)
+
+    def test_find_json_unbalanced_square_bracket_in_string(self):
+        test_sample_json = '{"title": "I like square brackets like this one:["}'
+        expected_ret = {"title": "I like square brackets like this one:["}
+        ret = salt.utils.json.find_json(test_sample_json)
+        self.assertDictEqual(ret, expected_ret)
+
     def test_find_json(self):
         test_sample_json = """
                             {
-- 
2.52.0
Places

File simplify-utils.json.find_json-function.patch of Package salt

Places