File simplify-utils.json.find_json-function.patch of Package salt
From 7cbb68f36824161743f4cc60d8920e2cea039e5e Mon Sep 17 00:00:00 2001
From: Marek Czernek <marek.czernek@suse.com>
Date: Fri, 9 Jan 2026 16:49:19 +0100
Subject: [PATCH] Simplify utils.json.find_json function
The previous implementation computed all combinations of potential JSON
documents and tried to `json.loads()`them. That resumted in num({) *
num(}) tries, which could take hours on large inputs.
The approach implemented with this change simplifies the work we do: we
only look for opening '{' and '[' characters, and try to parse the rest
of input string with JSONDecoder.raw_decode. This method ignores
extraneous data at the end and is faster than doing it ourselves in
Python.
Co-authored-by: Alexander Graul <agraul@suse.com>
---
changelog/68258.fixed.md | 1 +
salt/utils/json.py | 80 ++++++---------------------
tests/pytests/unit/utils/test_json.py | 5 --
tests/unit/utils/test_json.py | 12 ++++
4 files changed, 31 insertions(+), 67 deletions(-)
create mode 100644 changelog/68258.fixed.md
diff --git a/changelog/68258.fixed.md b/changelog/68258.fixed.md
new file mode 100644
index 0000000000..a9afeccef7
--- /dev/null
+++ b/changelog/68258.fixed.md
@@ -0,0 +1 @@
+Simplied and sped up `utils.json.find_json` function
diff --git a/salt/utils/json.py b/salt/utils/json.py
index 26cb38cdbe..1605e75f9f 100644
--- a/salt/utils/json.py
+++ b/salt/utils/json.py
@@ -2,7 +2,7 @@
Functions to work with JSON
"""
-
+import contextlib
import json
import logging
@@ -25,69 +25,25 @@ def __split(raw):
return raw.splitlines()
-def find_json(raw):
- """
- Pass in a raw string and load the json when it starts. This allows for a
- string to start with garbage and end with json but be cleanly loaded
- """
- ret = {}
- lines = __split(raw)
- lengths = list(map(len, lines))
- starts = []
- ends = []
-
- # Search for possible starts end ends of the json fragments
- for ind, _ in enumerate(lines):
- line = lines[ind].lstrip()
- line = line[0] if line else line
- if line == "{" or line == "[":
- starts.append((ind, line))
- if line == "}" or line == "]":
- ends.append((ind, line))
-
- # List all the possible pairs of starts and ends,
- # and fill the length of each block to sort by size after
- starts_ends = []
- for start, start_br in starts:
- for end, end_br in reversed(ends):
- if end > start and (
- (start_br == "{" and end_br == "}")
- or (start_br == "[" and end_br == "]")
- ):
- starts_ends.append((start, end, sum(lengths[start : end + 1])))
-
- # Iterate through all the possible pairs starting from the largest
- starts_ends.sort(key=lambda x: (x[2], x[1] - x[0], x[0]), reverse=True)
- for start, end, _ in starts_ends:
- working = "\n".join(lines[start : end + 1])
- try:
- ret = json.loads(working)
- return ret
- except ValueError:
- pass
- # Try filtering non-JSON text right after the last closing curly brace
- end_str = lines[end].lstrip()[0]
- working = "\n".join(lines[start : end]) + end_str
- try:
- ret = json.loads(working)
- return ret
- except ValueError:
- continue
+def find_json(s: str):
+ """Pass in a string and load JSON within it.
- # Fall back to old implementation for backward compatibility
- # excpecting json after the text
- for ind, _ in enumerate(lines):
- working = "\n".join(lines[ind:])
- try:
- ret = json.loads(working)
- except ValueError:
- continue
- if ret:
- return ret
+ The string may contain non-JSON text before and after the JSON document.
- if not ret:
- # Not json, raise an error
- raise ValueError
+ Raises ValueError if no valid JSON was found.
+ """
+ decoder = json.JSONDecoder()
+
+ # We look for the beginning of JSON objects / arrays and let raw_decode() handle
+ # extraneous data at the end.
+ for idx, char in enumerate(s):
+ if char == "{" or char == "[":
+ # JSONDecodeErrors are expected on stray '{'/'[' in the non-JSON part
+ with contextlib.suppress(json.JSONDecodeError):
+ data, _ = decoder.raw_decode(s[idx:])
+ return data
+
+ raise ValueError
def import_json():
diff --git a/tests/pytests/unit/utils/test_json.py b/tests/pytests/unit/utils/test_json.py
index 72b1023003..f7aed28b42 100644
--- a/tests/pytests/unit/utils/test_json.py
+++ b/tests/pytests/unit/utils/test_json.py
@@ -107,11 +107,6 @@ def test_find_json():
ret = salt.utils.json.find_json(garbage_around_json)
assert ret == expected_ret
- # Now pre-pend small json and re-test
- small_json_pre_json = f"{test_small_json}{test_sample_json}"
- ret = salt.utils.json.find_json(small_json_pre_json)
- assert ret == expected_ret
-
# Now post-pend small json and re-test
small_json_post_json = f"{test_sample_json}{test_small_json}"
ret = salt.utils.json.find_json(small_json_post_json)
diff --git a/tests/unit/utils/test_json.py b/tests/unit/utils/test_json.py
index 5ea409a705..f5dcc1f72d 100644
--- a/tests/unit/utils/test_json.py
+++ b/tests/unit/utils/test_json.py
@@ -49,6 +49,18 @@ class JSONTestCase(TestCase):
)
)
+ def test_find_json_unbalanced_brace_in_string(self):
+ test_sample_json = '{"title": "I like curly braces like this one:{"}'
+ expected_ret = {"title": "I like curly braces like this one:{"}
+ ret = salt.utils.json.find_json(test_sample_json)
+ self.assertDictEqual(ret, expected_ret)
+
+ def test_find_json_unbalanced_square_bracket_in_string(self):
+ test_sample_json = '{"title": "I like square brackets like this one:["}'
+ expected_ret = {"title": "I like square brackets like this one:["}
+ ret = salt.utils.json.find_json(test_sample_json)
+ self.assertDictEqual(ret, expected_ret)
+
def test_find_json(self):
test_sample_json = """
{
--
2.52.0