File 1351-stdlib-Fix-O-n-2-algorithm-in-erl_eval-extended_pars.patch of Package erlang
From ea2976161a90df7eae11635a3e32afce7928649c Mon Sep 17 00:00:00 2001
From: Paul Guyot <pguyot@kallisys.net>
Date: Sat, 1 Nov 2025 16:53:33 +0100
Subject: [PATCH] stdlib: Fix O(n^2) algorithm in
`erl_eval:extended_parse_exprs/1`
`erl_eval:extended_parse_exprs/1` exhibited O(n^2) time complexity when
passed more and more tokens, with both erts and AtomVM. The reason seems
to be related to the
`try <expr> of ... <recurse> catch _:_ -> <recurse otherwise> end.` pattern,
where `<expr>` would usually fail.
By replacing `<expr>` (in this case `unscannable/1`) with some expression
that usually succeeds, evaluated complexity is back to O(n) which it should
be for this algorithm.
The script to evaluate the complexity can be found here:
https://gist.github.com/pguyot/1aa53791a819709f147e2ad55aadb279
With OTP 28.1.1:
```
=== Results Summary ===
Size | Tokens | Avg (ms) | Min (ms) | Max (ms) | StdDev | Ratio
-----|--------|----------|----------|----------|--------|-------
512 | 1026 | 4 | 3 | 4 | 1 | 0.31/0.5
1024 | 2050 | 13 | 12 | 13 | 0 | 0.27/0.5
2048 | 4098 | 49 | 48 | 49 | 1 | 0.26/0.5
4096 | 8194 | 188 | 187 | 189 | 1 | 0.25/0.5
8192 | 16386 | 739 | 736 | 743 | 2 | -
=== Complexity Analysis ===
Expected behavior for doubling size:
- O(n): 2x time
- O(n^2): 4x time
- O(n^3): 8x time
Size 512 -> 1024: time ratio 3.25 (between O(n) and O(n^2))
Size 1024 -> 2048: time ratio 3.77 (between O(n) and O(n^2))
Size 2048 -> 4096: time ratio 3.84 (between O(n) and O(n^2))
Size 4096 -> 8192: time ratio 3.93 (between O(n) and O(n^2))
```
With this change, it is both much faster and it exhibits O(n).
```
=== Results Summary ===
Size | Tokens | Avg (ms) | Min (ms) | Max (ms) | StdDev | Ratio
-----|--------|----------|----------|----------|--------|-------
512 | 1026 | 1 | 1 | 1 | 0 | 0.50/0.5
1024 | 2050 | 2 | 1 | 3 | 1 | 0.50/0.5
2048 | 4098 | 4 | 3 | 5 | 0 | 0.50/0.5
4096 | 8194 | 8 | 7 | 10 | 1 | 0.50/0.5
8192 | 16386 | 16 | 14 | 19 | 2 | -
=== Complexity Analysis ===
Expected behavior for doubling size:
- O(n): 2x time
- O(n^2): 4x time
- O(n^3): 8x time
Size 512 -> 1024: time ratio 2.00 (approximately O(n))
Size 1024 -> 2048: time ratio 2.00 (approximately O(n))
Size 2048 -> 4096: time ratio 2.00 (approximately O(n))
Size 4096 -> 8192: time ratio 2.00 (approximately O(n))
```
Signed-off-by: Paul Guyot <pguyot@kallisys.net>
---
lib/stdlib/src/erl_eval.erl | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/lib/stdlib/src/erl_eval.erl b/lib/stdlib/src/erl_eval.erl
index 1b35c8da80..acdbe6b44e 100644
--- a/lib/stdlib/src/erl_eval.erl
+++ b/lib/stdlib/src/erl_eval.erl
@@ -2012,11 +2012,15 @@ tokens_fixup([T|Ts]=Ts0) ->
end.
token_fixup(Ts) ->
- {AnnoL, NewTs, FixupTag} = unscannable(Ts),
- String = lists:append([erl_anno:text(A) || A <- AnnoL]),
- _ = (fixup_fun(FixupTag))(String),
- NewAnno = erl_anno:set_text(fixup_text(FixupTag), hd(AnnoL)),
- {{string, NewAnno, String}, NewTs}.
+ case unscannable(Ts) of
+ {AnnoL, NewTs, FixupTag} ->
+ String = lists:append([erl_anno:text(A) || A <- AnnoL]),
+ _ = (fixup_fun(FixupTag))(String),
+ NewAnno = erl_anno:set_text(fixup_text(FixupTag), hd(AnnoL)),
+ {{string, NewAnno, String}, NewTs};
+ false ->
+ {hd(Ts), tl(Ts)}
+ end.
unscannable([{'#', A1}, {var, A2, 'Fun'}, {'<', A3}, {atom, A4, _},
{'.', A5}, {float, A6, _}, {'>', A7}|Ts]) ->
@@ -2033,7 +2037,9 @@ unscannable([{'#', A1}, {var, A2, 'Port'}, {'<', A3}, {float, A4, _},
{[A1, A2, A3, A4, A5], Ts, port};
unscannable([{'#', A1}, {var, A2, 'Ref'}, {'<', A3}, {float, A4, _},
{'.', A5}, {float, A6, _}, {'>', A7}|Ts]) ->
- {[A1, A2, A3, A4, A5, A6, A7], Ts, reference}.
+ {[A1, A2, A3, A4, A5, A6, A7], Ts, reference};
+unscannable(_) ->
+ false.
expr_fixup(Expr0) ->
{Expr, Bs, _} = expr_fixup(Expr0, erl_eval:new_bindings(), 1),
--
2.51.0