File 8302-Add-json-streaming-API.patch of Package erlang
From 30cc826feba66b34e1b288af712e505d53483bb2 Mon Sep 17 00:00:00 2001
From: Dan Gudmundsson <dgud@erlang.org>
Date: Thu, 29 Feb 2024 13:24:13 +0100
Subject: [PATCH 2/3] Add json streaming API
Add a separate API for streaming data, this is needed to make numbers work as expected since
there is no way of knowing when a number is complete and doesn't continue in the next package.
Allow the user to call decode_continue(NewBin, State) to complete the parsing.
We also need 'end_of_input' argument to let the user signal that there is no more data in the
case that stream only contained an integer or is an incomplete Json object.
---
lib/stdlib/src/json.erl | 318 ++++++++++++++++++++++-----------
lib/stdlib/src/json.hrl | 2 +-
lib/stdlib/test/json_SUITE.erl | 69 ++++++-
3 files changed, 281 insertions(+), 108 deletions(-)
diff --git a/lib/stdlib/src/json.erl b/lib/stdlib/src/json.erl
index 924b30a87c..6d7561d871 100644
--- a/lib/stdlib/src/json.erl
+++ b/lib/stdlib/src/json.erl
@@ -49,7 +49,7 @@ standards. The decoder is tested using [JSONTestSuite](https://github.com/nst/JS
-export_type([encoder/0, encode_value/0]).
-export([
- decode/1, decode/3
+ decode/1, decode/3, decode_start/3, decode_continue/2
]).
-export_type([
from_binary_fun/0,
@@ -60,7 +60,8 @@ standards. The decoder is tested using [JSONTestSuite](https://github.com/nst/JS
object_push_fun/0,
object_finish_fun/0,
decoders/0,
- decode_value/0
+ decode_value/0,
+ continuation_state/0
]).
-compile(warn_missing_spec).
@@ -360,7 +361,7 @@ escape_binary(<<Byte, Rest/binary>>, Acc, Orig, Skip0, Len) when ?is_ascii_escap
escape_binary(<<Byte, Rest/binary>>, Acc, Orig, Skip, Len) ->
case element(Byte - 127, utf8s0()) of
?UTF8_REJECT -> invalid_byte(Orig, Skip + Len);
- %% all accept cases are ASCII, already covred above
+ %% all accept cases are ASCII, already covered above
State -> escape_binary_utf8(Rest, Acc, Orig, Skip, Len, State)
end;
escape_binary(_, _Acc, Orig, 0, _Len) ->
@@ -379,7 +380,7 @@ escape_binary_utf8(<<Byte, Rest/binary>>, Acc, Orig, Skip, Len, State0) ->
State -> escape_binary_utf8(Rest, Acc, Orig, Skip, Len + 1, State)
end;
escape_binary_utf8(_, _Acc, Orig, Skip, Len, _State) ->
- unexpected(Orig, Skip + Len + 1).
+ unexpected_utf8(Orig, Skip + Len + 1).
escape_all(Bin) -> escape_all_ascii(Bin, [$"], Bin, 0, 0).
@@ -565,6 +566,8 @@ error_info(Skip) ->
-type stack() :: [?ARRAY | ?OBJECT | binary() | acc()].
-type decode() :: #decode{}.
+-opaque continuation_state() :: tuple().
+
-type decode_value() ::
integer()
| float()
@@ -602,8 +605,16 @@ Supports basic data mapping:
-spec decode(binary()) -> decode_value().
decode(Binary) when is_binary(Binary) ->
case value(Binary, Binary, 0, ok, [], #decode{}) of
- {Result, _Acc, <<>>} -> Result;
- {_, _, Rest} -> unexpected(Rest, 0)
+ {Result, _Acc, <<>>} ->
+ Result;
+ {_, _, Rest} ->
+ invalid_byte(Rest, 0);
+ {continue, {_Bin, _Acc, [], _Decode, {number, Number}}} ->
+ Number;
+ {continue, {_, _, _, _, {float_error, Token, Skip}}} ->
+ unexpected_sequence(Token, Skip);
+ {continue, _} ->
+ error(unexpected_end)
end.
-doc """
@@ -634,9 +645,9 @@ implementations used by the `decode/1` function:
## Errors
-* `error(unexpected_end)` if `Binary` contains incomplete JSON value
* `error({invalid_byte, Byte})` if `Binary` contains unexpected byte or invalid UTF-8 byte
* `error({invalid_sequence, Bytes})` if `Binary` contains invalid UTF-8 escape
+* `error(unexpected_end)` if `Binary` contains incomplete JSON value
## Example
@@ -649,11 +660,80 @@ Decoding object keys as atoms:
```
""".
-spec decode(binary(), dynamic(), decoders()) ->
- {Result :: dynamic(), Acc :: dynamic(), binary()}.
-decode(Binary, Acc, Decoders) when is_binary(Binary) ->
+ {Result :: dynamic(), Acc :: dynamic(), binary()}.
+decode(Binary, Acc0, Decoders) when is_binary(Binary) ->
+ Decode = maps:fold(fun parse_decoder/3, #decode{}, Decoders),
+ case value(Binary, Binary, 0, Acc0, [], Decode) of
+ {continue, {_Bin, Acc, [], _Decode, {number, Val}}} ->
+ {Val, Acc, <<>>};
+ {continue, {_, _, _, _, {float_error, Token, Skip}}} ->
+ unexpected_sequence(Token, Skip);
+ {continue, _} ->
+ error(unexpected_end);
+ Result ->
+ Result
+ end.
+
+-doc """
+Begin parsing a stream of bytes of a JSON value.
+
+Similar to `decode/3` but returns when a complete JSON value can be parsed or
+returns `{continue, State}` for incomplete data,
+the `State` can be fed to the `decode_continue/2` function when more data is available.
+""".
+-spec decode_start(binary(), dynamic(), decoders()) ->
+ {Result :: dynamic(), Acc :: dynamic(), binary()} | {continue, continuation_state()}.
+decode_start(Binary, Acc, Decoders) when is_binary(Binary) ->
Decode = maps:fold(fun parse_decoder/3, #decode{}, Decoders),
value(Binary, Binary, 0, Acc, [], Decode).
+-doc """
+Continue parsing a stream of bytes of a JSON value.
+
+Similar to `decode_start/3`, if the function returns `{continue, State}` and
+there is no more data, use `end_of_input` instead of a binary.
+
+```erlang
+> {continue, State} = json:decode_start(<<"{\"foo\":">>, ok, #{}).
+> json:decode_continue(<<"1}">>, State).
+{#{foo => 1},ok,<<>>}
+```
+```erlang
+> {continue, State} = json:decode_start(<<"123">>, ok, #{}).
+> json:decode_continue(end_of_input, State).
+{123,ok,<<>>}
+```
+""".
+-spec decode_continue(binary() | end_of_input, Opaque::term()) ->
+ {Result :: dynamic(), Acc :: dynamic(), binary()} | {continue, continuation_state()}.
+decode_continue(end_of_input, State) ->
+ case State of
+ {_, Acc, [], _Decode, {number, Val}} ->
+ {Val, Acc, <<>>};
+ {_, _, _, _, {float_error, Token, Skip}} ->
+ unexpected_sequence(Token, Skip);
+ _ ->
+ error(unexpected_end)
+ end;
+decode_continue(Cont, {Rest, Acc, Stack, #decode{} = Decode, FuncData}) when is_binary(Cont) ->
+ Binary = <<Rest/binary, Cont/binary>>,
+ case FuncData of
+ value ->
+ value(Binary, Binary, 0, Acc, Stack, Decode);
+ {number, _} ->
+ value(Binary, Binary, 0, Acc, Stack, Decode);
+ {float_error, _Token, _Skip} ->
+ value(Binary, Binary, 0, Acc, Stack, Decode);
+ {array_push, Val} ->
+ array_push(Binary, Binary, 0, Acc, Stack, Decode, Val);
+ {object_value, Key} ->
+ object_value(Binary, Binary, 0, Acc, Stack, Decode, Key);
+ {object_push, Value, Key} ->
+ object_push(Binary, Binary, 0, Acc, Stack, Decode, Value, Key);
+ object_key ->
+ object_key(Binary, Binary, 0, Acc, Stack, Decode)
+ end.
+
parse_decoder(array_start, Fun, Decode) when is_function(Fun, 1) ->
Decode#decode{array_start = Fun};
parse_decoder(array_push, Fun, Decode) when is_function(Fun, 2) ->
@@ -692,44 +772,46 @@ value(<<$n, Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
value(<<$", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
string(Rest, Original, Skip + 1, Acc, Stack, Decode);
value(<<$[, Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
- array_start(Rest, Original, Skip, Acc, Stack, Decode);
+ array_start(Rest, Original, Skip, Acc, Stack, Decode, 1);
value(<<${, Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
- object_start(Rest, Original, Skip, Acc, Stack, Decode);
+ object_start(Rest, Original, Skip, Acc, Stack, Decode, 1);
value(<<Byte, _/bits>>, Original, Skip, _Acc, _Stack, _Decode) when ?is_ascii_plain(Byte) ->
%% this clause is effecively the same as the last one, but necessary to
%% force compiler to emit a jump table dispatch, rather than binary search
invalid_byte(Original, Skip);
-value(_, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip).
+value(_, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 0, 0, value).
true(<<"rue", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
- continue(Rest, Original, Skip + 4, Acc, Stack, Decode, true);
-true(_Rest, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip + 1).
+ continue(Rest, Original, Skip+4, Acc, Stack, Decode, true);
+true(_Rest, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 1, 3, value).
false(<<"alse", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
- continue(Rest, Original, Skip + 5, Acc, Stack, Decode, false);
-false(_Rest, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip + 1).
+ continue(Rest, Original, Skip+5, Acc, Stack, Decode, false);
+false(_Rest, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 1, 4, value).
null(<<"ull", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
- continue(Rest, Original, Skip + 4, Acc, Stack, Decode, Decode#decode.null);
-null(_Rest, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip + 1).
+ continue(Rest, Original, Skip+4, Acc, Stack, Decode, Decode#decode.null);
+null(_Rest, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 1, 3, value).
number_minus(<<$0, Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
number_zero(Rest, Original, Skip, Acc, Stack, Decode, 2);
number_minus(<<Num, Rest/bits>>, Original, Skip, Acc, Stack, Decode) when ?is_1_to_9(Num) ->
number(Rest, Original, Skip, Acc, Stack, Decode, 2);
-number_minus(_Rest, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip + 1).
+number_minus(_Rest, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 1, 0, value).
number_zero(<<$., Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) ->
number_frac(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
number_zero(<<E, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when E =:= $E; E =:= $e ->
number_exp_copy(Rest, Original, Skip, Acc, Stack, Decode, Len + 1, <<"0">>);
+number_zero(<<>>, Original, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, {number, 0});
number_zero(Rest, Original, Skip, Acc, Stack, Decode, Len) ->
- continue(Rest, Original, Skip + Len, Acc, Stack, Decode, 0).
+ continue(Rest, Original, Skip+Len, Acc, Stack, Decode, 0).
number(<<Num, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_0_to_9(Num) ->
number(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
@@ -738,14 +820,17 @@ number(<<$., Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) ->
number(<<E, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when E =:= $E; E =:= $e ->
Prefix = binary_part(Original, Skip, Len),
number_exp_copy(Rest, Original, Skip, Acc, Stack, Decode, Len + 1, Prefix);
+number(<<>>, Original, Skip, Acc, Stack, Decode, Len) ->
+ Int = (Decode#decode.integer)(binary_part(Original, Skip, Len)),
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, {number, Int});
number(Rest, Original, Skip, Acc, Stack, Decode, Len) ->
Int = (Decode#decode.integer)(binary_part(Original, Skip, Len)),
- continue(Rest, Original, Skip + Len, Acc, Stack, Decode, Int).
+ continue(Rest, Original, Skip+Len, Acc, Stack, Decode, Int).
number_frac(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_0_to_9(Byte) ->
number_frac_cont(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
-number_frac(_, Original, Skip, _Acc, _Stack, _Decode, Len) ->
- unexpected(Original, Skip + Len).
+number_frac(_, Original, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value).
number_frac_cont(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_0_to_9(Byte) ->
number_frac_cont(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
@@ -755,10 +840,16 @@ number_frac_cont(Rest, Original, Skip, Acc, Stack, Decode, Len) ->
Token = binary_part(Original, Skip, Len),
float_decode(Rest, Original, Skip, Acc, Stack, Decode, Len, Token).
+float_decode(<<>>, Original, Skip, Acc, Stack, Decode, Len, Token) ->
+ try (Decode#decode.float)(Token) of
+ Float -> unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, {number, Float})
+ catch
+ _:_ -> unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, {float_error, Token, Skip})
+ end;
float_decode(<<Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, Token) ->
try (Decode#decode.float)(Token) of
Float ->
- continue(Rest, Original, Skip + Len, Acc, Stack, Decode, Float)
+ continue(Rest, Original, Skip+Len, Acc, Stack, Decode, Float)
catch
_:_ -> unexpected_sequence(Token, Skip)
end.
@@ -767,13 +858,13 @@ number_exp(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?i
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
number_exp(<<Sign, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when Sign =:= $+; Sign =:= $- ->
number_exp_sign(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
-number_exp(_, Original, Skip, _Acc, _Stack, _Decode, Len) ->
- unexpected(Original, Skip + Len).
+number_exp(_, Original, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value).
number_exp_sign(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_0_to_9(Byte) ->
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
-number_exp_sign(_, Original, Skip, _Acc, _Stack, _Decode, Len) ->
- unexpected(Original, Skip + Len).
+number_exp_sign(_, Original, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value).
number_exp_cont(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_0_to_9(Byte) ->
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len + 1);
@@ -785,13 +876,13 @@ number_exp_copy(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, Pr
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len, Prefix, 1);
number_exp_copy(<<Sign, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, Prefix) when Sign =:= $+; Sign =:= $- ->
number_exp_sign(Rest, Original, Skip, Acc, Stack, Decode, Len, Prefix, 1);
-number_exp_copy(_, Original, Skip, _Acc, _Stack, _Decode, Len, _Prefix) ->
- unexpected(Original, Skip + Len).
+number_exp_copy(_, Original, Skip, Acc, Stack, Decode, Len, _Prefix) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value).
number_exp_sign(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, Prefix, ExpLen) when ?is_0_to_9(Byte) ->
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len, Prefix, ExpLen + 1);
-number_exp_sign(_, Original, Skip, _Acc, _Stack, _Decode, Len, _Prefix, ExpLen) ->
- unexpected(Original, Skip + Len + ExpLen).
+number_exp_sign(_, Original, Skip, Acc, Stack, Decode, Len, _Prefix, ExpLen) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len + ExpLen, 0, value).
number_exp_cont(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, Prefix, ExpLen) when ?is_0_to_9(Byte) ->
number_exp_cont(Rest, Original, Skip, Acc, Stack, Decode, Len, Prefix, ExpLen + 1);
@@ -817,7 +908,7 @@ string(<<Byte, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Len) when ?is_ascii_
string(<<$\\, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Len) ->
Part = binary_part(Orig, Skip, Len),
SAcc = <<>>,
- unescape(Rest, Orig, Skip, Acc, Stack, Decode, Len, <<SAcc/binary, Part/binary>>);
+ unescape(Rest, Orig, Skip, Acc, Stack, Decode, Skip-1, Len, <<SAcc/binary, Part/binary>>);
string(<<$", Rest/bits>>, Orig, Skip0, Acc, Stack, Decode, Len) ->
Value = binary_part(Orig, Skip0, Len),
Skip = Skip0 + Len + 1,
@@ -833,8 +924,8 @@ string(<<Byte, Rest/bytes>>, Orig, Skip, Acc, Stack, Decode, Len) ->
%% all accept cases are ASCII, already covered above
State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Len, State)
end;
-string(_, Orig, Skip, _Acc, _Stack, _Decode, Len) ->
- unexpected(Orig, Skip + Len).
+string(_, Orig, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Orig, Skip-1, Acc, Stack, Decode, Len + 1, 0, value).
string_utf8(<<Byte, Rest/binary>>, Orig, Skip, Acc, Stack, Decode, Len, State0) ->
Type = element(Byte + 1, utf8t()),
@@ -843,24 +934,24 @@ string_utf8(<<Byte, Rest/binary>>, Orig, Skip, Acc, Stack, Decode, Len, State0)
?UTF8_REJECT -> invalid_byte(Orig, Skip + Len + 1);
State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Len + 1, State)
end;
-string_utf8(_, Orig, Skip, _Acc, _Stack, _Decode, Len, _State0) ->
- unexpected(Orig, Skip + Len + 1).
+string_utf8(_, Orig, Skip, Acc, Stack, Decode, Len, _State0) ->
+ unexpected(Orig, Skip-1, Acc, Stack, Decode, Len + 2, 0, value).
-string_ascii(Binary, Original, Skip, Acc, Stack, Decode, Len, SAcc) ->
+string_ascii(Binary, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc) ->
case Binary of
<<B1, B2, B3, B4, B5, B6, B7, B8, Rest/binary>> when ?are_all_ascii_plain(B1, B2, B3, B4, B5, B6, B7, B8) ->
- string_ascii(Rest, Original, Skip, Acc, Stack, Decode, Len + 8, SAcc);
+ string_ascii(Rest, Original, Skip, Acc, Stack, Decode, Start, Len + 8, SAcc);
Other ->
- string(Other, Original, Skip, Acc, Stack, Decode, Len, SAcc)
+ string(Other, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc)
end.
--spec string(binary(), binary(), integer(), acc(), stack(), decode(), integer(), binary()) -> dynamic().
-string(<<Byte, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Len, SAcc) when ?is_ascii_plain(Byte) ->
- string(Rest, Orig, Skip, Acc, Stack, Decode, Len + 1, SAcc);
-string(<<$\\, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Len, SAcc) ->
+-spec string(binary(), binary(), integer(), acc(), stack(), decode(), integer(), integer(), binary()) -> dynamic().
+string(<<Byte, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Start, Len, SAcc) when ?is_ascii_plain(Byte) ->
+ string(Rest, Orig, Skip, Acc, Stack, Decode, Start, Len + 1, SAcc);
+string(<<$\\, Rest/bits>>, Orig, Skip, Acc, Stack, Decode, Start, Len, SAcc) ->
Part = binary_part(Orig, Skip, Len),
- unescape(Rest, Orig, Skip, Acc, Stack, Decode, Len, <<SAcc/binary, Part/binary>>);
-string(<<$", Rest/bits>>, Orig, Skip0, Acc, Stack, Decode, Len, SAcc) ->
+ unescape(Rest, Orig, Skip, Acc, Stack, Decode, Start, Len, <<SAcc/binary, Part/binary>>);
+string(<<$", Rest/bits>>, Orig, Skip0, Acc, Stack, Decode, _Start, Len, SAcc) ->
Part = binary_part(Orig, Skip0, Len),
Value = <<SAcc/binary, Part/binary>>,
Skip = Skip0 + Len + 1,
@@ -868,28 +959,30 @@ string(<<$", Rest/bits>>, Orig, Skip0, Acc, Stack, Decode, Len, SAcc) ->
undefined -> continue(Rest, Orig, Skip, Acc, Stack, Decode, Value);
Fun -> continue(Rest, Orig, Skip, Acc, Stack, Decode, Fun(Value))
end;
-string(<<Byte, _/bits>>, Orig, Skip, _Acc, _Stack, _Decode, Len, _SAcc) when ?is_ascii_escape(Byte) ->
+string(<<Byte, _/bits>>, Orig, Skip, _Acc, _Stack, _Decode, _Start, Len, _SAcc) when ?is_ascii_escape(Byte) ->
invalid_byte(Orig, Skip + Len);
-string(<<Byte, Rest/bytes>>, Orig, Skip, Acc, Stack, Decode, Len, SAcc) ->
+string(<<Byte, Rest/bytes>>, Orig, Skip, Acc, Stack, Decode, Start, Len, SAcc) ->
case element(Byte - 127, utf8s0()) of
?UTF8_REJECT -> invalid_byte(Orig, Skip + Len);
- %% all accept cases are ASCII, already covred above
- State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Len, SAcc, State)
+ %% all accept cases are ASCII, already covered above
+ State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Start, Len, SAcc, State)
end;
-string(_, Orig, Skip, _Acc, _Stack, _Decode, Len, _SAcc) ->
- unexpected(Orig, Skip + Len).
+string(_, Orig, Skip, Acc, Stack, Decode, Start, Len, _SAcc) ->
+ Extra = Skip - Start,
+ unexpected(Orig, Start, Acc, Stack, Decode, Len+Extra, 0, value).
-string_utf8(<<Byte, Rest/binary>>, Orig, Skip, Acc, Stack, Decode, Len, SAcc, State0) ->
+string_utf8(<<Byte, Rest/binary>>, Orig, Skip, Acc, Stack, Decode, Start, Len, SAcc, State0) ->
Type = element(Byte + 1, utf8t()),
case element(State0 + Type, utf8s()) of
- ?UTF8_ACCEPT -> string_ascii(Rest, Orig, Skip, Acc, Stack, Decode, Len + 2, SAcc);
+ ?UTF8_ACCEPT -> string_ascii(Rest, Orig, Skip, Acc, Stack, Decode, Start, Len + 2, SAcc);
?UTF8_REJECT -> invalid_byte(Orig, Skip + Len + 1);
- State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Len + 1, SAcc, State)
+ State -> string_utf8(Rest, Orig, Skip, Acc, Stack, Decode, Start, Len + 1, SAcc, State)
end;
-string_utf8(_, Orig, Skip, _Acc, _Stack, _Decode, Len, _SAcc, _State0) ->
- unexpected(Orig, Skip + Len + 1).
+string_utf8(_, Orig, Skip, Acc, Stack, Decode, Start, Len, _SAcc, _State0) ->
+ Extra = Skip - Start,
+ unexpected(Orig, Start, Acc, Stack, Decode, Len + 1 + Extra, 0, value).
-unescape(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, SAcc) ->
+unescape(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc) ->
Val =
case Byte of
$b -> $\b;
@@ -904,20 +997,21 @@ unescape(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, SAcc) ->
_ -> error
end,
case Val of
- unicode -> unescapeu(Rest, Original, Skip, Acc, Stack, Decode, Len, SAcc);
- error -> unexpected(Original, Skip + Len + 1);
- Int -> string_ascii(Rest, Original, Skip + Len + 2, Acc, Stack, Decode, 0, <<SAcc/binary, Int>>)
+ unicode -> unescapeu(Rest, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc);
+ error -> invalid_byte(Original, Skip+Len+1);
+ Int -> string_ascii(Rest, Original, Skip + Len + 2, Acc, Stack, Decode, Start, 0, <<SAcc/binary, Int>>)
end;
-unescape(_, Original, Skip, _Acc, _Stack, _Decode, Len, _SAcc) ->
- unexpected(Original, Skip + Len + 1).
+unescape(_, Original, Skip, Acc, Stack, Decode, Start, Len, _SAcc) ->
+ Extra = Skip - Start,
+ unexpected(Original, Start, Acc, Stack, Decode, Len + 1 + Extra, 0, value).
-unescapeu(<<E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, SAcc) ->
+unescapeu(<<E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc) ->
try hex_to_int(E1, E2, E3, E4) of
CP when CP >= 16#D800, CP =< 16#DBFF ->
- unescape_surrogate(Rest, Original, Skip, Acc, Stack, Decode, Len, SAcc, CP);
+ unescape_surrogate(Rest, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc, CP);
CP ->
try <<SAcc/binary, CP/utf8>> of
- SAcc1 -> string_ascii(Rest, Original, Skip + Len + 6, Acc, Stack, Decode, 0, SAcc1)
+ SAcc1 -> string_ascii(Rest, Original, Skip + Len + 6, Acc, Stack, Decode, Start, 0, SAcc1)
catch
_:_ -> unexpected_sequence(binary_part(Original, Skip + Len, 6), Skip + Len)
end
@@ -925,15 +1019,16 @@ unescapeu(<<E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len
_:_ ->
unexpected_sequence(binary_part(Original, Skip + Len, 6), Skip + Len)
end;
-unescapeu(_, Original, Skip, _Acc, _Stack, _Decode, Len, _SAcc) ->
- unexpected(Original, Skip + Len + 2).
+unescapeu(_Rest, Original, Skip, Acc, Stack, Decode, Start, Len, _SAcc) ->
+ Extra = Skip - Start,
+ unexpected(Original, Start, Acc, Stack, Decode, Len + 2 + Extra, 4, value).
-unescape_surrogate(<<"\\u", E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len, SAcc, Hi) ->
+unescape_surrogate(<<"\\u", E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Start, Len, SAcc, Hi) ->
try hex_to_int(E1, E2, E3, E4) of
Lo when Lo >= 16#DC00, Lo =< 16#DFFF ->
CP = 16#10000 + ((Hi band 16#3FF) bsl 10) + (Lo band 16#3FF),
try <<SAcc/binary, CP/utf8>> of
- SAcc1 -> string_ascii(Rest, Original, Skip + Len + 12, Acc, Stack, Decode, 0, SAcc1)
+ SAcc1 -> string_ascii(Rest, Original, Skip + Len + 12, Acc, Stack, Decode, Start, 0, SAcc1)
catch
_:_ -> unexpected_sequence(binary_part(Original, Skip + Len, 12), Skip + Len)
end;
@@ -942,8 +1037,9 @@ unescape_surrogate(<<"\\u", E1, E2, E3, E4, Rest/bits>>, Original, Skip, Acc, St
catch
_:_ -> unexpected_sequence(binary_part(Original, Skip + Len, 12), Skip + Len)
end;
-unescape_surrogate(_, Original, Skip, _Acc, _Stack, _Decode, Len, _SAcc, _Hi) ->
- unexpected(Original, Skip + Len + 6).
+unescape_surrogate(_Rest, Original, Skip, Acc, Stack, Decode, Start, Len, _SAcc, _Hi) ->
+ Extra = Skip - Start,
+ unexpected(Original, Start, Acc, Stack, Decode, Len + 6 + Extra, 5, value).
%% erlfmt-ignore
%% this is a macro instead of an inlined function - compiler refused to inline
@@ -958,9 +1054,9 @@ unescape_surrogate(_, Original, Skip, _Acc, _Stack, _Decode, Len, _SAcc, _Hi) ->
hex_to_int(H1, H2, H3, H4) ->
?hex_digit(H4) + 16 * (?hex_digit(H3) + 16 * (?hex_digit(H2) + 16 * ?hex_digit(H1))).
-array_start(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode) when ?is_ws(Byte) ->
- array_start(Rest, Original, Skip + 1, Acc, Stack, Decode);
-array_start(<<"]", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
+array_start(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_ws(Byte) ->
+ array_start(Rest, Original, Skip, Acc, Stack, Decode, Len+1);
+array_start(<<"]", Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) ->
{Value, NewAcc} =
case {Decode#decode.array_start, Decode#decode.array_finish} of
{undefined, undefined} -> {[], Acc};
@@ -968,12 +1064,14 @@ array_start(<<"]", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
{undefined, Finish} -> Finish([], Acc);
{Start, Finish} -> Finish(Start(Acc), Acc)
end,
- continue(Rest, Original, Skip + 2, NewAcc, Stack, Decode, Value);
-array_start(Rest, Original, Skip0, OldAcc, Stack, Decode) ->
- Skip = Skip0 + 1,
+ continue(Rest, Original, Skip+Len+1, NewAcc, Stack, Decode, Value);
+array_start(<<>>, Original, Skip, Acc, Stack, Decode, Len) ->
+ %% Handles empty array [] in continuation mode
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value);
+array_start(Rest, Original, Skip, OldAcc, Stack, Decode, Len) ->
case Decode#decode.array_start of
- undefined -> value(Rest, Original, Skip, [], [?ARRAY, OldAcc | Stack], Decode);
- Fun -> value(Rest, Original, Skip, Fun(OldAcc), [?ARRAY, OldAcc | Stack], Decode)
+ undefined -> value(Rest, Original, Skip+Len, [], [?ARRAY, OldAcc | Stack], Decode);
+ Fun -> value(Rest, Original, Skip+Len, Fun(OldAcc), [?ARRAY, OldAcc | Stack], Decode)
end.
array_push(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Value) when ?is_ws(Byte) ->
@@ -997,12 +1095,13 @@ array_push(<<$,, Rest/bits>>, Original, Skip0, Acc, Stack, Decode, Value) ->
undefined -> value(Rest, Original, Skip, [Value | Acc], Stack, Decode);
Fun -> value(Rest, Original, Skip, Fun(Value, Acc), Stack, Decode)
end;
-array_push(_, Original, Skip, _Acc, _Stack, _Decode, _Value) ->
- unexpected(Original, Skip).
+array_push(_, Original, Skip, Acc, Stack, Decode, Value) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 0, 0, {?FUNCTION_NAME, Value}).
+
-object_start(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode) when ?is_ws(Byte) ->
- object_start(Rest, Original, Skip + 1, Acc, Stack, Decode);
-object_start(<<"}", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
+object_start(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) when ?is_ws(Byte) ->
+ object_start(Rest, Original, Skip, Acc, Stack, Decode, Len+1);
+object_start(<<"}", Rest/bits>>, Original, Skip, Acc, Stack, Decode, Len) ->
{Value, NewAcc} =
case {Decode#decode.object_start, Decode#decode.object_finish} of
{undefined, undefined} -> {#{}, Acc};
@@ -1010,10 +1109,10 @@ object_start(<<"}", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
{undefined, Finish} -> Finish([], Acc);
{Start, Finish} -> Finish(Start(Acc), Acc)
end,
- continue(Rest, Original, Skip + 2, NewAcc, Stack, Decode, Value);
-object_start(<<$", Rest/bits>>, Original, Skip0, OldAcc, Stack0, Decode) ->
+ continue(Rest, Original, Skip+Len+1, NewAcc, Stack, Decode, Value);
+object_start(<<$", Rest/bits>>, Original, Skip0, OldAcc, Stack0, Decode, Len) ->
Stack = [?OBJECT, OldAcc | Stack0],
- Skip = Skip0 + 2,
+ Skip = Skip0 + Len + 1,
case Decode#decode.object_start of
undefined ->
string(Rest, Original, Skip, [], Stack, Decode);
@@ -1021,15 +1120,15 @@ object_start(<<$", Rest/bits>>, Original, Skip0, OldAcc, Stack0, Decode) ->
Acc = Fun(OldAcc),
string(Rest, Original, Skip, Acc, Stack, Decode)
end;
-object_start(_, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip + 1).
+object_start(_, Original, Skip, Acc, Stack, Decode, Len) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, Len, 0, value).
object_value(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Key) when ?is_ws(Byte) ->
object_value(Rest, Original, Skip + 1, Acc, Stack, Decode, Key);
object_value(<<$:, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Key) ->
value(Rest, Original, Skip + 1, Acc, [Key | Stack], Decode);
-object_value(_, Original, Skip, _Acc, _Stack, _Decode, _Key) ->
- unexpected(Original, Skip).
+object_value(_, Original, Skip, Acc, Stack, Decode, Key) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 0, 0, {?FUNCTION_NAME, Key}).
object_push(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode, Value, Key) when ?is_ws(Byte) ->
object_push(Rest, Original, Skip + 1, Acc, Stack, Decode, Value, Key);
@@ -1051,15 +1150,15 @@ object_push(<<$,, Rest/bits>>, Original, Skip, Acc0, Stack, Decode, Value, Key)
undefined -> object_key(Rest, Original, Skip + 1, [{Key, Value} | Acc0], Stack, Decode);
Fun -> object_key(Rest, Original, Skip + 1, Fun(Key, Value, Acc0), Stack, Decode)
end;
-object_push(_, Original, Skip, _Acc, _Stack, _Decode, _Value, _Key) ->
- unexpected(Original, Skip).
+object_push(_, Original, Skip, Acc, Stack, Decode, Value, Key) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 0, 0, {?FUNCTION_NAME, Value, Key}).
object_key(<<Byte, Rest/bits>>, Original, Skip, Acc, Stack, Decode) when ?is_ws(Byte) ->
object_key(Rest, Original, Skip + 1, Acc, Stack, Decode);
object_key(<<$", Rest/bits>>, Original, Skip, Acc, Stack, Decode) ->
string(Rest, Original, Skip + 1, Acc, Stack, Decode);
-object_key(_, Original, Skip, _Acc, _Stack, _Decode) ->
- unexpected(Original, Skip).
+object_key(_, Original, Skip, Acc, Stack, Decode) ->
+ unexpected(Original, Skip, Acc, Stack, Decode, 0, 0, ?FUNCTION_NAME).
continue(<<Rest/bits>>, Original, Skip, Acc, Stack0, Decode, Value) ->
case Stack0 of
@@ -1074,12 +1173,23 @@ terminate(<<Byte, Rest/bits>>, Original, Skip, Acc, Value) when ?is_ws(Byte) ->
terminate(<<Rest/bits>>, _Original, _Skip, Acc, Value) ->
{Value, Acc, Rest}.
--spec unexpected(binary(), non_neg_integer()) -> no_return().
-unexpected(Original, Skip) when byte_size(Original) =:= Skip ->
+-spec unexpected_utf8(binary(), non_neg_integer()) -> no_return().
+unexpected_utf8(Original, Skip) when byte_size(Original) =:= Skip ->
error(unexpected_end);
-unexpected(Original, Skip) ->
+unexpected_utf8(Original, Skip) ->
invalid_byte(Original, Skip).
+unexpected(Original, Skip, Acc, Stack, Decode, Pos, Len, FuncData) ->
+ RequiredSize = Skip+Pos+Len,
+ OrigSize = byte_size(Original),
+ case OrigSize =< RequiredSize of
+ true ->
+ <<_:Skip/binary, Rest/binary>> = Original,
+ {continue, {Rest, Acc, Stack, Decode, FuncData}};
+ false ->
+ invalid_byte(Original, Skip+Pos)
+ end.
+
-spec unexpected_sequence(binary(), non_neg_integer()) -> no_return().
unexpected_sequence(Value, Skip) ->
error({unexpected_sequence, Value}, none, error_info(Skip)).
diff --git a/lib/stdlib/src/json.hrl b/lib/stdlib/src/json.hrl
index 0c8943c8e5..ae2bb26295 100644
--- a/lib/stdlib/src/json.hrl
+++ b/lib/stdlib/src/json.hrl
@@ -25,7 +25,7 @@
%% of values. They'll generate an efficient "jump table",
%% which gets to the correct clause in one go, rather
%% than going through a set of comparisons.
-%% However, this might not always be the bext way (see is_0_to_9),
+%% However, this might not always be the best way (see is_0_to_9),
%% so as always with any performance work - measure, don't guess!
-define(is_1_to_9(X),
diff --git a/lib/stdlib/test/json_SUITE.erl b/lib/stdlib/test/json_SUITE.erl
index ae0b299bfe..6c4c5e13d6 100644
--- a/lib/stdlib/test/json_SUITE.erl
+++ b/lib/stdlib/test/json_SUITE.erl
@@ -45,6 +45,7 @@
test_decode_objects/1,
test_decode_whitespace/1,
test_decode_api/1,
+ test_decode_api_stream/1,
test_json_test_suite/1,
counterexamples/1,
property_string_roundtrip/1,
@@ -88,7 +89,8 @@ groups() ->
test_decode_arrays,
test_decode_objects,
test_decode_whitespace,
- test_decode_api
+ test_decode_api,
+ test_decode_api_stream
]},
{properties, [parallel], [
property_string_roundtrip,
@@ -296,7 +298,7 @@ test_decode_atoms(_Config) ->
test_decode_numbers(_Config) ->
?assertError(unexpected_end, decode(<<"-">>)),
?assertError({invalid_byte, $-}, decode(<<"--1">>)),
- ?assertError({invalid_byte, $1}, decode(<<"01">>)),
+ ?assertError({invalid_byte, $1}, json:decode(<<"01">>)),
?assertError({invalid_byte, $.}, decode(<<".1">>)),
?assertError(unexpected_end, decode(<<"1.">>)),
?assertError(unexpected_end, decode(<<"1e">>)),
@@ -405,6 +407,7 @@ test_decode_strings(_Config) ->
test_decode_arrays(_Config) ->
?assertError(unexpected_end, decode(<<"[">>)),
?assertError({invalid_byte, $,}, decode(<<"[,">>)),
+ ?assertError({invalid_byte, $]}, decode(<<" ]">>)),
?assertError(unexpected_end, decode(<<"[1,">>)),
?assertEqual([], decode(<<"[]">>)),
@@ -541,7 +544,67 @@ set_history(Ty, Acc, Res) ->
put(history, [Entry | History]),
Res.
-decode(Bin) -> json:decode(Bin).
+test_decode_api_stream(_Config) ->
+ Types = ~#{"types": [[], {}, true, false, null, {"foo": "baz"}],
+ "numbers": [1, -10, 0.0, -0.0, 2.0, -2.0, 31e2, 31e-2, 0.31e2, -0.31e2, 0.13e-2],
+ "strings": ["three", "åäö", "mixed_Ω"],
+ "escaped": ["\\n", "\\u2603", "\\ud834\\uDD1E", "\\n\xc3\xb1"]
+ }#,
+ ok = stream_decode(Types),
+
+ Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345#,
+ ok = multi_stream_decode(Multiple),
+ ok.
+
+
+decode(Bin) ->
+ try json:decode(Bin) of
+ Result ->
+ {Res, [], <<>>} = byte_loop(Bin),
+ ?assertEqual(Result, Res, "Stream decode failed"),
+ Result
+ catch Class:Reason:ST ->
+ ?assertError(Reason, byte_loop(Bin)),
+ erlang:raise(Class, Reason, ST)
+ end.
+
+stream_decode(Str) ->
+ {R1, [], <<>>} = byte_loop(Str),
+ case json:decode(Str) of
+ R1 ->
+ ok;
+ R2 ->
+ io:format("~p ~p~n",[R1,R2]),
+ error
+ end.
+
+multi_stream_decode(<<>>) ->
+ ok;
+multi_stream_decode(Strs) ->
+ {R1, [], ContBin} = byte_loop(Strs),
+ case json:decode(Strs, [], #{}) of
+ {R1, [], ContBin} ->
+ multi_stream_decode(ContBin);
+ Other ->
+ io:format("~p '~ts'~n~p~n", [R1,ContBin, Other]),
+ error
+ end.
+
+byte_loop(Bin) ->
+ {continue, State} = json:decode_start(<<>>, [], #{}),
+ byte_loop(Bin, State, []).
+
+byte_loop(<<Byte, Rest/binary>>, State0, Bytes) ->
+ %% io:format("cont with '~s' ~p~n",[lists:reverse([Byte|Bytes]), State0]),
+ case json:decode_continue(<<Byte>>, State0) of
+ {continue, State} ->
+ byte_loop(Rest, State, [Byte|Bytes]);
+ {Result, [], <<>>} ->
+ %% trim to match the binary in return value
+ {Result, [], string:trim(Rest, leading)}
+ end;
+byte_loop(<<>>, State, _Bytes) ->
+ json:decode_continue(end_of_input, State).
%%
%% JSON SUITE tests
--
2.35.3