File 2351-Binary-replacement-with-a-function.patch of Package erlang
From 6ec2a71d83f270fc19df5ecb9a1081ea961c7360 Mon Sep 17 00:00:00 2001
From: Jan Uhlig <juhlig@hnc-agency.org>
Date: Fri, 25 Aug 2023 15:55:46 +0200
Subject: [PATCH] Binary replacement with a function
With this change, `binary:replace/3,4` also accepts a function for the
Replacement argument, for cases when more complex processing is
required to generate a replacement. The given function will be
called with the match as argument, and the returned binary will
be inserted into the result.
Co-authored-by: Maria Scott <maria-12648430@hnc-agency.org>
---
lib/stdlib/doc/src/binary.xml | 71 ++++++++++++++++---------
lib/stdlib/src/binary.erl | 32 +++++------
lib/stdlib/test/binary_module_SUITE.erl | 23 ++++++++
lib/stdlib/test/binref.erl | 8 ++-
4 files changed, 86 insertions(+), 48 deletions(-)
diff --git a/lib/stdlib/doc/src/binary.xml b/lib/stdlib/doc/src/binary.xml
index 5ed7babc7b..b3badc9424 100644
--- a/lib/stdlib/doc/src/binary.xml
+++ b/lib/stdlib/doc/src/binary.xml
@@ -574,41 +574,60 @@ store(Binary, GBSet) ->
<name name="replace" arity="4" since="OTP R14B"/>
<fsummary>Replace bytes in a binary according to a pattern.</fsummary>
<type_desc variable="OnePos">An integer() =< byte_size(<anno>Replacement</anno>)
- </type_desc>
+ if <anno>Replacement</anno> is given as a binary.</type_desc>
<desc>
- <p>Constructs a new binary by replacing the parts in
- <c><anno>Subject</anno></c> matching <c><anno>Pattern</anno></c> with
- the content of <c><anno>Replacement</anno></c>.</p>
-
- <p>If the matching subpart of <c><anno>Subject</anno></c> giving raise
- to the replacement is to be inserted in the result, option
- <c>{insert_replaced, <anno>InsPos</anno>}</c> inserts the matching part
- into <c><anno>Replacement</anno></c> at the specified position (or
- positions) before inserting <c><anno>Replacement</anno></c> into
- <c><anno>Subject</anno></c>.</p>
+ <p>Constructs a new binary by replacing the parts in
+ <c><anno>Subject</anno></c> matching <c><anno>Pattern</anno></c> with
+ <c><anno>Replacement</anno></c> if given as a literal <c>binary()</c>
+ or with the result of applying <c><anno>Replacement</anno></c> to a matching
+ subpart if given as a <c>fun</c>.</p>
+
+ <p>If <c><anno>Replacement</anno></c> is given as a <c>binary()</c> and the
+ matching subpart of <c><anno>Subject</anno></c> giving raise
+ to the replacement is to be inserted in the result, option
+ <c>{insert_replaced, <anno>InsPos</anno>}</c> inserts the matching part
+ into <c><anno>Replacement</anno></c> at the specified position (or
+ positions) before inserting <c><anno>Replacement</anno></c> into
+ <c><anno>Subject</anno></c>. If <c><anno>Replacement</anno></c> is given
+ as a <c>fun</c> instead, this option is ignored.</p>
- <p><em>Example:</em></p>
+ <p>If any position specified in <c><anno>InsPos</anno></c> > size
+ of the replacement binary, a <c>badarg</c> exception is raised.</p>
-<code>
-1> binary:replace(<<"abcde">>,<<"b">>,<<"[]">>, [{insert_replaced,1}]).
+ <p>Options <c>global</c> and <c>{scope, part()}</c> work as for
+ <seemfa marker="#split/3"><c>split/3</c></seemfa>.
+ The return type is always a <c>binary()</c>.</p>
+
+ <p>For a description of <c><anno>Pattern</anno></c>, see
+ <seemfa marker="#compile_pattern/1"><c>compile_pattern/1</c></seemfa>.</p>
+ <p><em>Examples:</em></p>
+
+<pre>
+1> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"X">>, []).</input>
+<<"aXcde">>
+
+2> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"X">>, [global]).</input>
+<<"aXcXe">>
+
+3> <input>binary:replace(<<"abcde">>, <<"b">>, <<"[]">>, [{insert_replaced, 1}]).</input>
<<"a[b]cde">>
-2> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>,[global,{insert_replaced,1}]).
+
+4> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[]">>, [global, {insert_replaced, 1}]).</input>
<<"a[b]c[d]e">>
-3> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[]">>,[global,{insert_replaced,[1,1]}]).
+
+5> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[]">>, [global, {insert_replaced, [1, 1]}]).</input>
<<"a[bb]c[dd]e">>
-4> binary:replace(<<"abcde">>,[<<"b">>,<<"d">>],<<"[-]">>,[global,{insert_replaced,[1,2]}]).
-<<"a[b-b]c[d-d]e">></code>
- <p>If any position specified in <c><anno>InsPos</anno></c> > size
- of the replacement binary, a <c>badarg</c> exception is raised.</p>
+6> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], <<"[-]">>, [global, {insert_replaced, [1, 2]}]).</input>
+<<"a[b-b]c[d-d]e">>
- <p>Options <c>global</c> and <c>{scope, part()}</c> work as for
- <seemfa marker="#split/3"><c>split/3</c></seemfa>.
- The return type is always a <c>binary()</c>.</p>
+7> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], fun(M) -> <<$[, M/binary, $]>> end, []).</input>
+<<"a[b]cde">>
+
+8> <input>binary:replace(<<"abcde">>, [<<"b">>, <<"d">>], fun(M) -> <<$[, M/binary, $]>> end, [global]).</input>
+<<"a[b]c[d]e">>
+</pre>
- <p>For a description of <c><anno>Pattern</anno></c>, see
- <seemfa marker="#compile_pattern/1"><c>compile_pattern/1</c></seemfa>.
- </p>
</desc>
</func>
diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl
index 52e6cbda0a..8b3be311ee 100644
--- a/lib/stdlib/src/binary.erl
+++ b/lib/stdlib/src/binary.erl
@@ -276,7 +276,7 @@ split(_, _, _) ->
Subject :: binary(),
Pattern :: PatternBinary | [PatternBinary,...] | cp(),
PatternBinary :: nonempty_binary(),
- Replacement :: binary(),
+ Replacement :: binary() | fun((binary()) -> binary()),
Result :: binary().
replace(H,N,R) ->
@@ -291,7 +291,7 @@ replace(H,N,R) ->
Subject :: binary(),
Pattern :: PatternBinary | [PatternBinary,...] | cp(),
PatternBinary :: nonempty_binary(),
- Replacement :: binary(),
+ Replacement :: binary() | fun((binary()) -> binary()),
Options :: [Option],
Option :: global | {scope, part()} | {insert_replaced, InsPos},
InsPos :: OnePos | [ OnePos ],
@@ -300,7 +300,7 @@ replace(H,N,R) ->
replace(Haystack,Needles,Replacement,Options) ->
try
- true = is_binary(Replacement), % Make badarg instead of function clause
+ true = is_binary(Replacement) orelse is_function(Replacement, 1), % Make badarg instead of function clause
{Part,Global,Insert} = get_opts_replace(Options,{no,false,[]}),
Moptlist = case Part of
no ->
@@ -317,13 +317,17 @@ replace(Haystack,Needles,Replacement,Options) ->
Match -> [Match]
end
end,
- ReplList = case Insert of
+ ReplList = case is_function(Replacement, 1) orelse Insert of
+ true ->
+ Replacement;
[] ->
- Replacement;
+ fun(_) -> Replacement end;
Y when is_integer(Y) ->
- splitat(Replacement,0,[Y]);
+ <<ReplFront:Y/binary, ReplRear/binary>> = Replacement,
+ fun(M) -> [ReplFront, M, ReplRear] end;
Li when is_list(Li) ->
- splitat(Replacement,0,lists:sort(Li))
+ Splits = splitat(Replacement,0,lists:sort(Li)),
+ fun(M) -> lists:join(M, Splits) end
end,
erlang:iolist_to_binary(do_replace(Haystack,MList,ReplList,0))
catch
@@ -337,19 +341,7 @@ replace(Haystack,Needles,Replacement,Options) ->
do_replace(H,[],_,N) ->
[binary:part(H,{N,byte_size(H)-N})];
do_replace(H,[{A,B}|T],Replacement,N) ->
- [binary:part(H,{N,A-N}),
- if
- is_list(Replacement) ->
- do_insert(Replacement, binary:part(H,{A,B}));
- true ->
- Replacement
- end
- | do_replace(H,T,Replacement,A+B)].
-
-do_insert([X],_) ->
- [X];
-do_insert([H|T],R) ->
- [H,R|do_insert(T,R)].
+ [binary:part(H,{N,A-N}), Replacement(binary:part(H, {A, B})) | do_replace(H,T,Replacement,A+B)].
splitat(H,N,[]) ->
[binary:part(H,{N,byte_size(H)-N})];
diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl
index 954efae9b7..8127b93819 100644
--- a/lib/stdlib/test/binary_module_SUITE.erl
+++ b/lib/stdlib/test/binary_module_SUITE.erl
@@ -495,28 +495,51 @@ do_interesting(Module) ->
[] = binary:split(<<>>, <<",">>, [global,trim]),
[] = binary:split(<<>>, <<",">>, [global,trim_all]),
+ ReplaceFn = fun(Match) -> << <<(B + 1)>> || <<B>> <= Match >> end,
badarg = ?MASK_ERROR(
Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,trim,{scope,{0,5}}])),
+ badarg = ?MASK_ERROR(
+ Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global,trim,{scope,{0,5}}])),
<<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,[]),
+ <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,[]),
<<1,2,3,99,6,99,99>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global]),
+ <<1,2,3,5,6,6,8,9>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global]),
<<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,{scope,{0,5}}]),
+ <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global,{scope,{0,5}}]),
<<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,{scope,{0,5}}]),
+ <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global,{scope,{0,5}}]),
<<1,2,3,99,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,{scope,{0,5}}]),
+ <<1,2,3,5,6,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global,{scope,{0,5}}]),
badarg = ?MASK_ERROR(Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,{scope,{0,5}},
{insert,1}])),
+ badarg = ?MASK_ERROR(Module:replace(<<1,2,3,4,5,6,7,8>>,
+ [<<4,5>>,<<7>>,<<8>>],ReplaceFn,
+ [global,{scope,{0,5}},
+ {insert,1}])),
<<1,2,3,99,4,5,6,7,8>> = Module:replace(<<1,2,3,4,5,6,7,8>>,
[<<4,5>>,<<7>>,<<8>>],<<99>>,
[global,{scope,{0,5}},
diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl
index deb1ede4df..c92a716dba 100644
--- a/lib/stdlib/test/binref.erl
+++ b/lib/stdlib/test/binref.erl
@@ -228,7 +228,7 @@ replace(Haystack,Needles0,Replacement,Options) ->
true ->
exit(badtype)
end,
- true = is_binary(Replacement), % Make badarg instead of function clause
+ true = is_binary(Replacement) orelse is_function(Replacement, 1), % Make badarg instead of function clause
{Part,Global,Insert} = get_opts_replace(Options,{nomatch,false,[]}),
{Start,End,NewStack} =
case Part of
@@ -254,7 +254,9 @@ replace(Haystack,Needles0,Replacement,Options) ->
[X]
end
end,
- ReplList = case Insert of
+ ReplList = case is_binary(Replacement) andalso Insert of
+ false ->
+ Replacement;
[] ->
Replacement;
Y when is_integer(Y) ->
@@ -274,6 +276,8 @@ do_replace(H,[],_,N) ->
do_replace(H,[{A,B}|T],Replacement,N) ->
[part(H,{N,A-N}),
if
+ is_function(Replacement) ->
+ Replacement(part(H, {A, B}));
is_list(Replacement) ->
do_insert(Replacement, part(H,{A,B}));
true ->
--
2.35.3