File 3421-stdlib-add-quote-unquote-functions-in-uri_string.patch of Package erlang
From 3d980d91c8fc81b02cd5cf85595bbf569ff9652e Mon Sep 17 00:00:00 2001
From: Jakub Witczak <kuba@erlang.org>
Date: Wed, 9 Feb 2022 16:34:37 +0100
Subject: [PATCH 1/2] stdlib: add quote, unquote functions in uri_string
- functions for percent encoding and decoding user data
- intended as utility functions for preparing URI components
---
lib/common_test/test/erl2html2_SUITE.erl | 4 +-
lib/inets/src/http_lib/http_uri.erl | 4 +-
lib/inets/src/http_server/httpd_util.erl | 11 +-
lib/inets/src/inets_app/inets.app.src | 2 +-
lib/stdlib/doc/src/uri_string.xml | 79 ++++++++++++-
lib/stdlib/doc/src/uri_string_usage.xml | 11 +-
lib/stdlib/src/uri_string.erl | 30 ++++-
lib/stdlib/test/uri_string_SUITE.erl | 139 ++++++++++++++++++++++-
8 files changed, 258 insertions(+), 22 deletions(-)
diff --git a/lib/common_test/test/erl2html2_SUITE.erl b/lib/common_test/test/erl2html2_SUITE.erl
index b2336ff0bc..bac028350a 100644
--- a/lib/common_test/test/erl2html2_SUITE.erl
+++ b/lib/common_test/test/erl2html2_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2012-2017. All Rights Reserved.
+%% Copyright Ericsson AB 2012-2022. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -262,7 +262,7 @@ sax_event({startElement,_Uri,"a",_QN,Attrs},{Exports,LastExprFuncs,PrevLine}) ->
{match,[FStr,EndStr]} =
re:run(Name,"^(.*)-(last_expr|[0-9]+)$",
[{capture,all_but_first,list}]),
- F = list_to_atom(http_uri:decode(FStr)),
+ F = list_to_atom(uri_string:unquote(FStr)),
case EndStr of
"last_expr" ->
true = lists:member(F,LastExprFuncs),
diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl
index 5799163271..5795280635 100644
--- a/lib/inets/src/http_lib/http_uri.erl
+++ b/lib/inets/src/http_lib/http_uri.erl
@@ -64,8 +64,8 @@
-deprecated({parse, 1, "use uri_string functions instead"}).
-deprecated({parse, 2, "use uri_string functions instead"}).
--deprecated({encode, 1, "use uri_string functions instead"}).
--deprecated({decode, 1, "use uri_string functions instead"}).
+-deprecated({encode, 1, "use uri_string:quote function instead"}).
+-deprecated({decode, 1, "use uri_string:unquote function instead"}).
-deprecated({scheme_defaults, 0, "use uri_string functions instead"}).
diff --git a/lib/inets/src/http_server/httpd_util.erl b/lib/inets/src/http_server/httpd_util.erl
index 70d5e776bb..c1f273af79 100644
--- a/lib/inets/src/http_server/httpd_util.erl
+++ b/lib/inets/src/http_server/httpd_util.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1997-2021. All Rights Reserved.
+%% Copyright Ericsson AB 1997-2022. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -398,10 +398,11 @@ month(12) -> "Dec".
%% decode_hex
decode_hex(URI) ->
- http_uri:decode(URI).
+ uri_string:unquote(URI).
encode_hex(URI) ->
- http_uri:encode(URI).
+ SafeChars = "!$()*", %% characters not encoded by deprecated http_uri:encode/1
+ uri_string:quote(URI, SafeChars).
%% flatlength
flatlength(List) ->
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 4fe24e656f..08fc09e150 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2017</year><year>2020</year>
+ <year>2017</year><year>2022</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -84,9 +84,14 @@
<item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
<seemfa marker="#dissect_query/1"><c>dissect_query/1</c></seemfa>
</item>
- <item>Decoding percent-encoded triplets<br></br>
+ <item>Decoding percent-encoded triplets in URI map or a specific component of URI<br></br>
<seemfa marker="#percent_decode/1"><c>percent_decode/1</c></seemfa>
</item>
+ <item>Preparing and retrieving application specific data included in URI components<br></br>
+ <seemfa marker="#quote/1"><c>quote/1</c></seemfa>
+ <seemfa marker="#quote/2"><c>quote/2</c></seemfa>
+ <seemfa marker="#unquote/1"><c>unquote/1</c></seemfa>
+ </item>
</list>
<p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
@@ -110,6 +115,15 @@
character encoding and it is usually defined by the protocol or surrounding text. This library
takes the same assumption, binary and percent-encoding are handled as one configuration unit,
they cannot be set to different values.</p>
+ <p>Quoting functions are intended to be used by URI producing application
+ during component preparation or retrieval phase to avoid conflicts between
+ data and characters used in URI syntax. Quoting functions use percent
+ encoding, but with different rules than for example during execution of
+ <c>recompose/1</c>. It is user responsibility to provide quoting
+ functions with application data only and using their output to combine an
+ URI component.<br></br>Quoting functions can for instance be used for constructing a path
+ component with a segment containing '/' character which should not collide with
+ '/' used as general delimiter in path component.</p>
</description>
<datatypes>
@@ -357,6 +371,48 @@
</desc>
</func>
+ <func>
+ <name name="quote" arity="1" since="OTP 25.0"/>
+ <fsummary>Percent encode characters out of unreserved set.</fsummary>
+ <desc>
+ <p>Replaces characters out of unreserved set with their percent encoded equivalents.</p>
+ <p>Unreserved characters defined in
+ <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> are not quoted.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:quote("SomeId/04").]]></input>
+<![CDATA["SomeId%2F04"]]>
+2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>).]]></input>
+<![CDATA[<<"SomeId%2F04">>]]>
+ </pre>
+
+ <warning><p>Function is not aware about any URI component context and
+ should not be used on whole URI. If applied more than once on the same
+ data, might produce unexpected results.</p></warning>
+ </desc>
+ </func>
+
+ <func>
+ <name name="quote" arity="2" since="OTP 25.0"/>
+ <fsummary>Percent encode characters out of unreserved set and user defined
+ safe list.</fsummary>
+ <desc>
+ <p>Same as <c>quote/1</c>, but <c><anno>Safe</anno></c> allows user to
+ provide a list of characters to be protected from encoding.</p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:quote("SomeId/04", "/").]]></input>
+<![CDATA["SomeId/04"]]>
+2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>, "/").]]></input>
+<![CDATA[<<"SomeId/04">>]]>
+ </pre>
+
+ <warning><p>Function is not aware about any URI component context and
+ should not be used on whole URI. If applied more than once on the same
+ data, might produce unexpected results.</p></warning>
+ </desc>
+ </func>
+
<func>
<name name="recompose" arity="1" since="OTP 21.0"/>
<fsummary>Recompose URI.</fsummary>
@@ -445,5 +501,24 @@
</desc>
</func>
+ <func>
+ <name name="unquote" arity="1" since="OTP 25.0"/>
+ <fsummary>Percent decode characters.</fsummary>
+ <desc>
+ <p>Percent decode characters.</p>
+
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:unquote("SomeId%2F04").]]></input>
+<![CDATA["SomeId/04"]]>
+2> <input><![CDATA[uri_string:unquote(<<"SomeId%2F04">>).]]></input>
+<![CDATA[<<"SomeId/04">>]]>
+ </pre>
+
+ <warning><p>Function is not aware about any URI component context and
+ should not be used on whole URI. If applied more than once on the same
+ data, might produce unexpected results.</p></warning>
+ </desc>
+ </func>
</funcs>
</erlref>
diff --git a/lib/stdlib/doc/src/uri_string_usage.xml b/lib/stdlib/doc/src/uri_string_usage.xml
index 72851096b7..31e2c23c45 100644
--- a/lib/stdlib/doc/src/uri_string_usage.xml
+++ b/lib/stdlib/doc/src/uri_string_usage.xml
@@ -4,8 +4,7 @@
<chapter>
<header>
<copyright>
- <year>2020</year>
- <year>2020</year>
+ <year>2022</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -294,12 +293,14 @@
and it happens when converting a
<seetype marker="uri_string#uri_map"><c>uri_map()</c></seetype>
into a <seetype marker="uri_string#uri_string"><c>uri_string()</c></seetype>.
- There is no equivalent to a raw percent-encoding function as percent-encoding
- shall be applied on the component level using different sets of allowed characters.
- Applying percent-encoding directly on an input URI would not be safe just as in
+ Applying any percent-encoding directly on an input URI would not be safe just as in
the case of
<seemfa marker="uri_string#percent_decode/1"><c>uri_string:percent_decode/1</c></seemfa>,
the output could be an invalid URI.
+ Quoting functions allow users to perform raw percent encoding and decoding
+ on application data which cannot be handled automatically by
+ <c>uri_string:recompose/1</c>. For example in scenario when user would
+ need to use '/' or sub-delimeter as data rather than delimeter in a path component.
</p>
</note>
</section>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 05b66c12d6..dbbb835036 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2017-2020. All Rights Reserved.
+%% Copyright Ericsson AB 2017-2022. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -237,7 +237,10 @@
recompose/1,
resolve/2,
resolve/3,
- transcode/2]).
+ transcode/2,
+ quote/1,
+ quote/2,
+ unquote/1]).
-export_type([error/0,
uri_map/0,
uri_string/0]).
@@ -518,6 +521,29 @@ percent_decode(URI) when is_list(URI) orelse
is_binary(URI) ->
raw_decode(URI).
+-spec quote(Data) -> QuotedData when
+ Data :: unicode:chardata(),
+ QuotedData :: unicode:chardata().
+quote(D) ->
+ encode(D, fun is_unreserved/1).
+
+-spec quote(Data, Safe) -> QuotedData when
+ Data :: unicode:chardata(),
+ Safe :: string(),
+ QuotedData :: unicode:chardata().
+quote(D, Safe) ->
+ UnreservedOrSafe =
+ fun(C) ->
+ is_unreserved(C) orelse lists:member(C, Safe)
+ end,
+ encode(D, UnreservedOrSafe).
+
+-spec unquote(QuotedData) -> Data when
+ QuotedData :: unicode:chardata(),
+ Data :: unicode:chardata().
+unquote(D) ->
+ raw_decode(D).
+
%%-------------------------------------------------------------------------
%% Functions for working with the query part of a URI as a list
%% of key/value pairs.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 04d7a23eab..6a6bff2688 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2020. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2022. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
-module(uri_string_SUITE).
-include_lib("common_test/include/ct.hrl").
+-compile({nowarn_deprecated_function, [{http_uri, encode, 1}]}).
+-compile({nowarn_deprecated_function, [{http_uri, decode, 1}]}).
-export([all/0, suite/0,groups/0,
normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1,
@@ -53,7 +55,8 @@
interop_query_latin1/1, interop_query_utf8/1,
regression_parse/1, regression_recompose/1, regression_normalize/1,
recompose_host_relative_path/1,
- recompose_host_absolute_path/1
+ recompose_host_absolute_path/1,
+ quote/1
]).
@@ -148,7 +151,8 @@ all() ->
regression_recompose,
regression_normalize,
recompose_host_relative_path,
- recompose_host_absolute_path
+ recompose_host_absolute_path,
+ quote
].
groups() ->
@@ -1354,3 +1358,132 @@ recompose_host_absolute_path(_Config) ->
path => [<<"/f">>,<<"oo">>]}),
ok.
+%%-------------------------------------------------------------------------
+%% Quote tests
+%%-------------------------------------------------------------------------
+quote(_Config) ->
+ TestQuote =
+ fun(Unquoted, Quoted) ->
+ Quoted = uri_string:quote(Unquoted)
+ end,
+
+ [TestQuote(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()],
+ [TestQuote(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()],
+
+ Head = fun([H | _]) -> H;
+ (<<H, _/binary>>) -> H
+ end,
+
+ TestQuoteUnquote =
+ fun(Unquoted) ->
+ %% case below should be removed when functions used are removed
+ case Head(Unquoted) =< 127 of
+ true ->
+ Unquoted = http_uri:decode(http_uri:encode(Unquoted));
+ _ ->
+ ok
+ end,
+ Unquoted = uri_string:unquote(uri_string:quote(Unquoted))
+ end,
+ [TestQuoteUnquote(U) || #{unquoted := U} <- get_quote_data()],
+ [TestQuoteUnquote(U) || #{unquoted_b := U} <- get_quote_data()],
+
+ TestQuoteWithSafeList =
+ fun(Unquoted, Quoted) ->
+ Safe = "!$()*", %% characters not encoded by old http_uri:encode
+ Result = uri_string:quote(Unquoted, Safe),
+ %% case below should be removed when function used are removed
+ case Head(Unquoted) =< 127 of
+ true ->
+ Result = http_uri:encode(Unquoted);
+ _ ->
+ ok
+ end,
+ case lists:member(Head(Unquoted), Safe) of
+ true ->
+ Unquoted = Result;
+ false ->
+ Quoted = Result
+ end
+ end,
+ [TestQuoteWithSafeList(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()],
+ [TestQuoteWithSafeList(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()],
+
+
+ ComposePath = fun (PathSegments, Safe) ->
+ lists:join("/", [uri_string:quote(S, Safe) ||
+ S <- PathSegments])
+ end,
+
+ %% / used as data see GH-5368
+ ExampleURI1 = "https://internal.api.com/devices/Ethernet0%2F4",
+ ExampleURI1 = uri_string:recompose(
+ #{scheme => "https",
+ host => "internal.api.com",
+ path => ComposePath(["devices", "Ethernet0/4"],
+ "")}),
+
+ %% sub-delims as data
+ %% in this example ComposePath must treat sub-delims and '%' as safe character
+ %% to avoid re-encoding encoded characters
+ ExampleURI2 = "yeti://localhost/folder/file.txt,version=1%2C1",
+ ExampleURI2 = uri_string:recompose(
+ #{scheme => "yeti",
+ host => "localhost",
+ path => ComposePath(["folder", "file.txt,version=" ++
+ uri_string:quote("1,1")],
+ ",=%")}),
+
+ %% percent character as data
+ ExampleURI3 = "yeti://localhost/folder/file_with_%25.txt",
+ ExampleURI3 = uri_string:recompose(
+ #{scheme => "yeti",
+ host => "localhost",
+ path => ComposePath(["folder", "file_with_" ++
+ uri_string:quote("%") ++ ".txt"],
+ "%")}),
+ ok.
+
+get_quote_data() ->
+ [%% reserved/gen-delims
+ #{unquoted => ":", quoted => "%3A", unquoted_b =><<":">>, quoted_b=> <<"%3A">>},
+ #{unquoted => "/", quoted => "%2F", unquoted_b =><<"/">>, quoted_b=> <<"%2F">>},
+ #{unquoted => "?", quoted => "%3F", unquoted_b =><<"?">>, quoted_b=> <<"%3F">>},
+ #{unquoted => "#", quoted => "%23", unquoted_b =><<"#">>, quoted_b=> <<"%23">>},
+ #{unquoted => "[", quoted => "%5B", unquoted_b =><<"[">>, quoted_b=> <<"%5B">>},
+ #{unquoted => "]", quoted => "%5D", unquoted_b =><<"]">>, quoted_b=> <<"%5D">>},
+ #{unquoted => "@", quoted => "%40", unquoted_b =><<"@">>, quoted_b=> <<"%40">>},
+ %% reserved/sub-delims
+ #{unquoted => "!", quoted => "%21", unquoted_b =><<"!">>, quoted_b=> <<"%21">>},
+ #{unquoted => "$", quoted => "%24", unquoted_b =><<"$">>, quoted_b=> <<"%24">>},
+ #{unquoted => "&", quoted => "%26", unquoted_b =><<"&">>, quoted_b=> <<"%26">>},
+ #{unquoted => "'", quoted => "%27", unquoted_b =><<"'">>, quoted_b=> <<"%27">>},
+ #{unquoted => "(", quoted => "%28", unquoted_b =><<"(">>, quoted_b=> <<"%28">>},
+ #{unquoted => ")", quoted => "%29", unquoted_b =><<")">>, quoted_b=> <<"%29">>},
+ #{unquoted => "*", quoted => "%2A", unquoted_b =><<"*">>, quoted_b=> <<"%2A">>},
+ #{unquoted => "+", quoted => "%2B", unquoted_b =><<"+">>, quoted_b=> <<"%2B">>},
+ #{unquoted => ",", quoted => "%2C", unquoted_b =><<",">>, quoted_b=> <<"%2C">>},
+ #{unquoted => ";", quoted => "%3B", unquoted_b =><<";">>, quoted_b=> <<"%3B">>},
+ #{unquoted => "=", quoted => "%3D", unquoted_b =><<"=">>, quoted_b=> <<"%3D">>},
+ %% other not unreserved
+ #{unquoted => "<", quoted => "%3C", unquoted_b =><<"<">>, quoted_b=> <<"%3C">>},
+ #{unquoted => ">", quoted => "%3E", unquoted_b =><<">">>, quoted_b=> <<"%3E">>},
+ #{unquoted => "\"", quoted => "%22", unquoted_b =><<"\"">>, quoted_b=> <<"%22">>},
+ #{unquoted => "{", quoted => "%7B", unquoted_b =><<"{">>, quoted_b=> <<"%7B">>},
+ #{unquoted => "}", quoted => "%7D", unquoted_b =><<"}">>, quoted_b=> <<"%7D">>},
+ #{unquoted => "|", quoted => "%7C", unquoted_b =><<"|">>, quoted_b=> <<"%7C">>},
+ #{unquoted => "\\", quoted => "%5C", unquoted_b =><<"\\">>, quoted_b=> <<"%5C">>},
+ #{unquoted => "^", quoted => "%5E", unquoted_b =><<"^">>, quoted_b=> <<"%5E">>},
+ #{unquoted => "%", quoted => "%25", unquoted_b =><<"%">>, quoted_b=> <<"%25">>},
+ #{unquoted => " ", quoted => "%20", unquoted_b =><<" ">>, quoted_b=> <<"%20">>},
+ %% non-ASCII
+ #{unquoted => "örebro", quoted => "%C3%B6rebro",
+ unquoted_b =><<"örebro"/utf8>>, quoted_b=> <<"%C3%B6rebro">>},
+ #{unquoted => "Łódź", quoted => "%C5%81%C3%B3d%C5%BA",
+ unquoted_b =><<"Łódź"/utf8>>, quoted_b=> <<"%C5%81%C3%B3d%C5%BA">>},
+ %% unreserved non alpha, non digit characters
+ #{unquoted => "-", quoted => "-", unquoted_b =><<"-">>, quoted_b=> <<"-">>},
+ #{unquoted => ".", quoted => ".", unquoted_b =><<".">>, quoted_b=> <<".">>},
+ #{unquoted => "_", quoted => "_", unquoted_b =><<"_">>, quoted_b=> <<"_">>},
+ #{unquoted => "~", quoted => "~", unquoted_b =><<"~">>, quoted_b=> <<"~">>}
+ ].
--
2.34.1