File 3421-stdlib-add-quote-unquote-functions-in-uri_string.patch of Package erlang

From 3d980d91c8fc81b02cd5cf85595bbf569ff9652e Mon Sep 17 00:00:00 2001
From: Jakub Witczak <kuba@erlang.org>
Date: Wed, 9 Feb 2022 16:34:37 +0100
Subject: [PATCH 1/2] stdlib: add quote, unquote functions in uri_string

- functions for percent encoding and decoding user data
- intended as utility functions for preparing URI components
---
 lib/common_test/test/erl2html2_SUITE.erl |   4 +-
 lib/inets/src/http_lib/http_uri.erl      |   4 +-
 lib/inets/src/http_server/httpd_util.erl |  11 +-
 lib/inets/src/inets_app/inets.app.src    |   2 +-
 lib/stdlib/doc/src/uri_string.xml        |  79 ++++++++++++-
 lib/stdlib/doc/src/uri_string_usage.xml  |  11 +-
 lib/stdlib/src/uri_string.erl            |  30 ++++-
 lib/stdlib/test/uri_string_SUITE.erl     | 139 ++++++++++++++++++++++-
 8 files changed, 258 insertions(+), 22 deletions(-)

diff --git a/lib/common_test/test/erl2html2_SUITE.erl b/lib/common_test/test/erl2html2_SUITE.erl
index b2336ff0bc..bac028350a 100644
--- a/lib/common_test/test/erl2html2_SUITE.erl
+++ b/lib/common_test/test/erl2html2_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2012-2017. All Rights Reserved.
+%% Copyright Ericsson AB 2012-2022. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -262,7 +262,7 @@ sax_event({startElement,_Uri,"a",_QN,Attrs},{Exports,LastExprFuncs,PrevLine}) ->
 	    {match,[FStr,EndStr]} =
 		 re:run(Name,"^(.*)-(last_expr|[0-9]+)$",
 			[{capture,all_but_first,list}]),
-	    F = list_to_atom(http_uri:decode(FStr)),
+	    F = list_to_atom(uri_string:unquote(FStr)),
 	    case EndStr of
 		"last_expr" ->
 		    true = lists:member(F,LastExprFuncs),
diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl
index 5799163271..5795280635 100644
--- a/lib/inets/src/http_lib/http_uri.erl
+++ b/lib/inets/src/http_lib/http_uri.erl
@@ -64,8 +64,8 @@
 
 -deprecated({parse, 1, "use uri_string functions instead"}).
 -deprecated({parse, 2, "use uri_string functions instead"}).
--deprecated({encode, 1, "use uri_string functions instead"}).
--deprecated({decode, 1, "use uri_string functions instead"}).
+-deprecated({encode, 1, "use uri_string:quote function instead"}).
+-deprecated({decode, 1, "use uri_string:unquote function instead"}).
 -deprecated({scheme_defaults, 0, "use uri_string functions instead"}).
  
 
diff --git a/lib/inets/src/http_server/httpd_util.erl b/lib/inets/src/http_server/httpd_util.erl
index 70d5e776bb..c1f273af79 100644
--- a/lib/inets/src/http_server/httpd_util.erl
+++ b/lib/inets/src/http_server/httpd_util.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1997-2021. All Rights Reserved.
+%% Copyright Ericsson AB 1997-2022. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -398,10 +398,11 @@ month(12) -> "Dec".
 %% decode_hex
 
 decode_hex(URI) ->
-    http_uri:decode(URI).
+    uri_string:unquote(URI).
 
 encode_hex(URI) ->
-    http_uri:encode(URI).
+    SafeChars = "!$()*", %% characters not encoded by deprecated http_uri:encode/1
+    uri_string:quote(URI, SafeChars).
 
 %% flatlength
 flatlength(List) ->
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 4fe24e656f..08fc09e150 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -4,7 +4,7 @@
 <erlref>
   <header>
     <copyright>
-      <year>2017</year><year>2020</year>
+      <year>2017</year><year>2022</year>
       <holder>Ericsson AB. All Rights Reserved.</holder>
     </copyright>
     <legalnotice>
@@ -84,9 +84,14 @@
       <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
       <seemfa marker="#dissect_query/1"><c>dissect_query/1</c></seemfa>
       </item>
-      <item>Decoding percent-encoded triplets<br></br>
+      <item>Decoding percent-encoded triplets in URI map or a specific component of URI<br></br>
       <seemfa marker="#percent_decode/1"><c>percent_decode/1</c></seemfa>
       </item>
+      <item>Preparing and retrieving application specific data included in URI components<br></br>
+      <seemfa marker="#quote/1"><c>quote/1</c></seemfa>
+      <seemfa marker="#quote/2"><c>quote/2</c></seemfa>
+      <seemfa marker="#unquote/1"><c>unquote/1</c></seemfa>
+      </item>
     </list>
     <p>There are four different encodings present during the handling of URIs:</p>
     <list type="bulleted">
@@ -110,6 +115,15 @@
     character encoding and it is usually defined by the protocol or surrounding text. This library
     takes the same assumption, binary and percent-encoding are handled as one configuration unit,
     they cannot be set to different values.</p>
+    <p>Quoting functions are intended to be used by URI producing application
+    during component preparation or retrieval phase to avoid conflicts between
+    data and characters used in URI syntax. Quoting functions use percent
+    encoding, but with different rules than for example during execution of
+    <c>recompose/1</c>. It is user responsibility to provide quoting
+    functions with application data only and using their output to combine an
+    URI component.<br></br>Quoting functions can for instance be used for constructing a path
+    component with a segment containing '/' character which should not collide with
+    '/' used as general delimiter in path component.</p>
   </description>
 
   <datatypes>
@@ -357,6 +371,48 @@
       </desc>
     </func>
 
+    <func>
+      <name name="quote" arity="1" since="OTP 25.0"/>
+      <fsummary>Percent encode characters out of unreserved set.</fsummary>
+      <desc>
+        <p>Replaces characters out of unreserved set with their percent encoded equivalents.</p>
+        <p>Unreserved characters defined in
+        <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url> are not quoted.</p>
+        <p><em>Example:</em></p>
+        <pre>
+1> <input><![CDATA[uri_string:quote("SomeId/04").]]></input>
+<![CDATA["SomeId%2F04"]]>
+2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>).]]></input>
+<![CDATA[<<"SomeId%2F04">>]]>
+	</pre>
+
+        <warning><p>Function is not aware about any URI component context and
+        should not be used on whole URI. If applied more than once on the same
+        data, might produce unexpected results.</p></warning>
+      </desc>
+    </func>
+
+    <func>
+      <name name="quote" arity="2" since="OTP 25.0"/>
+      <fsummary>Percent encode characters out of unreserved set and user defined
+      safe list.</fsummary>
+      <desc>
+        <p>Same as <c>quote/1</c>, but <c><anno>Safe</anno></c> allows user to
+        provide a list of characters to be protected from encoding.</p>
+        <p><em>Example:</em></p>
+        <pre>
+1> <input><![CDATA[uri_string:quote("SomeId/04", "/").]]></input>
+<![CDATA["SomeId/04"]]>
+2> <input><![CDATA[uri_string:quote(<<"SomeId/04">>, "/").]]></input>
+<![CDATA[<<"SomeId/04">>]]>
+	</pre>
+
+        <warning><p>Function is not aware about any URI component context and
+        should not be used on whole URI. If applied more than once on the same
+        data, might produce unexpected results.</p></warning>
+      </desc>
+    </func>
+
     <func>
       <name name="recompose" arity="1" since="OTP 21.0"/>
       <fsummary>Recompose URI.</fsummary>
@@ -445,5 +501,24 @@
       </desc>
     </func>
 
+    <func>
+      <name name="unquote" arity="1" since="OTP 25.0"/>
+      <fsummary>Percent decode characters.</fsummary>
+      <desc>
+        <p>Percent decode characters.</p>
+
+        <p><em>Example:</em></p>
+        <pre>
+1> <input><![CDATA[uri_string:unquote("SomeId%2F04").]]></input>
+<![CDATA["SomeId/04"]]>
+2> <input><![CDATA[uri_string:unquote(<<"SomeId%2F04">>).]]></input>
+<![CDATA[<<"SomeId/04">>]]>
+	</pre>
+
+        <warning><p>Function is not aware about any URI component context and
+        should not be used on whole URI. If applied more than once on the same
+        data, might produce unexpected results.</p></warning>
+      </desc>
+    </func>
   </funcs>
 </erlref>
diff --git a/lib/stdlib/doc/src/uri_string_usage.xml b/lib/stdlib/doc/src/uri_string_usage.xml
index 72851096b7..31e2c23c45 100644
--- a/lib/stdlib/doc/src/uri_string_usage.xml
+++ b/lib/stdlib/doc/src/uri_string_usage.xml
@@ -4,8 +4,7 @@
 <chapter>
   <header>
     <copyright>
-      <year>2020</year>
-      <year>2020</year>
+      <year>2022</year>
       <holder>Ericsson AB. All Rights Reserved.</holder>
     </copyright>
     <legalnotice>
@@ -294,12 +293,14 @@
     and it happens when converting a
     <seetype marker="uri_string#uri_map"><c>uri_map()</c></seetype>
     into a <seetype marker="uri_string#uri_string"><c>uri_string()</c></seetype>.
-    There is no equivalent to a raw percent-encoding function as percent-encoding
-    shall be applied on the component level using different sets of allowed characters.
-    Applying percent-encoding directly on an input URI would not be safe just as in
+    Applying any percent-encoding directly on an input URI would not be safe just as in
     the case of
     <seemfa marker="uri_string#percent_decode/1"><c>uri_string:percent_decode/1</c></seemfa>,
     the output could be an invalid URI.
+    Quoting functions allow users to perform raw percent encoding and decoding
+    on application data which cannot be handled automatically by
+    <c>uri_string:recompose/1</c>. For example in scenario when user would
+    need to use '/' or sub-delimeter as data rather than delimeter in a path component.
     </p>
     </note>
   </section>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 05b66c12d6..dbbb835036 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2017-2020. All Rights Reserved.
+%% Copyright Ericsson AB 2017-2022. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -237,7 +237,10 @@
          recompose/1,
          resolve/2,
          resolve/3,
-         transcode/2]).
+         transcode/2,
+         quote/1,
+         quote/2,
+         unquote/1]).
 -export_type([error/0,
               uri_map/0,
               uri_string/0]).
@@ -518,6 +521,29 @@ percent_decode(URI) when is_list(URI) orelse
                          is_binary(URI) ->
     raw_decode(URI).
 
+-spec quote(Data) -> QuotedData when
+      Data :: unicode:chardata(),
+      QuotedData :: unicode:chardata().
+quote(D) ->
+    encode(D, fun is_unreserved/1).
+
+-spec quote(Data, Safe) -> QuotedData when
+      Data :: unicode:chardata(),
+      Safe :: string(),
+      QuotedData :: unicode:chardata().
+quote(D, Safe) ->
+    UnreservedOrSafe =
+        fun(C) ->
+                is_unreserved(C) orelse lists:member(C, Safe)
+        end,
+    encode(D, UnreservedOrSafe).
+
+-spec unquote(QuotedData) -> Data when
+      QuotedData :: unicode:chardata(),
+      Data :: unicode:chardata().
+unquote(D) ->
+    raw_decode(D).
+
 %%-------------------------------------------------------------------------
 %% Functions for working with the query part of a URI as a list
 %% of key/value pairs.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 04d7a23eab..6a6bff2688 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2008-2020. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2022. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
 -module(uri_string_SUITE).
 
 -include_lib("common_test/include/ct.hrl").
+-compile({nowarn_deprecated_function, [{http_uri, encode, 1}]}).
+-compile({nowarn_deprecated_function, [{http_uri, decode, 1}]}).
 
 -export([all/0, suite/0,groups/0,
          normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1,
@@ -53,7 +55,8 @@
          interop_query_latin1/1, interop_query_utf8/1,
          regression_parse/1, regression_recompose/1, regression_normalize/1,
          recompose_host_relative_path/1,
-         recompose_host_absolute_path/1
+         recompose_host_absolute_path/1,
+         quote/1
         ]).
 
 
@@ -148,7 +151,8 @@ all() ->
      regression_recompose,
      regression_normalize,
      recompose_host_relative_path,
-     recompose_host_absolute_path
+     recompose_host_absolute_path,
+     quote
     ].
 
 groups() ->
@@ -1354,3 +1358,132 @@ recompose_host_absolute_path(_Config) ->
                                path => [<<"/f">>,<<"oo">>]}),
     ok.
 
+%%-------------------------------------------------------------------------
+%% Quote tests
+%%-------------------------------------------------------------------------
+quote(_Config) ->
+    TestQuote =
+        fun(Unquoted, Quoted) ->
+                Quoted = uri_string:quote(Unquoted)
+        end,
+
+    [TestQuote(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()],
+    [TestQuote(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()],
+
+    Head = fun([H | _]) -> H;
+               (<<H, _/binary>>) -> H
+           end,
+
+    TestQuoteUnquote =
+        fun(Unquoted) ->
+                %% case below should be removed when functions used are removed
+                case Head(Unquoted) =< 127 of
+                    true ->
+                        Unquoted = http_uri:decode(http_uri:encode(Unquoted));
+                    _ ->
+                        ok
+                end,
+                Unquoted = uri_string:unquote(uri_string:quote(Unquoted))
+        end,
+    [TestQuoteUnquote(U) || #{unquoted := U} <- get_quote_data()],
+    [TestQuoteUnquote(U) || #{unquoted_b := U} <- get_quote_data()],
+
+    TestQuoteWithSafeList =
+        fun(Unquoted, Quoted) ->
+                Safe = "!$()*", %% characters not encoded by old http_uri:encode
+                Result = uri_string:quote(Unquoted, Safe),
+                %% case below should be removed when function used are removed
+                case Head(Unquoted) =< 127 of
+                    true ->
+                        Result = http_uri:encode(Unquoted);
+                    _ ->
+                        ok
+                end,
+                case lists:member(Head(Unquoted), Safe) of
+                    true ->
+                        Unquoted = Result;
+                    false ->
+                        Quoted = Result
+                end
+        end,
+    [TestQuoteWithSafeList(U, Q) || #{unquoted := U, quoted := Q} <- get_quote_data()],
+    [TestQuoteWithSafeList(U, Q) || #{unquoted_b := U, quoted_b := Q} <- get_quote_data()],
+
+
+    ComposePath = fun (PathSegments, Safe) ->
+                          lists:join("/", [uri_string:quote(S, Safe) ||
+                                              S <- PathSegments])
+                  end,
+
+    %% / used as data see GH-5368
+    ExampleURI1 = "https://internal.api.com/devices/Ethernet0%2F4",
+    ExampleURI1 = uri_string:recompose(
+                    #{scheme => "https",
+                      host => "internal.api.com",
+                      path => ComposePath(["devices", "Ethernet0/4"],
+                                          "")}),
+
+    %% sub-delims as data
+    %% in this example ComposePath must treat sub-delims and '%' as safe character
+    %% to avoid re-encoding encoded characters
+    ExampleURI2 = "yeti://localhost/folder/file.txt,version=1%2C1",
+    ExampleURI2 = uri_string:recompose(
+                    #{scheme => "yeti",
+                      host => "localhost",
+                      path => ComposePath(["folder", "file.txt,version=" ++
+                                           uri_string:quote("1,1")],
+                                          ",=%")}),
+
+    %% percent character as data
+    ExampleURI3 = "yeti://localhost/folder/file_with_%25.txt",
+    ExampleURI3 = uri_string:recompose(
+                    #{scheme => "yeti",
+                      host => "localhost",
+                      path => ComposePath(["folder", "file_with_" ++
+                                               uri_string:quote("%") ++ ".txt"],
+                                          "%")}),
+    ok.
+
+get_quote_data() ->
+    [%% reserved/gen-delims
+     #{unquoted => ":", quoted => "%3A", unquoted_b =><<":">>, quoted_b=> <<"%3A">>},
+     #{unquoted => "/", quoted => "%2F", unquoted_b =><<"/">>, quoted_b=> <<"%2F">>},
+     #{unquoted => "?", quoted => "%3F", unquoted_b =><<"?">>, quoted_b=> <<"%3F">>},
+     #{unquoted => "#", quoted => "%23", unquoted_b =><<"#">>, quoted_b=> <<"%23">>},
+     #{unquoted => "[", quoted => "%5B", unquoted_b =><<"[">>, quoted_b=> <<"%5B">>},
+     #{unquoted => "]", quoted => "%5D", unquoted_b =><<"]">>, quoted_b=> <<"%5D">>},
+     #{unquoted => "@", quoted => "%40", unquoted_b =><<"@">>, quoted_b=> <<"%40">>},
+     %% reserved/sub-delims
+     #{unquoted => "!", quoted => "%21", unquoted_b =><<"!">>, quoted_b=> <<"%21">>},
+     #{unquoted => "$", quoted => "%24", unquoted_b =><<"$">>, quoted_b=> <<"%24">>},
+     #{unquoted => "&", quoted => "%26", unquoted_b =><<"&">>, quoted_b=> <<"%26">>},
+     #{unquoted => "'", quoted => "%27", unquoted_b =><<"'">>, quoted_b=> <<"%27">>},
+     #{unquoted => "(", quoted => "%28", unquoted_b =><<"(">>, quoted_b=> <<"%28">>},
+     #{unquoted => ")", quoted => "%29", unquoted_b =><<")">>, quoted_b=> <<"%29">>},
+     #{unquoted => "*", quoted => "%2A", unquoted_b =><<"*">>, quoted_b=> <<"%2A">>},
+     #{unquoted => "+", quoted => "%2B", unquoted_b =><<"+">>, quoted_b=> <<"%2B">>},
+     #{unquoted => ",", quoted => "%2C", unquoted_b =><<",">>, quoted_b=> <<"%2C">>},
+     #{unquoted => ";", quoted => "%3B", unquoted_b =><<";">>, quoted_b=> <<"%3B">>},
+     #{unquoted => "=", quoted => "%3D", unquoted_b =><<"=">>, quoted_b=> <<"%3D">>},
+     %% other not unreserved
+     #{unquoted => "<", quoted => "%3C", unquoted_b =><<"<">>, quoted_b=> <<"%3C">>},
+     #{unquoted => ">", quoted => "%3E", unquoted_b =><<">">>, quoted_b=> <<"%3E">>},
+     #{unquoted => "\"", quoted => "%22", unquoted_b =><<"\"">>, quoted_b=> <<"%22">>},
+     #{unquoted => "{", quoted => "%7B", unquoted_b =><<"{">>, quoted_b=> <<"%7B">>},
+     #{unquoted => "}", quoted => "%7D", unquoted_b =><<"}">>, quoted_b=> <<"%7D">>},
+     #{unquoted => "|", quoted => "%7C", unquoted_b =><<"|">>, quoted_b=> <<"%7C">>},
+     #{unquoted => "\\", quoted => "%5C", unquoted_b =><<"\\">>, quoted_b=> <<"%5C">>},
+     #{unquoted => "^", quoted => "%5E", unquoted_b =><<"^">>, quoted_b=> <<"%5E">>},
+     #{unquoted => "%", quoted => "%25", unquoted_b =><<"%">>, quoted_b=> <<"%25">>},
+     #{unquoted => " ", quoted => "%20", unquoted_b =><<" ">>, quoted_b=> <<"%20">>},
+     %% non-ASCII
+     #{unquoted => "örebro", quoted => "%C3%B6rebro",
+       unquoted_b =><<"örebro"/utf8>>, quoted_b=> <<"%C3%B6rebro">>},
+     #{unquoted => "Łódź", quoted => "%C5%81%C3%B3d%C5%BA",
+       unquoted_b =><<"Łódź"/utf8>>, quoted_b=> <<"%C5%81%C3%B3d%C5%BA">>},
+     %% unreserved non alpha, non digit characters
+     #{unquoted => "-", quoted => "-", unquoted_b =><<"-">>, quoted_b=> <<"-">>},
+     #{unquoted => ".", quoted => ".", unquoted_b =><<".">>, quoted_b=> <<".">>},
+     #{unquoted => "_", quoted => "_", unquoted_b =><<"_">>, quoted_b=> <<"_">>},
+     #{unquoted => "~", quoted => "~", unquoted_b =><<"~">>, quoted_b=> <<"~">>}
+    ].
-- 
2.34.1

openSUSE Build Service is sponsored by