Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
2841-Initial-implementation-of-EEP-50.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 2841-Initial-implementation-of-EEP-50.patch of Package erlang
From e66941e8d7c47b973dff94c0308ea85a6be1958e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@dashbit.co> Date: Thu, 12 Nov 2020 13:27:00 +0100 Subject: [PATCH] Initial implementation of EEP 50 This pull request adds support for maps in the sets module. According to my [benchmarks][bench], using maps is faster in the huge majority of cases, sometimes by multiple orders of magnitude, and in the few cases it is slower, it is by less than 10%. [bench]: https://github.com/josevalim/sets_bench --- lib/stdlib/doc/src/sets.xml | 30 +++++ lib/stdlib/src/sets.erl | 187 ++++++++++++++++++++++++------ lib/stdlib/test/sets_SUITE.erl | 56 +++++++-- lib/stdlib/test/sets_test_lib.erl | 16 ++- 4 files changed, 241 insertions(+), 48 deletions(-) diff --git a/lib/stdlib/doc/src/sets.xml b/lib/stdlib/doc/src/sets.xml index 291425c35b..8f19175d98 100644 --- a/lib/stdlib/doc/src/sets.xml +++ b/lib/stdlib/doc/src/sets.xml @@ -44,6 +44,20 @@ that while this module considers two elements as different if they do not match (<c>=:=</c>), <c>ordsets</c> considers two elements as different if and only if they do not compare equal (<c>==</c>).</p> + + <p>Erlang/OTP 24.0 introduced a new internal representation for sets + which is more performant. Developers can use this new representation + by passing the <c>{version, 2}</c> flag to <seemfa marker="#new/1"><c>new/1</c></seemfa> + and <seemfa marker="#from_list/2"><c>from_list/2</c></seemfa>, such + as <c>sets:new([{version, 2}])</c>. This new representation will + become the default in future Erlang/OTP versions. Functions that + work on two sets, such as <seemfa marker="#union/2"><c>union/2</c></seemfa> + and similar, will work with sets of different versions. In such cases, + there is no guarantee about the version of the returned set. Explicit + conversion from the old version to the new one can be done with + <c>sets:from_list(sets:to_list(Old), [{version,2}])</c>. + </p> + </description> <datatypes> @@ -103,6 +117,14 @@ </desc> </func> + <func> + <name name="from_list" arity="2" since="OTP 24.0"/> + <fsummary>Convert a list into a <c>Set</c> at the given version.</fsummary> + <desc> + <p>Returns a set of the elements in <c><anno>List</anno></c> at the given version.</p> + </desc> + </func> + <func> <name name="intersection" arity="1" since=""/> <fsummary>Return the intersection of a list of <c>Sets</c>.</fsummary> @@ -174,6 +196,14 @@ </desc> </func> + <func> + <name name="new" arity="1" since="OTP 24.0"/> + <fsummary>Return an empty set at the given version.</fsummary> + <desc> + <p>Returns a new empty set at the given version.</p> + </desc> + </func> + <func> <name name="size" arity="1" since=""/> <fsummary>Return the number of elements in a set.</fsummary> diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl index 7ee3454efa..344976cd4f 100644 --- a/lib/stdlib/src/sets.erl +++ b/lib/stdlib/src/sets.erl @@ -18,10 +18,13 @@ %% %CopyrightEnd% %% -%% We use the dynamic hashing techniques by Per-Åke Larsson as -%% described in "The Design and Implementation of Dynamic Hashing for -%% Sets and Tables in Icon" by Griswold and Townsend. Much of the -%% terminology comes from that paper as well. +%% The new version 2 has moved to use maps under the roof whenever a +%% map is given. + +%% The previous version (version 1) uses the dynamic hashing techniques +%% by Per-Åke Larsson as described in "The Design and Implementation +%% of Dynamic Hashing for Sets and Tables in Icon" by Griswold and +%% Townsend. Much of the terminology comes from that paper as well. %% The segments are all of the same fixed size and we just keep %% increasing the size of the top tuple as the table grows. At the @@ -44,9 +47,15 @@ -export([is_disjoint/2]). -export([subtract/2,is_subset/2]). -export([fold/3,filter/2]). +-export([new/1, from_list/2]). -export_type([set/0, set/1]). +%% This is the value used when sets are represented as maps. +%% We use an empty list instead of an atom as it is cheaper +%% to serialize. +-define(VALUE, []). + %% Note: mk_seg/1 must be changed too if seg_size is changed. -define(seg_size, 16). -define(max_seg, 32). @@ -54,6 +63,7 @@ -define(contract_load, 3). -define(exp_size, ?seg_size * ?expand_load). -define(con_size, ?seg_size * ?contract_load). +-compile({no_auto_import,[size/1]}). %%------------------------------------------------------------------------------ @@ -74,7 +84,7 @@ -type set() :: set(_). --opaque set(Element) :: #set{segs :: segs(Element)}. +-opaque set(Element) :: #set{segs :: segs(Element)} | #{Element => ?VALUE}. %%------------------------------------------------------------------------------ @@ -84,10 +94,41 @@ new() -> Empty = mk_seg(?seg_size), #set{empty = Empty, segs = {Empty}}. +-spec new([{version, 1..2}]) -> set(). +new([{version, 2}]) -> + #{}; +new(Opts) -> + case proplists:get_value(version, Opts, 1) of + 1 -> new(); + 2 -> new([{version, 2}]) + end. + +%% from_list([Elem]) -> Set. +%% Build a set from the elements in List. +-spec from_list(List) -> Set when + List :: [Element], + Set :: set(Element). +from_list(Ls) -> + lists:foldl(fun (E, S) -> add_element(E, S) end, new(), Ls). + +-spec from_list(List, [{version, 1..2}]) -> Set when + List :: [Element], + Set :: set(Element). +from_list(Ls, [{version, 2}]) -> + maps:from_list([{K,?VALUE}||K<-Ls]); +from_list(Ls, Opts) -> + case proplists:get_value(version, Opts, 1) of + 1 -> from_list(Ls); + 2 -> from_list(Ls, [{version, 2}]) + end. + +%%------------------------------------------------------------------------------ + %% is_set(Set) -> boolean(). %% Return 'true' if Set is a set of elements, else 'false'. -spec is_set(Set) -> boolean() when Set :: term(). +is_set(#{}) -> true; is_set(#set{}) -> true; is_set(_) -> false. @@ -95,35 +136,36 @@ is_set(_) -> false. %% Return the number of elements in Set. -spec size(Set) -> non_neg_integer() when Set :: set(). -size(S) -> S#set.size. +size(#{}=S) -> map_size(S); +size(#set{size=Size}) -> Size. %% is_empty(Set) -> boolean(). %% Return 'true' if Set is an empty set, otherwise 'false'. -spec is_empty(Set) -> boolean() when Set :: set(). -is_empty(S) -> S#set.size=:=0. +is_empty(#{}=S) -> map_size(S)=:=0; +is_empty(#set{size=Size}) -> Size=:=0. %% to_list(Set) -> [Elem]. %% Return the elements in Set as a list. -spec to_list(Set) -> List when Set :: set(Element), List :: [Element]. -to_list(S) -> +to_list(#{}=S) -> + maps:keys(S); +to_list(#set{} = S) -> fold(fun (Elem, List) -> [Elem|List] end, [], S). -%% from_list([Elem]) -> Set. -%% Build a set from the elements in List. --spec from_list(List) -> Set when - List :: [Element], - Set :: set(Element). -from_list(L) -> - lists:foldl(fun (E, S) -> add_element(E, S) end, new(), L). - %% is_element(Element, Set) -> boolean(). %% Return 'true' if Element is an element of Set, else 'false'. -spec is_element(Element, Set) -> boolean() when Set :: set(Element). -is_element(E, S) -> +is_element(E, #{}=S) -> + case S of + #{E := _} -> true; + _ -> false + end; +is_element(E, #set{}=S) -> Slot = get_slot(S, E), Bkt = get_bucket(S, Slot), lists:member(E, Bkt). @@ -133,7 +175,9 @@ is_element(E, S) -> -spec add_element(Element, Set1) -> Set2 when Set1 :: set(Element), Set2 :: set(Element). -add_element(E, S0) -> +add_element(E, #{}=S) -> + S#{E=>?VALUE}; +add_element(E, #set{}=S0) -> Slot = get_slot(S0, E), Bkt = get_bucket(S0, Slot), case lists:member(E, Bkt) of @@ -149,7 +193,9 @@ add_element(E, S0) -> -spec del_element(Element, Set1) -> Set2 when Set1 :: set(Element), Set2 :: set(Element). -del_element(E, S0) -> +del_element(E, #{}=S) -> + maps:remove(E, S); +del_element(E, #set{}=S0) -> Slot = get_slot(S0, E), Bkt = get_bucket(S0, Slot), case lists:member(E, Bkt) of @@ -180,10 +226,15 @@ update_bucket(Set, Slot, NewBucket) -> Set1 :: set(Element), Set2 :: set(Element), Set3 :: set(Element). -union(S1, S2) when S1#set.size < S2#set.size -> - fold(fun (E, S) -> add_element(E, S) end, S2, S1); +union(#{}=S1, #{}=S2) -> + maps:merge(S1,S2); union(S1, S2) -> - fold(fun (E, S) -> add_element(E, S) end, S1, S2). + case size(S1) < size(S2) of + true -> + fold(fun (E, S) -> add_element(E, S) end, S2, S1); + false -> + fold(fun (E, S) -> add_element(E, S) end, S1, S2) + end. %% union([Set]) -> Set %% Return the union of the list of sets. @@ -206,10 +257,15 @@ union1(S1, []) -> S1. Set1 :: set(Element), Set2 :: set(Element), Set3 :: set(Element). -intersection(S1, S2) when S1#set.size < S2#set.size -> - filter(fun (E) -> is_element(E, S2) end, S1); +intersection(#{}=S1, #{}=S2) -> + maps:intersect(S1, S2); intersection(S1, S2) -> - filter(fun (E) -> is_element(E, S1) end, S2). + case size(S1) < size(S2) of + true -> + filter(fun (E) -> is_element(E, S2) end, S1); + false -> + filter(fun (E) -> is_element(E, S1) end, S2) + end. %% intersection([Set]) -> Set. %% Return the intersection of the list of sets. @@ -230,14 +286,35 @@ intersection1(S1, []) -> S1. -spec is_disjoint(Set1, Set2) -> boolean() when Set1 :: set(Element), Set2 :: set(Element). -is_disjoint(S1, S2) when S1#set.size < S2#set.size -> - fold(fun (_, false) -> false; - (E, true) -> not is_element(E, S2) - end, true, S1); +is_disjoint(#{}=S1, #{}=S2) -> + if + map_size(S1) < map_size(S2) -> + is_disjoint_1(S2, maps:iterator(S1)); + true -> + is_disjoint_1(S1, maps:iterator(S2)) + end; is_disjoint(S1, S2) -> - fold(fun (_, false) -> false; - (E, true) -> not is_element(E, S1) - end, true, S2). + case size(S1) < size(S2) of + true -> + fold(fun (_, false) -> false; + (E, true) -> not is_element(E, S2) + end, true, S1); + false -> + fold(fun (_, false) -> false; + (E, true) -> not is_element(E, S1) + end, true, S2) + end. + +is_disjoint_1(Set, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Set of + #{K := _} -> false; + #{} -> is_disjoint_1(Set, NextIter) + end; + none -> + true + end. %% subtract(Set1, Set2) -> Set. %% Return all and only the elements of Set1 which are not also in @@ -255,9 +332,28 @@ subtract(S1, S2) -> -spec is_subset(Set1, Set2) -> boolean() when Set1 :: set(Element), Set2 :: set(Element). + +is_subset(#{}=S1, #{}=S2) -> + if + map_size(S1) > map_size(S2) -> + false; + true -> + is_subset_1(S2, maps:iterator(S1)) + end; is_subset(S1, S2) -> fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1). +is_subset_1(Set, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Set of + #{K := _} -> is_subset_1(Set, NextIter); + #{} -> false + end; + none -> + true + end. + %% fold(Fun, Accumulator, Set) -> Accumulator. %% Fold function Fun over all elements in Set and return Accumulator. -spec fold(Function, Acc0, Set) -> Acc1 when @@ -267,7 +363,16 @@ is_subset(S1, S2) -> Acc1 :: Acc, AccIn :: Acc, AccOut :: Acc. -fold(F, Acc, D) -> fold_set(F, Acc, D). +fold(F, Acc, #{}=D) -> fold_1(F, Acc, maps:iterator(D)); +fold(F, Acc, #set{}=D) -> fold_set(F, Acc, D). + +fold_1(Fun, Acc, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + fold_1(Fun, Fun(K,Acc), NextIter); + none -> + Acc + end. %% filter(Fun, Set) -> Set. %% Filter Set with Fun. @@ -275,7 +380,21 @@ fold(F, Acc, D) -> fold_set(F, Acc, D). Pred :: fun((Element) -> boolean()), Set1 :: set(Element), Set2 :: set(Element). -filter(F, D) -> filter_set(F, D). +filter(F, #{}=D) -> maps:from_list(filter_1(F, maps:iterator(D))); +filter(F, #set{}=D) -> filter_set(F, D). + +filter_1(Fun, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Fun(K) of + true -> + [{K,?VALUE} | filter_1(Fun, NextIter)]; + false -> + filter_1(Fun, NextIter) + end; + none -> + [] + end. %% get_slot(Hashdb, Key) -> Slot. %% Get the slot. First hash on the new range, if we hit a bucket diff --git a/lib/stdlib/test/sets_SUITE.erl b/lib/stdlib/test/sets_SUITE.erl index 2c1b388d52..140c2e4b43 100644 --- a/lib/stdlib/test/sets_SUITE.erl +++ b/lib/stdlib/test/sets_SUITE.erl @@ -28,7 +28,7 @@ init_per_testcase/2,end_per_testcase/2, create/1,add_element/1,del_element/1, subtract/1,intersection/1,union/1,is_subset/1, - is_set/1,is_empty/1,fold/1,filter/1, + is_disjoint/1,is_set/1,is_empty/1,fold/1,filter/1, take_smallest/1,take_largest/1, iterate/1]). -include_lib("common_test/include/ct.hrl"). @@ -48,7 +48,7 @@ suite() -> all() -> [create, add_element, del_element, subtract, intersection, union, is_subset, is_set, fold, filter, - take_smallest, take_largest, iterate, is_empty]. + take_smallest, take_largest, iterate, is_empty, is_disjoint]. groups() -> []. @@ -123,7 +123,7 @@ del_element(Config) when is_list(Config) -> del_element_1(List, M) -> S0 = M(from_list, List), Empty = foldl(fun(El, Set) -> M(del_element, {El,Set}) end, S0, List), - Empty = M(empty, []), + true = M(equal, {Empty,M(empty, [])}), true = M(is_empty, Empty), S1 = foldl(fun(El, Set) -> M(add_element, {El,Set}) @@ -299,6 +299,22 @@ is_subset_1(List, M) -> ], res_to_set(Res, M, 0, []). +is_disjoint(Config) when is_list(Config) -> + test_all([{1,132},{253,270},{299,311}], fun is_disjoint_1/2). + +is_disjoint_1(List, M) -> + S = M(from_list, List), + Empty = M(empty, []), + + true = M(is_disjoint, {Empty,Empty}), + true = M(is_disjoint, {Empty,S}), + true = M(is_disjoint, {S,Empty}), + false = M(is_disjoint, {S,S}), + + true = M(is_disjoint, {M(singleton, make_ref()),S}), + true = M(is_disjoint, {S,M(singleton, make_ref())}), + S. + check_subset(X, Y, M) -> check_one_subset(Y, X, M), check_one_subset(X, Y, M). @@ -481,13 +497,37 @@ iterate_set_1(M, {E, I}, R) -> sets_mods() -> Ordsets = sets_test_lib:new(ordsets, fun(X, Y) -> X == Y end), - Sets = sets_test_lib:new(sets, fun(X, Y) -> - lists:sort(sets:to_list(X)) == - lists:sort(sets:to_list(Y)) end), + + NewSets = sets_test_lib:new(sets, fun(X, Y) -> X == Y end, + fun() -> sets:new([{version,2}]) end, + fun(X) -> sets:from_list(X, [{version,2}]) end), + + MixSets = sets_test_lib:new(sets, fun(X, Y) -> + lists:sort(sets:to_list(X)) == + lists:sort(sets:to_list(Y)) end, + fun mixed_new/0, fun mixed_from_list/1), + + OldSets = sets_test_lib:new(sets, fun(X, Y) -> + lists:sort(sets:to_list(X)) == + lists:sort(sets:to_list(Y)) end, + fun sets:new/0, fun sets:from_list/1), + Gb = sets_test_lib:new(gb_sets, fun(X, Y) -> - gb_sets:to_list(X) == + gb_sets:to_list(X) == gb_sets:to_list(Y) end), - [Ordsets,Sets,Gb]. + [Ordsets,OldSets,MixSets,NewSets,Gb]. + +mixed_new() -> + case erlang:erase(sets_type) of + undefined -> erlang:put(sets_type, deprecated), sets:new([{version,2}]); + deprecated -> sets:new() + end. + +mixed_from_list(L) -> + case erlang:erase(sets_type) of + undefined -> erlang:put(sets_type, deprecated), sets:from_list(L, [{version,2}]); + deprecated -> sets:from_list(L) + end. test_all(Tester) -> Res = [begin diff --git a/lib/stdlib/test/sets_test_lib.erl b/lib/stdlib/test/sets_test_lib.erl index e4d476ba54..076e310206 100644 --- a/lib/stdlib/test/sets_test_lib.erl +++ b/lib/stdlib/test/sets_test_lib.erl @@ -20,18 +20,22 @@ -module(sets_test_lib). --export([new/2]). +-export([new/2, new/4]). new(Mod, Eq) -> + new(Mod, Eq, fun Mod:new/0, fun Mod:from_list/1). + +new(Mod, Eq, New, FromList) -> fun (add_element, {El,S}) -> add_element(Mod, El, S); (del_element, {El,S}) -> del_element(Mod, El, S); - (empty, []) -> Mod:new(); + (empty, []) -> New(); (equal, {S1,S2}) -> Eq(S1, S2); (filter, {F,S}) -> filter(Mod, F, S); (fold, {F,A,S}) -> fold(Mod, F, A, S); - (from_list, L) -> Mod:from_list(L); + (from_list, L) -> FromList(L); (intersection, {S1,S2}) -> intersection(Mod, Eq, S1, S2); (intersection, Ss) -> intersection(Mod, Eq, Ss); + (is_disjoint, {S,Set}) -> Mod:is_disjoint(S, Set); (is_empty, S) -> Mod:is_empty(S); (is_set, S) -> Mod:is_set(S); (is_subset, {S,Set}) -> is_subset(Mod, Eq, S, Set); @@ -39,7 +43,7 @@ new(Mod, Eq) -> (iterator_from, {Start, S}) -> Mod:iterator_from(Start, S); (module, []) -> Mod; (next, I) -> Mod:next(I); - (singleton, E) -> singleton(Mod, E); + (singleton, E) -> singleton(Mod, FromList, E); (size, S) -> Mod:size(S); (subtract, {S1,S2}) -> subtract(Mod, S1, S2); (to_list, S) -> Mod:to_list(S); @@ -47,10 +51,10 @@ new(Mod, Eq) -> (union, Ss) -> union(Mod, Eq, Ss) end. -singleton(Mod, E) -> +singleton(Mod, FromList, E) -> case erlang:function_exported(Mod, singleton, 1) of true -> Mod:singleton(E); - false -> Mod:from_list([E]) + false -> FromList([E]) end. add_element(Mod, El, S0) -> -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor