File 3271-Optimize-is_subset-and-is_disjoint-in-cerl_sets.patch of Package erlang
From 7e42e6a63d6876b6a03f50f571e5e850c9b771e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@plataformatec.com.br>
Date: Fri, 19 Jul 2019 23:25:16 +0200
Subject: [PATCH] Optimize is_subset and is_disjoint in cerl_sets
The new implementation use maps iterators and are more
perfomant in banchmarks by roughly 10%. More importantly,
the iterators approach allow us to short-circuit and abort
early.
fold and filter have also been changed to use iterators.
We could simply delegate to the maps' functions, but
inlining the implementation allows us to skip a double
anonymous function dispatch.
---
lib/compiler/src/cerl_sets.erl | 65 ++++++++++++++++++++++++++++++++++--------
1 file changed, 53 insertions(+), 12 deletions(-)
diff --git a/lib/compiler/src/cerl_sets.erl b/lib/compiler/src/cerl_sets.erl
index f489baf238..84e488fc55 100644
--- a/lib/compiler/src/cerl_sets.erl
+++ b/lib/compiler/src/cerl_sets.erl
@@ -153,14 +153,21 @@ intersection1(S1, []) -> S1.
Set1 :: set(Element),
Set2 :: set(Element).
-is_disjoint(S1, S2) when map_size(S1) < map_size(S2) ->
- fold(fun (_, false) -> false;
- (E, true) -> not is_element(E, S2)
- end, true, S1);
+is_disjoint(S1, S2) when map_size(S1) > map_size(S2) ->
+ is_disjoint_1(S1, maps:iterator(S2));
is_disjoint(S1, S2) ->
- fold(fun (_, false) -> false;
- (E, true) -> not is_element(E, S1)
- end, true, S2).
+ is_disjoint_1(S2, maps:iterator(S1)).
+
+is_disjoint_1(Set, Iter) ->
+ case maps:next(Iter) of
+ {K, _, NextIter} ->
+ case Set of
+ #{K := _} -> false;
+ #{} -> is_disjoint_1(Set, NextIter)
+ end;
+ none ->
+ true
+ end.
%% subtract(Set1, Set2) -> Set.
%% Return all and only the elements of Set1 which are not also in
@@ -180,8 +187,21 @@ subtract(S1, S2) ->
Set1 :: set(Element),
Set2 :: set(Element).
+is_subset(S1, S2) when map_size(S1) > map_size(S2) ->
+ false;
is_subset(S1, S2) ->
- fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1).
+ is_subset_1(S2, maps:iterator(S1)).
+
+is_subset_1(Set, Iter) ->
+ case maps:next(Iter) of
+ {K, _, NextIter} ->
+ case Set of
+ #{K := _} -> is_subset_1(Set, NextIter);
+ #{} -> false
+ end;
+ none ->
+ true
+ end.
%% fold(Fun, Accumulator, Set) -> Accumulator.
%% Fold function Fun over all elements in Set and return Accumulator.
@@ -193,8 +213,16 @@ is_subset(S1, S2) ->
AccIn :: Acc,
AccOut :: Acc.
-fold(F, Init, D) ->
- lists:foldl(fun(E,Acc) -> F(E,Acc) end,Init,maps:keys(D)).
+fold(Fun, Init, Set) ->
+ fold_1(Fun, Init, maps:iterator(Set)).
+
+fold_1(Fun, Acc, Iter) ->
+ case maps:next(Iter) of
+ {K, _, NextIter} ->
+ fold_1(Fun, Fun(K,Acc), NextIter);
+ none ->
+ Acc
+ end.
%% filter(Fun, Set) -> Set.
%% Filter Set with Fun.
@@ -203,5 +231,18 @@ fold(F, Init, D) ->
Set1 :: set(Element),
Set2 :: set(Element).
-filter(F, D) ->
- maps:filter(fun(K,_) -> F(K) end, D).
+filter(Fun, Set) ->
+ maps:from_list(filter_1(Fun, maps:iterator(Set))).
+
+filter_1(Fun, Iter) ->
+ case maps:next(Iter) of
+ {K, _, NextIter} ->
+ case Fun(K) of
+ true ->
+ [{K,ok} | filter_1(Fun, NextIter)];
+ false ->
+ filter_1(Fun, NextIter)
+ end;
+ none ->
+ []
+ end.
--
2.16.4