File 3493-Optimize-ets-operations.patch of Package erlang
From 03c893ef92bbc9cafed48941c2c419580147c1f6 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Fri, 9 Apr 2021 13:04:59 +0200
Subject: [PATCH 3/4] Optimize ets operations
Use match specifications and ets:select_delete/2 to traverse and
delete in one pass. Reduce the number of passes for alloc_entry
through one pass to get all time stamps, calculate deletion
criteria, then one pass to delete.
---
lib/kernel/src/inet_db.erl | 173 +++++++++++++++++++++++--------------
1 file changed, 107 insertions(+), 66 deletions(-)
diff --git a/lib/kernel/src/inet_db.erl b/lib/kernel/src/inet_db.erl
index 7cf95dda8a..47f6258c81 100644
--- a/lib/kernel/src/inet_db.erl
+++ b/lib/kernel/src/inet_db.erl
@@ -1193,7 +1193,7 @@ handle_cast(_Msg, State) ->
-spec handle_info(term(), state()) -> {'noreply', state()}.
handle_info(refresh_timeout, State) ->
- do_refresh_cache(State#state.cache),
+ _ = delete_expired(State#state.cache, times()),
{noreply, State#state{cache_timer = init_timer()}};
handle_info(_Info, State) ->
@@ -1598,7 +1598,7 @@ is_reqname(clear_cache) -> true;
is_reqname(clear_hosts) -> true;
is_reqname(_) -> false.
-%% Add a resource record to the cache if there is space left.
+%% Add a resource record to the cache if there is a cache.
%% If the cache is full this function first deletes old entries,
%% i.e. entries with the oldest access time.
%%
@@ -1643,7 +1643,20 @@ times() ->
%% ETS match expressions
%%
--compile({inline, [dns_rr_match/3, dns_rr_match/4]}).
+-compile(
+ {inline,
+ [dns_rr_match_tm_ttl_cnt/3, dns_rr_match_cnt/1,
+ dns_rr_match/3, dns_rr_match/4]}).
+%%
+dns_rr_match_tm_ttl_cnt(TM, TTL, Cnt) ->
+ #dns_rr{
+ domain = '_', class = '_', type = '_', data = '_',
+ cnt = Cnt, tm = TM, ttl = TTL, bm = '_', func = '_'}.
+dns_rr_match_cnt(Cnt) ->
+ #dns_rr{
+ domain = '_', class = '_', type = '_', data = '_',
+ cnt = Cnt, tm = '_', ttl = '_', bm = '_', func = '_'}.
+%%
dns_rr_match(Domain, Class, Type) ->
#dns_rr{
domain = Domain, class = Class, type = Type, data = '_',
@@ -1656,6 +1669,7 @@ dns_rr_match(Domain, Class, Type, Data) ->
%% RR creation
-compile({inline, [dns_rr_add/5]}).
+%%
dns_rr_add(Domain, Class, Type, TTL, Data) ->
#dns_rr{
domain = Domain, class = Class, type = Type,
@@ -1798,81 +1812,98 @@ cache_refresh() ->
end.
%% Delete all entries with expired TTL.
-%% Returns the access time of the entry with the oldest access time
-%% in the cache.
-do_refresh_cache(CacheDb) ->
- Now = times(),
- [OldestT | RRsToDelete] =
- ets:foldl(
- fun (#dns_rr{tm = TM, ttl = TTL} = RR, [T | RRs])
- when TM + TTL < Now -> % Expired - delete it
- [T, RR | RRs];
- (#dns_rr{cnt = C}, [T | RRs])
- when C < T -> % Older than previously oldest - use this cnt
- [C | RRs];
- (#dns_rr{}, T_RRs) -> % Keep - do not delete
- T_RRs
- end, [Now], CacheDb),
- _ = [ets:delete_object(CacheDb, RR) || RR <- RRsToDelete],
- OldestT.
+%% Returns the number of deleted entries.
+%%
+delete_expired(CacheDb, TM) ->
+ ets:select_delete(
+ CacheDb,
+ [{dns_rr_match_tm_ttl_cnt('$1', '$2', '_'), [],
+ %% Delete all with tm + ttl < TM
+ [{'<', {'+', '$1', '$2'}, {const, TM}}]}]).
%% -------------------------------------------------------------------
%% Allocate room for a new entry in the cache.
+%%
%% Deletes entries with expired TTL and all entries with latest
-%% access time older than
-%% trunc((TM - OldestTM) * 0.3) + OldestTM from the cache if it
-%% is full. Does not delete more than 10% of the entries in the cache
+%% access time older than trunc((TM - OldestTM) / 3) + OldestTM
+%% from the cache if it is full.
+%%
+%% Does not delete more than 1/10 of the entries in the cache
%% though, unless they there deleted due to expired TTL.
-%% Returns: true if space for a new entry otherwise false.
+%% Returns: true if space for a new entry otherwise false
+%% (true if we have a cache since we always make room for new).
%% -------------------------------------------------------------------
alloc_entry(Db, CacheDb, TM) ->
- CurSize = ets:info(CacheDb, size),
- case ets:lookup_element(Db, cache_size, 2) of
- Size when Size =< CurSize, Size > 0 ->
- alloc_entry(CacheDb, CurSize, TM, trunc(Size * 0.1) + 1);
- Size when Size =< 0 ->
+ Size = ets:lookup_element(Db, cache_size, 2),
+ if
+ Size =< 0 ->
false;
- _Size ->
- true
- end.
-
-alloc_entry(CacheDb, OldSize, TM, N) ->
- OldestTM = do_refresh_cache(CacheDb), % Delete timedout entries
- case ets:info(CacheDb, size) of
- OldSize ->
- %% No entrys timedout
- delete_n_oldest(CacheDb, TM, OldestTM, N);
- _ ->
- true
+ true ->
+ CurSize = ets:info(CacheDb, size),
+ if
+ Size =< CurSize ->
+ N = ((Size - 1) div 10) + 1,
+ _ = delete_oldest(CacheDb, TM, N),
+ true;
+ true ->
+ true
+ end
end.
-delete_n_oldest(CacheDb, TM, OldestTM, N) ->
- DelTM = trunc((TM - OldestTM) * 0.3) + OldestTM,
- delete_older(CacheDb, DelTM, N) =/= 0.
-
-%% Delete entries with latest access time older than TM.
-%% Delete max N number of entries.
-%% Return the number of deleted entries.
+%% This deletion should always give some room since
+%% it removes a percentage of the oldest entries.
+%%
+%% Fetch all cnt times, sort them, calculate a limit
+%% as the earliest of the time 1/3 from the oldest to now,
+%% and the 1/10 oldest entry,.
+%%
+%% Delete all entries with a cnt time older than that,
+%% and all expired (tm + ttl < now).
%%
-delete_older(CacheDb, TM, N) ->
- RRsToDelete =
- ets:foldl(
- fun (#dns_rr{cnt = C} = RR, RRs)
- when C =< TM -> % Candidate for deletion
- [RR | RRs];
- (#dns_rr{}, RRs) ->
- RRs
- end, [], CacheDb),
- N - delete_older_n(CacheDb, lists:keysort(#dns_rr.cnt, RRsToDelete), N).
-
-delete_older_n(_CacheDb, RRs, 0) when is_list(RRs) ->
- 0;
-delete_older_n(_CacheDb, [], N) ->
- N;
-delete_older_n(CacheDb, [RR | RRs], N) ->
- ets:delete_object(CacheDb, RR),
- delete_older_n(CacheDb, RRs, N - 1).
+delete_oldest(CacheDb, TM, N) ->
+ case
+ lists:sort(
+ ets:select(
+ CacheDb,
+ %% All cnt vals
+ [{dns_rr_match_cnt('$1'), [], ['$1']}]))
+ %% That could be space optimized by using ets:select/3
+ %% with a limit, and storing the returned times in
+ %% gb_sets with size limitation of N. Then we would
+ %% never have to sort the whole list and find
+ %% the N:th element, but instead take the smallest
+ %% and largest elements from gb_sets.
+ %%
+ %% The size of the whole list is, however, already
+ %% much smaller than all table entries, so is is
+ %% unclear how much of an improvement that would be.
+ %%
+ %% Note that since gb_sets does not store duplicate
+ %% times, that will not work nicely if there are
+ %% many duplicate times, which is not unlikely
+ %% given the second resolution. Therefore it is
+ %% possible that gb_trees and storing the number
+ %% of occurences for a cnt time might be needed,
+ %% so insertion gets more complicated and slower,
+ %% and we need our own concept of set size.
+ %%
+ of
+ [] -> % Empty table, this should not happen,
+ 0;
+ [OldestTM | _] = TMs ->
+ DelTM_A = ((TM - OldestTM) div 3) + OldestTM,
+ DelTM_B = lists_nth(N, TMs, DelTM_A), % N:th cnt time
+ DelTM = min(DelTM_A, DelTM_B),
+ %%
+ ets:select_delete(
+ CacheDb,
+ [{dns_rr_match_tm_ttl_cnt('$1', '$2', '$3'), [],
+ %% RRs with cnt =< DelTM or tm + ttl < TM
+ [{'orelse',
+ {'=<', '$3', {const, DelTM}},
+ {'<', {'+', '$1', '$2'}, {const, TM}}}]}])
+ end.
%% as lists:delete/2, but delete all exact matches
@@ -1893,3 +1924,13 @@ lists_keydelete(K, N, [T|Ts]) when element(N, T) =:= K ->
lists_keydelete(K, N, Ts);
lists_keydelete(K, N, [X|Ts]) ->
[X|lists_keydelete(K, N, Ts)].
+
+%% as lists:nth/2 but return Default for out of bounds
+lists_nth(0, List, Default) when is_list(List) ->
+ Default;
+lists_nth(1, [H | _], _Default) ->
+ H;
+lists_nth(_N, [], Default) ->
+ Default;
+lists_nth(N, [_ | T], Default) ->
+ lists_nth(N - 1, T, Default).
--
2.26.2