File 4251-Honour-call-timeout-combined-with-servfail_retry_tim.patch of Package erlang
From d97c1673a6a0d8fc8c18817c2a551e1d28bc4ea4 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Wed, 3 Feb 2021 16:43:59 +0100
Subject: [PATCH] Honour call timeout combined with servfail_retry_timeout
---
lib/kernel/doc/src/inet_res.xml | 12 +-
lib/kernel/src/inet_res.erl | 212 +++++++++++++++++++++-----------
2 files changed, 148 insertions(+), 76 deletions(-)
diff --git a/lib/kernel/doc/src/inet_res.xml b/lib/kernel/doc/src/inet_res.xml
index f50915a2e2..690d1c61d1 100644
--- a/lib/kernel/doc/src/inet_res.xml
+++ b/lib/kernel/doc/src/inet_res.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2009</year><year>2020</year>
+ <year>2009</year><year>2021</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -75,9 +75,13 @@
<marker id="servfail_retry_timeout"/>
<p>But before all name servers are tried again, there is a
(user configurable) timeout, <c>servfail_retry_timeout</c>.
- The point of this is to prevent the new query to be handled
- by to the servfail cache (a client that is to eager will
- actually only get what is in the servfail cache). </p>
+ The point of this is to prevent the new query to be handled by
+ a server's servfail cache (a client that is to eager will
+ actually only get what is in the servfail cache).
+ If there is too little time left
+ of the resolver call's timeout to do a retry,
+ the resolver call may return
+ before the call's timeout has expired. </p>
<p>For queries not using the <c>search</c> list,
if the query to all <c>nameservers</c> results in
diff --git a/lib/kernel/src/inet_res.erl b/lib/kernel/src/inet_res.erl
index e03cf76994..e323ad0caf 100644
--- a/lib/kernel/src/inet_res.erl
+++ b/lib/kernel/src/inet_res.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1997-2020. All Rights Reserved.
+%% Copyright Ericsson AB 1997-2021. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -696,15 +696,15 @@ udp_send(#sock{inet=I}, {A,B,C,D}=IP, Port, Buffer)
udp_recv(#sock{inet6=I}, {A,B,C,D,E,F,G,H}=IP, Port, Timeout, Decode)
when ?ip6(A,B,C,D,E,F,G,H), ?port(Port), 0 =< Timeout ->
- do_udp_recv(I, IP, Port, Timeout, Decode, deadline(Timeout), Timeout);
+ do_udp_recv(I, IP, Port, Timeout, Decode, time(Timeout), Timeout);
udp_recv(#sock{inet=I}, {A,B,C,D}=IP, Port, Timeout, Decode)
when ?ip(A,B,C,D), ?port(Port), 0 =< Timeout ->
- do_udp_recv(I, IP, Port, Timeout, Decode, deadline(Timeout), Timeout).
+ do_udp_recv(I, IP, Port, Timeout, Decode, time(Timeout), Timeout).
-do_udp_recv(_I, _IP, _Port, 0, _Decode, _Deadline, PollCnt)
+do_udp_recv(_I, _IP, _Port, 0, _Decode, _Time, PollCnt)
when PollCnt =< 0 ->
timeout;
-do_udp_recv(I, IP, Port, Timeout, Decode, Deadline, PollCnt) ->
+do_udp_recv(I, IP, Port, Timeout, Decode, Time, PollCnt) ->
case gen_udp:recv(I, 0, Timeout) of
{ok,Reply} ->
case Decode(Reply) of
@@ -720,10 +720,10 @@ do_udp_recv(I, IP, Port, Timeout, Decode, Deadline, PollCnt) ->
%% an infinite loop here.
%%
do_udp_recv(
- I, IP, Port, Timeout, Decode, Deadline, PollCnt-50);
+ I, IP, Port, Timeout, Decode, Time, PollCnt-50);
false ->
- T = timeout(Deadline),
- do_udp_recv(I, IP, Port, T, Decode, Deadline, PollCnt);
+ do_udp_recv(
+ I, IP, Port, timeout(Time), Decode, Time, PollCnt);
Result ->
Result
end;
@@ -767,46 +767,45 @@ do_query(#q{options=#options{retry=Retry}}=Q, NSs, Timer) ->
%% so a failure will be a timeout,
%% unless a name server says otherwise
Reason = timeout,
+ %% Verify that the nameservers list contains only 2-tuples
+ %% to protect our internal servfail_retry mechanism from surprises
+ lists:all(
+ fun (NS) when tuple_size(NS) =:= 2 -> true;
+ (_) -> false
+ end, NSs) orelse
+ erlang:error(badarg, [Q,NSs,Timer]),
query_retries(Q, NSs, Timer, Retry, 0, #sock{}, Reason).
-%% Loop until out of name servers or retries
+%% Loop until out of retries or name servers
%%
-query_retries(_Q, _NSs, _Timer, Retry, Retry, S, Reason) ->
+query_retries(_Q, _NSs, _Timer, Retry, I, S, Reason) when Retry =:= I ->
query_retries_error(S, Reason);
query_retries(_Q, [], _Timer, _Retry, _I, S, Reason) ->
query_retries_error(S, Reason);
-query_retries(Q, NSs, Timer, Retry, I, S_0, Reason) ->
- servfail_retry_wait(Q, I),
- query_nss(Q, NSs, Timer, Retry, I, S_0, Reason, NSs).
-
-servfail_retry_wait(_Q, 0) ->
- ok;
-servfail_retry_wait(#q{options = #options{servfail_retry_timeout = T}}, _)
- when (T > 0) ->
- receive after T -> ok end;
-servfail_retry_wait(_, _) ->
- ok.
+query_retries(Q, NSs, Timer, Retry, I, S, Reason) ->
+ query_nss(Q, NSs, Timer, Retry, I, S, Reason, []).
-
-%% Loop for all name servers, for each:
+%% For each name server:
%% If EDNS is enabled, try that first,
%% and for selected failures fall back to plain DNS.
%%
-query_nss(Q, NSs, Timer, Retry, I, S, Reason, []) ->
+query_nss(Q, [], Timer, Retry, I, S, Reason, RetryNSs) ->
%% End of name servers list, do a new retry
- query_retries(Q, NSs, Timer, Retry, I+1, S, Reason);
-query_nss(#q{edns = undefined}=Q, NSs, Timer, Retry, I, S, Reason, TryNSs) ->
- query_nss_dns(Q, NSs, Timer, Retry, I, S, Reason, TryNSs);
-query_nss(Q, NSs, Timer, Retry, I, S, Reason, TryNSs) ->
- query_nss_edns(Q, NSs, Timer, Retry, I, S, Reason, TryNSs).
+ %% with the remaining name servers
+ query_retries(Q, lists:reverse(RetryNSs), Timer, Retry, I+1, S, Reason);
+query_nss(#q{edns = undefined}=Q, NSs, Timer, Retry, I, S, Reason, RetryNSs) ->
+ query_nss_dns(Q, NSs, Timer, Retry, I, S, Reason, RetryNSs);
+query_nss(Q, NSs, Timer, Retry, I, S, Reason, RetryNSs) ->
+ query_nss_edns(Q, NSs, Timer, Retry, I, S, Reason, RetryNSs).
query_nss_edns(
#q{options =
#options{
udp_payload_size = PSz}=Options,
edns = {Id,Buffer}}=Q,
- NSs, Timer, Retry, I, S_0, Reason, [{IP,Port}=NS|TryNSs]=TryNSs_0) ->
+ [NsSpec|NSs], Timer, Retry, I, S_0, Reason, RetryNSs) ->
%%
+ {IP,Port} = NS = servfail_retry_wait(NsSpec),
{S,Result} =
query_ns(
S_0, Id, Buffer, IP, Port, Timer, Retry, I, Options, PSz),
@@ -816,19 +815,25 @@ query_nss_edns(
E =:= notimp;
E =:= servfail;
E =:= badvers ->
- %% The server did not like that,
- %% ignore that error and try plain DNS
+ %% The server did not like that.
+ %% Ignore that error and try plain DNS.
+ %%
+ %% We ignore the servfail_retry_timeout here,
+ %% assuming that if the servfail was due to us using EDNS,
+ %% a DNS query might work, therefore we do not
+ %% count this failure as a try.
query_nss_dns(
- Q, NSs, Timer, Retry, I, S, Reason, TryNSs_0);
+ Q, [NS|NSs], Timer, Retry, I, S, Reason, RetryNSs);
_ ->
query_nss_result(
- Q, NSs, Timer, Retry, I, S, Reason, TryNSs, NS, Result)
+ Q, NSs, Timer, Retry, I, S, Reason, RetryNSs, NS, Result)
end.
query_nss_dns(
#q{dns = Qdns}=Q_0,
- NSs, Timer, Retry, I, S_0, Reason, [{IP,Port}=NS|TryNSs]) ->
+ [NsSpec|NSs], Timer, Retry, I, S_0, Reason, RetryNSs) ->
%%
+ {IP,Port} = NS = servfail_retry_wait(NsSpec),
#q{options = Options,
dns = {Id,Buffer}}=Q =
if
@@ -839,51 +844,89 @@ query_nss_dns(
query_ns(
S_0, Id, Buffer, IP, Port, Timer, Retry, I, Options, ?PACKETSZ),
query_nss_result(
- Q, NSs, Timer, Retry, I, S, Reason, TryNSs, NS, Result).
+ Q, NSs, Timer, Retry, I, S, Reason, RetryNSs, NS, Result).
-query_nss_result(Q, NSs, Timer, Retry, I, S, Reason, TryNSs, NS, Result) ->
+
+%% servfail_retry NsSpec handling.
+%%
+%% A NsSpec is either a NS = {IP, Port},
+%% or for a servfail_retry_timeout, the nameserver wrapped
+%% in a tuple with the earliest time to contact
+%% the nameserver again.
+%%
+%% When unwrapping; wait until it is time before returning
+%% the nameserver.
+
+%% Wrap with retry time
+servfail_retry_time(RetryTimeout, NS) ->
+ {servfail_retry, time(RetryTimeout), NS}.
+
+%% Unwrap and wait
+servfail_retry_wait(NsSpec) ->
+ case NsSpec of
+ {servfail_retry, Time, NS} ->
+ wait(timeout(Time)),
+ NS;
+ {_IP,_Port} = NS->
+ NS
+ end.
+
+
+query_nss_result(Q, NSs, Timer, Retry, I, S, Reason, RetryNSs, NS, Result) ->
case Result of
{ok,_} ->
_ = udp_close(S),
Result;
timeout -> % Out of total time timeout
query_retries_error(S, Reason); % The best reason we have
- {error,timeout} -> % Query timeout
- %% Try next server, may retry this server later
- query_nss(Q, NSs, Timer, Retry, I, S, Reason, TryNSs);
- {error,{nxdomain,_}=NewReason} ->
- query_retries_error(S, NewReason); % Definite answer
+ {error,{nxdomain=E,_}} ->
+ query_retries_error(S, E); % Definite answer
{error,{E,_}=NewReason}
when E =:= qfmterror;
E =:= notimp;
E =:= refused;
- E =:= badvers ->
+ E =:= badvers;
+ E =:= unknown ->
%% The server did not like that.
- %% Remove this server from retry list since
+ %% Do not retry this server since
%% it will not answer differently on the next retry.
- NewNSs = lists:delete(NS, NSs),
- query_nss(Q, NewNSs, Timer, Retry, I, S, NewReason, TryNSs);
+ query_nss(Q, NSs, Timer, Retry, I, S, NewReason, RetryNSs);
{error,E=NewReason}
when E =:= formerr;
E =:= enetunreach;
E =:= econnrefused ->
%% Could not decode answer, or network problem.
- %% Remove this server from retry list.
- NewNSs = lists:delete(NS, NSs),
- query_nss(Q, NewNSs, Timer, Retry, I, S, NewReason, TryNSs);
+ %% Do not retry this server.
+ query_nss(Q, NSs, Timer, Retry, I, S, NewReason, RetryNSs);
+ {error,timeout} -> % Query timeout
+ %% Try next server, may retry this server
+ query_nss(Q, NSs, Timer, Retry, I, S, Reason, [NS|RetryNSs]);
+ {error,{servfail,_}=NewReason} ->
+ RetryTimeout = Q#q.options#options.servfail_retry_timeout,
+ case inet:timeout(RetryTimeout, Timer) of
+ RetryTimeout ->
+ NsSpec = servfail_retry_time(RetryTimeout, NS),
+ query_nss(
+ Q, NSs, Timer, Retry, I, S, NewReason,
+ [NsSpec|RetryNSs]);
+ _ ->
+ %% No time for a new retry with this server
+ %% - do not retry this server
+ query_nss(
+ Q, NSs, Timer, Retry, I, S, NewReason, RetryNSs)
+ end;
{error,NewReason} ->
- %% Try next server, may retry this server later
- query_nss(Q, NSs, Timer, Retry, I, S, NewReason, TryNSs)
+ %% NewReason =
+ %% {error,badid} |
+ %% {error,{noquery,Msg}} |
+ %% {error,OtherSocketError}
+ %% Try next server, may retry this server
+ query_nss(Q, NSs, Timer, Retry, I, S, NewReason, [NS|RetryNSs])
end.
query_retries_error(S, Reason) ->
_ = udp_close(S),
- case Reason of
- {nxdomain, _} ->
- {error, nxdomain};
- _ ->
- {error, Reason}
- end.
+ {error, Reason}.
query_ns(S0, Id, Buffer, IP, Port, Timer, Retry, I,
@@ -892,18 +935,22 @@ query_ns(S0, Id, Buffer, IP, Port, Timer, Retry, I,
case UseVC orelse iolist_size(Buffer) > PSz of
true ->
TcpTimeout = inet:timeout(Tm*5, Timer),
- {S0,query_tcp(TcpTimeout, Id, Buffer, IP, Port, Verbose)};
+ {S0,
+ query_tcp(TcpTimeout, Id, Buffer, IP, Port, Verbose)};
false ->
case udp_open(S0, IP) of
{ok,S} ->
- Timeout =
+ UdpTimeout =
inet:timeout( (Tm * (1 bsl I)) div Retry, Timer),
- case query_udp(
- S, Id, Buffer, IP, Port, Timeout, Verbose) of
+ case
+ query_udp(
+ S, Id, Buffer, IP, Port, UdpTimeout, Verbose)
+ of
{ok,#dns_rec{header=H}} when H#dns_header.tc ->
TcpTimeout = inet:timeout(Tm*5, Timer),
- {S, query_tcp(
- TcpTimeout, Id, Buffer, IP, Port, Verbose)};
+ {S,
+ query_tcp(
+ TcpTimeout, Id, Buffer, IP, Port, Verbose)};
{error, econnrefused} = Err ->
ok = udp_close(S),
{#sock{}, Err};
@@ -1017,10 +1064,10 @@ decode_answer(Answer, Id, Verbose) ->
?NOTIMP -> {error,{notimp,Msg}};
?REFUSED -> {error,{refused,Msg}};
?BADVERS -> {error,{badvers,Msg}};
- _ -> {error,{unknown,Msg}}
+ _ -> {error,{unknown,Msg}}
end;
- Error ->
- ?verbose(Verbose, "Got reply: ~p~n", [Error]),
+ {error, formerr} = Error ->
+ ?verbose(Verbose, "Got reply: decode format error~n", []),
Error
end.
@@ -1098,11 +1145,32 @@ dns_msg(Msg) ->
{Type,dns_msg(Fields)}
end.
--compile({inline, [deadline/1, timeout/1]}).
-deadline(Timeout) -> % When is the deadline? [ms]
+
+
+-compile({inline, [time/1, timeout/1, wait/1]}).
+
+%% What Time is the Timeout? [ms]
+%%
+time(Timeout) ->
erlang:monotonic_time(1000) + Timeout.
-timeout(Deadline) -> % How long to deadline? [ms] >= 0
- case Deadline - erlang:monotonic_time(1000) of
- Timeout when 0 =< Timeout -> Timeout;
- _ -> 0
+
+%% How long Timeout to Time? [ms] >= 0
+%%
+timeout(Time) ->
+ TimeNow = erlang:monotonic_time(1000),
+ if
+ TimeNow < Time ->
+ Time - TimeNow;
+ true ->
+ 0
+ end.
+
+%% receive after Timeout but do not yield for 0
+%%
+wait(0) ->
+ ok;
+wait(Timeout) ->
+ receive
+ after Timeout ->
+ ok
end.
--
2.26.2