File 7771-erts-kernel-reuseaddr-reuseport-exclusiveaddruse-sup.patch of Package erlang
From 27258e92db76fdb8d3daa8ed610edbc9ac58d752 Mon Sep 17 00:00:00 2001
From: Rickard Green <rickard@erlang.org>
Date: Fri, 25 Nov 2022 00:48:47 +0100
Subject: [PATCH] [erts, kernel] reuseaddr/reuseport/exclusiveaddruse
support/fixes
* Introduce socket option 'reuseport' which may or may not have
load balancing.
* Introduce socket option 'reuseport_lb' with load balancing.
* Introduce socket option 'exclusiveaddruse'. This socket option is
Windows specific and will silently be ignored on other systems.
* Change behavior on Windows so that SO_REUSEADDR is only set if
both 'reusaddr' and 'reuseport' have been set. This since
SO_REUSEADDR on Windows behaves as BSD behaves if both SO_REUSEADDR
and SO_REUSEPORT have been set.
---
erts/emulator/drivers/common/inet_drv.c | 346 +++++++++++++++++++++---
erts/preloaded/ebin/prim_inet.beam | Bin 102548 -> 102796 bytes
erts/preloaded/src/erts.app.src | 2 +-
erts/preloaded/src/prim_inet.erl | 9 +
lib/kernel/doc/src/inet.xml | 110 +++++++-
lib/kernel/src/gen_sctp.erl | 6 +
lib/kernel/src/gen_tcp.erl | 6 +
lib/kernel/src/gen_udp.erl | 6 +
lib/kernel/src/inet.erl | 10 +-
lib/kernel/src/inet_int.hrl | 3 +
lib/kernel/src/kernel.app.src | 2 +-
lib/kernel/test/inet_sockopt_SUITE.erl | 54 +++-
12 files changed, 498 insertions(+), 56 deletions(-)
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index ec5f9a655c..538f9133a3 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -862,6 +862,9 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#define UDP_OPT_ADD_MEMBERSHIP 14 /* add an IP group membership */
#define UDP_OPT_DROP_MEMBERSHIP 15 /* drop an IP group membership */
#define INET_OPT_IPV6_V6ONLY 16 /* IPv6 only socket, no mapped v4 addrs */
+#define INET_OPT_REUSEPORT 17 /* enable/disable local port reuse */
+#define INET_OPT_REUSEPORT_LB 18 /* enable/disable local port reuse */
+#define INET_OPT_EXCLUSIVEADDRUSE 19 /* windows specific exclusive addr */
/* LOPT is local options */
#define INET_LOPT_BUFFER 20 /* min buffer size hint */
#define INET_LOPT_HEADER 21 /* list header size */
@@ -1184,6 +1187,10 @@ typedef struct {
(affect how to interpret hsz) */
int exitf; /* exit port on close or not */
int deliver; /* Delivery mode, TERM or PORT */
+#ifdef __WIN32__
+ /* placed here in order to not consume more memory in 64-bit case... */
+ int bsd_compat; /* State for Windows compatibility with BSD */
+#endif
ErlDrvTermData caller; /* recipient of sync reply */
ErlDrvTermData busy_caller; /* recipient of sync reply when caller busy.
@@ -1536,6 +1543,9 @@ static ErlDrvTermData am_linger;
static ErlDrvTermData am_recbuf;
static ErlDrvTermData am_sndbuf;
static ErlDrvTermData am_reuseaddr;
+static ErlDrvTermData am_reuseport;
+static ErlDrvTermData am_reuseport_lb;
+static ErlDrvTermData am_exclusiveaddruse;
static ErlDrvTermData am_dontroute;
static ErlDrvTermData am_priority;
static ErlDrvTermData am_recvtos;
@@ -4171,6 +4181,9 @@ static void inet_init_sctp(void) {
INIT_ATOM(recbuf);
INIT_ATOM(sndbuf);
INIT_ATOM(reuseaddr);
+ INIT_ATOM(reuseport);
+ INIT_ATOM(reuseport_lb);
+ INIT_ATOM(exclusiveaddruse);
INIT_ATOM(dontroute);
INIT_ATOM(priority);
INIT_ATOM(recvtos);
@@ -6584,8 +6597,6 @@ int inet_setopt(int fd,
return res;
}
-
-
/* set socket options:
** return -1 on error
** 0 if ok
@@ -6634,6 +6645,10 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
while(len >= 5) {
int recv_cmsgflags;
+#ifdef __WIN32__
+ int bsd_compat_set = 0;
+ int bsd_compat_unset = 0;
+#endif
opt = *ptr++;
ival = get_int32(ptr);
@@ -6913,41 +6928,82 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
desc->delimiter = (char)ival;
continue;
+ case INET_OPT_EXCLUSIVEADDRUSE:
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "inet_set_opts(reuseaddr) -> %s\r\n",
+ __LINE__, desc->s, driver_caller(desc->port), B2S(ival)) );
+#ifdef __WIN32__
+ type = SO_EXCLUSIVEADDRUSE;
+#else
+ continue;
+#endif
+ break;
+
+ case INET_OPT_REUSEPORT_LB:
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "inet_set_opts(reuseport_lb) -> %s\r\n",
+ __LINE__, desc->s, driver_caller(desc->port), B2S(ival)) );
+#if defined(__WIN32__)
+ continue;
+#elif defined(__linux__) && defined(SO_REUSEPORT)
+ /* SO_REUSEPORT is load balancing on linux */
+ type = SO_REUSEPORT;
+ break;
+#elif defined(SO_REUSEPORT_LB)
+ type = SO_REUSEPORT_LB;
+ break;
+#else
+ continue;
+#endif
+
+ case INET_OPT_REUSEPORT:
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "inet_set_opts(reuseport) -> %s\r\n",
+ __LINE__, desc->s, driver_caller(desc->port), B2S(ival)) );
+#if defined(__WIN32__)
+ /* fall through to INET_OPT_REUSEADDR */
+#elif defined(SO_REUSEPORT)
+ type = SO_REUSEPORT;
+ break;
+#else
+ continue;
+#endif
+
case INET_OPT_REUSEADDR:
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "inet_set_opts(reuseaddr) -> %s\r\n",
+ __LINE__, desc->s, driver_caller(desc->port), B2S(ival)) );
#ifdef __WIN32__
- /* The behaviour changed in Windows Server 2003.
- * Now it works as the combo of `SO_REUSEADDR` and
- * `SO_REUSEPORT` does on *BSD.
- */
- if (desc->stype != SOCK_DGRAM) {
+ {
+ int old_ra, new_ra, compat;
/*
- * We refuse usage of SO_REUSEADDR on non-UDP sockets since it
- * mostly (perhaps only) opens up for socket collisions and
- * probably hasn't got any useful use-cases. There are useful
- * use-cases for multicast sockets, though. For more
- * information see:
- * https://learn.microsoft.com/en-us/windows/win32/winsock/using-so-reuseaddr-and-so-exclusiveaddruse
- *
- * Prior to OTP 25 we also refused to use SO_REUSEADDR on any
- * sockets on Windows. See
- * 2a6ac6f3f027fcab6d607599e82714e930d9fde2
- *
- * We certainly do not want to use it for the Erlang
- * distribution TCP sockets since we can end up reusing our
- * own active sockets as demonstrated in the issue:
- * https://github.com/erlang/otp/issues/6461
- *
- * We probably want to expose the Windows specific
- * SO_EXCLUSIVEADDRUSE in the API as well...
+ * We only set SO_REUSEADDR on Windows if both 'reuseaddr' and
+ * 'reuseport' has been passed as options, since SO_REUSEADDR
+ * on Windows behaves like SO_REUSEADDR|SO_REUSEPORT does on BSD.
*/
- continue;
+ compat = desc->bsd_compat;
+ old_ra = ((compat & (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT))
+ == (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT));
+ if (ival) {
+ bsd_compat_set = opt;
+ compat |= opt;
+ }
+ else {
+ bsd_compat_unset = opt;
+ compat &= ~opt;
+ }
+ new_ra = ((compat & (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT))
+ == (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT));
+ desc->bsd_compat = compat;
+ if (old_ra == new_ra)
+ continue;
}
#endif
type = SO_REUSEADDR;
- DDBG(desc,
- ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
- "inet_set_opts(reuseaddr) -> %s\r\n",
- __LINE__, desc->s, driver_caller(desc->port), B2S(ival)) );
break;
case INET_OPT_KEEPALIVE: type = SO_KEEPALIVE;
@@ -7493,9 +7549,18 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
"inet_set_opts -> set opt result: %d\r\n",
__LINE__, desc->s, driver_caller(desc->port), res) );
- if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
- if (propagate && res != 0) {
- return -1;
+ if (res == 0) {
+ desc->recv_cmsgflags = recv_cmsgflags;
+ }
+ else {
+#ifdef __WIN32__
+ if (bsd_compat_set)
+ desc->bsd_compat &= ~bsd_compat_set;
+ if (bsd_compat_unset)
+ desc->bsd_compat |= bsd_compat_unset;
+#endif
+ if (propagate)
+ return -1;
}
}
@@ -7639,6 +7704,10 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
int recv_cmsgflags;
/* Get the Erlang-encoded option type -- always 1 byte: */
int eopt;
+#ifdef __WIN32__
+ int bsd_compat_set = 0;
+ int bsd_compat_unset = 0;
+#endif
eopt = *curr;
curr++;
@@ -7882,13 +7951,99 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
break;
}
+
+ case INET_OPT_EXCLUSIVEADDRUSE:
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "sctp_set_opts -> EXCLUSIVEADDRUSE\r\n",
+ __LINE__, desc->s, driver_caller(desc->port)) );
+#ifdef __WIN32__
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = SOL_SOCKET;
+ type = SO_EXCLUSIVEADDRUSE;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ break;
+#else
+ continue;
+#endif
+
+ case INET_OPT_REUSEPORT_LB:
+ {
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "sctp_set_opts -> REUSEPORT_LB\r\n",
+ __LINE__, desc->s, driver_caller(desc->port)) );
+#if defined(__WIN32__)
+ continue;
+#elif defined(SO_REUSEPORT_LB) || (defined(__linux__) && defined(SO_REUSEPORT))
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = SOL_SOCKET;
+#if defined(__linux__)
+ /* SO_REUSEPORT is load balancing on linux */
+ type = SO_REUSEPORT;
+#else
+ type = SO_REUSEPORT_LB;
+#endif
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ break;
+#else
+ continue;
+#endif
+ }
+
+ case INET_OPT_REUSEPORT:
+ {
+ DDBG(desc,
+ ("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
+ "sctp_set_opts -> REUSEPORT\r\n",
+ __LINE__, desc->s, driver_caller(desc->port)) );
+#if defined(__WIN32__)
+ /* fall through to INET_OPT_REUSEADDR */
+#elif defined(SO_REUSEPORT)
+ arg.ival= get_int32 (curr); curr += 4;
+ proto = SOL_SOCKET;
+ type = SO_REUSEPORT;
+ arg_ptr = (char*) (&arg.ival);
+ arg_sz = sizeof ( arg.ival);
+ break;
+#else
+ continue;
+#endif
+ }
case INET_OPT_REUSEADDR:
{
DDBG(desc,
("INET-DRV-DBG[%d][" SOCKET_FSTR ",%T] "
"sctp_set_opts -> REUSEADDR\r\n",
__LINE__, desc->s, driver_caller(desc->port)) );
-
+#ifdef __WIN32__
+ {
+ int old_ra, new_ra, compat;
+ /*
+ * We only set SO_REUSEADDR on Windows if both 'reuseaddr' and
+ * 'reuseport' has been passed as options, since SO_REUSEADDR
+ * on Windows behaves like SO_REUSEADDR|SO_REUSEPORT does on BSD.
+ */
+ compat = desc->bsd_compat;
+ old_ra = ((compat & (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT))
+ == (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT));
+ if (ival) {
+ bsd_compat_set = opt;
+ compat |= opt;
+ }
+ else {
+ bsd_compat_unset = opt;
+ compat &= ~opt;
+ }
+ new_ra = ((compat & (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT))
+ == (INET_OPT_REUSEADDR | INET_OPT_REUSEPORT));
+ desc->bsd_compat = compat;
+ if (old_ra == new_ra)
+ continue;
+ }
+#endif
arg.ival= get_int32 (curr); curr += 4;
proto = SOL_SOCKET;
type = SO_REUSEADDR;
@@ -8421,7 +8576,17 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
/* The return values of "sock_setopt" can only be 0 or -1: */
ASSERT(res == 0 || res == -1);
- if (res == 0) desc->recv_cmsgflags = recv_cmsgflags;
+ if (res == 0) {
+ desc->recv_cmsgflags = recv_cmsgflags;
+ }
+#ifdef __WIN32__
+ else {
+ if (bsd_compat_set)
+ desc->bsd_compat &= ~bsd_compat_set;
+ if (bsd_compat_unset)
+ desc->bsd_compat |= bsd_compat_unset;
+ }
+#endif
if (res == -1)
{ /* Got an error, DO NOT continue with other options. However, on
Solaris 10, we DO allow SO_SNDBUF and SO_RCVBUF to fail, assu-
@@ -8770,9 +8935,55 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
TRUNCATE_TO(0,ptr);
continue;
#endif
- case INET_OPT_REUSEADDR:
- type = SO_REUSEADDR;
+ case INET_OPT_REUSEADDR: {
+#if defined(__WIN32__)
+ int res = !!(desc->bsd_compat & INET_OPT_REUSEADDR);
+ *ptr++ = opt;
+ put_int32(res, ptr);
+ continue;
+#else
+ type = SO_REUSEADDR;
+#endif
+ break;
+ }
+ case INET_OPT_REUSEPORT_LB: {
+#if defined(__linux__) && defined(SO_REUSEPORT)
+ /* SO_REUSEPORT is load balancing on linux */
+ type = SO_REUSEPORT;
+ break;
+#elif defined(SO_REUSEPORT_LB)
+ type = SO_REUSEPORT_LB;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
+ }
+ case INET_OPT_REUSEPORT: {
+#if defined(__WIN32__)
+ int res = !!(desc->bsd_compat & INET_OPT_REUSEPORT);
+ *ptr++ = opt;
+ put_int32(res, ptr);
+ continue;
+#elif defined(SO_REUSEPORT)
+ type = SO_REUSEPORT;
break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
+ }
+ case INET_OPT_EXCLUSIVEADDRUSE:
+#ifdef __WIN32__
+ type = SO_EXCLUSIVEADDRUSE;
+ break;
+#else
+ *ptr++ = opt;
+ put_int32(0, ptr);
+ continue;
+#endif
case INET_OPT_KEEPALIVE:
type = SO_KEEPALIVE;
break;
@@ -9351,7 +9562,6 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
/* The following options just return an integer value: */
case INET_OPT_RCVBUF :
case INET_OPT_SNDBUF :
- case INET_OPT_REUSEADDR:
case INET_OPT_DONTROUTE:
case INET_OPT_PRIORITY :
case INET_OPT_TOS :
@@ -9361,6 +9571,10 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
case SCTP_OPT_AUTOCLOSE:
case SCTP_OPT_MAXSEG :
/* The following options return true or false: */
+ case INET_OPT_REUSEADDR:
+ case INET_OPT_REUSEPORT:
+ case INET_OPT_REUSEPORT_LB:
+ case INET_OPT_EXCLUSIVEADDRUSE:
case SCTP_OPT_NODELAY :
case SCTP_OPT_DISABLE_FRAGMENTS:
case SCTP_OPT_I_WANT_MAPPED_V4_ADDR:
@@ -9391,13 +9605,66 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
tag = am_sndbuf;
break;
}
+ case INET_OPT_EXCLUSIVEADDRUSE:
+ {
+#if defined(__WIN32__)
+ proto = SOL_SOCKET;
+ type = SO_EXCLUSIVEADDRUSE;
+ is_int = 0;
+ tag = am_exclusiveaddruse;
+ break;
+#else
+ continue;
+#endif
+ }
+ case INET_OPT_REUSEPORT_LB:
+ {
+#if defined(SO_REUSEPORT_LB) || (defined(__linux__) && defined(SO_REUSEPORT))
+ proto = SOL_SOCKET;
+#if defined(__linux__)
+ /* SO_REUSEPORT is load balancing on linux */
+ type = SO_REUSEPORT;
+#else
+ type = SO_REUSEPORT_LB;
+#endif
+ is_int = 0;
+ tag = am_reuseport_lb;
+ break;
+#else
+ continue;
+#endif
+ }
+ case INET_OPT_REUSEPORT:
+ {
+#if defined(__WIN32__)
+ res = !!(desc->bsd_compat & INET_OPT_REUSEPORT);
+ is_int = 0;
+ tag = am_reuseaddr;
+ goto form_result;
+#elif defined(SO_REUSEPORT)
+ proto = SOL_SOCKET;
+ type = SO_REUSEPORT;
+ is_int = 0;
+ tag = am_reuseport;
+ break;
+#else
+ continue;
+#endif
+ }
case INET_OPT_REUSEADDR:
{
+#if defined(__WIN32__)
+ res = !!(desc->bsd_compat & INET_OPT_REUSEADDR);
+ is_int = 0;
+ tag = am_reuseaddr;
+ goto form_result;
+#else
proto = SOL_SOCKET;
type = SO_REUSEADDR;
is_int = 0;
tag = am_reuseaddr;
break;
+#endif
}
case INET_OPT_DONTROUTE:
{
@@ -9556,6 +9823,9 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
}
if (sock_getopt (desc->s, proto, type, &res, &sz) < 0) continue;
/* Form the result: */
+#ifdef __WIN32__
+form_result:
+#endif
PLACE_FOR(spec, i, LOAD_ATOM_CNT +
(is_int ? LOAD_INT_CNT : LOAD_BOOL_CNT) +
LOAD_TUPLE_CNT);
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index c6274f89d6..6bb912a57a 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -1513,6 +1513,9 @@ is_sockopt_val(Opt, Val) ->
%% Socket options processing: Encoding option NAMES:
%%
enc_opt(reuseaddr) -> ?INET_OPT_REUSEADDR;
+enc_opt(reuseport) -> ?INET_OPT_REUSEPORT;
+enc_opt(reuseport_lb) -> ?INET_OPT_REUSEPORT_LB;
+enc_opt(exclusiveaddruse) -> ?INET_OPT_EXCLUSIVEADDRUSE;
enc_opt(keepalive) -> ?INET_OPT_KEEPALIVE;
enc_opt(dontroute) -> ?INET_OPT_DONTROUTE;
enc_opt(linger) -> ?INET_OPT_LINGER;
@@ -1581,6 +1584,9 @@ enc_opt(sctp_get_peer_addr_info) -> ?SCTP_OPT_GET_PEER_ADDR_INFO.
%% Decoding option NAMES:
%%
dec_opt(?INET_OPT_REUSEADDR) -> reuseaddr;
+dec_opt(?INET_OPT_REUSEPORT) -> reuseport;
+dec_opt(?INET_OPT_REUSEPORT_LB) -> reuseport_lb;
+dec_opt(?INET_OPT_EXCLUSIVEADDRUSE) -> exclusiveaddruse;
dec_opt(?INET_OPT_KEEPALIVE) -> keepalive;
dec_opt(?INET_OPT_DONTROUTE) -> dontroute;
dec_opt(?INET_OPT_LINGER) -> linger;
@@ -1664,6 +1670,9 @@ type_opt(_, Opt) ->
%% Types of option values, by option name:
%%
type_opt_1(reuseaddr) -> bool;
+type_opt_1(reuseport) -> bool;
+type_opt_1(reuseport_lb) -> bool;
+type_opt_1(exclusiveaddruse) -> bool;
type_opt_1(keepalive) -> bool;
type_opt_1(dontroute) -> bool;
type_opt_1(linger) -> {bool,int};
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml
index 0dac35f9a9..b7a20c9d11 100644
--- a/lib/kernel/doc/src/inet.xml
+++ b/lib/kernel/doc/src/inet.xml
@@ -1092,6 +1092,22 @@ get_tcpi_sacked(Sock) ->
<seemfa marker="gen_tcp#shutdown/2"><c>gen_tcp:shutdown/2</c></seemfa>
to shut down the write side.</p>
</item>
+ <tag><c>{exclusiveaddruse, Boolean}</c>
+ <marker id="option-exclusiveaddruse"/></tag>
+ <item>
+ <p>
+ Enables/disables exclusive address/port usage on Windows. That
+ is, by enabling this option you can prevent other sockets from
+ binding to the same address/port. By default this option is
+ disabled. That is, other sockets may use the same address/port
+ by setting <seeerl marker="#option-reuseaddr"><c>{reuseaddr,
+ true}</c></seeerl> in combination with
+ <seeerl marker="#option-reuseport"><c>{reuseport,
+ true}</c></seeerl> unless <c>{exclusiveaddruse, true}</c>
+ has been set on <c><anno>Socket</anno></c>. On non-Windows
+ systems this option is silently ignored.
+ </p>
+ </item>
<tag><c>{header, Size}</c></tag>
<item>
<p>This option is only meaningful if option <c>binary</c>
@@ -1566,18 +1582,94 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code>
option.
</p>
</item>
- <tag><c>{reuseaddr, Boolean}</c></tag>
+ <tag><c>{reuseaddr, Boolean}</c><marker id="option-reuseaddr"/></tag>
+ <item>
+ <p>
+ Allows or disallows reuse of local address. By default, reuse
+ is disallowed.
+ </p>
+ <note>
+ <p>
+ On windows <c>{reuseaddr, true}</c> will have no effect unless
+ also <seeerl marker="#option-reuseport"><c>{reuseport,
+ true}</c></seeerl> is set. If both are set, the
+ <c>SO_REUSEADDR</c> Windows socket option will be enabled.
+ This since setting <c>SO_REUSEADDR</c> on Windows more or less
+ has the same behavior as setting both <c>SO_REUSEADDR</c> and
+ <c>SO_REUSEPORT</c> on BSD. This behavior was introduced as
+ of OTP 26.0.
+ </p>
+ <p>
+ Between OTP 25.0 and up to the predecessor of OTP 26.0,
+ the <c>SO_REUSEADDR</c> option was set on Windows if
+ <c>{reuseaddr, true}</c> was set. Prior to OTP 25.0, the
+ <c>{reuseaddr, true}</c> option on Windows was silently
+ ignored.
+ </p>
+ <p>
+ See also the
+ <seeerl marker="#option-exclusiveaddruse"><c>exclusiveaddruse</c></seeerl>
+ option.
+ </p>
+ </note>
+ </item>
+ <tag><c>{reuseport, Boolean}</c><marker id="option-reuseport"/></tag>
+ <item>
+ <p>
+ Allows or disallows reuse of local port which <i>may or may not</i>
+ have load balancing depending on the underlying OS. By default,
+ reuse is disallowed. See also
+ <seeerl marker="#option-reuseport_lb"><c>reuseport_lb</c></seeerl>.
+ </p>
+ <note>
+ <p>
+ On windows <c>{reuseport, true}</c> will have no effect unless
+ also <seeerl marker="#option-reuseaddr"><c>{reuseaddr,
+ true}</c></seeerl> is set. If both are set, the
+ <c>SO_REUSEADDR</c> Windows socket option will be enabled.
+ This since setting <c>SO_REUSEADDR</c> on Windows more or less
+ has the same behavior as setting both <c>SO_REUSEADDR</c> and
+ <c>SO_REUSEPORT</c> on BSD. The <c>reuseport</c> option was
+ introduced as of OTP 26.0.
+ </p>
+ <p>
+ See also the
+ <seeerl marker="#option-exclusiveaddruse"><c>exclusiveaddruse</c></seeerl>
+ option.
+ </p>
+ </note>
+ <note>
+ <p>
+ <c>reuseport</c> <i>may or may not</i> be the same underlying
+ option as
+ <seeerl marker="#option-reuseport_lb"><c>reuseport_lb</c></seeerl>
+ depending on the underlying OS. They, for example, are on Linux.
+ When they are the same underlying option, operating on both may
+ cause them to interact in surprising ways. For example,
+ by enabling <c>reuseport</c> and then disabling
+ <c>reuseport_lb</c> both will end up being disabled.
+ </p>
+ </note>
+ </item>
+ <tag><c>{reuseport_lb, Boolean}</c><marker id="option-reuseport_lb"/></tag>
<item>
<p>
- Allows or disallows local reuse of address. By
- default, reuse is disallowed.
+ Allows or disallows reuse of local port <i>with</i> load balancing.
+ By default, reuse is disallowed. See also
+ <seeerl marker="#option-reuseport"><c>reuseport</c></seeerl>.
</p>
- <note><p>
- On Windows this option will be ignored unless
- <c><anno>Socket</anno></c> is an UDP socket. This since the
- behavior of <c>reuseaddr</c> is very different on Windows
- compared to other system.
- </p></note>
+ <note>
+ <p>
+ <c>reuseport_lb</c> <i>may or may not</i> be the same underlying
+ option as
+ <seeerl marker="#option-reuseport"><c>reuseport</c></seeerl>
+ depending on the underlying OS. They, for example, are on Linux.
+ When they are the same underlying option, operating on both may
+ cause them to interact in surprising ways. For example,
+ by enabling <c>reuseport_lb</c> and then disabling
+ <c>reuseport</c> both will end up being disabled.
+ </p>
+ </note>
</item>
<tag><c>{send_timeout, Integer}</c></tag>
<item>
diff --git a/lib/kernel/src/gen_sctp.erl b/lib/kernel/src/gen_sctp.erl
index f53a9dca3e..d02b9a2379 100644
--- a/lib/kernel/src/gen_sctp.erl
+++ b/lib/kernel/src/gen_sctp.erl
@@ -58,6 +58,7 @@
{buffer, non_neg_integer()} |
{debug, boolean()} |
{dontroute, boolean()} |
+ {exclusiveaddruse, boolean()} |
{high_msgq_watermark, pos_integer()} |
{linger, {boolean(), non_neg_integer()}} |
{low_msgq_watermark, pos_integer()} |
@@ -65,6 +66,8 @@
{priority, non_neg_integer()} |
{recbuf, non_neg_integer()} |
{reuseaddr, boolean()} |
+ {reuseport, boolean()} |
+ {reuseport_lb, boolean()} |
{ipv6_v6only, boolean()} |
{sndbuf, non_neg_integer()} |
{sctp_autoclose, non_neg_integer()} |
@@ -84,6 +87,7 @@
buffer |
debug |
dontroute |
+ exclusiveaddruse |
high_msgq_watermark |
linger |
low_msgq_watermark |
@@ -91,6 +95,8 @@
priority |
recbuf |
reuseaddr |
+ reuseport |
+ reuseport_lb |
ipv6_v6only |
sctp_autoclose |
sctp_disable_fragments |
diff --git a/lib/kernel/src/gen_tcp.erl b/lib/kernel/src/gen_tcp.erl
index df0e8b1b13..ca4975a493 100644
--- a/lib/kernel/src/gen_tcp.erl
+++ b/lib/kernel/src/gen_tcp.erl
@@ -43,6 +43,7 @@
{deliver, port | term} |
{dontroute, boolean()} |
{exit_on_close, boolean()} |
+ {exclusiveaddruse, boolean()} |
{header, non_neg_integer()} |
{high_msgq_watermark, pos_integer()} |
{high_watermark, non_neg_integer()} |
@@ -63,6 +64,8 @@
ValueBin :: binary()} |
{recbuf, non_neg_integer()} |
{reuseaddr, boolean()} |
+ {reuseport, boolean()} |
+ {reuseport_lb, boolean()} |
{send_timeout, non_neg_integer() | infinity} |
{send_timeout_close, boolean()} |
{show_econnreset, boolean()} |
@@ -84,6 +87,7 @@
deliver |
dontroute |
exit_on_close |
+ exclusiveaddruse |
header |
high_msgq_watermark |
high_watermark |
@@ -103,6 +107,8 @@
(ValueBin :: binary())} |
recbuf |
reuseaddr |
+ reuseport |
+ reuseport_lb |
send_timeout |
send_timeout_close |
show_econnreset |
diff --git a/lib/kernel/src/gen_udp.erl b/lib/kernel/src/gen_udp.erl
index c652a7663f..9debede39b 100644
--- a/lib/kernel/src/gen_udp.erl
+++ b/lib/kernel/src/gen_udp.erl
@@ -38,6 +38,7 @@
{deliver, port | term} |
{dontroute, boolean()} |
{drop_membership, membership()} |
+ {exclusiveaddruse, boolean()} |
{header, non_neg_integer()} |
{high_msgq_watermark, pos_integer()} |
{low_msgq_watermark, pos_integer()} |
@@ -53,6 +54,8 @@
{read_packets, non_neg_integer()} |
{recbuf, non_neg_integer()} |
{reuseaddr, boolean()} |
+ {reuseport, boolean()} |
+ {reuseport_lb, boolean()} |
{sndbuf, non_neg_integer()} |
{tos, non_neg_integer()} |
{tclass, non_neg_integer()} |
@@ -68,6 +71,7 @@
debug |
deliver |
dontroute |
+ exclusiveaddruse |
header |
high_msgq_watermark |
low_msgq_watermark |
@@ -84,6 +88,8 @@
read_packets |
recbuf |
reuseaddr |
+ reuseport |
+ reuseport_lb |
sndbuf |
tos |
tclass |
diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl
index d836f6b367..4ca59a92ff 100644
--- a/lib/kernel/src/inet.erl
+++ b/lib/kernel/src/inet.erl
@@ -953,9 +953,9 @@ stats() ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
connect_options() ->
[debug,
- tos, tclass, priority, reuseaddr, keepalive, linger, nodelay,
- sndbuf, recbuf,
- recvtos, recvtclass, ttl, recvttl,
+ tos, tclass, priority, reuseaddr, reuseport, reuseport_lb,
+ exclusiveaddruse, keepalive,
+ linger, nodelay, sndbuf, recbuf, recvtos, recvtclass, ttl, recvttl,
header, active, packet, packet_size, buffer, mode, deliver, line_delimiter,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw,
@@ -1044,8 +1044,8 @@ con_add(Name, Val, #connect_opts{} = R, Opts, AllOpts) ->
listen_options() ->
[debug,
tos, tclass,
- priority, reuseaddr, keepalive, linger, sndbuf, recbuf, nodelay,
- recvtos, recvtclass, ttl, recvttl,
+ priority, reuseaddr, reuseport, reuseport_lb, exclusiveaddruse, keepalive,
+ linger, sndbuf, recbuf, nodelay, recvtos, recvtclass, ttl, recvttl,
header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send,
diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl
index f4e16c6a76..2f50f2c23c 100644
--- a/lib/kernel/src/inet_int.hrl
+++ b/lib/kernel/src/inet_int.hrl
@@ -135,6 +135,9 @@
-define(UDP_OPT_ADD_MEMBERSHIP, 14).
-define(UDP_OPT_DROP_MEMBERSHIP, 15).
-define(INET_OPT_IPV6_V6ONLY, 16).
+-define(INET_OPT_REUSEPORT, 17).
+-define(INET_OPT_REUSEPORT_LB, 18).
+-define(INET_OPT_EXCLUSIVEADDRUSE, 19).
% "Local" options: codes start from 20:
-define(INET_LOPT_BUFFER, 20).
-define(INET_LOPT_HEADER, 21).
diff --git a/lib/kernel/test/inet_sockopt_SUITE.erl b/lib/kernel/test/inet_sockopt_SUITE.erl
index ff24b2f0c3..0cba8ef445 100644
--- a/lib/kernel/test/inet_sockopt_SUITE.erl
+++ b/lib/kernel/test/inet_sockopt_SUITE.erl
@@ -863,9 +863,14 @@ make_check_fun(Type,Element) ->
%% {OptionName,Value1,Value2,Mandatory,Changeable}
all_listen_options() ->
+ OsType = os:type(),
+ OsVersion = os:version(),
[{tos,0,1,false,true},
{priority,0,1,false,true},
- {reuseaddr,false,true,false,true},
+ {reuseaddr,false,true,mandatory_reuseaddr(OsType,OsVersion),true},
+ {reuseport,false,true,mandatory_reuseport(OsType,OsVersion),true},
+ {reuseport_lb,false,true,mandatory_reuseport_lb(OsType,OsVersion),true},
+ {exclusiveaddruse,false,true,mandatory_exclusiveaddruse(OsType,OsVersion),true},
{keepalive,false,true,true,true},
{linger, {false,10}, {true,10},true,true},
{sndbuf,2048,4096,false,true},
@@ -888,9 +893,14 @@ all_listen_options() ->
{packet_size,0,4,true,true}
].
all_connect_options() ->
+ OsType = os:type(),
+ OsVersion = os:version(),
[{tos,0,1,false,true},
{priority,0,1,false,true},
- {reuseaddr,false,true,false,true},
+ {reuseaddr,false,true,mandatory_reuseaddr(OsType,OsVersion),true},
+ {reuseport,false,true,mandatory_reuseport(OsType,OsVersion),true},
+ {reuseport_lb,false,true,mandatory_reuseport_lb(OsType,OsVersion),true},
+ {exclusiveaddruse,false,true,mandatory_exclusiveaddruse(OsType,OsVersion),true},
{keepalive,false,true,true,true},
{linger, {false,10}, {true,10},true,true},
{sndbuf,2048,4096,false,true},
@@ -914,6 +924,46 @@ all_connect_options() ->
].
+%% Mandatory on a lot of system other than those listed below. Please add more...
+mandatory_reuseaddr({unix, linux}, _OsVersion) ->
+ true;
+mandatory_reuseaddr({unix, freebsd}, _OsVersion) ->
+ true;
+mandatory_reuseaddr({unix, darwin}, _OsVersion) ->
+ true;
+mandatory_reuseaddr({win32, _}, _OsVersion) ->
+ true; %% reuseaddr and reuseport are emulated by the inet-driver
+mandatory_reuseaddr(_OsType, _OsVersion) ->
+ false.
+
+%% Mandatory an a lot of system other than those listed below. Please add more...
+mandatory_reuseport({win32, _}, _OsVersion) ->
+ true; %% reuseaddr and reuseport are emulated by the inet-driver
+mandatory_reuseport({unix, linux}, {X,Y,_Z}) when X > 4 orelse X == 4 andalso Y >= 6 ->
+ true;
+mandatory_reuseport({unix, freebsd}, {X,Y,_Z}) when X > 11 orelse X == 11 andalso Y >= 4 ->
+ %% I know that it is available on 11.4, but it may be available earlier...
+ true;
+mandatory_reuseport({unix, darwin}, {X,Y,_Z}) when X > 26 orelse X == 26 andalso Y >= 6 ->
+ %% I know that it is available on 26.6, but it may be available earlier...
+ true;
+mandatory_reuseport(_OsType, _OsVersion) ->
+ false.
+
+%% Perhaps mandatory an system other than those listed below. Please add more...
+mandatory_reuseport_lb({unix, linux}, {X,Y,_Z}) when X > 4 orelse X == 4 andalso Y >= 6 ->
+ true;
+mandatory_reuseport_lb({unix, freebsd}, {X,Y,_Z}) when X > 13 orelse X == 13 andalso Y >= 1 ->
+ %% I know that it is available on 13.1, but it may be available earlier...
+ true;
+mandatory_reuseport_lb(_OsType, _OsVersion) ->
+ false.
+
+mandatory_exclusiveaddruse({win32, _}, {X,Y,_Z}) when X > 5 orelse X == 5 andalso Y >= 2 ->
+ true;
+mandatory_exclusiveaddruse(_OsType, _OsVersion) ->
+ false.
+
create_socketpair(ListenOptions,ConnectOptions) ->
{ok,LS}=gen_tcp:listen(0,ListenOptions),
{ok,Port}=inet:port(LS),
--
2.35.3