File 2228-implement-SO_BINDTODEVICE-for-inet-protocols.patch of Package erlang

From 308841d8c99907a364d6876ed9375507153729eb Mon Sep 17 00:00:00 2001
From: Andreas Schultz <aschultz@tpip.net>
Date: Thu, 2 Feb 2017 11:17:17 +0100
Subject: [PATCH 1/2] implement SO_BINDTODEVICE for inet protocols

bind to device is needed to properly support VRF-Lite under
Linux (see [1] for details).

[1]: https://www.kernel.org/doc/Documentation/networking/vrf.txt
---
 erts/emulator/drivers/common/inet_drv.c | 97 ++++++++++++++++++++++++++++++++-
 erts/preloaded/src/prim_inet.erl        |  3 +
 lib/kernel/doc/src/inet.xml             | 26 +++++++++
 lib/kernel/src/inet.erl                 |  8 +--
 lib/kernel/src/inet_int.hrl             |  1 +
 lib/kernel/test/inet_SUITE.erl          | 70 +++++++++++++++++++++++-
 6 files changed, 198 insertions(+), 7 deletions(-)

diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 0fe5183b4..90114b77f 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -591,7 +591,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
 
 #include "packet_parser.h"
 
-#ifdef HAVE_SYS_UN_H
+#if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE)
 
 /* strnlen doesn't exist everywhere */
 static size_t my_strnlen(const char *s, size_t maxlen)
@@ -602,6 +602,10 @@ static size_t my_strnlen(const char *s, size_t maxlen)
     return i;
 }
 
+#endif
+
+#if defined(HAVE_SYS_UN_H)
+
 /* Check that some character in the buffer != '\0' */
 static int is_nonzero(const char *s, size_t n)
 {
@@ -778,6 +782,7 @@ static int is_nonzero(const char *s, size_t n)
 #define INET_LOPT_TCP_SHOW_ECONNRESET 39  /* tell user about incoming RST */
 #define INET_LOPT_LINE_DELIM        40  /* Line delimiting char */
 #define INET_OPT_TCLASS             41  /* IPv6 transport class */
+#define INET_OPT_BIND_TO_DEVICE     42  /* get/set network device the socket is bound to */
 /* SCTP options: a separate range, from 100: */
 #define SCTP_OPT_RTOINFO		100
 #define SCTP_OPT_ASSOCINFO		101
@@ -1334,6 +1339,7 @@ static ErlDrvTermData am_tos;
 static ErlDrvTermData am_tclass;
 static ErlDrvTermData am_ipv6_v6only;
 static ErlDrvTermData am_netns;
+static ErlDrvTermData am_bind_to_device;
 #endif
 
 static char str_eafnosupport[] = "eafnosupport";
@@ -3725,6 +3731,7 @@ static void inet_init_sctp(void) {
     INIT_ATOM(tclass);
     INIT_ATOM(ipv6_v6only);
     INIT_ATOM(netns);
+    INIT_ATOM(bind_to_device);
     
     /* Option names */
     INIT_ATOM(sctp_rtoinfo);
@@ -5946,6 +5953,9 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
     int ival;
     char* arg_ptr;
     int arg_sz;
+#ifdef SO_BINDTODEVICE
+    char ifname[IFNAMSIZ];
+#endif
     enum PacketParseType old_htype = desc->htype;
     int old_active = desc->active;
     int propagate; /* Set to 1 if failure to set this option
@@ -6331,6 +6341,29 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
 	    len -= arg_sz;
 	    break;
 
+#ifdef SO_BINDTODEVICE
+	case INET_OPT_BIND_TO_DEVICE:
+	    if (ival < 0) return -1;
+	    if (len < ival) return -1;
+	    if (ival > sizeof(ifname)) {
+		return -1;
+	    }
+	    memcpy(ifname, ptr, ival);
+	    ifname[ival] = '\0';
+	    ptr += ival;
+	    len -= ival;
+
+	    proto = SOL_SOCKET;
+	    type = SO_BINDTODEVICE;
+	    arg_ptr = (char*)&ifname;
+	    arg_sz = sizeof(ifname);
+	    propagate = 1; /* We do want to know if this fails */
+
+	    DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n",
+		    (long)desc->port, desc->s, ifname));
+	    break;
+#endif
+
 	default:
 	    return -1;
 	}
@@ -6463,6 +6496,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
 #	ifdef SCTP_DELAYED_ACK_TIME
 	struct sctp_assoc_value     av; /* Not in SOLARIS10 */
 #	endif
+#	ifdef SO_BINDTODEVICE
+	char ifname[IFNAMSIZ];
+#	endif
     }
     arg;
 
@@ -6702,6 +6738,23 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
 	    continue; /* Option not supported -- ignore it */
 #       endif
 
+#ifdef SO_BINDTODEVICE
+	case INET_OPT_BIND_TO_DEVICE:
+	    arg_sz = get_int32(curr);			curr += 4;
+	    CHKLEN(curr, arg_sz);
+	    if (arg_sz >= sizeof(arg.ifname))
+		return -1;
+	    memcpy(arg.ifname, curr, arg_sz);
+	    arg.ifname[arg_sz] = '\0';
+	    curr += arg_sz;
+
+	    proto   = SOL_SOCKET;
+	    type    = SO_BINDTODEVICE;
+	    arg_ptr = (char*) (&arg.ifname);
+	    arg_sz  = sizeof  ( arg.ifname);
+	    break;
+#endif
+
 	case SCTP_OPT_AUTOCLOSE:
 	{
 	    arg.ival= get_int32 (curr);	  curr += 4;
@@ -6967,6 +7020,9 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
     ErlDrvSizeT dest_used = 0;
     ErlDrvSizeT dest_allocated = destlen;
     char *orig_dest = *dest;
+#ifdef SO_BINDTODEVICE
+    char ifname[IFNAMSIZ];
+#endif
 
     /* Ptr is a name parameter */ 
 #define RETURN_ERROR()				\
@@ -7302,6 +7358,26 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
 		put_int32(arg_sz,ptr);
 		continue;
 	    }
+
+#ifdef SO_BINDTODEVICE
+	case INET_OPT_BIND_TO_DEVICE:
+	    arg_sz = sizeof(ifname);
+	    TRUNCATE_TO(0,ptr);
+	    PLACE_FOR(5 + arg_sz,ptr);
+	    arg_ptr = ptr + 5;
+	    if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE,
+						arg_ptr,&arg_sz))) {
+		    TRUNCATE_TO(0,ptr);
+		    continue;
+		}
+	    arg_sz = my_strnlen(arg_ptr, arg_sz);
+	    TRUNCATE_TO(arg_sz + 5,ptr);
+	    *ptr++ = opt;
+	    put_int32(arg_sz,ptr);
+	    ptr += arg_sz;
+	    continue;
+#endif
+
 	default:
 	    RETURN_ERROR();
 	}
@@ -7583,6 +7659,25 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
 	    i = LOAD_TUPLE	(spec, i, 2);
 	    break;
 	}
+
+#ifdef SO_BINDTODEVICE
+	/* The following option returns a binary:   */
+	case INET_OPT_BIND_TO_DEVICE: {
+	    char ifname[IFNAMSIZ];
+	    unsigned int  sz = sizeof(ifname);
+
+	    if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE,
+			    &ifname, &sz) < 0) continue;
+	    /* Fill in the response: */
+	    PLACE_FOR(spec, i,
+		      LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT);
+	    i = LOAD_ATOM (spec, i, am_bind_to_device);
+	    i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz));
+	    i = LOAD_TUPLE (spec, i, 2);
+	    break;
+	}
+#endif
+
 	/* The following options just return an integer value: */
 	case INET_OPT_RCVBUF   :
 	case INET_OPT_SNDBUF   :
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index 61f727e8a..19f8b0d87 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -1234,6 +1234,7 @@ enc_opt(netns)           -> ?INET_LOPT_NETNS;
 enc_opt(show_econnreset) -> ?INET_LOPT_TCP_SHOW_ECONNRESET;
 enc_opt(line_delimiter)  -> ?INET_LOPT_LINE_DELIM;
 enc_opt(raw)             -> ?INET_OPT_RAW;
+enc_opt(bind_to_device)  -> ?INET_OPT_BIND_TO_DEVICE;
 % Names of SCTP opts:
 enc_opt(sctp_rtoinfo)	 	   -> ?SCTP_OPT_RTOINFO;
 enc_opt(sctp_associnfo)	 	   -> ?SCTP_OPT_ASSOCINFO;
@@ -1294,6 +1295,7 @@ dec_opt(?INET_LOPT_NETNS)           -> netns;
 dec_opt(?INET_LOPT_TCP_SHOW_ECONNRESET) -> show_econnreset;
 dec_opt(?INET_LOPT_LINE_DELIM)      -> line_delimiter;
 dec_opt(?INET_OPT_RAW)              -> raw;
+dec_opt(?INET_OPT_BIND_TO_DEVICE) -> bind_to_device;
 dec_opt(I) when is_integer(I)     -> undefined.
 
 
@@ -1395,6 +1397,7 @@ type_opt_1(packet_size)     -> uint;
 type_opt_1(read_packets)    -> uint;
 type_opt_1(netns)           -> binary;
 type_opt_1(show_econnreset) -> bool;
+type_opt_1(bind_to_device)  -> binary;
 %% 
 %% SCTP options (to be set). If the type is a record type, the corresponding
 %% record signature is returned, otherwise, an "elementary" type tag 
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml
index 076e50cd1..947e4d456 100644
--- a/lib/kernel/doc/src/inet.xml
+++ b/lib/kernel/doc/src/inet.xml
@@ -897,6 +897,32 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code>
 	      <seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding/0</c></seealso>.</p></item>
 	    </list>
           </item>
+	  <tag><c>{bind_to_device, Ifname :: binary()}</c></tag>
+	  <item>
+	    <p>Binds a socket to a specific network interface. This option
+	      must be used in a function call that creates a socket, that is,
+	      <seealso marker="gen_tcp#connect/3"><c>gen_tcp:connect/3,4</c></seealso>,
+	      <seealso marker="gen_tcp#listen/2"><c>gen_tcp:listen/2</c></seealso>,
+	      <seealso marker="gen_udp#open/1"><c>gen_udp:open/1,2</c></seealso>, or
+	      <seealso marker="gen_sctp#open/0"><c>gen_sctp:open/0,1,2</c></seealso>.</p>
+	    <p>Unlike <seealso marker="#getifaddrs/0"><c>getifaddrs/0</c></seealso>, Ifname
+	      is encoded a binary. In the unlikely case that a system is using
+	      non-7-bit-ASCII characters in network device names, special care
+	      has to be taken when encoding this argument.</p>
+	    <p>This option uses the Linux-specific socket option
+	      <c>SO_BINDTODEVICE</c>, such as in Linux kernel 2.0.30 or later,
+	      and therefore only exists when the runtime system
+	      is compiled for such an operating system.</p>
+	    <p>Before Linux 3.8, this socket option could be set, but could not retrieved
+	      with <seealso marker="#getopts/2"><c>getopts/2</c></seealso>. Since Linux 3.8,
+	      it is readable.</p>
+	    <p>The virtual machine also needs elevated privileges, either
+	      running as superuser or (for Linux) having capability
+	    <c>CAP_NET_RAW</c>.</p>
+	    <p>The primary use case for this option is to bind sockets into
+	      <url href="http://www.kernel.org/doc/Documentation/networking/vrf.txt">Linux VRF instances</url>.
+	    </p>
+	  </item>
           <tag><c>list</c></tag>
           <item>
             <p>Received <c>Packet</c> is delivered as a list.</p>
diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl
index f5c13ecdd..5be790b7d 100644
--- a/lib/kernel/src/inet.erl
+++ b/lib/kernel/src/inet.erl
@@ -702,7 +702,7 @@ connect_options() ->
      header, active, packet, packet_size, buffer, mode, deliver, line_delimiter,
      exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
      low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw,
-     show_econnreset].
+     show_econnreset, bind_to_device].
     
 connect_options(Opts, Mod) ->
     BaseOpts = 
@@ -770,7 +770,7 @@ listen_options() ->
      header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only,
      exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
      low_msgq_watermark, send_timeout, send_timeout_close, delay_send,
-     packet_size, raw, show_econnreset].
+     packet_size, raw, show_econnreset, bind_to_device].
 
 listen_options(Opts, Mod) ->
     BaseOpts = 
@@ -850,7 +850,7 @@ udp_options() ->
      deliver, ipv6_v6only,
      broadcast, dontroute, multicast_if, multicast_ttl, multicast_loop,
      add_membership, drop_membership, read_packets,raw,
-     high_msgq_watermark, low_msgq_watermark].
+     high_msgq_watermark, low_msgq_watermark, bind_to_device].
 
 
 udp_options(Opts, Mod) ->
@@ -919,6 +919,7 @@ sctp_options() ->
 [   % The following are generic inet options supported for SCTP sockets:
     mode, active, buffer, tos, tclass, priority, dontroute, reuseaddr, linger, sndbuf,
     recbuf, ipv6_v6only, high_msgq_watermark, low_msgq_watermark,
+    bind_to_device,
 
     % Other options are SCTP-specific (though they may be similar to their
     % TCP and UDP counter-parts):
@@ -1055,7 +1056,6 @@ binary2filename(Bin) ->
 	    Bin
     end.
 
-
 translate_ip(any,      inet) -> {0,0,0,0};
 translate_ip(loopback, inet) -> {127,0,0,1};
 translate_ip(any,      inet6) -> {0,0,0,0,0,0,0,0};
diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl
index 4e8f59a3b..e6cd48935 100644
--- a/lib/kernel/src/inet_int.hrl
+++ b/lib/kernel/src/inet_int.hrl
@@ -154,6 +154,7 @@
 -define(INET_LOPT_TCP_SHOW_ECONNRESET, 39).
 -define(INET_LOPT_LINE_DELIM,     40).
 -define(INET_OPT_TCLASS,          41).
+-define(INET_OPT_BIND_TO_DEVICE,  42).
 % Specific SCTP options: separate range:
 -define(SCTP_OPT_RTOINFO,	 	100).
 -define(SCTP_OPT_ASSOCINFO,	 	101).
diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl
index f60c13d2e..86f6b95fb 100644
--- a/lib/kernel/test/inet_SUITE.erl
+++ b/lib/kernel/test/inet_SUITE.erl
@@ -40,7 +40,8 @@
 	 lookup_bad_search_option/1,
 	 getif/1,
 	 getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1,
-	 parse_strict_address/1, simple_netns/1, simple_netns_open/1]).
+	 parse_strict_address/1, simple_netns/1, simple_netns_open/1,
+         simple_bind_to_device/1, simple_bind_to_device_open/1]).
 
 -export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1,
 	 kill_gethost/0, parallell_gethost/0, test_netns/0]).
@@ -58,7 +59,8 @@ all() ->
      gethostnative_debug_level, gethostnative_soft_restart,
      lookup_bad_search_option,
      getif, getif_ifr_name_overflow, getservbyname_overflow,
-     getifaddrs, parse_strict_address, simple_netns, simple_netns_open].
+     getifaddrs, parse_strict_address, simple_netns, simple_netns_open,
+     simple_bind_to_device, simple_bind_to_device_open].
 
 groups() -> 
     [{parse, [], [parse_hosts, parse_address]}].
@@ -1247,3 +1249,67 @@ cmd(CmdString) ->
     io:put_chars(["# ",CmdString,io_lib:nl()]),
     io:put_chars([os:cmd(CmdString++" ; echo '  =>' $?")]),
     ok.
+
+-define(CAP_NET_RAW, 13).        %% from /usr/include/linux/capability.h
+
+can_bind_to_device({unix, linux}, {Major, _, _})
+  when Major > 2 ->
+    Status = os:cmd("cat /proc/self/status | grep CapEff"),
+    [_, CapEffStr] = string:tokens(Status, [$\n, $\t]),
+    CapEff = list_to_integer(CapEffStr, 16),
+    if CapEff band (1 bsl ?CAP_NET_RAW) =/= 0 ->
+            ok;
+       true ->
+            {skip,"insufficient capabilities, CAP_NET_RAW not granted"}
+    end;
+can_bind_to_device(_OS, _Version) ->
+    {skip,"socket option bind_to_device not supported on this OS or version"}.
+
+simple_bind_to_device(Config) when is_list(Config) ->
+    case can_bind_to_device(os:type(), os:version()) of
+        ok ->
+            {ok,U} = gen_udp:open(0),
+            jog_bind_to_device_opt(U),
+            ok = gen_udp:close(U),
+            %%
+            {ok,L} = gen_tcp:listen(0, []),
+            jog_bind_to_device_opt(L),
+            ok = gen_tcp:close(L),
+            %%
+            case gen_sctp:open() of
+                {ok,S} ->
+                    jog_bind_to_device_opt(S),
+                    ok = gen_sctp:close(S);
+                {error,eprotonosupport} ->
+                    ok
+            end;
+        Other ->
+            Other
+    end.
+
+%% Smoke test bind_to_device support.
+simple_bind_to_device_open(Config) when is_list(Config) ->
+    case can_bind_to_device(os:type(), os:version()) of
+        ok ->
+            {ok,U} = gen_udp:open(0, [binary,{bind_to_device,<<"lo">>},inet]),
+            ok = gen_udp:close(U),
+            {ok,T} = gen_tcp:listen(0, [binary,{bind_to_device,<<"lo">>},inet]),
+            ok = gen_tcp:close(T),
+
+            case gen_sctp:open(0, [binary,{bind_to_device,<<"lo">>},inet]) of
+                {ok,S} ->
+                    ok = gen_sctp:close(S);
+                {error,eprotonosupport} ->
+                    ok
+            end;
+        Other ->
+            Other
+    end.
+
+jog_bind_to_device_opt(S) ->
+    %% This is just jogging the option mechanics
+    ok = inet:setopts(S, [{bind_to_device,<<>>}]),
+    {ok,[{bind_to_device,<<>>}]} = inet:getopts(S, [bind_to_device]),
+    ok = inet:setopts(S, [{bind_to_device,<<"lo">>}]),
+    {ok,[{bind_to_device,<<"lo">>}]} = inet:getopts(S, [bind_to_device]),
+    ok.
-- 
2.12.2

openSUSE Build Service is sponsored by