File libibverbs-Add-receive-flow-steering-support.patch of Package libibverbs

From db47da4a0b79fb3c20a744db2f70ab6b32c8a7eb Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 6 Feb 2014 14:20:33 +0200
Subject: [PATCH 6/6] Add receive flow steering support

The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side. Applications
using these QPs must be provided with a method to program some steering
rule with the HW so packets arriving at the local port can be routed to them.

In a similar manner, when the device supports flow streeing, IB UD QPs
created by IPoIB allow user-space applications to steer specific TCP/IP
flows to their QPs.

This patch adds ibv_create_flow(), which allow providing a flow specification
for a QP.  When there's a match between the specification and a received packet,
the packet is forwarded to that QP, in a the same way one uses ibv_attach_mcast()
for IB UD multicast handling.

Flow specifications are provided as instances of struct ibv_flow_spec_yyy,
which describes L2, L3 and L4 headers.  Currently specs for Ethernet, IPv4,
TCP and UDP are defined.  Flow specs are made of values and masks.

The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.

 struct ibv_flow_attr {
	uint32_t comp_mask;
	enum ibv_flow_attr_type type;
	uint16_t size;
	uint16_t priority;
	uint8_t  num_of_specs;
	uint8_t  port;
	uint32_t flags;
	/* Following are the optional layers according to user request
	 * struct ibv_flow_spec_xxx [L2]
	 * struct ibv_flow_spec_yyy [L3/L4]
	 */
 };

These flow specs are defined and used in a way which allows adding new spec
types without kernel/user ABI change, just with a little API enhancement which
defines the newly added spec.

The flow spec structures are defined with TLV (Type-Length-Value) entries, which
allows calling ib_create_flow() with a list of variable length of optional specs.

For the actual processing of ibv_flow_attr the kernel uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.

The returned value from ibv_create_flow() is a struct ibv_flow, which contains
a handle provided by the kernel to be used when calling ibv_destroy_flow().

The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:

    IBV_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification

    IBV_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
        all Ethernet traffic which isn't steered to any QP

    IBV_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast

ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
---
 include/infiniband/driver.h   |    4 +
 include/infiniband/kern-abi.h |   99 ++++++++++++++++++++++++++++++++
 include/infiniband/verbs.h    |  128 +++++++++++++++++++++++++++++++++++++++++-
 src/cmd.c                     |  105 ++++++++++++++++++++++++++++++++++
 src/device.c                  |    4 +
 src/libibverbs.map            |    2 
 6 files changed, 340 insertions(+), 2 deletions(-)

Index: libibverbs-1.1.7/include/infiniband/driver.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/driver.h	2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/include/infiniband/driver.h	2014-08-29 12:50:51.649871589 +0200
@@ -194,6 +194,10 @@ int ibv_cmd_destroy_ah(struct ibv_ah *ah
 int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
 int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
 
+struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
+				     struct ibv_flow_attr *flow_attr);
+int ibv_cmd_destroy_flow(struct ibv_flow *flow_id);
+
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
 
Index: libibverbs-1.1.7/include/infiniband/kern-abi.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/kern-abi.h	2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/include/infiniband/kern-abi.h	2014-08-29 12:51:28.751407756 +0200
@@ -102,6 +102,13 @@ enum {
 #define IB_USER_VERBS_CMD_FLAG_EXTENDED		0x80ul
 
 
+enum {
+	IB_USER_VERBS_CMD_CREATE_FLOW = (IB_USER_VERBS_CMD_FLAG_EXTENDED <<
+					 IB_USER_VERBS_CMD_FLAGS_SHIFT) +
+					IB_USER_VERBS_CMD_THRESHOLD,
+	IB_USER_VERBS_CMD_DESTROY_FLOW
+};
+
 /*
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
@@ -676,6 +683,76 @@ struct ibv_kern_send_wr {
 	} qp_type;
 };
 
+struct ibv_kern_eth_filter {
+	__u8  dst_mac[6];
+	__u8  src_mac[6];
+	__u16  ether_type;
+	__u16  vlan_tag;
+};
+
+struct ibv_kern_spec_eth {
+	__u32 type;
+	__u16  size;
+	__u16 reserved;
+	struct ibv_kern_eth_filter val;
+	struct ibv_kern_eth_filter mask;
+};
+
+struct ibv_kern_ipv4_filter {
+	__u32 src_ip;
+	__u32 dst_ip;
+};
+
+struct ibv_kern_spec_ipv4 {
+	__u32  type;
+	__u16  size;
+	__u16 reserved;
+	struct ibv_kern_ipv4_filter val;
+	struct ibv_kern_ipv4_filter mask;
+};
+
+struct ibv_kern_tcp_udp_filter {
+	__u16 dst_port;
+	__u16 src_port;
+};
+
+struct ibv_kern_spec_tcp_udp {
+	__u32  type;
+	__u16  size;
+	__u16 reserved;
+	struct ibv_kern_tcp_udp_filter val;
+	struct ibv_kern_tcp_udp_filter mask;
+};
+
+
+struct ibv_kern_spec {
+	union {
+		struct {
+			__u32 type;
+			__u16 size;
+			__u16 reserved;
+		} hdr;
+		struct ibv_kern_spec_eth eth;
+		struct ibv_kern_spec_ipv4 ipv4;
+		struct ibv_kern_spec_tcp_udp tcp_udp;
+	};
+
+};
+
+struct ibv_kern_flow_attr {
+	__u32 type;
+	__u16 size;
+	__u16 priority;
+	__u8 num_of_specs;
+	__u8 reserved[2];
+	__u8 port;
+	__u32 flags;
+	/* Following are the optional layers according to user request
+	 * struct ibv_kern_flow_spec_xxx
+	 * struct ibv_kern_flow_spec_yyy
+	 */
+};
+
 struct ibv_post_send {
 	__u32 command;
 	__u16 in_words;
@@ -763,6 +840,24 @@ struct ibv_attach_mcast {
 	__u64 driver_data[0];
 };
 
+struct ibv_create_flow  {
+	struct ex_hdr hdr;
+	__u32 comp_mask;
+	__u32 qp_handle;
+	struct ibv_kern_flow_attr flow_attr;
+};
+
+struct ibv_create_flow_resp {
+	__u32 comp_mask;
+	__u32 flow_handle;
+};
+
+struct ibv_destroy_flow  {
+	struct ex_hdr hdr;
+	__u32 comp_mask;
+	__u32 flow_handle;
+};
+
 struct ibv_detach_mcast {
 	__u32 command;
 	__u16 in_words;
@@ -904,7 +999,9 @@ enum {
 	IB_USER_VERBS_CMD_OPEN_XRCD_V2 = -1,
 	IB_USER_VERBS_CMD_CLOSE_XRCD_V2 = -1,
 	IB_USER_VERBS_CMD_CREATE_XSRQ_V2 = -1,
-	IB_USER_VERBS_CMD_OPEN_QP_V2 = -1
+	IB_USER_VERBS_CMD_OPEN_QP_V2 = -1,
+	IB_USER_VERBS_CMD_CREATE_FLOW_V2 = -1,
+	IB_USER_VERBS_CMD_DESTROY_FLOW_V2 = -1
 };
 
 struct ibv_modify_srq_v3 {
Index: libibverbs-1.1.7/include/infiniband/verbs.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/verbs.h	2014-08-29 12:50:51.644871652 +0200
+++ libibverbs-1.1.7/include/infiniband/verbs.h	2014-08-29 12:50:51.667871364 +0200
@@ -115,7 +115,8 @@ enum ibv_device_cap_flags {
 	IBV_DEVICE_RC_RNR_NAK_GEN	= 1 << 12,
 	IBV_DEVICE_SRQ_RESIZE		= 1 << 13,
 	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14,
-	IBV_DEVICE_XRC			= 1 << 20
+	IBV_DEVICE_XRC			= 1 << 20,
+	IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29
 };
 
 enum ibv_atomic_cap {
@@ -965,8 +966,113 @@ enum verbs_context_mask {
 	VERBS_CONTEXT_RESERVED  = 1 << 4
 };
 
+enum ibv_flow_flags {
+	IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1,
+};
+
+enum ibv_flow_attr_type {
+	/* steering according to rule specifications */
+	IBV_FLOW_ATTR_NORMAL		= 0x0,
+	/* default unicast and multicast rule -
+	 * receive all Eth traffic which isn't steered to any QP
+	 */
+	IBV_FLOW_ATTR_ALL_DEFAULT	= 0x1,
+	/* default multicast rule -
+	 * receive all Eth multicast traffic which isn't steered to any QP
+	 */
+	IBV_FLOW_ATTR_MC_DEFAULT	= 0x2,
+};
+
+enum ibv_flow_spec_type {
+	IBV_FLOW_SPEC_ETH	= 0x20,
+	IBV_FLOW_SPEC_IPV4	= 0x30,
+	IBV_FLOW_SPEC_TCP	= 0x40,
+	IBV_FLOW_SPEC_UDP	= 0x41,
+};
+
+struct ibv_flow_eth_filter {
+	uint8_t		dst_mac[6];
+	uint8_t		src_mac[6];
+	uint16_t	ether_type;
+	/*
+	 * same layout as 802.1q: prio 3, cfi 1, vlan id 12
+	 */
+	uint16_t	vlan_tag;
+};
+
+struct ibv_flow_spec_eth {
+	enum ibv_flow_spec_type  type;
+	uint16_t  size;
+	struct ibv_flow_eth_filter val;
+	struct ibv_flow_eth_filter mask;
+};
+
+struct ibv_flow_ipv4_filter {
+	uint32_t src_ip;
+	uint32_t dst_ip;
+};
+
+struct ibv_flow_spec_ipv4 {
+	enum ibv_flow_spec_type  type;
+	uint16_t  size;
+	struct ibv_flow_ipv4_filter val;
+	struct ibv_flow_ipv4_filter mask;
+};
+
+struct ibv_flow_tcp_udp_filter {
+	uint16_t dst_port;
+	uint16_t src_port;
+};
+
+struct ibv_flow_spec_tcp_udp {
+	enum ibv_flow_spec_type  type;
+	uint16_t  size;
+	struct ibv_flow_tcp_udp_filter val;
+	struct ibv_flow_tcp_udp_filter mask;
+};
+
+struct ibv_flow_spec {
+	union {
+		struct {
+			enum ibv_flow_spec_type	type;
+			uint16_t		size;
+		} hdr;
+		struct ibv_flow_spec_eth eth;
+		struct ibv_flow_spec_ipv4 ipv4;
+		struct ibv_flow_spec_tcp_udp tcp_udp;
+	};
+};
+
+struct ibv_flow_attr {
+	uint32_t comp_mask;
+	enum ibv_flow_attr_type type;
+	uint16_t size;
+	uint16_t priority;
+	uint8_t num_of_specs;
+	uint8_t port;
+	uint32_t flags;
+	/* Following are the optional layers according to user request
+	 * struct ibv_flow_spec_xxx [L2]
+	 * struct ibv_flow_spec_yyy [L3/L4]
+	 */
+};
+
+struct ibv_flow {
+	uint32_t	   comp_mask;
+	struct ibv_context *context;
+	uint32_t	   handle;
+};
+
 struct verbs_context {
 	/*  "grows up" - new fields go here */
+	int (*drv_ibv_destroy_flow) (struct ibv_flow *flow);
+	int (*lib_ibv_destroy_flow) (struct ibv_flow *flow);
+	struct ibv_flow * (*drv_ibv_create_flow) (struct ibv_qp *qp,
+						  struct ibv_flow_attr
+						  *flow_attr);
+	struct ibv_flow * (*lib_ibv_create_flow) (struct ibv_qp *qp,
+						  struct ibv_flow_attr
+						  *flow_attr);
 	int (*drv_query_port_ex)(struct ibv_context *context, uint8_t port_num,
 				 struct ibv_port_attr_ex *port_attr);
 	int (*lib_query_port_ex)(struct ibv_context *context, uint8_t port_num,
@@ -1156,6 +1262,26 @@ struct ibv_pd *ibv_alloc_pd(struct ibv_c
  */
 int ibv_dealloc_pd(struct ibv_pd *pd);
 
+static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp,
+					       struct ibv_flow_attr *flow)
+{
+	struct verbs_context *vctx = verbs_get_ctx_op(qp->context,
+						      lib_ibv_create_flow);
+	if (!vctx || !vctx->lib_ibv_create_flow)
+		return NULL;
+
+	return vctx->lib_ibv_create_flow(qp, flow);
+}
+
+static inline int ibv_destroy_flow(struct ibv_flow *flow_id)
+{
+	struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context,
+						      lib_ibv_destroy_flow);
+	if (!vctx || !vctx->lib_ibv_destroy_flow)
+		return -ENOSYS;
+	return vctx->lib_ibv_destroy_flow(flow_id);
+}
+
 /**
  * ibv_open_xrcd - Open an extended connection domain
  */
Index: libibverbs-1.1.7/src/cmd.c
===================================================================
--- libibverbs-1.1.7.orig/src/cmd.c	2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/src/cmd.c	2014-08-29 12:50:51.667871364 +0200
@@ -1268,3 +1268,108 @@ int ibv_cmd_detach_mcast(struct ibv_qp *
 
 	return 0;
 }
+
+static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec,
+				struct ibv_kern_spec *kern_spec)
+{
+	kern_spec->hdr.type = ib_spec->hdr.type;
+
+	switch (ib_spec->hdr.type) {
+	case IBV_FLOW_SPEC_ETH:
+		kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth);
+		memcpy(&kern_spec->eth.val, &ib_spec->eth.val,
+		       sizeof(struct ibv_flow_eth_filter));
+		memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask,
+		       sizeof(struct ibv_flow_eth_filter));
+		break;
+	case IBV_FLOW_SPEC_IPV4:
+		kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4);
+		memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val,
+		       sizeof(struct ibv_flow_ipv4_filter));
+		memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask,
+		       sizeof(struct ibv_flow_ipv4_filter));
+		break;
+	case IBV_FLOW_SPEC_TCP:
+	case IBV_FLOW_SPEC_UDP:
+		kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp);
+		memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val,
+		       sizeof(struct ibv_flow_ipv4_filter));
+		memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask,
+		       sizeof(struct ibv_flow_tcp_udp_filter));
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
+				     struct ibv_flow_attr *flow_attr)
+{
+	struct ibv_create_flow *cmd;
+	struct ibv_create_flow_resp resp;
+	struct ibv_flow *flow_id;
+	size_t cmd_size;
+	size_t written_size;
+	int i, err;
+	void *kern_spec;
+	void *ib_spec;
+
+	cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs *
+				  sizeof(struct ibv_kern_spec));
+	cmd = alloca(cmd_size);
+	flow_id = malloc(sizeof(*flow_id));
+	if (!flow_id)
+		return NULL;
+	memset(cmd, 0, cmd_size);
+
+	cmd->qp_handle = qp->handle;
+
+	cmd->flow_attr.type = flow_attr->type;
+	cmd->flow_attr.priority = flow_attr->priority;
+	cmd->flow_attr.num_of_specs = flow_attr->num_of_specs;
+	cmd->flow_attr.port = flow_attr->port;
+	cmd->flow_attr.flags = flow_attr->flags;
+
+	kern_spec = cmd + 1;
+	ib_spec = flow_attr + 1;
+	for (i = 0; i < flow_attr->num_of_specs; i++) {
+		err = ib_spec_to_kern_spec(ib_spec, kern_spec);
+		if (err)
+			goto err;
+		cmd->flow_attr.size +=
+			((struct ibv_kern_spec *)kern_spec)->hdr.size;
+		kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size;
+		ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size;
+	}
+
+	written_size = sizeof(*cmd) + cmd->flow_attr.size;
+	IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW,
+				  &resp, sizeof(resp));
+	if (write(qp->context->cmd_fd, cmd, written_size) != written_size)
+		goto err;
+
+	VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
+
+	flow_id->context = qp->context;
+	flow_id->handle = resp.flow_handle;
+	return flow_id;
+err:
+	free(flow_id);
+	return NULL;
+}
+
+int ibv_cmd_destroy_flow(struct ibv_flow *flow_id)
+{
+	struct ibv_destroy_flow cmd;
+	int ret = 0;
+
+	memset(&cmd, 0, sizeof(cmd));
+	IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW);
+	cmd.flow_handle = flow_id->handle;
+
+	if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
+		ret = errno;
+	free(flow_id);
+	return ret;
+}
Index: libibverbs-1.1.7/src/device.c
===================================================================
--- libibverbs-1.1.7.orig/src/device.c	2014-08-29 12:50:51.644871652 +0200
+++ libibverbs-1.1.7/src/device.c	2014-08-29 12:50:51.667871364 +0200
@@ -171,6 +171,10 @@ struct ibv_context *__ibv_open_device(st
 		 */
 		 context_ex->lib_query_port_ex =
 			 context_ex->drv_query_port_ex;
+		 context_ex->lib_ibv_create_flow =
+			 context_ex->drv_ibv_create_flow;
+		 context_ex->lib_ibv_destroy_flow =
+			 context_ex->drv_ibv_destroy_flow;
 	}
 
 	context->device = device;
Index: libibverbs-1.1.7/src/libibverbs.map
===================================================================
--- libibverbs-1.1.7.orig/src/libibverbs.map	2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/src/libibverbs.map	2014-08-29 12:50:51.667871364 +0200
@@ -64,6 +64,8 @@ IBVERBS_1.0 {
 		ibv_cmd_destroy_ah;
 		ibv_cmd_attach_mcast;
 		ibv_cmd_detach_mcast;
+		ibv_cmd_create_flow;
+		ibv_cmd_destroy_flow;
 		ibv_copy_qp_attr_from_kern;
 		ibv_copy_path_rec_from_kern;
 		ibv_copy_path_rec_to_kern;
openSUSE Build Service is sponsored by