File libibverbs-Add-receive-flow-steering-support.patch of Package libibverbs
From db47da4a0b79fb3c20a744db2f70ab6b32c8a7eb Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 6 Feb 2014 14:20:33 +0200
Subject: [PATCH 6/6] Add receive flow steering support
The RDMA stack allows for applications to create IB_QPT_RAW_PACKET
QPs, which receive plain Ethernet packets, specifically packets that
don't carry any QPN to be matched by the receiving side. Applications
using these QPs must be provided with a method to program some steering
rule with the HW so packets arriving at the local port can be routed to them.
In a similar manner, when the device supports flow streeing, IB UD QPs
created by IPoIB allow user-space applications to steer specific TCP/IP
flows to their QPs.
This patch adds ibv_create_flow(), which allow providing a flow specification
for a QP. When there's a match between the specification and a received packet,
the packet is forwarded to that QP, in a the same way one uses ibv_attach_mcast()
for IB UD multicast handling.
Flow specifications are provided as instances of struct ibv_flow_spec_yyy,
which describes L2, L3 and L4 headers. Currently specs for Ethernet, IPv4,
TCP and UDP are defined. Flow specs are made of values and masks.
The input to ib_create_flow() is a struct ib_flow_attr, which contains
a few mandatory control elements and optional flow specs.
struct ibv_flow_attr {
uint32_t comp_mask;
enum ibv_flow_attr_type type;
uint16_t size;
uint16_t priority;
uint8_t num_of_specs;
uint8_t port;
uint32_t flags;
/* Following are the optional layers according to user request
* struct ibv_flow_spec_xxx [L2]
* struct ibv_flow_spec_yyy [L3/L4]
*/
};
These flow specs are defined and used in a way which allows adding new spec
types without kernel/user ABI change, just with a little API enhancement which
defines the newly added spec.
The flow spec structures are defined with TLV (Type-Length-Value) entries, which
allows calling ib_create_flow() with a list of variable length of optional specs.
For the actual processing of ibv_flow_attr the kernel uses the number
of specs and the size mandatory fields along with the TLV nature of
the specs.
The returned value from ibv_create_flow() is a struct ibv_flow, which contains
a handle provided by the kernel to be used when calling ibv_destroy_flow().
The ib_flow_attr enum type supports usage of flow steering for promiscuous
and sniffer purposes:
IBV_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification
IBV_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive
all Ethernet traffic which isn't steered to any QP
IBV_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast
ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type.
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
---
include/infiniband/driver.h | 4 +
include/infiniband/kern-abi.h | 99 ++++++++++++++++++++++++++++++++
include/infiniband/verbs.h | 128 +++++++++++++++++++++++++++++++++++++++++-
src/cmd.c | 105 ++++++++++++++++++++++++++++++++++
src/device.c | 4 +
src/libibverbs.map | 2
6 files changed, 340 insertions(+), 2 deletions(-)
Index: libibverbs-1.1.7/include/infiniband/driver.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/driver.h 2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/include/infiniband/driver.h 2014-08-29 12:50:51.649871589 +0200
@@ -194,6 +194,10 @@ int ibv_cmd_destroy_ah(struct ibv_ah *ah
int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
+struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
+ struct ibv_flow_attr *flow_attr);
+int ibv_cmd_destroy_flow(struct ibv_flow *flow_id);
+
int ibv_dontfork_range(void *base, size_t size);
int ibv_dofork_range(void *base, size_t size);
Index: libibverbs-1.1.7/include/infiniband/kern-abi.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/kern-abi.h 2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/include/infiniband/kern-abi.h 2014-08-29 12:51:28.751407756 +0200
@@ -102,6 +102,13 @@ enum {
#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80ul
+enum {
+ IB_USER_VERBS_CMD_CREATE_FLOW = (IB_USER_VERBS_CMD_FLAG_EXTENDED <<
+ IB_USER_VERBS_CMD_FLAGS_SHIFT) +
+ IB_USER_VERBS_CMD_THRESHOLD,
+ IB_USER_VERBS_CMD_DESTROY_FLOW
+};
+
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@@ -676,6 +683,76 @@ struct ibv_kern_send_wr {
} qp_type;
};
+struct ibv_kern_eth_filter {
+ __u8 dst_mac[6];
+ __u8 src_mac[6];
+ __u16 ether_type;
+ __u16 vlan_tag;
+};
+
+struct ibv_kern_spec_eth {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ibv_kern_eth_filter val;
+ struct ibv_kern_eth_filter mask;
+};
+
+struct ibv_kern_ipv4_filter {
+ __u32 src_ip;
+ __u32 dst_ip;
+};
+
+struct ibv_kern_spec_ipv4 {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ibv_kern_ipv4_filter val;
+ struct ibv_kern_ipv4_filter mask;
+};
+
+struct ibv_kern_tcp_udp_filter {
+ __u16 dst_port;
+ __u16 src_port;
+};
+
+struct ibv_kern_spec_tcp_udp {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ struct ibv_kern_tcp_udp_filter val;
+ struct ibv_kern_tcp_udp_filter mask;
+};
+
+
+struct ibv_kern_spec {
+ union {
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ } hdr;
+ struct ibv_kern_spec_eth eth;
+ struct ibv_kern_spec_ipv4 ipv4;
+ struct ibv_kern_spec_tcp_udp tcp_udp;
+ };
+
+};
+
+struct ibv_kern_flow_attr {
+ __u32 type;
+ __u16 size;
+ __u16 priority;
+ __u8 num_of_specs;
+ __u8 reserved[2];
+ __u8 port;
+ __u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ibv_kern_flow_spec_xxx
+ * struct ibv_kern_flow_spec_yyy
+ */
+};
+
struct ibv_post_send {
__u32 command;
__u16 in_words;
@@ -763,6 +840,24 @@ struct ibv_attach_mcast {
__u64 driver_data[0];
};
+struct ibv_create_flow {
+ struct ex_hdr hdr;
+ __u32 comp_mask;
+ __u32 qp_handle;
+ struct ibv_kern_flow_attr flow_attr;
+};
+
+struct ibv_create_flow_resp {
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
+struct ibv_destroy_flow {
+ struct ex_hdr hdr;
+ __u32 comp_mask;
+ __u32 flow_handle;
+};
+
struct ibv_detach_mcast {
__u32 command;
__u16 in_words;
@@ -904,7 +999,9 @@ enum {
IB_USER_VERBS_CMD_OPEN_XRCD_V2 = -1,
IB_USER_VERBS_CMD_CLOSE_XRCD_V2 = -1,
IB_USER_VERBS_CMD_CREATE_XSRQ_V2 = -1,
- IB_USER_VERBS_CMD_OPEN_QP_V2 = -1
+ IB_USER_VERBS_CMD_OPEN_QP_V2 = -1,
+ IB_USER_VERBS_CMD_CREATE_FLOW_V2 = -1,
+ IB_USER_VERBS_CMD_DESTROY_FLOW_V2 = -1
};
struct ibv_modify_srq_v3 {
Index: libibverbs-1.1.7/include/infiniband/verbs.h
===================================================================
--- libibverbs-1.1.7.orig/include/infiniband/verbs.h 2014-08-29 12:50:51.644871652 +0200
+++ libibverbs-1.1.7/include/infiniband/verbs.h 2014-08-29 12:50:51.667871364 +0200
@@ -115,7 +115,8 @@ enum ibv_device_cap_flags {
IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12,
IBV_DEVICE_SRQ_RESIZE = 1 << 13,
IBV_DEVICE_N_NOTIFY_CQ = 1 << 14,
- IBV_DEVICE_XRC = 1 << 20
+ IBV_DEVICE_XRC = 1 << 20,
+ IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29
};
enum ibv_atomic_cap {
@@ -965,8 +966,113 @@ enum verbs_context_mask {
VERBS_CONTEXT_RESERVED = 1 << 4
};
+enum ibv_flow_flags {
+ IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1,
+};
+
+enum ibv_flow_attr_type {
+ /* steering according to rule specifications */
+ IBV_FLOW_ATTR_NORMAL = 0x0,
+ /* default unicast and multicast rule -
+ * receive all Eth traffic which isn't steered to any QP
+ */
+ IBV_FLOW_ATTR_ALL_DEFAULT = 0x1,
+ /* default multicast rule -
+ * receive all Eth multicast traffic which isn't steered to any QP
+ */
+ IBV_FLOW_ATTR_MC_DEFAULT = 0x2,
+};
+
+enum ibv_flow_spec_type {
+ IBV_FLOW_SPEC_ETH = 0x20,
+ IBV_FLOW_SPEC_IPV4 = 0x30,
+ IBV_FLOW_SPEC_TCP = 0x40,
+ IBV_FLOW_SPEC_UDP = 0x41,
+};
+
+struct ibv_flow_eth_filter {
+ uint8_t dst_mac[6];
+ uint8_t src_mac[6];
+ uint16_t ether_type;
+ /*
+ * same layout as 802.1q: prio 3, cfi 1, vlan id 12
+ */
+ uint16_t vlan_tag;
+};
+
+struct ibv_flow_spec_eth {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
+ struct ibv_flow_eth_filter val;
+ struct ibv_flow_eth_filter mask;
+};
+
+struct ibv_flow_ipv4_filter {
+ uint32_t src_ip;
+ uint32_t dst_ip;
+};
+
+struct ibv_flow_spec_ipv4 {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
+ struct ibv_flow_ipv4_filter val;
+ struct ibv_flow_ipv4_filter mask;
+};
+
+struct ibv_flow_tcp_udp_filter {
+ uint16_t dst_port;
+ uint16_t src_port;
+};
+
+struct ibv_flow_spec_tcp_udp {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
+ struct ibv_flow_tcp_udp_filter val;
+ struct ibv_flow_tcp_udp_filter mask;
+};
+
+struct ibv_flow_spec {
+ union {
+ struct {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
+ } hdr;
+ struct ibv_flow_spec_eth eth;
+ struct ibv_flow_spec_ipv4 ipv4;
+ struct ibv_flow_spec_tcp_udp tcp_udp;
+ };
+};
+
+struct ibv_flow_attr {
+ uint32_t comp_mask;
+ enum ibv_flow_attr_type type;
+ uint16_t size;
+ uint16_t priority;
+ uint8_t num_of_specs;
+ uint8_t port;
+ uint32_t flags;
+ /* Following are the optional layers according to user request
+ * struct ibv_flow_spec_xxx [L2]
+ * struct ibv_flow_spec_yyy [L3/L4]
+ */
+};
+
+struct ibv_flow {
+ uint32_t comp_mask;
+ struct ibv_context *context;
+ uint32_t handle;
+};
+
struct verbs_context {
/* "grows up" - new fields go here */
+ int (*drv_ibv_destroy_flow) (struct ibv_flow *flow);
+ int (*lib_ibv_destroy_flow) (struct ibv_flow *flow);
+ struct ibv_flow * (*drv_ibv_create_flow) (struct ibv_qp *qp,
+ struct ibv_flow_attr
+ *flow_attr);
+ struct ibv_flow * (*lib_ibv_create_flow) (struct ibv_qp *qp,
+ struct ibv_flow_attr
+ *flow_attr);
int (*drv_query_port_ex)(struct ibv_context *context, uint8_t port_num,
struct ibv_port_attr_ex *port_attr);
int (*lib_query_port_ex)(struct ibv_context *context, uint8_t port_num,
@@ -1156,6 +1262,26 @@ struct ibv_pd *ibv_alloc_pd(struct ibv_c
*/
int ibv_dealloc_pd(struct ibv_pd *pd);
+static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp,
+ struct ibv_flow_attr *flow)
+{
+ struct verbs_context *vctx = verbs_get_ctx_op(qp->context,
+ lib_ibv_create_flow);
+ if (!vctx || !vctx->lib_ibv_create_flow)
+ return NULL;
+
+ return vctx->lib_ibv_create_flow(qp, flow);
+}
+
+static inline int ibv_destroy_flow(struct ibv_flow *flow_id)
+{
+ struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context,
+ lib_ibv_destroy_flow);
+ if (!vctx || !vctx->lib_ibv_destroy_flow)
+ return -ENOSYS;
+ return vctx->lib_ibv_destroy_flow(flow_id);
+}
+
/**
* ibv_open_xrcd - Open an extended connection domain
*/
Index: libibverbs-1.1.7/src/cmd.c
===================================================================
--- libibverbs-1.1.7.orig/src/cmd.c 2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/src/cmd.c 2014-08-29 12:50:51.667871364 +0200
@@ -1268,3 +1268,108 @@ int ibv_cmd_detach_mcast(struct ibv_qp *
return 0;
}
+
+static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec,
+ struct ibv_kern_spec *kern_spec)
+{
+ kern_spec->hdr.type = ib_spec->hdr.type;
+
+ switch (ib_spec->hdr.type) {
+ case IBV_FLOW_SPEC_ETH:
+ kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth);
+ memcpy(&kern_spec->eth.val, &ib_spec->eth.val,
+ sizeof(struct ibv_flow_eth_filter));
+ memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask,
+ sizeof(struct ibv_flow_eth_filter));
+ break;
+ case IBV_FLOW_SPEC_IPV4:
+ kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4);
+ memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val,
+ sizeof(struct ibv_flow_ipv4_filter));
+ memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask,
+ sizeof(struct ibv_flow_ipv4_filter));
+ break;
+ case IBV_FLOW_SPEC_TCP:
+ case IBV_FLOW_SPEC_UDP:
+ kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp);
+ memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val,
+ sizeof(struct ibv_flow_ipv4_filter));
+ memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask,
+ sizeof(struct ibv_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
+ struct ibv_flow_attr *flow_attr)
+{
+ struct ibv_create_flow *cmd;
+ struct ibv_create_flow_resp resp;
+ struct ibv_flow *flow_id;
+ size_t cmd_size;
+ size_t written_size;
+ int i, err;
+ void *kern_spec;
+ void *ib_spec;
+
+ cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs *
+ sizeof(struct ibv_kern_spec));
+ cmd = alloca(cmd_size);
+ flow_id = malloc(sizeof(*flow_id));
+ if (!flow_id)
+ return NULL;
+ memset(cmd, 0, cmd_size);
+
+ cmd->qp_handle = qp->handle;
+
+ cmd->flow_attr.type = flow_attr->type;
+ cmd->flow_attr.priority = flow_attr->priority;
+ cmd->flow_attr.num_of_specs = flow_attr->num_of_specs;
+ cmd->flow_attr.port = flow_attr->port;
+ cmd->flow_attr.flags = flow_attr->flags;
+
+ kern_spec = cmd + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs; i++) {
+ err = ib_spec_to_kern_spec(ib_spec, kern_spec);
+ if (err)
+ goto err;
+ cmd->flow_attr.size +=
+ ((struct ibv_kern_spec *)kern_spec)->hdr.size;
+ kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size;
+ ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size;
+ }
+
+ written_size = sizeof(*cmd) + cmd->flow_attr.size;
+ IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW,
+ &resp, sizeof(resp));
+ if (write(qp->context->cmd_fd, cmd, written_size) != written_size)
+ goto err;
+
+ VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
+
+ flow_id->context = qp->context;
+ flow_id->handle = resp.flow_handle;
+ return flow_id;
+err:
+ free(flow_id);
+ return NULL;
+}
+
+int ibv_cmd_destroy_flow(struct ibv_flow *flow_id)
+{
+ struct ibv_destroy_flow cmd;
+ int ret = 0;
+
+ memset(&cmd, 0, sizeof(cmd));
+ IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW);
+ cmd.flow_handle = flow_id->handle;
+
+ if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
+ ret = errno;
+ free(flow_id);
+ return ret;
+}
Index: libibverbs-1.1.7/src/device.c
===================================================================
--- libibverbs-1.1.7.orig/src/device.c 2014-08-29 12:50:51.644871652 +0200
+++ libibverbs-1.1.7/src/device.c 2014-08-29 12:50:51.667871364 +0200
@@ -171,6 +171,10 @@ struct ibv_context *__ibv_open_device(st
*/
context_ex->lib_query_port_ex =
context_ex->drv_query_port_ex;
+ context_ex->lib_ibv_create_flow =
+ context_ex->drv_ibv_create_flow;
+ context_ex->lib_ibv_destroy_flow =
+ context_ex->drv_ibv_destroy_flow;
}
context->device = device;
Index: libibverbs-1.1.7/src/libibverbs.map
===================================================================
--- libibverbs-1.1.7.orig/src/libibverbs.map 2014-03-05 10:36:36.000000000 +0100
+++ libibverbs-1.1.7/src/libibverbs.map 2014-08-29 12:50:51.667871364 +0200
@@ -64,6 +64,8 @@ IBVERBS_1.0 {
ibv_cmd_destroy_ah;
ibv_cmd_attach_mcast;
ibv_cmd_detach_mcast;
+ ibv_cmd_create_flow;
+ ibv_cmd_destroy_flow;
ibv_copy_qp_attr_from_kern;
ibv_copy_path_rec_from_kern;
ibv_copy_path_rec_to_kern;