summaryrefslogtreecommitdiff
path: root/include/rdma
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /include/rdma
Initial import
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib.h89
-rw-r--r--include/rdma/ib_addr.h311
-rw-r--r--include/rdma/ib_cache.h132
-rw-r--r--include/rdma/ib_cm.h605
-rw-r--r--include/rdma/ib_fmr_pool.h93
-rw-r--r--include/rdma/ib_mad.h680
-rw-r--r--include/rdma/ib_marshall.h53
-rw-r--r--include/rdma/ib_pack.h268
-rw-r--r--include/rdma/ib_pma.h156
-rw-r--r--include/rdma/ib_sa.h431
-rw-r--r--include/rdma/ib_smi.h128
-rw-r--r--include/rdma/ib_umem.h109
-rw-r--r--include/rdma/ib_umem_odp.h160
-rw-r--r--include/rdma/ib_verbs.h2671
-rw-r--r--include/rdma/iw_cm.h251
-rw-r--r--include/rdma/iw_portmap.h224
-rw-r--r--include/rdma/rdma_cm.h383
-rw-r--r--include/rdma/rdma_cm_ib.h54
-rw-r--r--include/rdma/rdma_netlink.h80
19 files changed, 6878 insertions, 0 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
new file mode 100644
index 000000000..cf8f9e700
--- /dev/null
+++ b/include/rdma/ib.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2010 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(_RDMA_IB_H)
+#define _RDMA_IB_H
+
+#include <linux/types.h>
+
+struct ib_addr {
+ union {
+ __u8 uib_addr8[16];
+ __be16 uib_addr16[8];
+ __be32 uib_addr32[4];
+ __be64 uib_addr64[2];
+ } ib_u;
+#define sib_addr8 ib_u.uib_addr8
+#define sib_addr16 ib_u.uib_addr16
+#define sib_addr32 ib_u.uib_addr32
+#define sib_addr64 ib_u.uib_addr64
+#define sib_raw ib_u.uib_addr8
+#define sib_subnet_prefix ib_u.uib_addr64[0]
+#define sib_interface_id ib_u.uib_addr64[1]
+};
+
+static inline int ib_addr_any(const struct ib_addr *a)
+{
+ return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
+}
+
+static inline int ib_addr_loopback(const struct ib_addr *a)
+{
+ return ((a->sib_addr32[0] | a->sib_addr32[1] |
+ a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
+}
+
+static inline void ib_addr_set(struct ib_addr *addr,
+ __be32 w1, __be32 w2, __be32 w3, __be32 w4)
+{
+ addr->sib_addr32[0] = w1;
+ addr->sib_addr32[1] = w2;
+ addr->sib_addr32[2] = w3;
+ addr->sib_addr32[3] = w4;
+}
+
+static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2)
+{
+ return memcmp(a1, a2, sizeof(struct ib_addr));
+}
+
+struct sockaddr_ib {
+ unsigned short int sib_family; /* AF_IB */
+ __be16 sib_pkey;
+ __be32 sib_flowinfo;
+ struct ib_addr sib_addr;
+ __be64 sib_sid;
+ __be64 sib_sid_mask;
+ __u64 sib_scope_id;
+};
+
+#endif /* _RDMA_IB_H */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
new file mode 100644
index 000000000..ac54c27a2
--- /dev/null
+++ b/include/rdma/ib_addr.h
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_ADDR_H)
+#define IB_ADDR_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/socket.h>
+#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <net/ipv6.h>
+
+struct rdma_addr_client {
+ atomic_t refcount;
+ struct completion comp;
+};
+
+/**
+ * rdma_addr_register_client - Register an address client.
+ */
+void rdma_addr_register_client(struct rdma_addr_client *client);
+
+/**
+ * rdma_addr_unregister_client - Deregister an address client.
+ * @client: Client object to deregister.
+ */
+void rdma_addr_unregister_client(struct rdma_addr_client *client);
+
+struct rdma_dev_addr {
+ unsigned char src_dev_addr[MAX_ADDR_LEN];
+ unsigned char dst_dev_addr[MAX_ADDR_LEN];
+ unsigned char broadcast[MAX_ADDR_LEN];
+ unsigned short dev_type;
+ int bound_dev_if;
+ enum rdma_transport_type transport;
+};
+
+/**
+ * rdma_translate_ip - Translate a local IP address to an RDMA hardware
+ * address.
+ */
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id);
+
+/**
+ * rdma_resolve_ip - Resolve source and destination IP addresses to
+ * RDMA hardware addresses.
+ * @client: Address client associated with request.
+ * @src_addr: An optional source address to use in the resolution. If a
+ * source address is not provided, a usable address will be returned via
+ * the callback.
+ * @dst_addr: The destination address to resolve.
+ * @addr: A reference to a data location that will receive the resolved
+ * addresses. The data location must remain valid until the callback has
+ * been invoked.
+ * @timeout_ms: Amount of time to wait for the address resolution to complete.
+ * @callback: Call invoked once address resolution has completed, timed out,
+ * or been canceled. A status of 0 indicates success.
+ * @context: User-specified context associated with the call.
+ */
+int rdma_resolve_ip(struct rdma_addr_client *client,
+ struct sockaddr *src_addr, struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr, int timeout_ms,
+ void (*callback)(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context),
+ void *context);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr);
+
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+ const unsigned char *dst_dev_addr);
+
+int rdma_addr_size(struct sockaddr *addr);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
+ u16 *vlan_id);
+
+static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
+{
+ return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
+}
+
+static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
+{
+ dev_addr->broadcast[8] = pkey >> 8;
+ dev_addr->broadcast[9] = (unsigned char) pkey;
+}
+
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+ union ib_gid *gid)
+{
+ memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
+{
+ return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
+}
+
+static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_802_1Q_VLAN ?
+ vlan_dev_vlan_id(dev) : 0xffff;
+}
+
+static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ ipv6_addr_set_v4mapped(((struct sockaddr_in *)
+ addr)->sin_addr.s_addr,
+ (struct in6_addr *)gid);
+ break;
+ case AF_INET6:
+ memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
+static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
+{
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
+ struct sockaddr_in *out_in = (struct sockaddr_in *)out;
+ memset(out_in, 0, sizeof(*out_in));
+ out_in->sin_family = AF_INET;
+ memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
+ } else {
+ struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
+ memset(out_in, 0, sizeof(*out_in));
+ out_in->sin6_family = AF_INET6;
+ memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
+ }
+}
+
+static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+ union ib_gid *gid)
+{
+ struct net_device *dev;
+ struct in_device *ip4;
+
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (dev) {
+ ip4 = (struct in_device *)dev->ip_ptr;
+ if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
+ ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
+ (struct in6_addr *)gid);
+ dev_put(dev);
+ }
+}
+
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+ if (dev_addr->transport == RDMA_TRANSPORT_IB &&
+ dev_addr->dev_type != ARPHRD_INFINIBAND)
+ iboe_addr_get_sgid(dev_addr, gid);
+ else
+ memcpy(gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof *gid);
+}
+
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+ memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
+}
+
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+ memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
+}
+
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+ memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
+}
+
+static inline enum ib_mtu iboe_get_mtu(int mtu)
+{
+ /*
+ * reduce IB headers from effective IBoE MTU. 28 stands for
+ * atomic header which is the biggest possible header after BTH
+ */
+ mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28;
+
+ if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096))
+ return IB_MTU_4096;
+ else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048))
+ return IB_MTU_2048;
+ else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024))
+ return IB_MTU_1024;
+ else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512))
+ return IB_MTU_512;
+ else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256))
+ return IB_MTU_256;
+ else
+ return 0;
+}
+
+static inline int iboe_get_rate(struct net_device *dev)
+{
+ struct ethtool_cmd cmd;
+ u32 speed;
+ int err;
+
+ rtnl_lock();
+ err = __ethtool_get_settings(dev, &cmd);
+ rtnl_unlock();
+ if (err)
+ return IB_RATE_PORT_CURRENT;
+
+ speed = ethtool_cmd_speed(&cmd);
+ if (speed >= 40000)
+ return IB_RATE_40_GBPS;
+ else if (speed >= 30000)
+ return IB_RATE_30_GBPS;
+ else if (speed >= 20000)
+ return IB_RATE_20_GBPS;
+ else if (speed >= 10000)
+ return IB_RATE_10_GBPS;
+ else
+ return IB_RATE_PORT_CURRENT;
+}
+
+static inline int rdma_link_local_addr(struct in6_addr *addr)
+{
+ if (addr->s6_addr32[0] == htonl(0xfe800000) &&
+ addr->s6_addr32[1] == 0)
+ return 1;
+
+ return 0;
+}
+
+static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+ memcpy(mac, &addr->s6_addr[8], 3);
+ memcpy(mac + 3, &addr->s6_addr[13], 3);
+ mac[0] ^= 2;
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+ return addr->s6_addr[0] == 0xff;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+ int i;
+
+ mac[0] = 0x33;
+ mac[1] = 0x33;
+ for (i = 2; i < 6; ++i)
+ mac[i] = addr->s6_addr[i + 10];
+}
+
+static inline u16 rdma_get_vlan_id(union ib_gid *dgid)
+{
+ u16 vid;
+
+ vid = dgid->raw[11] << 8 | dgid->raw[12];
+ return vid < 0x1000 ? vid : 0xffff;
+}
+
+static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_802_1Q_VLAN ?
+ vlan_dev_real_dev(dev) : NULL;
+}
+
+#endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
new file mode 100644
index 000000000..ad9a3c280
--- /dev/null
+++ b/include/rdma/ib_cache.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _IB_CACHE_H
+#define _IB_CACHE_H
+
+#include <rdma/ib_verbs.h>
+
+/**
+ * ib_get_cached_gid - Returns a cached GID table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @index: The index into the cached GID table to query.
+ * @gid: The GID value found at the specified index.
+ *
+ * ib_get_cached_gid() fetches the specified GID table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_gid(struct ib_device *device,
+ u8 port_num,
+ int index,
+ union ib_gid *gid);
+
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
+int ib_find_cached_gid(struct ib_device *device,
+ union ib_gid *gid,
+ u8 *port_num,
+ u16 *index);
+
+/**
+ * ib_get_cached_pkey - Returns a cached PKey table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @index: The index into the cached PKey table to query.
+ * @pkey: The PKey value found at the specified index.
+ *
+ * ib_get_cached_pkey() fetches the specified PKey table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_pkey(struct ib_device *device_handle,
+ u8 port_num,
+ int index,
+ u16 *pkey);
+
+/**
+ * ib_find_cached_pkey - Returns the PKey table index where a specified
+ * PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index);
+
+/**
+ * ib_find_exact_cached_pkey - Returns the PKey table index where a specified
+ * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_exact_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_exact_cached_pkey(struct ib_device *device,
+ u8 port_num,
+ u16 pkey,
+ u16 *index);
+
+/**
+ * ib_get_cached_lmc - Returns a cached lmc table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @lmc: The lmc value for the specified port for that device.
+ *
+ * ib_get_cached_lmc() fetches the specified lmc table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_lmc(struct ib_device *device,
+ u8 port_num,
+ u8 *lmc);
+
+#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
new file mode 100644
index 000000000..39ed2d2fb
--- /dev/null
+++ b/include/rdma/ib_cm.h
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if !defined(IB_CM_H)
+#define IB_CM_H
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
+
+/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
+extern struct class cm_class;
+
+enum ib_cm_state {
+ IB_CM_IDLE,
+ IB_CM_LISTEN,
+ IB_CM_REQ_SENT,
+ IB_CM_REQ_RCVD,
+ IB_CM_MRA_REQ_SENT,
+ IB_CM_MRA_REQ_RCVD,
+ IB_CM_REP_SENT,
+ IB_CM_REP_RCVD,
+ IB_CM_MRA_REP_SENT,
+ IB_CM_MRA_REP_RCVD,
+ IB_CM_ESTABLISHED,
+ IB_CM_DREQ_SENT,
+ IB_CM_DREQ_RCVD,
+ IB_CM_TIMEWAIT,
+ IB_CM_SIDR_REQ_SENT,
+ IB_CM_SIDR_REQ_RCVD
+};
+
+enum ib_cm_lap_state {
+ IB_CM_LAP_UNINIT,
+ IB_CM_LAP_IDLE,
+ IB_CM_LAP_SENT,
+ IB_CM_LAP_RCVD,
+ IB_CM_MRA_LAP_SENT,
+ IB_CM_MRA_LAP_RCVD,
+};
+
+enum ib_cm_event_type {
+ IB_CM_REQ_ERROR,
+ IB_CM_REQ_RECEIVED,
+ IB_CM_REP_ERROR,
+ IB_CM_REP_RECEIVED,
+ IB_CM_RTU_RECEIVED,
+ IB_CM_USER_ESTABLISHED,
+ IB_CM_DREQ_ERROR,
+ IB_CM_DREQ_RECEIVED,
+ IB_CM_DREP_RECEIVED,
+ IB_CM_TIMEWAIT_EXIT,
+ IB_CM_MRA_RECEIVED,
+ IB_CM_REJ_RECEIVED,
+ IB_CM_LAP_ERROR,
+ IB_CM_LAP_RECEIVED,
+ IB_CM_APR_RECEIVED,
+ IB_CM_SIDR_REQ_ERROR,
+ IB_CM_SIDR_REQ_RECEIVED,
+ IB_CM_SIDR_REP_RECEIVED
+};
+
+enum ib_cm_data_size {
+ IB_CM_REQ_PRIVATE_DATA_SIZE = 92,
+ IB_CM_MRA_PRIVATE_DATA_SIZE = 222,
+ IB_CM_REJ_PRIVATE_DATA_SIZE = 148,
+ IB_CM_REP_PRIVATE_DATA_SIZE = 196,
+ IB_CM_RTU_PRIVATE_DATA_SIZE = 224,
+ IB_CM_DREQ_PRIVATE_DATA_SIZE = 220,
+ IB_CM_DREP_PRIVATE_DATA_SIZE = 224,
+ IB_CM_REJ_ARI_LENGTH = 72,
+ IB_CM_LAP_PRIVATE_DATA_SIZE = 168,
+ IB_CM_APR_PRIVATE_DATA_SIZE = 148,
+ IB_CM_APR_INFO_LENGTH = 72,
+ IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
+ IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
+ IB_CM_SIDR_REP_INFO_LENGTH = 72,
+ /* compare done u32 at a time */
+ IB_CM_COMPARE_SIZE = (64 / sizeof(u32))
+};
+
+struct ib_cm_id;
+
+struct ib_cm_req_event_param {
+ struct ib_cm_id *listen_id;
+ u8 port;
+
+ struct ib_sa_path_rec *primary_path;
+ struct ib_sa_path_rec *alternate_path;
+
+ __be64 remote_ca_guid;
+ u32 remote_qkey;
+ u32 remote_qpn;
+ enum ib_qp_type qp_type;
+
+ u32 starting_psn;
+ u8 responder_resources;
+ u8 initiator_depth;
+ unsigned int local_cm_response_timeout:5;
+ unsigned int flow_control:1;
+ unsigned int remote_cm_response_timeout:5;
+ unsigned int retry_count:3;
+ unsigned int rnr_retry_count:3;
+ unsigned int srq:1;
+};
+
+struct ib_cm_rep_event_param {
+ __be64 remote_ca_guid;
+ u32 remote_qkey;
+ u32 remote_qpn;
+ u32 starting_psn;
+ u8 responder_resources;
+ u8 initiator_depth;
+ unsigned int target_ack_delay:5;
+ unsigned int failover_accepted:2;
+ unsigned int flow_control:1;
+ unsigned int rnr_retry_count:3;
+ unsigned int srq:1;
+};
+
+enum ib_cm_rej_reason {
+ IB_CM_REJ_NO_QP = 1,
+ IB_CM_REJ_NO_EEC = 2,
+ IB_CM_REJ_NO_RESOURCES = 3,
+ IB_CM_REJ_TIMEOUT = 4,
+ IB_CM_REJ_UNSUPPORTED = 5,
+ IB_CM_REJ_INVALID_COMM_ID = 6,
+ IB_CM_REJ_INVALID_COMM_INSTANCE = 7,
+ IB_CM_REJ_INVALID_SERVICE_ID = 8,
+ IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9,
+ IB_CM_REJ_STALE_CONN = 10,
+ IB_CM_REJ_RDC_NOT_EXIST = 11,
+ IB_CM_REJ_INVALID_GID = 12,
+ IB_CM_REJ_INVALID_LID = 13,
+ IB_CM_REJ_INVALID_SL = 14,
+ IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15,
+ IB_CM_REJ_INVALID_HOP_LIMIT = 16,
+ IB_CM_REJ_INVALID_PACKET_RATE = 17,
+ IB_CM_REJ_INVALID_ALT_GID = 18,
+ IB_CM_REJ_INVALID_ALT_LID = 19,
+ IB_CM_REJ_INVALID_ALT_SL = 20,
+ IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21,
+ IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22,
+ IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23,
+ IB_CM_REJ_PORT_CM_REDIRECT = 24,
+ IB_CM_REJ_PORT_REDIRECT = 25,
+ IB_CM_REJ_INVALID_MTU = 26,
+ IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27,
+ IB_CM_REJ_CONSUMER_DEFINED = 28,
+ IB_CM_REJ_INVALID_RNR_RETRY = 29,
+ IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30,
+ IB_CM_REJ_INVALID_CLASS_VERSION = 31,
+ IB_CM_REJ_INVALID_FLOW_LABEL = 32,
+ IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33
+};
+
+struct ib_cm_rej_event_param {
+ enum ib_cm_rej_reason reason;
+ void *ari;
+ u8 ari_length;
+};
+
+struct ib_cm_mra_event_param {
+ u8 service_timeout;
+};
+
+struct ib_cm_lap_event_param {
+ struct ib_sa_path_rec *alternate_path;
+};
+
+enum ib_cm_apr_status {
+ IB_CM_APR_SUCCESS,
+ IB_CM_APR_INVALID_COMM_ID,
+ IB_CM_APR_UNSUPPORTED,
+ IB_CM_APR_REJECT,
+ IB_CM_APR_REDIRECT,
+ IB_CM_APR_IS_CURRENT,
+ IB_CM_APR_INVALID_QPN_EECN,
+ IB_CM_APR_INVALID_LID,
+ IB_CM_APR_INVALID_GID,
+ IB_CM_APR_INVALID_FLOW_LABEL,
+ IB_CM_APR_INVALID_TCLASS,
+ IB_CM_APR_INVALID_HOP_LIMIT,
+ IB_CM_APR_INVALID_PACKET_RATE,
+ IB_CM_APR_INVALID_SL
+};
+
+struct ib_cm_apr_event_param {
+ enum ib_cm_apr_status ap_status;
+ void *apr_info;
+ u8 info_len;
+};
+
+struct ib_cm_sidr_req_event_param {
+ struct ib_cm_id *listen_id;
+ u8 port;
+ u16 pkey;
+};
+
+enum ib_cm_sidr_status {
+ IB_SIDR_SUCCESS,
+ IB_SIDR_UNSUPPORTED,
+ IB_SIDR_REJECT,
+ IB_SIDR_NO_QP,
+ IB_SIDR_REDIRECT,
+ IB_SIDR_UNSUPPORTED_VERSION
+};
+
+struct ib_cm_sidr_rep_event_param {
+ enum ib_cm_sidr_status status;
+ u32 qkey;
+ u32 qpn;
+ void *info;
+ u8 info_len;
+};
+
+struct ib_cm_event {
+ enum ib_cm_event_type event;
+ union {
+ struct ib_cm_req_event_param req_rcvd;
+ struct ib_cm_rep_event_param rep_rcvd;
+ /* No data for RTU received events. */
+ struct ib_cm_rej_event_param rej_rcvd;
+ struct ib_cm_mra_event_param mra_rcvd;
+ struct ib_cm_lap_event_param lap_rcvd;
+ struct ib_cm_apr_event_param apr_rcvd;
+ /* No data for DREQ/DREP received events. */
+ struct ib_cm_sidr_req_event_param sidr_req_rcvd;
+ struct ib_cm_sidr_rep_event_param sidr_rep_rcvd;
+ enum ib_wc_status send_status;
+ } param;
+
+ void *private_data;
+};
+
+#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
+#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
+#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
+#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
+#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
+#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
+#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
+#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
+#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
+#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
+#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
+
+/**
+ * ib_cm_handler - User-defined callback to process communication events.
+ * @cm_id: Communication identifier associated with the reported event.
+ * @event: Information about the communication event.
+ *
+ * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events
+ * generated as a result of listen requests result in the allocation of a
+ * new @cm_id. The new @cm_id is returned to the user through this callback.
+ * Clients are responsible for destroying the new @cm_id. For peer-to-peer
+ * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
+ * to a user's existing communication identifier.
+ *
+ * Users may not call ib_destroy_cm_id while in the context of this callback;
+ * however, returning a non-zero value instructs the communication manager to
+ * destroy the @cm_id after the callback completes.
+ */
+typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event);
+
+struct ib_cm_id {
+ ib_cm_handler cm_handler;
+ void *context;
+ struct ib_device *device;
+ __be64 service_id;
+ __be64 service_mask;
+ enum ib_cm_state state; /* internal CM/debug use */
+ enum ib_cm_lap_state lap_state; /* internal CM/debug use */
+ __be32 local_id;
+ __be32 remote_id;
+ u32 remote_cm_qpn; /* 1 unless redirected */
+};
+
+/**
+ * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id. All related communication will
+ * be associated with the specified device.
+ * @cm_handler: Callback invoked to notify the user of CM events.
+ * @context: User specified context associated with the communication
+ * identifier.
+ *
+ * Communication identifiers are used to track connection states, service
+ * ID resolution requests, and listen requests.
+ */
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context);
+
+/**
+ * ib_destroy_cm_id - Destroy a connection identifier.
+ * @cm_id: Connection identifier to destroy.
+ *
+ * This call blocks until the connection identifier is destroyed.
+ */
+void ib_destroy_cm_id(struct ib_cm_id *cm_id);
+
+#define IB_SERVICE_ID_AGN_MASK cpu_to_be64(0xFF00000000000000ULL)
+#define IB_CM_ASSIGN_SERVICE_ID cpu_to_be64(0x0200000000000000ULL)
+#define IB_CMA_SERVICE_ID cpu_to_be64(0x0000000001000000ULL)
+#define IB_CMA_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFF000000ULL)
+#define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL)
+#define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
+
+struct ib_cm_compare_data {
+ u32 data[IB_CM_COMPARE_SIZE];
+ u32 mask[IB_CM_COMPARE_SIZE];
+};
+
+/**
+ * ib_cm_listen - Initiates listening on the specified service ID for
+ * connection and service ID resolution requests.
+ * @cm_id: Connection identifier associated with the listen request.
+ * @service_id: Service identifier matched against incoming connection
+ * and service ID resolution requests. The service ID should be specified
+ * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
+ * assign a service ID to the caller.
+ * @service_mask: Mask applied to service ID used to listen across a
+ * range of service IDs. If set to 0, the service ID is matched
+ * exactly. This parameter is ignored if %service_id is set to
+ * IB_CM_ASSIGN_SERVICE_ID.
+ * @compare_data: This parameter is optional. It specifies data that must
+ * appear in the private data of a connection request for the specified
+ * listen request.
+ */
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+ struct ib_cm_compare_data *compare_data);
+
+struct ib_cm_req_param {
+ struct ib_sa_path_rec *primary_path;
+ struct ib_sa_path_rec *alternate_path;
+ __be64 service_id;
+ u32 qp_num;
+ enum ib_qp_type qp_type;
+ u32 starting_psn;
+ const void *private_data;
+ u8 private_data_len;
+ u8 peer_to_peer;
+ u8 responder_resources;
+ u8 initiator_depth;
+ u8 remote_cm_response_timeout;
+ u8 flow_control;
+ u8 local_cm_response_timeout;
+ u8 retry_count;
+ u8 rnr_retry_count;
+ u8 max_cm_retries;
+ u8 srq;
+};
+
+/**
+ * ib_send_cm_req - Sends a connection request to the remote node.
+ * @cm_id: Connection identifier that will be associated with the
+ * connection request.
+ * @param: Connection request information needed to establish the
+ * connection.
+ */
+int ib_send_cm_req(struct ib_cm_id *cm_id,
+ struct ib_cm_req_param *param);
+
+struct ib_cm_rep_param {
+ u32 qp_num;
+ u32 starting_psn;
+ const void *private_data;
+ u8 private_data_len;
+ u8 responder_resources;
+ u8 initiator_depth;
+ u8 failover_accepted;
+ u8 flow_control;
+ u8 rnr_retry_count;
+ u8 srq;
+};
+
+/**
+ * ib_send_cm_rep - Sends a connection reply in response to a connection
+ * request.
+ * @cm_id: Connection identifier that will be associated with the
+ * connection request.
+ * @param: Connection reply information needed to establish the
+ * connection.
+ */
+int ib_send_cm_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_rep_param *param);
+
+/**
+ * ib_send_cm_rtu - Sends a connection ready to use message in response
+ * to a connection reply message.
+ * @cm_id: Connection identifier associated with the connection request.
+ * @private_data: Optional user-defined private data sent with the
+ * ready to use message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rtu(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len);
+
+/**
+ * ib_send_cm_dreq - Sends a disconnection request for an existing
+ * connection.
+ * @cm_id: Connection identifier associated with the connection being
+ * released.
+ * @private_data: Optional user-defined private data sent with the
+ * disconnection request message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_dreq(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len);
+
+/**
+ * ib_send_cm_drep - Sends a disconnection reply to a disconnection request.
+ * @cm_id: Connection identifier associated with the connection being
+ * released.
+ * @private_data: Optional user-defined private data sent with the
+ * disconnection reply message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ *
+ * If the cm_id is in the correct state, the CM will transition the connection
+ * to the timewait state, even if an error occurs sending the DREP message.
+ */
+int ib_send_cm_drep(struct ib_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len);
+
+/**
+ * ib_cm_notify - Notifies the CM of an event reported to the consumer.
+ * @cm_id: Connection identifier to transition to established.
+ * @event: Type of event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events. Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ * QP before an RTU has been received.
+ * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over
+ * to the alternate path.
+ */
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event);
+
+/**
+ * ib_send_cm_rej - Sends a connection rejection message to the
+ * remote node.
+ * @cm_id: Connection identifier associated with the connection being
+ * rejected.
+ * @reason: Reason for the connection request rejection.
+ * @ari: Optional additional rejection information.
+ * @ari_length: Size of the additional rejection information, in bytes.
+ * @private_data: Optional user-defined private data sent with the
+ * rejection message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rej(struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason,
+ void *ari,
+ u8 ari_length,
+ const void *private_data,
+ u8 private_data_len);
+
+#define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */
+
+/**
+ * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
+ * message.
+ * @cm_id: Connection identifier associated with the connection message.
+ * @service_timeout: The lower 5-bits specify the maximum time required for
+ * the sender to reply to the connection message. The upper 3-bits
+ * specify additional control flags.
+ * @private_data: Optional user-defined private data sent with the
+ * message receipt acknowledgement.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_mra(struct ib_cm_id *cm_id,
+ u8 service_timeout,
+ const void *private_data,
+ u8 private_data_len);
+
+/**
+ * ib_send_cm_lap - Sends a load alternate path request.
+ * @cm_id: Connection identifier associated with the load alternate path
+ * message.
+ * @alternate_path: A path record that identifies the alternate path to
+ * load.
+ * @private_data: Optional user-defined private data sent with the
+ * load alternate path message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_lap(struct ib_cm_id *cm_id,
+ struct ib_sa_path_rec *alternate_path,
+ const void *private_data,
+ u8 private_data_len);
+
+/**
+ * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
+ * to a specified QP state.
+ * @cm_id: Communication identifier associated with the QP attributes to
+ * initialize.
+ * @qp_attr: On input, specifies the desired QP state. On output, the
+ * mandatory and desired optional attributes will be set in order to
+ * modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ * QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state. This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes. Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ */
+int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
+ struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask);
+
+/**
+ * ib_send_cm_apr - Sends an alternate path response message in response to
+ * a load alternate path request.
+ * @cm_id: Connection identifier associated with the alternate path response.
+ * @status: Reply status sent with the alternate path response.
+ * @info: Optional additional information sent with the alternate path
+ * response.
+ * @info_length: Size of the additional information, in bytes.
+ * @private_data: Optional user-defined private data sent with the
+ * alternate path response message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_apr(struct ib_cm_id *cm_id,
+ enum ib_cm_apr_status status,
+ void *info,
+ u8 info_length,
+ const void *private_data,
+ u8 private_data_len);
+
+struct ib_cm_sidr_req_param {
+ struct ib_sa_path_rec *path;
+ __be64 service_id;
+ int timeout_ms;
+ const void *private_data;
+ u8 private_data_len;
+ u8 max_cm_retries;
+};
+
+/**
+ * ib_send_cm_sidr_req - Sends a service ID resolution request to the
+ * remote node.
+ * @cm_id: Communication identifier that will be associated with the
+ * service ID resolution request.
+ * @param: Service ID resolution request information.
+ */
+int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_req_param *param);
+
+struct ib_cm_sidr_rep_param {
+ u32 qp_num;
+ u32 qkey;
+ enum ib_cm_sidr_status status;
+ const void *info;
+ u8 info_length;
+ const void *private_data;
+ u8 private_data_len;
+};
+
+/**
+ * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the
+ * remote node.
+ * @cm_id: Communication identifier associated with the received service ID
+ * resolution request.
+ * @param: Service ID resolution reply information.
+ */
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param);
+
+#endif /* IB_CM_H */
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
new file mode 100644
index 000000000..f62b842e6
--- /dev/null
+++ b/include/rdma/ib_fmr_pool.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_FMR_POOL_H)
+#define IB_FMR_POOL_H
+
+#include <rdma/ib_verbs.h>
+
+struct ib_fmr_pool;
+
+/**
+ * struct ib_fmr_pool_param - Parameters for creating FMR pool
+ * @max_pages_per_fmr:Maximum number of pages per map request.
+ * @page_shift: Log2 of sizeof "pages" mapped by this fmr
+ * @access:Access flags for FMRs in pool.
+ * @pool_size:Number of FMRs to allocate for pool.
+ * @dirty_watermark:Flush is triggered when @dirty_watermark dirty
+ * FMRs are present.
+ * @flush_function:Callback called when unmapped FMRs are flushed and
+ * more FMRs are possibly available for mapping
+ * @flush_arg:Context passed to user's flush function.
+ * @cache:If set, FMRs may be reused after unmapping for identical map
+ * requests.
+ */
+struct ib_fmr_pool_param {
+ int max_pages_per_fmr;
+ int page_shift;
+ enum ib_access_flags access;
+ int pool_size;
+ int dirty_watermark;
+ void (*flush_function)(struct ib_fmr_pool *pool,
+ void *arg);
+ void *flush_arg;
+ unsigned cache:1;
+};
+
+struct ib_pool_fmr {
+ struct ib_fmr *fmr;
+ struct ib_fmr_pool *pool;
+ struct list_head list;
+ struct hlist_node cache_node;
+ int ref_count;
+ int remap_count;
+ u64 io_virtual_address;
+ int page_list_len;
+ u64 page_list[0];
+};
+
+struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
+ struct ib_fmr_pool_param *params);
+
+void ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
+
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
+
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+ u64 *page_list,
+ int list_len,
+ u64 io_virtual_address);
+
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+
+#endif /* IB_FMR_POOL_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
new file mode 100644
index 000000000..9bb99e983
--- /dev/null
+++ b/include/rdma/ib_mad.h
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_MAD_H)
+#define IB_MAD_H
+
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <uapi/rdma/ib_user_mad.h>
+
+/* Management base version */
+#define IB_MGMT_BASE_VERSION 1
+
+/* Management classes */
+#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
+#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81
+#define IB_MGMT_CLASS_SUBN_ADM 0x03
+#define IB_MGMT_CLASS_PERF_MGMT 0x04
+#define IB_MGMT_CLASS_BM 0x05
+#define IB_MGMT_CLASS_DEVICE_MGMT 0x06
+#define IB_MGMT_CLASS_CM 0x07
+#define IB_MGMT_CLASS_SNMP 0x08
+#define IB_MGMT_CLASS_DEVICE_ADM 0x10
+#define IB_MGMT_CLASS_BOOT_MGMT 0x11
+#define IB_MGMT_CLASS_BIS 0x12
+#define IB_MGMT_CLASS_CONG_MGMT 0x21
+#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30
+#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F
+
+#define IB_OPENIB_OUI (0x001405)
+
+/* Management methods */
+#define IB_MGMT_METHOD_GET 0x01
+#define IB_MGMT_METHOD_SET 0x02
+#define IB_MGMT_METHOD_GET_RESP 0x81
+#define IB_MGMT_METHOD_SEND 0x03
+#define IB_MGMT_METHOD_TRAP 0x05
+#define IB_MGMT_METHOD_REPORT 0x06
+#define IB_MGMT_METHOD_REPORT_RESP 0x86
+#define IB_MGMT_METHOD_TRAP_REPRESS 0x07
+
+#define IB_MGMT_METHOD_RESP 0x80
+#define IB_BM_ATTR_MOD_RESP cpu_to_be32(1)
+
+#define IB_MGMT_MAX_METHODS 128
+
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000
+#define IB_MGMT_MAD_STATUS_BUSY 0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c
+
+/* RMPP information */
+#define IB_MGMT_RMPP_VERSION 1
+
+#define IB_MGMT_RMPP_TYPE_DATA 1
+#define IB_MGMT_RMPP_TYPE_ACK 2
+#define IB_MGMT_RMPP_TYPE_STOP 3
+#define IB_MGMT_RMPP_TYPE_ABORT 4
+
+#define IB_MGMT_RMPP_FLAG_ACTIVE 1
+#define IB_MGMT_RMPP_FLAG_FIRST (1<<1)
+#define IB_MGMT_RMPP_FLAG_LAST (1<<2)
+
+#define IB_MGMT_RMPP_NO_RESPTIME 0x1F
+
+#define IB_MGMT_RMPP_STATUS_SUCCESS 0
+#define IB_MGMT_RMPP_STATUS_RESX 1
+#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118
+#define IB_MGMT_RMPP_STATUS_T2L 118
+#define IB_MGMT_RMPP_STATUS_BAD_LEN 119
+#define IB_MGMT_RMPP_STATUS_BAD_SEG 120
+#define IB_MGMT_RMPP_STATUS_BADT 121
+#define IB_MGMT_RMPP_STATUS_W2S 122
+#define IB_MGMT_RMPP_STATUS_S2B 123
+#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124
+#define IB_MGMT_RMPP_STATUS_UNV 125
+#define IB_MGMT_RMPP_STATUS_TMR 126
+#define IB_MGMT_RMPP_STATUS_UNSPEC 127
+#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127
+
+#define IB_QP0 0
+#define IB_QP1 cpu_to_be32(1)
+#define IB_QP1_QKEY 0x80010000
+#define IB_QP_SET_QKEY 0x80000000
+
+#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
+#define IB_DEFAULT_PKEY_FULL 0xFFFF
+
+enum {
+ IB_MGMT_MAD_HDR = 24,
+ IB_MGMT_MAD_DATA = 232,
+ IB_MGMT_RMPP_HDR = 36,
+ IB_MGMT_RMPP_DATA = 220,
+ IB_MGMT_VENDOR_HDR = 40,
+ IB_MGMT_VENDOR_DATA = 216,
+ IB_MGMT_SA_HDR = 56,
+ IB_MGMT_SA_DATA = 200,
+ IB_MGMT_DEVICE_HDR = 64,
+ IB_MGMT_DEVICE_DATA = 192,
+};
+
+struct ib_mad_hdr {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+};
+
+struct ib_rmpp_hdr {
+ u8 rmpp_version;
+ u8 rmpp_type;
+ u8 rmpp_rtime_flags;
+ u8 rmpp_status;
+ __be32 seg_num;
+ __be32 paylen_newwin;
+};
+
+typedef u64 __bitwise ib_sa_comp_mask;
+
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
+
+/*
+ * ib_sa_hdr and ib_sa_mad structures must be packed because they have
+ * 64-bit fields that are only 32-bit aligned. 64-bit architectures will
+ * lay them out wrong otherwise. (And unfortunately they are sent on
+ * the wire so we can't change the layout)
+ */
+struct ib_sa_hdr {
+ __be64 sm_key;
+ __be16 attr_offset;
+ __be16 reserved;
+ ib_sa_comp_mask comp_mask;
+} __attribute__ ((packed));
+
+struct ib_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 data[IB_MGMT_MAD_DATA];
+};
+
+struct ib_rmpp_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 data[IB_MGMT_RMPP_DATA];
+};
+
+struct ib_sa_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ struct ib_sa_hdr sa_hdr;
+ u8 data[IB_MGMT_SA_DATA];
+} __attribute__ ((packed));
+
+struct ib_vendor_mad {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+ u8 reserved;
+ u8 oui[3];
+ u8 data[IB_MGMT_VENDOR_DATA];
+};
+
+struct ib_class_port_info {
+ u8 base_version;
+ u8 class_version;
+ __be16 capability_mask;
+ u8 reserved[3];
+ u8 resp_time_value;
+ u8 redirect_gid[16];
+ __be32 redirect_tcslfl;
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp;
+ __be32 redirect_qkey;
+ u8 trap_gid[16];
+ __be32 trap_tcslfl;
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hlqp;
+ __be32 trap_qkey;
+};
+
+/**
+ * ib_mad_send_buf - MAD data buffer and work request for sends.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer for MADs that do not have
+ * RMPP active. For MADs using RMPP, references the common and management
+ * class specific headers.
+ * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
+ * @context: User-controlled context fields.
+ * @hdr_len: Indicates the size of the data header of the MAD. This length
+ * includes the common MAD, RMPP, and class specific headers.
+ * @data_len: Indicates the total size of user-transferred data.
+ * @seg_count: The number of RMPP segments allocated for this send.
+ * @seg_size: Size of each RMPP segment.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response. For MADs
+ * using RMPP, this applies per window. On completion, returns the number
+ * of retries needed to complete the transfer.
+ *
+ * Users are responsible for initializing the MAD buffer itself, with the
+ * exception of any RMPP header. Additional segment buffer space allocated
+ * beyond data_len is padding.
+ */
+struct ib_mad_send_buf {
+ struct ib_mad_send_buf *next;
+ void *mad;
+ struct ib_mad_agent *mad_agent;
+ struct ib_ah *ah;
+ void *context[2];
+ int hdr_len;
+ int data_len;
+ int seg_count;
+ int seg_size;
+ int timeout_ms;
+ int retries;
+};
+
+/**
+ * ib_response_mad - Returns if the specified MAD has been generated in
+ * response to a sent request or trap.
+ */
+int ib_response_mad(struct ib_mad *mad);
+
+/**
+ * ib_get_rmpp_resptime - Returns the RMPP response time.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
+{
+ return rmpp_hdr->rmpp_rtime_flags >> 3;
+}
+
+/**
+ * ib_get_rmpp_flags - Returns the RMPP flags.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
+{
+ return rmpp_hdr->rmpp_rtime_flags & 0x7;
+}
+
+/**
+ * ib_set_rmpp_resptime - Sets the response time in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @rtime: The response time to set.
+ */
+static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
+{
+ rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
+}
+
+/**
+ * ib_set_rmpp_flags - Sets the flags in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @flags: The flags to set.
+ */
+static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
+{
+ rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) |
+ (flags & 0x7);
+}
+
+struct ib_mad_agent;
+struct ib_mad_send_wc;
+struct ib_mad_recv_wc;
+
+/**
+ * ib_mad_send_handler - callback handler for a sent MAD.
+ * @mad_agent: MAD agent that sent the MAD.
+ * @mad_send_wc: Send work completion information on the sent MAD.
+ */
+typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_wc *mad_send_wc);
+
+/**
+ * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
+ * @mad_agent: MAD agent that snooped the MAD.
+ * @send_wr: Work request information on the sent MAD.
+ * @mad_send_wc: Work completion information on the sent MAD. Valid
+ * only for snooping that occurs on a send completion.
+ *
+ * Clients snooping MADs should not modify data referenced by the @send_wr
+ * or @mad_send_wc.
+ */
+typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_wc *mad_send_wc);
+
+/**
+ * ib_mad_recv_handler - callback handler for a received MAD.
+ * @mad_agent: MAD agent requesting the received MAD.
+ * @mad_recv_wc: Received work completion information on the received MAD.
+ *
+ * MADs received in response to a send request operation will be handed to
+ * the user before the send operation completes. All data buffers given
+ * to registered agents through this routine are owned by the receiving
+ * client, except for snooping agents. Clients snooping MADs should not
+ * modify the data referenced by @mad_recv_wc.
+ */
+typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_mad_agent - Used to track MAD registration with the access layer.
+ * @device: Reference to device registration is on.
+ * @qp: Reference to QP used for sending and receiving MADs.
+ * @mr: Memory region for system memory usable for DMA.
+ * @recv_handler: Callback handler for a received MAD.
+ * @send_handler: Callback handler for a sent MAD.
+ * @snoop_handler: Callback handler for snooped sent MADs.
+ * @context: User-specified context associated with this registration.
+ * @hi_tid: Access layer assigned transaction ID for this client.
+ * Unsolicited MADs sent by this client will have the upper 32-bits
+ * of their TID set to this value.
+ * @flags: registration flags
+ * @port_num: Port number on which QP is registered
+ * @rmpp_version: If set, indicates the RMPP version used by this agent.
+ */
+enum {
+ IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP,
+};
+struct ib_mad_agent {
+ struct ib_device *device;
+ struct ib_qp *qp;
+ struct ib_mr *mr;
+ ib_mad_recv_handler recv_handler;
+ ib_mad_send_handler send_handler;
+ ib_mad_snoop_handler snoop_handler;
+ void *context;
+ u32 hi_tid;
+ u32 flags;
+ u8 port_num;
+ u8 rmpp_version;
+};
+
+/**
+ * ib_mad_send_wc - MAD send completion information.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
+ * @status: Completion status.
+ * @vendor_err: Optional vendor error information returned with a failed
+ * request.
+ */
+struct ib_mad_send_wc {
+ struct ib_mad_send_buf *send_buf;
+ enum ib_wc_status status;
+ u32 vendor_err;
+};
+
+/**
+ * ib_mad_recv_buf - received MAD buffer information.
+ * @list: Reference to next data buffer for a received RMPP MAD.
+ * @grh: References a data buffer containing the global route header.
+ * The data refereced by this buffer is only valid if the GRH is
+ * valid.
+ * @mad: References the start of the received MAD.
+ */
+struct ib_mad_recv_buf {
+ struct list_head list;
+ struct ib_grh *grh;
+ struct ib_mad *mad;
+};
+
+/**
+ * ib_mad_recv_wc - received MAD information.
+ * @wc: Completion information for the received data.
+ * @recv_buf: Specifies the location of the received data buffer(s).
+ * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
+ * @mad_len: The length of the received MAD, without duplicated headers.
+ *
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
+ * for the corresponding send request.
+ */
+struct ib_mad_recv_wc {
+ struct ib_wc *wc;
+ struct ib_mad_recv_buf recv_buf;
+ struct list_head rmpp_list;
+ int mad_len;
+};
+
+/**
+ * ib_mad_reg_req - MAD registration request
+ * @mgmt_class: Indicates which management class of MADs should be receive
+ * by the caller. This field is only required if the user wishes to
+ * receive unsolicited MADs, otherwise it should be 0.
+ * @mgmt_class_version: Indicates which version of MADs for the given
+ * management class to receive.
+ * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
+ * in the range from 0x30 to 0x4f. Otherwise not used.
+ * @method_mask: The caller will receive unsolicited MADs for any method
+ * where @method_mask = 1.
+ *
+ */
+struct ib_mad_reg_req {
+ u8 mgmt_class;
+ u8 mgmt_class_version;
+ u8 oui[3];
+ DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
+};
+
+/**
+ * ib_register_mad_agent - Register to send/receive MADs.
+ * @device: The device to register with.
+ * @port_num: The port on the specified device to use.
+ * @qp_type: Specifies which QP to access. Must be either
+ * IB_QPT_SMI or IB_QPT_GSI.
+ * @mad_reg_req: Specifies which unsolicited MADs should be received
+ * by the caller. This parameter may be NULL if the caller only
+ * wishes to receive solicited responses.
+ * @rmpp_version: If set, indicates that the client will send
+ * and receive MADs that contain the RMPP header for the given version.
+ * If set to 0, indicates that RMPP is not used by this client.
+ * @send_handler: The completion callback routine invoked after a send
+ * request has completed.
+ * @recv_handler: The completion callback routine invoked for a received
+ * MAD.
+ * @context: User specified context associated with the registration.
+ * @registration_flags: Registration flags to set for this agent
+ */
+struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ struct ib_mad_reg_req *mad_reg_req,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context,
+ u32 registration_flags);
+
+enum ib_mad_snoop_flags {
+ /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
+ /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/
+ IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2),
+ /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
+ IB_MAD_SNOOP_RECVS = (1<<4)
+ /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/
+ /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/
+};
+
+/**
+ * ib_register_mad_snoop - Register to snoop sent and received MADs.
+ * @device: The device to register with.
+ * @port_num: The port on the specified device to use.
+ * @qp_type: Specifies which QP traffic to snoop. Must be either
+ * IB_QPT_SMI or IB_QPT_GSI.
+ * @mad_snoop_flags: Specifies information where snooping occurs.
+ * @send_handler: The callback routine invoked for a snooped send.
+ * @recv_handler: The callback routine invoked for a snooped receive.
+ * @context: User specified context associated with the registration.
+ */
+struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
+ u8 port_num,
+ enum ib_qp_type qp_type,
+ int mad_snoop_flags,
+ ib_mad_snoop_handler snoop_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context);
+
+/**
+ * ib_unregister_mad_agent - Unregisters a client from using MAD services.
+ * @mad_agent: Corresponding MAD registration request to deregister.
+ *
+ * After invoking this routine, MAD services are no longer usable by the
+ * client on the associated QP.
+ */
+int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
+
+/**
+ * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
+ * with the registered client.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered. This
+ * parameter is optional if only a single MAD is posted.
+ *
+ * Sent MADs are not guaranteed to complete in the order that they were posted.
+ *
+ * If the MAD requires RMPP, the data buffer should contain a single copy
+ * of the common MAD, RMPP, and class specific headers, followed by the class
+ * defined data. If the class defined data would not divide evenly into
+ * RMPP segments, then space must be allocated at the end of the referenced
+ * buffer for any required padding. To indicate the amount of class defined
+ * data being transferred, the paylen_newwin field in the RMPP header should
+ * be set to the size of the class specific header plus the amount of class
+ * defined data being transferred. The paylen_newwin field should be
+ * specified in network-byte order.
+ */
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_buf **bad_send_buf);
+
+
+/**
+ * ib_free_recv_mad - Returns data buffers used to receive a MAD.
+ * @mad_recv_wc: Work completion information for a received MAD.
+ *
+ * Clients receiving MADs through their ib_mad_recv_handler must call this
+ * routine to return the work completion buffers to the access layer.
+ */
+void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_cancel_mad - Cancels an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to cancel.
+ *
+ * MADs will be returned to the user through the corresponding
+ * ib_mad_send_handler.
+ */
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf);
+
+/**
+ * ib_modify_mad - Modifies an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to modify.
+ * @timeout_ms: New timeout value for sent MAD.
+ *
+ * This call will reset the timeout value for a sent MAD to the specified
+ * value.
+ */
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+
+/**
+ * ib_redirect_mad_qp - Registers a QP for MAD services.
+ * @qp: Reference to a QP that requires MAD services.
+ * @rmpp_version: If set, indicates that the client will send
+ * and receive MADs that contain the RMPP header for the given version.
+ * If set to 0, indicates that RMPP is not used by this client.
+ * @send_handler: The completion callback routine invoked after a send
+ * request has completed.
+ * @recv_handler: The completion callback routine invoked for a received
+ * MAD.
+ * @context: User specified context associated with the registration.
+ *
+ * Use of this call allows clients to use MAD services, such as RMPP,
+ * on user-owned QPs. After calling this routine, users may send
+ * MADs on the specified QP by calling ib_mad_post_send.
+ */
+struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
+ u8 rmpp_version,
+ ib_mad_send_handler send_handler,
+ ib_mad_recv_handler recv_handler,
+ void *context);
+
+/**
+ * ib_process_mad_wc - Processes a work completion associated with a
+ * MAD sent or received on a redirected QP.
+ * @mad_agent: Specifies the registered MAD service using the redirected QP.
+ * @wc: References a work completion associated with a sent or received
+ * MAD segment.
+ *
+ * This routine is used to complete or continue processing on a MAD request.
+ * If the work completion is associated with a send operation, calling
+ * this routine is required to continue an RMPP transfer or to wait for a
+ * corresponding response, if it is a request. If the work completion is
+ * associated with a receive operation, calling this routine is required to
+ * process an inbound or outbound RMPP transfer, or to match a response MAD
+ * with its corresponding request.
+ */
+int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
+ struct ib_wc *wc);
+
+/**
+ * ib_create_send_mad - Allocate and initialize a data buffer and work request
+ * for sending a MAD.
+ * @mad_agent: Specifies the registered MAD service to associate with the MAD.
+ * @remote_qpn: Specifies the QPN of the receiving node.
+ * @pkey_index: Specifies which PKey the MAD will be sent using. This field
+ * is valid only if the remote_qpn is QP 1.
+ * @rmpp_active: Indicates if the send will enable RMPP.
+ * @hdr_len: Indicates the size of the data header of the MAD. This length
+ * should include the common MAD header, RMPP header, plus any class
+ * specific header.
+ * @data_len: Indicates the size of any user-transferred data. The call will
+ * automatically adjust the allocated buffer size to account for any
+ * additional padding that may be necessary.
+ * @gfp_mask: GFP mask used for the memory allocation.
+ *
+ * This routine allocates a MAD for sending. The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
+ * with an initialized work request structure. Users may modify the returned
+ * MAD data buffer before posting the send.
+ *
+ * The returned MAD header, class specific headers, and any padding will be
+ * cleared. Users are responsible for initializing the common MAD header,
+ * any class specific header, and MAD data area.
+ * If @rmpp_active is set, the RMPP header will be initialized for sending.
+ */
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active,
+ int hdr_len, int data_len,
+ gfp_t gfp_mask);
+
+/**
+ * ib_is_mad_class_rmpp - returns whether given management class
+ * supports RMPP.
+ * @mgmt_class: management class
+ *
+ * This routine returns whether the management class supports RMPP.
+ */
+int ib_is_mad_class_rmpp(u8 mgmt_class);
+
+/**
+ * ib_get_mad_data_offset - returns the data offset for a given
+ * management class.
+ * @mgmt_class: management class
+ *
+ * This routine returns the data offset in the MAD for the management
+ * class requested.
+ */
+int ib_get_mad_data_offset(u8 mgmt_class);
+
+/**
+ * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
+ * @send_buf: Previously allocated send data buffer.
+ * @seg_num: number of segment to return
+ *
+ * This routine returns a pointer to the data buffer of an RMPP MAD.
+ * Users must provide synchronization to @send_buf around this call.
+ */
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
+
+/**
+ * ib_free_send_mad - Returns data buffers used to send a MAD.
+ * @send_buf: Previously allocated send data buffer.
+ */
+void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
+
+/**
+ * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP.
+ * @agent: the agent in question
+ * @return: true if agent is performing rmpp, false otherwise.
+ */
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent);
+
+#endif /* IB_MAD_H */
diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h
new file mode 100644
index 000000000..db037205c
--- /dev/null
+++ b/include/rdma/ib_marshall.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_USER_MARSHALL_H)
+#define IB_USER_MARSHALL_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+ struct ib_qp_attr *src);
+
+void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+ struct ib_ah_attr *src);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct ib_sa_path_rec *src);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+ struct ib_user_path_rec *src);
+
+#endif /* IB_USER_MARSHALL_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
new file mode 100644
index 000000000..b1f7592e0
--- /dev/null
+++ b/include/rdma/ib_pack.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_PACK_H
+#define IB_PACK_H
+
+#include <rdma/ib_verbs.h>
+#include <uapi/linux/if_ether.h>
+
+enum {
+ IB_LRH_BYTES = 8,
+ IB_ETH_BYTES = 14,
+ IB_VLAN_BYTES = 4,
+ IB_GRH_BYTES = 40,
+ IB_BTH_BYTES = 12,
+ IB_DETH_BYTES = 8
+};
+
+struct ib_field {
+ size_t struct_offset_bytes;
+ size_t struct_size_bytes;
+ int offset_words;
+ int offset_bits;
+ int size_bits;
+ char *field_name;
+};
+
+#define RESERVED \
+ .field_name = "reserved"
+
+/*
+ * This macro cleans up the definitions of constants for BTH opcodes.
+ * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY,
+ * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives
+ * the correct value.
+ *
+ * In short, user code should use the constants defined using the
+ * macro rather than worrying about adding together other constants.
+*/
+#define IB_OPCODE(transport, op) \
+ IB_OPCODE_ ## transport ## _ ## op = \
+ IB_OPCODE_ ## transport + IB_OPCODE_ ## op
+
+enum {
+ /* transport types -- just used to define real constants */
+ IB_OPCODE_RC = 0x00,
+ IB_OPCODE_UC = 0x20,
+ IB_OPCODE_RD = 0x40,
+ IB_OPCODE_UD = 0x60,
+
+ /* operations -- just used to define real constants */
+ IB_OPCODE_SEND_FIRST = 0x00,
+ IB_OPCODE_SEND_MIDDLE = 0x01,
+ IB_OPCODE_SEND_LAST = 0x02,
+ IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03,
+ IB_OPCODE_SEND_ONLY = 0x04,
+ IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05,
+ IB_OPCODE_RDMA_WRITE_FIRST = 0x06,
+ IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07,
+ IB_OPCODE_RDMA_WRITE_LAST = 0x08,
+ IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09,
+ IB_OPCODE_RDMA_WRITE_ONLY = 0x0a,
+ IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b,
+ IB_OPCODE_RDMA_READ_REQUEST = 0x0c,
+ IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d,
+ IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e,
+ IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f,
+ IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10,
+ IB_OPCODE_ACKNOWLEDGE = 0x11,
+ IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12,
+ IB_OPCODE_COMPARE_SWAP = 0x13,
+ IB_OPCODE_FETCH_ADD = 0x14,
+
+ /* real constants follow -- see comment about above IB_OPCODE()
+ macro for more details */
+
+ /* RC */
+ IB_OPCODE(RC, SEND_FIRST),
+ IB_OPCODE(RC, SEND_MIDDLE),
+ IB_OPCODE(RC, SEND_LAST),
+ IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(RC, SEND_ONLY),
+ IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
+ IB_OPCODE(RC, RDMA_WRITE_FIRST),
+ IB_OPCODE(RC, RDMA_WRITE_MIDDLE),
+ IB_OPCODE(RC, RDMA_WRITE_LAST),
+ IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(RC, RDMA_WRITE_ONLY),
+ IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+ IB_OPCODE(RC, RDMA_READ_REQUEST),
+ IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
+ IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
+ IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
+ IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
+ IB_OPCODE(RC, ACKNOWLEDGE),
+ IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
+ IB_OPCODE(RC, COMPARE_SWAP),
+ IB_OPCODE(RC, FETCH_ADD),
+
+ /* UC */
+ IB_OPCODE(UC, SEND_FIRST),
+ IB_OPCODE(UC, SEND_MIDDLE),
+ IB_OPCODE(UC, SEND_LAST),
+ IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(UC, SEND_ONLY),
+ IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
+ IB_OPCODE(UC, RDMA_WRITE_FIRST),
+ IB_OPCODE(UC, RDMA_WRITE_MIDDLE),
+ IB_OPCODE(UC, RDMA_WRITE_LAST),
+ IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(UC, RDMA_WRITE_ONLY),
+ IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+
+ /* RD */
+ IB_OPCODE(RD, SEND_FIRST),
+ IB_OPCODE(RD, SEND_MIDDLE),
+ IB_OPCODE(RD, SEND_LAST),
+ IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(RD, SEND_ONLY),
+ IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
+ IB_OPCODE(RD, RDMA_WRITE_FIRST),
+ IB_OPCODE(RD, RDMA_WRITE_MIDDLE),
+ IB_OPCODE(RD, RDMA_WRITE_LAST),
+ IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IB_OPCODE(RD, RDMA_WRITE_ONLY),
+ IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+ IB_OPCODE(RD, RDMA_READ_REQUEST),
+ IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
+ IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
+ IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
+ IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
+ IB_OPCODE(RD, ACKNOWLEDGE),
+ IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
+ IB_OPCODE(RD, COMPARE_SWAP),
+ IB_OPCODE(RD, FETCH_ADD),
+
+ /* UD */
+ IB_OPCODE(UD, SEND_ONLY),
+ IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+};
+
+enum {
+ IB_LNH_RAW = 0,
+ IB_LNH_IP = 1,
+ IB_LNH_IBA_LOCAL = 2,
+ IB_LNH_IBA_GLOBAL = 3
+};
+
+struct ib_unpacked_lrh {
+ u8 virtual_lane;
+ u8 link_version;
+ u8 service_level;
+ u8 link_next_header;
+ __be16 destination_lid;
+ __be16 packet_length;
+ __be16 source_lid;
+};
+
+struct ib_unpacked_grh {
+ u8 ip_version;
+ u8 traffic_class;
+ __be32 flow_label;
+ __be16 payload_length;
+ u8 next_header;
+ u8 hop_limit;
+ union ib_gid source_gid;
+ union ib_gid destination_gid;
+};
+
+struct ib_unpacked_bth {
+ u8 opcode;
+ u8 solicited_event;
+ u8 mig_req;
+ u8 pad_count;
+ u8 transport_header_version;
+ __be16 pkey;
+ __be32 destination_qpn;
+ u8 ack_req;
+ __be32 psn;
+};
+
+struct ib_unpacked_deth {
+ __be32 qkey;
+ __be32 source_qpn;
+};
+
+struct ib_unpacked_eth {
+ u8 dmac_h[4];
+ u8 dmac_l[2];
+ u8 smac_h[2];
+ u8 smac_l[4];
+ __be16 type;
+};
+
+struct ib_unpacked_vlan {
+ __be16 tag;
+ __be16 type;
+};
+
+struct ib_ud_header {
+ int lrh_present;
+ struct ib_unpacked_lrh lrh;
+ int eth_present;
+ struct ib_unpacked_eth eth;
+ int vlan_present;
+ struct ib_unpacked_vlan vlan;
+ int grh_present;
+ struct ib_unpacked_grh grh;
+ struct ib_unpacked_bth bth;
+ struct ib_unpacked_deth deth;
+ int immediate_present;
+ __be32 immediate_data;
+};
+
+void ib_pack(const struct ib_field *desc,
+ int desc_len,
+ void *structure,
+ void *buf);
+
+void ib_unpack(const struct ib_field *desc,
+ int desc_len,
+ void *buf,
+ void *structure);
+
+void ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int immediate_present,
+ struct ib_ud_header *header);
+
+int ib_ud_header_pack(struct ib_ud_header *header,
+ void *buf);
+
+int ib_ud_header_unpack(void *buf,
+ struct ib_ud_header *header);
+
+#endif /* IB_PACK_H */
diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h
new file mode 100644
index 000000000..a5889f188
--- /dev/null
+++ b/include/rdma/ib_pma.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_PMA_H)
+#define IB_PMA_H
+
+#include <rdma/ib_mad.h>
+
+/*
+ * PMA class portinfo capability mask bits
+ */
+#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
+
+#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
+#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010)
+#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011)
+#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012)
+#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D)
+#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E)
+
+struct ib_pma_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[40];
+ u8 data[192];
+} __packed;
+
+struct ib_pma_portsamplescontrol {
+ u8 opcode;
+ u8 port_select;
+ u8 tick;
+ u8 counter_width; /* resv: 7:3, counter width: 2:0 */
+ __be32 counter_mask0_9; /* 2, 10 3-bit fields */
+ __be16 counter_mask10_14; /* 1, 5 3-bit fields */
+ u8 sample_mechanisms;
+ u8 sample_status; /* only lower 2 bits */
+ __be64 option_mask;
+ __be64 vendor_mask;
+ __be32 sample_start;
+ __be32 sample_interval;
+ __be16 tag;
+ __be16 counter_select[15];
+ __be32 reserved1;
+ __be64 samples_only_option_mask;
+ __be32 reserved2[28];
+};
+
+struct ib_pma_portsamplesresult {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 counter[15];
+};
+
+struct ib_pma_portsamplesresult_ext {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 extended_width; /* only upper 2 bits */
+ __be64 counter[15];
+};
+
+struct ib_pma_portcounters {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be16 symbol_error_counter;
+ u8 link_error_recovery_counter;
+ u8 link_downed_counter;
+ __be16 port_rcv_errors;
+ __be16 port_rcv_remphys_errors;
+ __be16 port_rcv_switch_relay_errors;
+ __be16 port_xmit_discards;
+ u8 port_xmit_constraint_errors;
+ u8 port_rcv_constraint_errors;
+ u8 reserved1;
+ u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
+ __be16 reserved2;
+ __be16 vl15_dropped;
+ __be32 port_xmit_data;
+ __be32 port_rcv_data;
+ __be32 port_xmit_packets;
+ __be32 port_rcv_packets;
+ __be32 port_xmit_wait;
+} __packed;
+
+
+#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001)
+#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002)
+#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004)
+#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008)
+#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010)
+#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800)
+#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000)
+#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000)
+#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000)
+#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000)
+
+struct ib_pma_portcounters_ext {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be32 reserved1;
+ __be64 port_xmit_data;
+ __be64 port_rcv_data;
+ __be64 port_xmit_packets;
+ __be64 port_rcv_packets;
+ __be64 port_unicast_xmit_packets;
+ __be64 port_unicast_rcv_packets;
+ __be64 port_multicast_xmit_packets;
+ __be64 port_multicast_rcv_packets;
+} __packed;
+
+#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004)
+#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008)
+#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010)
+#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020)
+#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040)
+#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080)
+
+#endif /* IB_PMA_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
new file mode 100644
index 000000000..7e071a6ab
--- /dev/null
+++ b/include/rdma/ib_sa.h
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_SA_H
+#define IB_SA_H
+
+#include <linux/completion.h>
+#include <linux/compiler.h>
+
+#include <linux/atomic.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+
+enum {
+ IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */
+
+ IB_SA_METHOD_GET_TABLE = 0x12,
+ IB_SA_METHOD_GET_TABLE_RESP = 0x92,
+ IB_SA_METHOD_DELETE = 0x15,
+ IB_SA_METHOD_DELETE_RESP = 0x95,
+ IB_SA_METHOD_GET_MULTI = 0x14,
+ IB_SA_METHOD_GET_MULTI_RESP = 0x94,
+ IB_SA_METHOD_GET_TRACE_TBL = 0x13
+};
+
+enum {
+ IB_SA_ATTR_CLASS_PORTINFO = 0x01,
+ IB_SA_ATTR_NOTICE = 0x02,
+ IB_SA_ATTR_INFORM_INFO = 0x03,
+ IB_SA_ATTR_NODE_REC = 0x11,
+ IB_SA_ATTR_PORT_INFO_REC = 0x12,
+ IB_SA_ATTR_SL2VL_REC = 0x13,
+ IB_SA_ATTR_SWITCH_REC = 0x14,
+ IB_SA_ATTR_LINEAR_FDB_REC = 0x15,
+ IB_SA_ATTR_RANDOM_FDB_REC = 0x16,
+ IB_SA_ATTR_MCAST_FDB_REC = 0x17,
+ IB_SA_ATTR_SM_INFO_REC = 0x18,
+ IB_SA_ATTR_LINK_REC = 0x20,
+ IB_SA_ATTR_GUID_INFO_REC = 0x30,
+ IB_SA_ATTR_SERVICE_REC = 0x31,
+ IB_SA_ATTR_PARTITION_REC = 0x33,
+ IB_SA_ATTR_PATH_REC = 0x35,
+ IB_SA_ATTR_VL_ARB_REC = 0x36,
+ IB_SA_ATTR_MC_MEMBER_REC = 0x38,
+ IB_SA_ATTR_TRACE_REC = 0x39,
+ IB_SA_ATTR_MULTI_PATH_REC = 0x3a,
+ IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b,
+ IB_SA_ATTR_INFORM_INFO_REC = 0xf3
+};
+
+enum ib_sa_selector {
+ IB_SA_GT = 0,
+ IB_SA_LT = 1,
+ IB_SA_EQ = 2,
+ /*
+ * The meaning of "best" depends on the attribute: for
+ * example, for MTU best will return the largest available
+ * MTU, while for packet life time, best will return the
+ * smallest available life time.
+ */
+ IB_SA_BEST = 3
+};
+
+/*
+ * Structures for SA records are named "struct ib_sa_xxx_rec." No
+ * attempt is made to pack structures to match the physical layout of
+ * SA records in SA MADs; all packing and unpacking is handled by the
+ * SA query code.
+ *
+ * For a record with structure ib_sa_xxx_rec, the naming convention
+ * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we
+ * never use different abbreviations or otherwise change the spelling
+ * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY).
+ *
+ * Reserved rows are indicated with comments to help maintainability.
+ */
+
+#define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) |\
+ IB_SA_COMP_MASK( 1))
+#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2)
+#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3)
+#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4)
+#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5)
+#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6)
+/* reserved: 7 */
+#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8)
+#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9)
+#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10)
+#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11)
+#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12)
+#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13)
+#define IB_SA_PATH_REC_QOS_CLASS IB_SA_COMP_MASK(14)
+#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15)
+#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16)
+#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17)
+#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18)
+#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21)
+#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22)
+
+struct ib_sa_path_rec {
+ __be64 service_id;
+ union ib_gid dgid;
+ union ib_gid sgid;
+ __be16 dlid;
+ __be16 slid;
+ int raw_traffic;
+ /* reserved */
+ __be32 flow_label;
+ u8 hop_limit;
+ u8 traffic_class;
+ int reversible;
+ u8 numb_path;
+ __be16 pkey;
+ __be16 qos_class;
+ u8 sl;
+ u8 mtu_selector;
+ u8 mtu;
+ u8 rate_selector;
+ u8 rate;
+ u8 packet_life_time_selector;
+ u8 packet_life_time;
+ u8 preference;
+ u8 smac[ETH_ALEN];
+ u8 dmac[ETH_ALEN];
+ u16 vlan_id;
+};
+
+#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
+#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1)
+#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2)
+#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3)
+#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4)
+#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5)
+#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6)
+#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7)
+#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8)
+#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11)
+#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12)
+#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13)
+#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14)
+#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16)
+#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17)
+
+struct ib_sa_mcmember_rec {
+ union ib_gid mgid;
+ union ib_gid port_gid;
+ __be32 qkey;
+ __be16 mlid;
+ u8 mtu_selector;
+ u8 mtu;
+ u8 traffic_class;
+ __be16 pkey;
+ u8 rate_selector;
+ u8 rate;
+ u8 packet_life_time_selector;
+ u8 packet_life_time;
+ u8 sl;
+ __be32 flow_label;
+ u8 hop_limit;
+ u8 scope;
+ u8 join_state;
+ int proxy_join;
+};
+
+/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */
+#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0)
+#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1)
+#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2)
+/* reserved: 3 */
+#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4)
+#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5)
+#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36)
+
+#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF
+
+struct ib_sa_service_rec {
+ u64 id;
+ union ib_gid gid;
+ __be16 pkey;
+ /* reserved */
+ u32 lease;
+ u8 key[16];
+ u8 name[64];
+ u8 data8[16];
+ u16 data16[8];
+ u32 data32[4];
+ u64 data64[2];
+};
+
+#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
+#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
+#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
+#define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3)
+#define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4)
+#define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5)
+#define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6)
+#define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7)
+#define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8)
+#define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9)
+#define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10)
+#define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11)
+
+struct ib_sa_guidinfo_rec {
+ __be16 lid;
+ u8 block_num;
+ /* reserved */
+ u8 res1;
+ __be32 res2;
+ u8 guid_info_list[64];
+};
+
+struct ib_sa_client {
+ atomic_t users;
+ struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
+struct ib_sa_query;
+
+void ib_sa_cancel_query(int id, struct ib_sa_query *query);
+
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_path_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **query);
+
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_service_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+struct ib_sa_multicast {
+ struct ib_sa_mcmember_rec rec;
+ ib_sa_comp_mask comp_mask;
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast);
+ void *context;
+};
+
+/**
+ * ib_sa_join_multicast - Initiates a join request to the specified multicast
+ * group.
+ * @client: SA client
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ * group.
+ * @rec: SA multicast member record specifying group attributes.
+ * @comp_mask: Component mask indicating which group attributes of %rec are
+ * valid.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the join operation completes.
+ * @context: User specified context stored with the ib_sa_multicast structure.
+ *
+ * This call initiates a multicast join request with the SA for the specified
+ * multicast group. If the join operation is started successfully, it returns
+ * an ib_sa_multicast structure that is used to track the multicast operation.
+ * Users must free this structure by calling ib_free_multicast, even if the
+ * join operation later fails. (The callback status is non-zero.)
+ *
+ * If the join operation fails; status will be non-zero, with the following
+ * failures possible:
+ * -ETIMEDOUT: The request timed out.
+ * -EIO: An error occurred sending the query.
+ * -EINVAL: The MCMemberRecord values differed from the existing group's.
+ * -ENETRESET: Indicates that an fatal error has occurred on the multicast
+ * group, and the user must rejoin the group to continue using it.
+ */
+struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast
+ *multicast),
+ void *context);
+
+/**
+ * ib_free_multicast - Frees the multicast tracking structure, and releases
+ * any reference on the multicast group.
+ * @multicast: Multicast tracking structure allocated by ib_join_multicast.
+ *
+ * This call blocks until the multicast identifier is destroyed. It may
+ * not be called from within the multicast callback; however, returning a non-
+ * zero value from the callback will result in destroying the multicast
+ * tracking structure.
+ */
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
+
+/**
+ * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
+ * returns it if found.
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ * group.
+ * @mgid: MGID of multicast group.
+ * @rec: Location to copy SA multicast member record.
+ */
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+ union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
+
+/**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on
+ * an SA multicast member record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_init_ah_from_path - Initialize address handle attributes based on an SA
+ * path record.
+ */
+int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
+ struct ib_sa_path_rec *rec,
+ struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
+ * to IB MAD wire format.
+ */
+void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute);
+
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+
+/* Support GuidInfoRecord */
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+#endif /* IB_SA_H */
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
new file mode 100644
index 000000000..98b9086d7
--- /dev/null
+++ b/include/rdma/ib_smi.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_SMI_H)
+#define IB_SMI_H
+
+#include <rdma/ib_mad.h>
+
+#define IB_SMP_DATA_SIZE 64
+#define IB_SMP_MAX_PATH_HOPS 64
+
+struct ib_smp {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ u8 hop_ptr;
+ u8 hop_cnt;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 mkey;
+ __be16 dr_slid;
+ __be16 dr_dlid;
+ u8 reserved[28];
+ u8 data[IB_SMP_DATA_SIZE];
+ u8 initial_path[IB_SMP_MAX_PATH_HOPS];
+ u8 return_path[IB_SMP_MAX_PATH_HOPS];
+} __attribute__ ((packed));
+
+#define IB_SMP_DIRECTION cpu_to_be16(0x8000)
+
+/* Subnet management attributes */
+#define IB_SMP_ATTR_NOTICE cpu_to_be16(0x0002)
+#define IB_SMP_ATTR_NODE_DESC cpu_to_be16(0x0010)
+#define IB_SMP_ATTR_NODE_INFO cpu_to_be16(0x0011)
+#define IB_SMP_ATTR_SWITCH_INFO cpu_to_be16(0x0012)
+#define IB_SMP_ATTR_GUID_INFO cpu_to_be16(0x0014)
+#define IB_SMP_ATTR_PORT_INFO cpu_to_be16(0x0015)
+#define IB_SMP_ATTR_PKEY_TABLE cpu_to_be16(0x0016)
+#define IB_SMP_ATTR_SL_TO_VL_TABLE cpu_to_be16(0x0017)
+#define IB_SMP_ATTR_VL_ARB_TABLE cpu_to_be16(0x0018)
+#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cpu_to_be16(0x0019)
+#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cpu_to_be16(0x001A)
+#define IB_SMP_ATTR_MCAST_FORWARD_TABLE cpu_to_be16(0x001B)
+#define IB_SMP_ATTR_SM_INFO cpu_to_be16(0x0020)
+#define IB_SMP_ATTR_VENDOR_DIAG cpu_to_be16(0x0030)
+#define IB_SMP_ATTR_LED_INFO cpu_to_be16(0x0031)
+#define IB_SMP_ATTR_VENDOR_MASK cpu_to_be16(0xFF00)
+
+struct ib_port_info {
+ __be64 mkey;
+ __be64 gid_prefix;
+ __be16 lid;
+ __be16 sm_lid;
+ __be32 cap_mask;
+ __be16 diag_code;
+ __be16 mkey_lease_period;
+ u8 local_port_num;
+ u8 link_width_enabled;
+ u8 link_width_supported;
+ u8 link_width_active;
+ u8 linkspeed_portstate; /* 4 bits, 4 bits */
+ u8 portphysstate_linkdown; /* 4 bits, 4 bits */
+ u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */
+ u8 linkspeedactive_enabled; /* 4 bits, 4 bits */
+ u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */
+ u8 vlcap_inittype; /* 4 bits, 4 bits */
+ u8 vl_high_limit;
+ u8 vl_arb_high_cap;
+ u8 vl_arb_low_cap;
+ u8 inittypereply_mtucap; /* 4 bits, 4 bits */
+ u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */
+ u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */
+ __be16 mkey_violations;
+ __be16 pkey_violations;
+ __be16 qkey_violations;
+ u8 guid_cap;
+ u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */
+ u8 resv_resptimevalue; /* 3 bits, 5 bits */
+ u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */
+ __be16 max_credit_hint;
+ u8 resv;
+ u8 link_roundtrip_latency[3];
+};
+
+static inline u8
+ib_get_smp_direction(struct ib_smp *smp)
+{
+ return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
+}
+
+#endif /* IB_SMI_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
new file mode 100644
index 000000000..2d83cfd7e
--- /dev/null
+++ b/include/rdma/ib_umem.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_H
+#define IB_UMEM_H
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+
+struct ib_ucontext;
+struct ib_umem_odp;
+
+struct ib_umem {
+ struct ib_ucontext *context;
+ size_t length;
+ unsigned long address;
+ int page_size;
+ int writable;
+ int hugetlb;
+ struct work_struct work;
+ struct pid *pid;
+ struct mm_struct *mm;
+ unsigned long diff;
+ struct ib_umem_odp *odp_data;
+ struct sg_table sg_head;
+ int nmap;
+ int npages;
+};
+
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+ return umem->address & ((unsigned long)umem->page_size - 1);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem *umem)
+{
+ return umem->address - ib_umem_offset(umem);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem *umem)
+{
+ return PAGE_ALIGN(umem->address + umem->length);
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+ return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+}
+
+#ifdef CONFIG_INFINIBAND_USER_MEM
+
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+ size_t size, int access, int dmasync);
+void ib_umem_release(struct ib_umem *umem);
+int ib_umem_page_count(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+ size_t length);
+
+#else /* CONFIG_INFINIBAND_USER_MEM */
+
+#include <linux/err.h>
+
+static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+ unsigned long addr, size_t size,
+ int access, int dmasync) {
+ return ERR_PTR(-EINVAL);
+}
+static inline void ib_umem_release(struct ib_umem *umem) { }
+static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+ size_t length) {
+ return -EINVAL;
+}
+#endif /* CONFIG_INFINIBAND_USER_MEM */
+
+#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 000000000..3da0b1670
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <linux/interval_tree.h>
+
+struct umem_odp_node {
+ u64 __subtree_last;
+ struct rb_node rb;
+};
+
+struct ib_umem_odp {
+ /*
+ * An array of the pages included in the on-demand paging umem.
+ * Indices of pages that are currently not mapped into the device will
+ * contain NULL.
+ */
+ struct page **page_list;
+ /*
+ * An array of the same size as page_list, with DMA addresses mapped
+ * for pages the pages in page_list. The lower two bits designate
+ * access permissions. See ODP_READ_ALLOWED_BIT and
+ * ODP_WRITE_ALLOWED_BIT.
+ */
+ dma_addr_t *dma_list;
+ /*
+ * The umem_mutex protects the page_list and dma_list fields of an ODP
+ * umem, allowing only a single thread to map/unmap pages. The mutex
+ * also protects access to the mmu notifier counters.
+ */
+ struct mutex umem_mutex;
+ void *private; /* for the HW driver to use. */
+
+ /* When false, use the notifier counter in the ucontext struct. */
+ bool mn_counters_active;
+ int notifiers_seq;
+ int notifiers_count;
+
+ /* A linked list of umems that don't have private mmu notifier
+ * counters yet. */
+ struct list_head no_private_counters;
+ struct ib_umem *umem;
+
+ /* Tree tracking */
+ struct umem_odp_node interval_tree;
+
+ struct completion notifier_completion;
+ int dying;
+};
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+
+void ib_umem_odp_release(struct ib_umem *umem);
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
+ u64 access_mask, unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+ u64 bound);
+
+void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
+void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
+typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+ void *cookie);
+/*
+ * Call the callback on each ib_umem in the range. Returns the logical or of
+ * the return values of the functions called.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
+ umem_call_back cb, void *cookie);
+
+struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
+ u64 start, u64 last);
+struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
+ u64 start, u64 last);
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+ unsigned long mmu_seq)
+{
+ /*
+ * This code is strongly based on the KVM code from
+ * mmu_notifier_retry. Should be called with
+ * the relevant locks taken (item->odp_data->umem_mutex
+ * and the ucontext umem_mutex semaphore locked for read).
+ */
+
+ /* Do not allow page faults while the new ib_umem hasn't seen a state
+ * with zero notifiers yet, and doesn't have its own valid set of
+ * private counters. */
+ if (!item->odp_data->mn_counters_active)
+ return 1;
+
+ if (unlikely(item->odp_data->notifiers_count))
+ return 1;
+ if (item->odp_data->notifiers_seq != mmu_seq)
+ return 1;
+ return 0;
+}
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline int ib_umem_odp_get(struct ib_ucontext *context,
+ struct ib_umem *umem)
+{
+ return -EINVAL;
+}
+
+static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
new file mode 100644
index 000000000..65994a19e
--- /dev/null
+++ b/include/rdma/ib_verbs.h
@@ -0,0 +1,2671 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_VERBS_H)
+#define IB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/if_ether.h>
+
+#include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
+#include <asm/uaccess.h>
+
+extern struct workqueue_struct *ib_wq;
+
+union ib_gid {
+ u8 raw[16];
+ struct {
+ __be64 subnet_prefix;
+ __be64 interface_id;
+ } global;
+};
+
+enum rdma_node_type {
+ /* IB values map to NodeInfo:NodeType. */
+ RDMA_NODE_IB_CA = 1,
+ RDMA_NODE_IB_SWITCH,
+ RDMA_NODE_IB_ROUTER,
+ RDMA_NODE_RNIC,
+ RDMA_NODE_USNIC,
+ RDMA_NODE_USNIC_UDP,
+};
+
+enum rdma_transport_type {
+ RDMA_TRANSPORT_IB,
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_USNIC,
+ RDMA_TRANSPORT_USNIC_UDP
+};
+
+__attribute_const__ enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type);
+
+enum rdma_link_layer {
+ IB_LINK_LAYER_UNSPECIFIED,
+ IB_LINK_LAYER_INFINIBAND,
+ IB_LINK_LAYER_ETHERNET,
+};
+
+enum ib_device_cap_flags {
+ IB_DEVICE_RESIZE_MAX_WR = 1,
+ IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
+ IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
+ IB_DEVICE_RAW_MULTI = (1<<3),
+ IB_DEVICE_AUTO_PATH_MIG = (1<<4),
+ IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
+ IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
+ IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
+ IB_DEVICE_SHUTDOWN_PORT = (1<<8),
+ IB_DEVICE_INIT_TYPE = (1<<9),
+ IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
+ IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
+ IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
+ IB_DEVICE_SRQ_RESIZE = (1<<13),
+ IB_DEVICE_N_NOTIFY_CQ = (1<<14),
+ IB_DEVICE_LOCAL_DMA_LKEY = (1<<15),
+ IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
+ IB_DEVICE_MEM_WINDOW = (1<<17),
+ /*
+ * Devices should set IB_DEVICE_UD_IP_SUM if they support
+ * insertion of UDP and TCP checksum on outgoing UD IPoIB
+ * messages and can verify the validity of checksum for
+ * incoming messages. Setting this flag implies that the
+ * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
+ */
+ IB_DEVICE_UD_IP_CSUM = (1<<18),
+ IB_DEVICE_UD_TSO = (1<<19),
+ IB_DEVICE_XRC = (1<<20),
+ IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
+ IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
+ IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
+};
+
+enum ib_signature_prot_cap {
+ IB_PROT_T10DIF_TYPE_1 = 1,
+ IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+ IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+ IB_GUARD_T10DIF_CRC = 1,
+ IB_GUARD_T10DIF_CSUM = 1 << 1,
+};
+
+enum ib_atomic_cap {
+ IB_ATOMIC_NONE,
+ IB_ATOMIC_HCA,
+ IB_ATOMIC_GLOB
+};
+
+enum ib_odp_general_cap_bits {
+ IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+ IB_ODP_SUPPORT_SEND = 1 << 0,
+ IB_ODP_SUPPORT_RECV = 1 << 1,
+ IB_ODP_SUPPORT_WRITE = 1 << 2,
+ IB_ODP_SUPPORT_READ = 1 << 3,
+ IB_ODP_SUPPORT_ATOMIC = 1 << 4,
+};
+
+struct ib_odp_caps {
+ uint64_t general_caps;
+ struct {
+ uint32_t rc_odp_caps;
+ uint32_t uc_odp_caps;
+ uint32_t ud_odp_caps;
+ } per_transport_caps;
+};
+
+struct ib_device_attr {
+ u64 fw_ver;
+ __be64 sys_image_guid;
+ u64 max_mr_size;
+ u64 page_size_cap;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 hw_ver;
+ int max_qp;
+ int max_qp_wr;
+ int device_cap_flags;
+ int max_sge;
+ int max_sge_rd;
+ int max_cq;
+ int max_cqe;
+ int max_mr;
+ int max_pd;
+ int max_qp_rd_atom;
+ int max_ee_rd_atom;
+ int max_res_rd_atom;
+ int max_qp_init_rd_atom;
+ int max_ee_init_rd_atom;
+ enum ib_atomic_cap atomic_cap;
+ enum ib_atomic_cap masked_atomic_cap;
+ int max_ee;
+ int max_rdd;
+ int max_mw;
+ int max_raw_ipv6_qp;
+ int max_raw_ethy_qp;
+ int max_mcast_grp;
+ int max_mcast_qp_attach;
+ int max_total_mcast_qp_attach;
+ int max_ah;
+ int max_fmr;
+ int max_map_per_fmr;
+ int max_srq;
+ int max_srq_wr;
+ int max_srq_sge;
+ unsigned int max_fast_reg_page_list_len;
+ u16 max_pkeys;
+ u8 local_ca_ack_delay;
+ int sig_prot_cap;
+ int sig_guard_cap;
+ struct ib_odp_caps odp_caps;
+};
+
+enum ib_mtu {
+ IB_MTU_256 = 1,
+ IB_MTU_512 = 2,
+ IB_MTU_1024 = 3,
+ IB_MTU_2048 = 4,
+ IB_MTU_4096 = 5
+};
+
+static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256: return 256;
+ case IB_MTU_512: return 512;
+ case IB_MTU_1024: return 1024;
+ case IB_MTU_2048: return 2048;
+ case IB_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+
+enum ib_port_state {
+ IB_PORT_NOP = 0,
+ IB_PORT_DOWN = 1,
+ IB_PORT_INIT = 2,
+ IB_PORT_ARMED = 3,
+ IB_PORT_ACTIVE = 4,
+ IB_PORT_ACTIVE_DEFER = 5
+};
+
+enum ib_port_cap_flags {
+ IB_PORT_SM = 1 << 1,
+ IB_PORT_NOTICE_SUP = 1 << 2,
+ IB_PORT_TRAP_SUP = 1 << 3,
+ IB_PORT_OPT_IPD_SUP = 1 << 4,
+ IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+ IB_PORT_SL_MAP_SUP = 1 << 6,
+ IB_PORT_MKEY_NVRAM = 1 << 7,
+ IB_PORT_PKEY_NVRAM = 1 << 8,
+ IB_PORT_LED_INFO_SUP = 1 << 9,
+ IB_PORT_SM_DISABLED = 1 << 10,
+ IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+ IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_PORT_CM_SUP = 1 << 16,
+ IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+ IB_PORT_REINIT_SUP = 1 << 18,
+ IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+ IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+ IB_PORT_DR_NOTICE_SUP = 1 << 21,
+ IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+ IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+ IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+ IB_PORT_CLIENT_REG_SUP = 1 << 25,
+ IB_PORT_IP_BASED_GIDS = 1 << 26
+};
+
+enum ib_port_width {
+ IB_WIDTH_1X = 1,
+ IB_WIDTH_4X = 2,
+ IB_WIDTH_8X = 4,
+ IB_WIDTH_12X = 8
+};
+
+static inline int ib_width_enum_to_int(enum ib_port_width width)
+{
+ switch (width) {
+ case IB_WIDTH_1X: return 1;
+ case IB_WIDTH_4X: return 4;
+ case IB_WIDTH_8X: return 8;
+ case IB_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum ib_port_speed {
+ IB_SPEED_SDR = 1,
+ IB_SPEED_DDR = 2,
+ IB_SPEED_QDR = 4,
+ IB_SPEED_FDR10 = 8,
+ IB_SPEED_FDR = 16,
+ IB_SPEED_EDR = 32
+};
+
+struct ib_protocol_stats {
+ /* TBD... */
+};
+
+struct iw_protocol_stats {
+ u64 ipInReceives;
+ u64 ipInHdrErrors;
+ u64 ipInTooBigErrors;
+ u64 ipInNoRoutes;
+ u64 ipInAddrErrors;
+ u64 ipInUnknownProtos;
+ u64 ipInTruncatedPkts;
+ u64 ipInDiscards;
+ u64 ipInDelivers;
+ u64 ipOutForwDatagrams;
+ u64 ipOutRequests;
+ u64 ipOutDiscards;
+ u64 ipOutNoRoutes;
+ u64 ipReasmTimeout;
+ u64 ipReasmReqds;
+ u64 ipReasmOKs;
+ u64 ipReasmFails;
+ u64 ipFragOKs;
+ u64 ipFragFails;
+ u64 ipFragCreates;
+ u64 ipInMcastPkts;
+ u64 ipOutMcastPkts;
+ u64 ipInBcastPkts;
+ u64 ipOutBcastPkts;
+
+ u64 tcpRtoAlgorithm;
+ u64 tcpRtoMin;
+ u64 tcpRtoMax;
+ u64 tcpMaxConn;
+ u64 tcpActiveOpens;
+ u64 tcpPassiveOpens;
+ u64 tcpAttemptFails;
+ u64 tcpEstabResets;
+ u64 tcpCurrEstab;
+ u64 tcpInSegs;
+ u64 tcpOutSegs;
+ u64 tcpRetransSegs;
+ u64 tcpInErrs;
+ u64 tcpOutRsts;
+};
+
+union rdma_protocol_stats {
+ struct ib_protocol_stats ib;
+ struct iw_protocol_stats iw;
+};
+
+struct ib_port_attr {
+ enum ib_port_state state;
+ enum ib_mtu max_mtu;
+ enum ib_mtu active_mtu;
+ int gid_tbl_len;
+ u32 port_cap_flags;
+ u32 max_msg_sz;
+ u32 bad_pkey_cntr;
+ u32 qkey_viol_cntr;
+ u16 pkey_tbl_len;
+ u16 lid;
+ u16 sm_lid;
+ u8 lmc;
+ u8 max_vl_num;
+ u8 sm_sl;
+ u8 subnet_timeout;
+ u8 init_type_reply;
+ u8 active_width;
+ u8 active_speed;
+ u8 phys_state;
+};
+
+enum ib_device_modify_flags {
+ IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0,
+ IB_DEVICE_MODIFY_NODE_DESC = 1 << 1
+};
+
+struct ib_device_modify {
+ u64 sys_image_guid;
+ char node_desc[64];
+};
+
+enum ib_port_modify_flags {
+ IB_PORT_SHUTDOWN = 1,
+ IB_PORT_INIT_TYPE = (1<<2),
+ IB_PORT_RESET_QKEY_CNTR = (1<<3)
+};
+
+struct ib_port_modify {
+ u32 set_port_cap_mask;
+ u32 clr_port_cap_mask;
+ u8 init_type;
+};
+
+enum ib_event_type {
+ IB_EVENT_CQ_ERR,
+ IB_EVENT_QP_FATAL,
+ IB_EVENT_QP_REQ_ERR,
+ IB_EVENT_QP_ACCESS_ERR,
+ IB_EVENT_COMM_EST,
+ IB_EVENT_SQ_DRAINED,
+ IB_EVENT_PATH_MIG,
+ IB_EVENT_PATH_MIG_ERR,
+ IB_EVENT_DEVICE_FATAL,
+ IB_EVENT_PORT_ACTIVE,
+ IB_EVENT_PORT_ERR,
+ IB_EVENT_LID_CHANGE,
+ IB_EVENT_PKEY_CHANGE,
+ IB_EVENT_SM_CHANGE,
+ IB_EVENT_SRQ_ERR,
+ IB_EVENT_SRQ_LIMIT_REACHED,
+ IB_EVENT_QP_LAST_WQE_REACHED,
+ IB_EVENT_CLIENT_REREGISTER,
+ IB_EVENT_GID_CHANGE,
+};
+
+struct ib_event {
+ struct ib_device *device;
+ union {
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ struct ib_srq *srq;
+ u8 port_num;
+ } element;
+ enum ib_event_type event;
+};
+
+struct ib_event_handler {
+ struct ib_device *device;
+ void (*handler)(struct ib_event_handler *, struct ib_event *);
+ struct list_head list;
+};
+
+#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \
+ do { \
+ (_ptr)->device = _device; \
+ (_ptr)->handler = _handler; \
+ INIT_LIST_HEAD(&(_ptr)->list); \
+ } while (0)
+
+struct ib_global_route {
+ union ib_gid dgid;
+ u32 flow_label;
+ u8 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+};
+
+struct ib_grh {
+ __be32 version_tclass_flow;
+ __be16 paylen;
+ u8 next_hdr;
+ u8 hop_limit;
+ union ib_gid sgid;
+ union ib_gid dgid;
+};
+
+enum {
+ IB_MULTICAST_QPN = 0xffffff
+};
+
+#define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF)
+
+enum ib_ah_flags {
+ IB_AH_GRH = 1
+};
+
+enum ib_rate {
+ IB_RATE_PORT_CURRENT = 0,
+ IB_RATE_2_5_GBPS = 2,
+ IB_RATE_5_GBPS = 5,
+ IB_RATE_10_GBPS = 3,
+ IB_RATE_20_GBPS = 6,
+ IB_RATE_30_GBPS = 4,
+ IB_RATE_40_GBPS = 7,
+ IB_RATE_60_GBPS = 8,
+ IB_RATE_80_GBPS = 9,
+ IB_RATE_120_GBPS = 10,
+ IB_RATE_14_GBPS = 11,
+ IB_RATE_56_GBPS = 12,
+ IB_RATE_112_GBPS = 13,
+ IB_RATE_168_GBPS = 14,
+ IB_RATE_25_GBPS = 15,
+ IB_RATE_100_GBPS = 16,
+ IB_RATE_200_GBPS = 17,
+ IB_RATE_300_GBPS = 18
+};
+
+/**
+ * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
+ * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be
+ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
+ * @rate: rate to convert.
+ */
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
+
+/**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
+
+enum ib_mr_create_flags {
+ IB_MR_SIGNATURE_EN = 1,
+};
+
+/**
+ * ib_mr_init_attr - Memory region init attributes passed to routine
+ * ib_create_mr.
+ * @max_reg_descriptors: max number of registration descriptors that
+ * may be used with registration work requests.
+ * @flags: MR creation flags bit mask.
+ */
+struct ib_mr_init_attr {
+ int max_reg_descriptors;
+ u32 flags;
+};
+
+/**
+ * Signature types
+ * IB_SIG_TYPE_NONE: Unprotected.
+ * IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+ IB_SIG_TYPE_NONE,
+ IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * Signature T10-DIF block-guard types
+ * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+ IB_T10DIF_CRC,
+ IB_T10DIF_CSUM
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ * domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+ enum ib_t10_dif_bg_type bg_type;
+ u16 pi_interval;
+ u16 bg;
+ u16 app_tag;
+ u32 ref_tag;
+ bool ref_remap;
+ bool app_escape;
+ bool ref_escape;
+ u16 apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ * be used to set domain layout.
+ */
+struct ib_sig_domain {
+ enum ib_signature_type sig_type;
+ union {
+ struct ib_t10_dif_domain dif;
+ } sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout desciptor.
+ * @wire: wire domain layout desciptor.
+ */
+struct ib_sig_attrs {
+ u8 check_mask;
+ struct ib_sig_domain mem;
+ struct ib_sig_domain wire;
+};
+
+enum ib_sig_err_type {
+ IB_SIG_BAD_GUARD,
+ IB_SIG_BAD_REFTAG,
+ IB_SIG_BAD_APPTAG,
+};
+
+/**
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+ enum ib_sig_err_type err_type;
+ u32 expected;
+ u32 actual;
+ u64 sig_err_offset;
+ u32 key;
+};
+
+enum ib_mr_status_check {
+ IB_MR_CHECK_SIG_STATUS = 1,
+};
+
+/**
+ * struct ib_mr_status - Memory region status container
+ *
+ * @fail_status: Bitmask of MR checks status. For each
+ * failed check a corresponding status bit is set.
+ * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
+ * failure.
+ */
+struct ib_mr_status {
+ u32 fail_status;
+ struct ib_sig_err sig_err;
+};
+
+/**
+ * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
+ * enum.
+ * @mult: multiple to convert.
+ */
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+
+struct ib_ah_attr {
+ struct ib_global_route grh;
+ u16 dlid;
+ u8 sl;
+ u8 src_path_bits;
+ u8 static_rate;
+ u8 ah_flags;
+ u8 port_num;
+ u8 dmac[ETH_ALEN];
+ u16 vlan_id;
+};
+
+enum ib_wc_status {
+ IB_WC_SUCCESS,
+ IB_WC_LOC_LEN_ERR,
+ IB_WC_LOC_QP_OP_ERR,
+ IB_WC_LOC_EEC_OP_ERR,
+ IB_WC_LOC_PROT_ERR,
+ IB_WC_WR_FLUSH_ERR,
+ IB_WC_MW_BIND_ERR,
+ IB_WC_BAD_RESP_ERR,
+ IB_WC_LOC_ACCESS_ERR,
+ IB_WC_REM_INV_REQ_ERR,
+ IB_WC_REM_ACCESS_ERR,
+ IB_WC_REM_OP_ERR,
+ IB_WC_RETRY_EXC_ERR,
+ IB_WC_RNR_RETRY_EXC_ERR,
+ IB_WC_LOC_RDD_VIOL_ERR,
+ IB_WC_REM_INV_RD_REQ_ERR,
+ IB_WC_REM_ABORT_ERR,
+ IB_WC_INV_EECN_ERR,
+ IB_WC_INV_EEC_STATE_ERR,
+ IB_WC_FATAL_ERR,
+ IB_WC_RESP_TIMEOUT_ERR,
+ IB_WC_GENERAL_ERR
+};
+
+enum ib_wc_opcode {
+ IB_WC_SEND,
+ IB_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ,
+ IB_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD,
+ IB_WC_BIND_MW,
+ IB_WC_LSO,
+ IB_WC_LOCAL_INV,
+ IB_WC_FAST_REG_MR,
+ IB_WC_MASKED_COMP_SWAP,
+ IB_WC_MASKED_FETCH_ADD,
+/*
+ * Set value of IB_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IB_WC_RECV).
+ */
+ IB_WC_RECV = 1 << 7,
+ IB_WC_RECV_RDMA_WITH_IMM
+};
+
+enum ib_wc_flags {
+ IB_WC_GRH = 1,
+ IB_WC_WITH_IMM = (1<<1),
+ IB_WC_WITH_INVALIDATE = (1<<2),
+ IB_WC_IP_CSUM_OK = (1<<3),
+ IB_WC_WITH_SMAC = (1<<4),
+ IB_WC_WITH_VLAN = (1<<5),
+};
+
+struct ib_wc {
+ u64 wr_id;
+ enum ib_wc_status status;
+ enum ib_wc_opcode opcode;
+ u32 vendor_err;
+ u32 byte_len;
+ struct ib_qp *qp;
+ union {
+ __be32 imm_data;
+ u32 invalidate_rkey;
+ } ex;
+ u32 src_qp;
+ int wc_flags;
+ u16 pkey_index;
+ u16 slid;
+ u8 sl;
+ u8 dlid_path_bits;
+ u8 port_num; /* valid only for DR SMPs on switches */
+ u8 smac[ETH_ALEN];
+ u16 vlan_id;
+};
+
+enum ib_cq_notify_flags {
+ IB_CQ_SOLICITED = 1 << 0,
+ IB_CQ_NEXT_COMP = 1 << 1,
+ IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
+ IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
+};
+
+enum ib_srq_type {
+ IB_SRQT_BASIC,
+ IB_SRQT_XRC
+};
+
+enum ib_srq_attr_mask {
+ IB_SRQ_MAX_WR = 1 << 0,
+ IB_SRQ_LIMIT = 1 << 1,
+};
+
+struct ib_srq_attr {
+ u32 max_wr;
+ u32 max_sge;
+ u32 srq_limit;
+};
+
+struct ib_srq_init_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *srq_context;
+ struct ib_srq_attr attr;
+ enum ib_srq_type srq_type;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ } xrc;
+ } ext;
+};
+
+struct ib_qp_cap {
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
+enum ib_sig_type {
+ IB_SIGNAL_ALL_WR,
+ IB_SIGNAL_REQ_WR
+};
+
+enum ib_qp_type {
+ /*
+ * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+ * here (and in that order) since the MAD layer uses them as
+ * indices into a 2-entry table.
+ */
+ IB_QPT_SMI,
+ IB_QPT_GSI,
+
+ IB_QPT_RC,
+ IB_QPT_UC,
+ IB_QPT_UD,
+ IB_QPT_RAW_IPV6,
+ IB_QPT_RAW_ETHERTYPE,
+ IB_QPT_RAW_PACKET = 8,
+ IB_QPT_XRC_INI = 9,
+ IB_QPT_XRC_TGT,
+ IB_QPT_MAX,
+ /* Reserve a range for qp types internal to the low level driver.
+ * These qp types will not be visible at the IB core layer, so the
+ * IB_QPT_MAX usages should not be affected in the core layer
+ */
+ IB_QPT_RESERVED1 = 0x1000,
+ IB_QPT_RESERVED2,
+ IB_QPT_RESERVED3,
+ IB_QPT_RESERVED4,
+ IB_QPT_RESERVED5,
+ IB_QPT_RESERVED6,
+ IB_QPT_RESERVED7,
+ IB_QPT_RESERVED8,
+ IB_QPT_RESERVED9,
+ IB_QPT_RESERVED10,
+};
+
+enum ib_qp_create_flags {
+ IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_NETIF_QP = 1 << 5,
+ IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
+ IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
+ /* reserve bits 26-31 for low level drivers' internal use */
+ IB_QP_CREATE_RESERVED_START = 1 << 26,
+ IB_QP_CREATE_RESERVED_END = 1 << 31,
+};
+
+
+/*
+ * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
+ * callback to destroy the passed in QP.
+ */
+
+struct ib_qp_init_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
+ struct ib_qp_cap cap;
+ enum ib_sig_type sq_sig_type;
+ enum ib_qp_type qp_type;
+ enum ib_qp_create_flags create_flags;
+ u8 port_num; /* special QP types only */
+};
+
+struct ib_qp_open_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ u32 qp_num;
+ enum ib_qp_type qp_type;
+};
+
+enum ib_rnr_timeout {
+ IB_RNR_TIMER_655_36 = 0,
+ IB_RNR_TIMER_000_01 = 1,
+ IB_RNR_TIMER_000_02 = 2,
+ IB_RNR_TIMER_000_03 = 3,
+ IB_RNR_TIMER_000_04 = 4,
+ IB_RNR_TIMER_000_06 = 5,
+ IB_RNR_TIMER_000_08 = 6,
+ IB_RNR_TIMER_000_12 = 7,
+ IB_RNR_TIMER_000_16 = 8,
+ IB_RNR_TIMER_000_24 = 9,
+ IB_RNR_TIMER_000_32 = 10,
+ IB_RNR_TIMER_000_48 = 11,
+ IB_RNR_TIMER_000_64 = 12,
+ IB_RNR_TIMER_000_96 = 13,
+ IB_RNR_TIMER_001_28 = 14,
+ IB_RNR_TIMER_001_92 = 15,
+ IB_RNR_TIMER_002_56 = 16,
+ IB_RNR_TIMER_003_84 = 17,
+ IB_RNR_TIMER_005_12 = 18,
+ IB_RNR_TIMER_007_68 = 19,
+ IB_RNR_TIMER_010_24 = 20,
+ IB_RNR_TIMER_015_36 = 21,
+ IB_RNR_TIMER_020_48 = 22,
+ IB_RNR_TIMER_030_72 = 23,
+ IB_RNR_TIMER_040_96 = 24,
+ IB_RNR_TIMER_061_44 = 25,
+ IB_RNR_TIMER_081_92 = 26,
+ IB_RNR_TIMER_122_88 = 27,
+ IB_RNR_TIMER_163_84 = 28,
+ IB_RNR_TIMER_245_76 = 29,
+ IB_RNR_TIMER_327_68 = 30,
+ IB_RNR_TIMER_491_52 = 31
+};
+
+enum ib_qp_attr_mask {
+ IB_QP_STATE = 1,
+ IB_QP_CUR_STATE = (1<<1),
+ IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2),
+ IB_QP_ACCESS_FLAGS = (1<<3),
+ IB_QP_PKEY_INDEX = (1<<4),
+ IB_QP_PORT = (1<<5),
+ IB_QP_QKEY = (1<<6),
+ IB_QP_AV = (1<<7),
+ IB_QP_PATH_MTU = (1<<8),
+ IB_QP_TIMEOUT = (1<<9),
+ IB_QP_RETRY_CNT = (1<<10),
+ IB_QP_RNR_RETRY = (1<<11),
+ IB_QP_RQ_PSN = (1<<12),
+ IB_QP_MAX_QP_RD_ATOMIC = (1<<13),
+ IB_QP_ALT_PATH = (1<<14),
+ IB_QP_MIN_RNR_TIMER = (1<<15),
+ IB_QP_SQ_PSN = (1<<16),
+ IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
+ IB_QP_PATH_MIG_STATE = (1<<18),
+ IB_QP_CAP = (1<<19),
+ IB_QP_DEST_QPN = (1<<20),
+ IB_QP_SMAC = (1<<21),
+ IB_QP_ALT_SMAC = (1<<22),
+ IB_QP_VID = (1<<23),
+ IB_QP_ALT_VID = (1<<24),
+};
+
+enum ib_qp_state {
+ IB_QPS_RESET,
+ IB_QPS_INIT,
+ IB_QPS_RTR,
+ IB_QPS_RTS,
+ IB_QPS_SQD,
+ IB_QPS_SQE,
+ IB_QPS_ERR
+};
+
+enum ib_mig_state {
+ IB_MIG_MIGRATED,
+ IB_MIG_REARM,
+ IB_MIG_ARMED
+};
+
+enum ib_mw_type {
+ IB_MW_TYPE_1 = 1,
+ IB_MW_TYPE_2 = 2
+};
+
+struct ib_qp_attr {
+ enum ib_qp_state qp_state;
+ enum ib_qp_state cur_qp_state;
+ enum ib_mtu path_mtu;
+ enum ib_mig_state path_mig_state;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qp_num;
+ int qp_access_flags;
+ struct ib_qp_cap cap;
+ struct ib_ah_attr ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ u16 pkey_index;
+ u16 alt_pkey_index;
+ u8 en_sqd_async_notify;
+ u8 sq_draining;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 min_rnr_timer;
+ u8 port_num;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 alt_port_num;
+ u8 alt_timeout;
+ u8 smac[ETH_ALEN];
+ u8 alt_smac[ETH_ALEN];
+ u16 vlan_id;
+ u16 alt_vlan_id;
+};
+
+enum ib_wr_opcode {
+ IB_WR_RDMA_WRITE,
+ IB_WR_RDMA_WRITE_WITH_IMM,
+ IB_WR_SEND,
+ IB_WR_SEND_WITH_IMM,
+ IB_WR_RDMA_READ,
+ IB_WR_ATOMIC_CMP_AND_SWP,
+ IB_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_LSO,
+ IB_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV,
+ IB_WR_FAST_REG_MR,
+ IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
+ IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW,
+ IB_WR_REG_SIG_MR,
+ /* reserve values for low level drivers' internal use.
+ * These values will not be used at all in the ib core layer.
+ */
+ IB_WR_RESERVED1 = 0xf0,
+ IB_WR_RESERVED2,
+ IB_WR_RESERVED3,
+ IB_WR_RESERVED4,
+ IB_WR_RESERVED5,
+ IB_WR_RESERVED6,
+ IB_WR_RESERVED7,
+ IB_WR_RESERVED8,
+ IB_WR_RESERVED9,
+ IB_WR_RESERVED10,
+};
+
+enum ib_send_flags {
+ IB_SEND_FENCE = 1,
+ IB_SEND_SIGNALED = (1<<1),
+ IB_SEND_SOLICITED = (1<<2),
+ IB_SEND_INLINE = (1<<3),
+ IB_SEND_IP_CSUM = (1<<4),
+
+ /* reserve bits 26-31 for low level drivers' internal use */
+ IB_SEND_RESERVED_START = (1 << 26),
+ IB_SEND_RESERVED_END = (1 << 31),
+};
+
+struct ib_sge {
+ u64 addr;
+ u32 length;
+ u32 lkey;
+};
+
+struct ib_fast_reg_page_list {
+ struct ib_device *device;
+ u64 *page_list;
+ unsigned int max_page_list_len;
+};
+
+/**
+ * struct ib_mw_bind_info - Parameters for a memory window bind operation.
+ * @mr: A memory region to bind the memory window to.
+ * @addr: The address where the memory window should begin.
+ * @length: The length of the memory window, in bytes.
+ * @mw_access_flags: Access flags from enum ib_access_flags for the window.
+ *
+ * This struct contains the shared parameters for type 1 and type 2
+ * memory window bind operations.
+ */
+struct ib_mw_bind_info {
+ struct ib_mr *mr;
+ u64 addr;
+ u64 length;
+ int mw_access_flags;
+};
+
+struct ib_send_wr {
+ struct ib_send_wr *next;
+ u64 wr_id;
+ struct ib_sge *sg_list;
+ int num_sge;
+ enum ib_wr_opcode opcode;
+ int send_flags;
+ union {
+ __be32 imm_data;
+ u32 invalidate_rkey;
+ } ex;
+ union {
+ struct {
+ u64 remote_addr;
+ u32 rkey;
+ } rdma;
+ struct {
+ u64 remote_addr;
+ u64 compare_add;
+ u64 swap;
+ u64 compare_add_mask;
+ u64 swap_mask;
+ u32 rkey;
+ } atomic;
+ struct {
+ struct ib_ah *ah;
+ void *header;
+ int hlen;
+ int mss;
+ u32 remote_qpn;
+ u32 remote_qkey;
+ u16 pkey_index; /* valid for GSI only */
+ u8 port_num; /* valid for DR SMPs on switch only */
+ } ud;
+ struct {
+ u64 iova_start;
+ struct ib_fast_reg_page_list *page_list;
+ unsigned int page_shift;
+ unsigned int page_list_len;
+ u32 length;
+ int access_flags;
+ u32 rkey;
+ } fast_reg;
+ struct {
+ struct ib_mw *mw;
+ /* The new rkey for the memory window. */
+ u32 rkey;
+ struct ib_mw_bind_info bind_info;
+ } bind_mw;
+ struct {
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_mr *sig_mr;
+ int access_flags;
+ struct ib_sge *prot;
+ } sig_handover;
+ } wr;
+ u32 xrc_remote_srq_num; /* XRC TGT QPs only */
+};
+
+struct ib_recv_wr {
+ struct ib_recv_wr *next;
+ u64 wr_id;
+ struct ib_sge *sg_list;
+ int num_sge;
+};
+
+enum ib_access_flags {
+ IB_ACCESS_LOCAL_WRITE = 1,
+ IB_ACCESS_REMOTE_WRITE = (1<<1),
+ IB_ACCESS_REMOTE_READ = (1<<2),
+ IB_ACCESS_REMOTE_ATOMIC = (1<<3),
+ IB_ACCESS_MW_BIND = (1<<4),
+ IB_ZERO_BASED = (1<<5),
+ IB_ACCESS_ON_DEMAND = (1<<6),
+};
+
+struct ib_phys_buf {
+ u64 addr;
+ u64 size;
+};
+
+struct ib_mr_attr {
+ struct ib_pd *pd;
+ u64 device_virt_addr;
+ u64 size;
+ int mr_access_flags;
+ u32 lkey;
+ u32 rkey;
+};
+
+enum ib_mr_rereg_flags {
+ IB_MR_REREG_TRANS = 1,
+ IB_MR_REREG_PD = (1<<1),
+ IB_MR_REREG_ACCESS = (1<<2),
+ IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
+};
+
+/**
+ * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
+ * @wr_id: Work request id.
+ * @send_flags: Flags from ib_send_flags enum.
+ * @bind_info: More parameters of the bind operation.
+ */
+struct ib_mw_bind {
+ u64 wr_id;
+ int send_flags;
+ struct ib_mw_bind_info bind_info;
+};
+
+struct ib_fmr_attr {
+ int max_pages;
+ int max_maps;
+ u8 page_shift;
+};
+
+struct ib_umem;
+
+struct ib_ucontext {
+ struct ib_device *device;
+ struct list_head pd_list;
+ struct list_head mr_list;
+ struct list_head mw_list;
+ struct list_head cq_list;
+ struct list_head qp_list;
+ struct list_head srq_list;
+ struct list_head ah_list;
+ struct list_head xrcd_list;
+ struct list_head rule_list;
+ int closing;
+
+ struct pid *tgid;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ struct rb_root umem_tree;
+ /*
+ * Protects .umem_rbroot and tree, as well as odp_mrs_count and
+ * mmu notifiers registration.
+ */
+ struct rw_semaphore umem_rwsem;
+ void (*invalidate_range)(struct ib_umem *umem,
+ unsigned long start, unsigned long end);
+
+ struct mmu_notifier mn;
+ atomic_t notifier_count;
+ /* A list of umems that don't have private mmu notifier counters yet. */
+ struct list_head no_private_counters;
+ int odp_mrs_count;
+#endif
+};
+
+struct ib_uobject {
+ u64 user_handle; /* handle given to us by userspace */
+ struct ib_ucontext *context; /* associated user context */
+ void *object; /* containing object */
+ struct list_head list; /* link to context's list */
+ int id; /* index into kernel idr */
+ struct kref ref;
+ struct rw_semaphore mutex; /* protects .live */
+ int live;
+};
+
+struct ib_udata {
+ const void __user *inbuf;
+ void __user *outbuf;
+ size_t inlen;
+ size_t outlen;
+};
+
+struct ib_pd {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ atomic_t usecnt; /* count all resources */
+};
+
+struct ib_xrcd {
+ struct ib_device *device;
+ atomic_t usecnt; /* count all exposed resources */
+ struct inode *inode;
+
+ struct mutex tgt_qp_mutex;
+ struct list_head tgt_qp_list;
+};
+
+struct ib_ah {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+};
+
+typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
+
+struct ib_cq {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ ib_comp_handler comp_handler;
+ void (*event_handler)(struct ib_event *, void *);
+ void *cq_context;
+ int cqe;
+ atomic_t usecnt; /* count number of work queues */
+};
+
+struct ib_srq {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+ void (*event_handler)(struct ib_event *, void *);
+ void *srq_context;
+ enum ib_srq_type srq_type;
+ atomic_t usecnt;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ u32 srq_num;
+ } xrc;
+ } ext;
+};
+
+struct ib_qp {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+ struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
+ struct list_head xrcd_list;
+ /* count times opened, mcast attaches, flow attaches */
+ atomic_t usecnt;
+ struct list_head open_list;
+ struct ib_qp *real_qp;
+ struct ib_uobject *uobject;
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ u32 qp_num;
+ enum ib_qp_type qp_type;
+};
+
+struct ib_mr {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+ u32 lkey;
+ u32 rkey;
+ atomic_t usecnt; /* count number of MWs */
+};
+
+struct ib_mw {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_uobject *uobject;
+ u32 rkey;
+ enum ib_mw_type type;
+};
+
+struct ib_fmr {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct list_head list;
+ u32 lkey;
+ u32 rkey;
+};
+
+/* Supported steering options */
+enum ib_flow_attr_type {
+ /* steering according to rule specifications */
+ IB_FLOW_ATTR_NORMAL = 0x0,
+ /* default unicast and multicast rule -
+ * receive all Eth traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_ALL_DEFAULT = 0x1,
+ /* default multicast rule -
+ * receive all Eth multicast traffic which isn't steered to any QP
+ */
+ IB_FLOW_ATTR_MC_DEFAULT = 0x2,
+ /* sniffer rule - receive all port traffic */
+ IB_FLOW_ATTR_SNIFFER = 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+ /* L2 headers*/
+ IB_FLOW_SPEC_ETH = 0x20,
+ IB_FLOW_SPEC_IB = 0x22,
+ /* L3 header*/
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ /* L4 headers*/
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41
+};
+#define IB_FLOW_SPEC_LAYER_MASK 0xF0
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+
+/* Flow steering rule priority is set according to it's domain.
+ * Lower domain value means higher priority.
+ */
+enum ib_flow_domain {
+ IB_FLOW_DOMAIN_USER,
+ IB_FLOW_DOMAIN_ETHTOOL,
+ IB_FLOW_DOMAIN_RFS,
+ IB_FLOW_DOMAIN_NIC,
+ IB_FLOW_DOMAIN_NUM /* Must be last */
+};
+
+struct ib_flow_eth_filter {
+ u8 dst_mac[6];
+ u8 src_mac[6];
+ __be16 ether_type;
+ __be16 vlan_tag;
+};
+
+struct ib_flow_spec_eth {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_eth_filter val;
+ struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ib_filter {
+ __be16 dlid;
+ __u8 sl;
+};
+
+struct ib_flow_spec_ib {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ib_filter val;
+ struct ib_flow_ib_filter mask;
+};
+
+struct ib_flow_ipv4_filter {
+ __be32 src_ip;
+ __be32 dst_ip;
+};
+
+struct ib_flow_spec_ipv4 {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_ipv4_filter val;
+ struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
+ __be16 dst_port;
+ __be16 src_port;
+};
+
+struct ib_flow_spec_tcp_udp {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_tcp_udp_filter val;
+ struct ib_flow_tcp_udp_filter mask;
+};
+
+union ib_flow_spec {
+ struct {
+ enum ib_flow_spec_type type;
+ u16 size;
+ };
+ struct ib_flow_spec_eth eth;
+ struct ib_flow_spec_ib ib;
+ struct ib_flow_spec_ipv4 ipv4;
+ struct ib_flow_spec_tcp_udp tcp_udp;
+};
+
+struct ib_flow_attr {
+ enum ib_flow_attr_type type;
+ u16 size;
+ u16 priority;
+ u32 flags;
+ u8 num_of_specs;
+ u8 port;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_flow {
+ struct ib_qp *qp;
+ struct ib_uobject *uobject;
+};
+
+struct ib_mad;
+struct ib_grh;
+
+enum ib_process_mad_flags {
+ IB_MAD_IGNORE_MKEY = 1,
+ IB_MAD_IGNORE_BKEY = 2,
+ IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
+};
+
+enum ib_mad_result {
+ IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */
+ IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */
+ IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */
+ IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */
+};
+
+#define IB_DEVICE_NAME_MAX 64
+
+struct ib_cache {
+ rwlock_t lock;
+ struct ib_event_handler event_handler;
+ struct ib_pkey_cache **pkey_cache;
+ struct ib_gid_cache **gid_cache;
+ u8 *lmc_cache;
+};
+
+struct ib_dma_mapping_ops {
+ int (*mapping_error)(struct ib_device *dev,
+ u64 dma_addr);
+ u64 (*map_single)(struct ib_device *dev,
+ void *ptr, size_t size,
+ enum dma_data_direction direction);
+ void (*unmap_single)(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction);
+ u64 (*map_page)(struct ib_device *dev,
+ struct page *page, unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction);
+ void (*unmap_page)(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction);
+ int (*map_sg)(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction);
+ void (*unmap_sg)(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction);
+ void (*sync_single_for_cpu)(struct ib_device *dev,
+ u64 dma_handle,
+ size_t size,
+ enum dma_data_direction dir);
+ void (*sync_single_for_device)(struct ib_device *dev,
+ u64 dma_handle,
+ size_t size,
+ enum dma_data_direction dir);
+ void *(*alloc_coherent)(struct ib_device *dev,
+ size_t size,
+ u64 *dma_handle,
+ gfp_t flag);
+ void (*free_coherent)(struct ib_device *dev,
+ size_t size, void *cpu_addr,
+ u64 dma_handle);
+};
+
+struct iw_cm_verbs;
+
+struct ib_device {
+ struct device *dma_device;
+
+ char name[IB_DEVICE_NAME_MAX];
+
+ struct list_head event_handler_list;
+ spinlock_t event_handler_lock;
+
+ spinlock_t client_data_lock;
+ struct list_head core_list;
+ struct list_head client_data_list;
+
+ struct ib_cache cache;
+ int *pkey_tbl_len;
+ int *gid_tbl_len;
+
+ int num_comp_vectors;
+
+ struct iw_cm_verbs *iwcm;
+
+ int (*get_protocol_stats)(struct ib_device *device,
+ union rdma_protocol_stats *stats);
+ int (*query_device)(struct ib_device *device,
+ struct ib_device_attr *device_attr);
+ int (*query_port)(struct ib_device *device,
+ u8 port_num,
+ struct ib_port_attr *port_attr);
+ enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+ u8 port_num);
+ int (*query_gid)(struct ib_device *device,
+ u8 port_num, int index,
+ union ib_gid *gid);
+ int (*query_pkey)(struct ib_device *device,
+ u8 port_num, u16 index, u16 *pkey);
+ int (*modify_device)(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
+ int (*modify_port)(struct ib_device *device,
+ u8 port_num, int port_modify_mask,
+ struct ib_port_modify *port_modify);
+ struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
+ struct ib_udata *udata);
+ int (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+ struct ib_pd * (*alloc_pd)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd);
+ struct ib_ah * (*create_ah)(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr);
+ int (*modify_ah)(struct ib_ah *ah,
+ struct ib_ah_attr *ah_attr);
+ int (*query_ah)(struct ib_ah *ah,
+ struct ib_ah_attr *ah_attr);
+ int (*destroy_ah)(struct ib_ah *ah);
+ struct ib_srq * (*create_srq)(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+ int (*modify_srq)(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr);
+ int (*destroy_srq)(struct ib_srq *srq);
+ int (*post_srq_recv)(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+ struct ib_qp * (*create_qp)(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+ int (*modify_qp)(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata);
+ int (*query_qp)(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+ int (*destroy_qp)(struct ib_qp *qp);
+ int (*post_send)(struct ib_qp *qp,
+ struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr);
+ int (*post_recv)(struct ib_qp *qp,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+ struct ib_cq * (*create_cq)(struct ib_device *device, int cqe,
+ int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+ int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
+ u16 cq_period);
+ int (*destroy_cq)(struct ib_cq *cq);
+ int (*resize_cq)(struct ib_cq *cq, int cqe,
+ struct ib_udata *udata);
+ int (*poll_cq)(struct ib_cq *cq, int num_entries,
+ struct ib_wc *wc);
+ int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+ int (*req_notify_cq)(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags);
+ int (*req_ncomp_notif)(struct ib_cq *cq,
+ int wc_cnt);
+ struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
+ int mr_access_flags);
+ struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start);
+ struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
+ u64 start, u64 length,
+ u64 virt_addr,
+ int mr_access_flags,
+ struct ib_udata *udata);
+ int (*rereg_user_mr)(struct ib_mr *mr,
+ int flags,
+ u64 start, u64 length,
+ u64 virt_addr,
+ int mr_access_flags,
+ struct ib_pd *pd,
+ struct ib_udata *udata);
+ int (*query_mr)(struct ib_mr *mr,
+ struct ib_mr_attr *mr_attr);
+ int (*dereg_mr)(struct ib_mr *mr);
+ int (*destroy_mr)(struct ib_mr *mr);
+ struct ib_mr * (*create_mr)(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
+ struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
+ int max_page_list_len);
+ struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
+ int page_list_len);
+ void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
+ int (*rereg_phys_mr)(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start);
+ struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
+ enum ib_mw_type type);
+ int (*bind_mw)(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+ int (*dealloc_mw)(struct ib_mw *mw);
+ struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+ int (*map_phys_fmr)(struct ib_fmr *fmr,
+ u64 *page_list, int list_len,
+ u64 iova);
+ int (*unmap_fmr)(struct list_head *fmr_list);
+ int (*dealloc_fmr)(struct ib_fmr *fmr);
+ int (*attach_mcast)(struct ib_qp *qp,
+ union ib_gid *gid,
+ u16 lid);
+ int (*detach_mcast)(struct ib_qp *qp,
+ union ib_gid *gid,
+ u16 lid);
+ int (*process_mad)(struct ib_device *device,
+ int process_mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+ struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
+ struct ib_ucontext *ucontext,
+ struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ struct ib_flow * (*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr
+ *flow_attr,
+ int domain);
+ int (*destroy_flow)(struct ib_flow *flow_id);
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+
+ struct ib_dma_mapping_ops *dma_ops;
+
+ struct module *owner;
+ struct device dev;
+ struct kobject *ports_parent;
+ struct list_head port_list;
+
+ enum {
+ IB_DEV_UNINITIALIZED,
+ IB_DEV_REGISTERED,
+ IB_DEV_UNREGISTERED
+ } reg_state;
+
+ int uverbs_abi_ver;
+ u64 uverbs_cmd_mask;
+ u64 uverbs_ex_cmd_mask;
+
+ char node_desc[64];
+ __be64 node_guid;
+ u32 local_dma_lkey;
+ u8 node_type;
+ u8 phys_port_cnt;
+};
+
+struct ib_client {
+ char *name;
+ void (*add) (struct ib_device *);
+ void (*remove)(struct ib_device *);
+
+ struct list_head list;
+};
+
+struct ib_device *ib_alloc_device(size_t size);
+void ib_dealloc_device(struct ib_device *device);
+
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
+void ib_unregister_device(struct ib_device *device);
+
+int ib_register_client (struct ib_client *client);
+void ib_unregister_client(struct ib_client *client);
+
+void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+void ib_set_client_data(struct ib_device *device, struct ib_client *client,
+ void *data);
+
+static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+ return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+ return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
+/**
+ * ib_modify_qp_is_ok - Check that the supplied attribute mask
+ * contains all required attributes and no attributes not allowed for
+ * the given QP state transition.
+ * @cur_state: Current QP state
+ * @next_state: Next QP state
+ * @type: QP type
+ * @mask: Mask of supplied QP attributes
+ * @ll : link layer of port
+ *
+ * This function is a helper function that a low-level driver's
+ * modify_qp method can use to validate the consumer's input. It
+ * checks that cur_state and next_state are valid QP states, that a
+ * transition from cur_state to next_state is allowed by the IB spec,
+ * and that the attribute mask supplied is allowed for the transition.
+ */
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll);
+
+int ib_register_event_handler (struct ib_event_handler *event_handler);
+int ib_unregister_event_handler(struct ib_event_handler *event_handler);
+void ib_dispatch_event(struct ib_event *event);
+
+int ib_query_device(struct ib_device *device,
+ struct ib_device_attr *device_attr);
+
+int ib_query_port(struct ib_device *device,
+ u8 port_num, struct ib_port_attr *port_attr);
+
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+ u8 port_num);
+
+int ib_query_gid(struct ib_device *device,
+ u8 port_num, int index, union ib_gid *gid);
+
+int ib_query_pkey(struct ib_device *device,
+ u8 port_num, u16 index, u16 *pkey);
+
+int ib_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
+
+int ib_modify_port(struct ib_device *device,
+ u8 port_num, int port_modify_mask,
+ struct ib_port_modify *port_modify);
+
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+ u8 *port_num, u16 *index);
+
+int ib_find_pkey(struct ib_device *device,
+ u8 port_num, u16 pkey, u16 *index);
+
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ */
+struct ib_pd *ib_alloc_pd(struct ib_device *device);
+
+/**
+ * ib_dealloc_pd - Deallocates a protection domain.
+ * @pd: The protection domain to deallocate.
+ */
+int ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_create_ah - Creates an address handle for the given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_init_ah_from_wc - Initializes address handle attributes from a
+ * work completion.
+ * @device: Device on which the received message arrived.
+ * @port_num: Port on which the received message arrived.
+ * @wc: Work completion associated with the received message.
+ * @grh: References the received global route header. This parameter is
+ * ignored unless the work completion indicates that the GRH is valid.
+ * @ah_attr: Returned attributes that can be used when creating an address
+ * handle for replying to the message.
+ */
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_create_ah_from_wc - Creates an address handle associated with the
+ * sender of the specified work completion.
+ * @pd: The protection domain associated with the address handle.
+ * @wc: Work completion information associated with a received message.
+ * @grh: References the received global route header. This parameter is
+ * ignored unless the work completion indicates that the GRH is valid.
+ * @port_num: The outbound port number to associate with the address.
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
+ struct ib_grh *grh, u8 port_num);
+
+/**
+ * ib_modify_ah - Modifies the address vector associated with an address
+ * handle.
+ * @ah: The address handle to modify.
+ * @ah_attr: The new address vector attributes to associate with the
+ * address handle.
+ */
+int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_query_ah - Queries the address vector associated with an address
+ * handle.
+ * @ah: The address handle to query.
+ * @ah_attr: The address vector attributes associated with the address
+ * handle.
+ */
+int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_destroy_ah - Destroys an address handle.
+ * @ah: The address handle to destroy.
+ */
+int ib_destroy_ah(struct ib_ah *ah);
+
+/**
+ * ib_create_srq - Creates a SRQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ * SRQ. If SRQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr);
+
+/**
+ * ib_modify_srq - Modifies the attributes for the specified SRQ.
+ * @srq: The SRQ to modify.
+ * @srq_attr: On input, specifies the SRQ attributes to modify. On output,
+ * the current values of selected SRQ attributes are returned.
+ * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
+ * are being modified.
+ *
+ * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
+ * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
+ * the number of receives queued drops below the limit.
+ */
+int ib_modify_srq(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask);
+
+/**
+ * ib_query_srq - Returns the attribute list and current values for the
+ * specified SRQ.
+ * @srq: The SRQ to query.
+ * @srq_attr: The attributes of the specified SRQ.
+ */
+int ib_query_srq(struct ib_srq *srq,
+ struct ib_srq_attr *srq_attr);
+
+/**
+ * ib_destroy_srq - Destroys the specified SRQ.
+ * @srq: The SRQ to destroy.
+ */
+int ib_destroy_srq(struct ib_srq *srq);
+
+/**
+ * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
+ * @srq: The SRQ to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ * the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+}
+
+/**
+ * ib_create_qp - Creates a QP associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ */
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_modify_qp - Modifies the attributes for the specified QP and then
+ * transitions the QP to the given state.
+ * @qp: The QP to modify.
+ * @qp_attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ */
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask);
+
+/**
+ * ib_query_qp - Returns the attribute list and current values for the
+ * specified QP.
+ * @qp: The QP to query.
+ * @qp_attr: The attributes of the specified QP.
+ * @qp_attr_mask: A bit-mask used to select specific attributes to query.
+ * @qp_init_attr: Additional attributes of the selected QP.
+ *
+ * The qp_attr_mask may be used to limit the query to gathering only the
+ * selected attributes.
+ */
+int ib_query_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_destroy_qp - Destroys the specified QP.
+ * @qp: The QP to destroy.
+ */
+int ib_destroy_qp(struct ib_qp *qp);
+
+/**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+ struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller. The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
+/**
+ * ib_post_send - Posts a list of work requests to the send queue of
+ * the specified QP.
+ * @qp: The QP to post the work request on.
+ * @send_wr: A list of work requests to post on the send queue.
+ * @bad_send_wr: On an immediate failure, this parameter will reference
+ * the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
+ */
+static inline int ib_post_send(struct ib_qp *qp,
+ struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr)
+{
+ return qp->device->post_send(qp, send_wr, bad_send_wr);
+}
+
+/**
+ * ib_post_recv - Posts a list of work requests to the receive queue of
+ * the specified QP.
+ * @qp: The QP to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ * the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_recv(struct ib_qp *qp,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
+}
+
+/**
+ * ib_create_cq - Creates a CQ on the specified device.
+ * @device: The device on which to create the CQ.
+ * @comp_handler: A user-specified callback that is invoked when a
+ * completion event occurs on the CQ.
+ * @event_handler: A user-specified callback that is invoked when an
+ * asynchronous event not associated with a completion occurs on the CQ.
+ * @cq_context: Context associated with the CQ returned to the user via
+ * the associated completion and event handlers.
+ * @cqe: The minimum size of the CQ.
+ * @comp_vector - Completion vector used to signal completion events.
+ * Must be >= 0 and < context->num_comp_vectors.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+struct ib_cq *ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context, int cqe, int comp_vector);
+
+/**
+ * ib_resize_cq - Modifies the capacity of the CQ.
+ * @cq: The CQ to resize.
+ * @cqe: The minimum size of the CQ.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+int ib_resize_cq(struct ib_cq *cq, int cqe);
+
+/**
+ * ib_modify_cq - Modifies moderation params of the CQ
+ * @cq: The CQ to modify.
+ * @cq_count: number of CQEs that will trigger an event
+ * @cq_period: max period of time in usec before triggering an event
+ *
+ */
+int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+
+/**
+ * ib_destroy_cq - Destroys the specified CQ.
+ * @cq: The CQ to destroy.
+ */
+int ib_destroy_cq(struct ib_cq *cq);
+
+/**
+ * ib_poll_cq - poll a CQ for completion(s)
+ * @cq:the CQ being polled
+ * @num_entries:maximum number of completions to return
+ * @wc:array of at least @num_entries &struct ib_wc where completions
+ * will be returned
+ *
+ * Poll a CQ for (possibly multiple) completions. If the return value
+ * is < 0, an error occurred. If the return value is >= 0, it is the
+ * number of completions returned. If the return value is
+ * non-negative and < num_entries, then the CQ was emptied.
+ */
+static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
+ struct ib_wc *wc)
+{
+ return cq->device->poll_cq(cq, num_entries, wc);
+}
+
+/**
+ * ib_peek_cq - Returns the number of unreaped completions currently
+ * on the specified CQ.
+ * @cq: The CQ to peek.
+ * @wc_cnt: A minimum number of unreaped completions to check for.
+ *
+ * If the number of unreaped completions is greater than or equal to wc_cnt,
+ * this function returns wc_cnt, otherwise, it returns the actual number of
+ * unreaped completions.
+ */
+int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+/**
+ * ib_req_notify_cq - Request completion notification on a CQ.
+ * @cq: The CQ to generate an event for.
+ * @flags:
+ * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
+ * to request an event on the next solicited event or next work
+ * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
+ * may also be |ed in to request a hint about missed events, as
+ * described below.
+ *
+ * Return Value:
+ * < 0 means an error occurred while requesting notification
+ * == 0 means notification was requested successfully, and if
+ * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
+ * were missed and it is safe to wait for another event. In
+ * this case is it guaranteed that any work completions added
+ * to the CQ since the last CQ poll will trigger a completion
+ * notification event.
+ * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
+ * in. It means that the consumer must poll the CQ again to
+ * make sure it is empty to avoid missing an event because of a
+ * race between requesting notification and an entry being
+ * added to the CQ. This return value means it is possible
+ * (but not guaranteed) that a work completion has been added
+ * to the CQ since the last poll without triggering a
+ * completion notification event.
+ */
+static inline int ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ return cq->device->req_notify_cq(cq, flags);
+}
+
+/**
+ * ib_req_ncomp_notif - Request completion notification when there are
+ * at least the specified number of unreaped completions on the CQ.
+ * @cq: The CQ to generate an event for.
+ * @wc_cnt: The number of unreaped completions that should be on the
+ * CQ before an event is generated.
+ */
+static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
+{
+ return cq->device->req_ncomp_notif ?
+ cq->device->req_ncomp_notif(cq, wc_cnt) :
+ -ENOSYS;
+}
+
+/**
+ * ib_get_dma_mr - Returns a memory region for system memory that is
+ * usable for DMA.
+ * @pd: The protection domain associated with the memory region.
+ * @mr_access_flags: Specifies the memory access rights.
+ *
+ * Note that the ib_dma_*() functions defined below must be used
+ * to create/destroy addresses used with the Lkey or Rkey returned
+ * by ib_get_dma_mr().
+ */
+struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+/**
+ * ib_dma_mapping_error - check a DMA addr for error
+ * @dev: The device for which the dma_addr was created
+ * @dma_addr: The DMA address to check
+ */
+static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ if (dev->dma_ops)
+ return dev->dma_ops->mapping_error(dev, dma_addr);
+ return dma_mapping_error(dev->dma_device, dma_addr);
+}
+
+/**
+ * ib_dma_map_single - Map a kernel virtual address to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @cpu_addr: The kernel virtual address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_single(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
+ return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+}
+
+/**
+ * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_single(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->unmap_single(dev, addr, size, direction);
+ else
+ dma_unmap_single(dev->dma_device, addr, size, direction);
+}
+
+static inline u64 ib_dma_map_single_attrs(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return dma_map_single_attrs(dev->dma_device, cpu_addr, size,
+ direction, attrs);
+}
+
+static inline void ib_dma_unmap_single_attrs(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return dma_unmap_single_attrs(dev->dma_device, addr, size,
+ direction, attrs);
+}
+
+/**
+ * ib_dma_map_page - Map a physical page to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @page: The page to be mapped
+ * @offset: The offset within the page
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_page(struct ib_device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ return dev->dma_ops->map_page(dev, page, offset, size, direction);
+ return dma_map_page(dev->dma_device, page, offset, size, direction);
+}
+
+/**
+ * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_page(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->unmap_page(dev, addr, size, direction);
+ else
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline int ib_dma_map_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ return dev->dma_ops->map_sg(dev, sg, nents, direction);
+ return dma_map_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->unmap_sg(dev, sg, nents, direction);
+ else
+ dma_unmap_sg(dev->dma_device, sg, nents, direction);
+}
+
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs);
+}
+/**
+ * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_address() into sg_dma_address().
+ */
+static inline u64 ib_sg_dma_address(struct ib_device *dev,
+ struct scatterlist *sg)
+{
+ return sg_dma_address(sg);
+}
+
+/**
+ * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ *
+ * Note: this function is obsolete. To do: change all occurrences of
+ * ib_sg_dma_len() into sg_dma_len().
+ */
+static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
+ struct scatterlist *sg)
+{
+ return sg_dma_len(sg);
+}
+
+/**
+ * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
+ else
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
+ else
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_alloc_coherent - Allocate memory and map it for DMA
+ * @dev: The device for which the DMA address is requested
+ * @size: The size of the region to allocate in bytes
+ * @dma_handle: A pointer for returning the DMA address of the region
+ * @flag: memory allocator flags
+ */
+static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
+ size_t size,
+ u64 *dma_handle,
+ gfp_t flag)
+{
+ if (dev->dma_ops)
+ return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+ else {
+ dma_addr_t handle;
+ void *ret;
+
+ ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
+ *dma_handle = handle;
+ return ret;
+ }
+}
+
+/**
+ * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
+ * @dev: The device for which the DMA addresses were allocated
+ * @size: The size of the region
+ * @cpu_addr: the address returned by ib_dma_alloc_coherent()
+ * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
+ */
+static inline void ib_dma_free_coherent(struct ib_device *dev,
+ size_t size, void *cpu_addr,
+ u64 dma_handle)
+{
+ if (dev->dma_ops)
+ dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+ else
+ dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+}
+
+/**
+ * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
+ * by an HCA.
+ * @pd: The protection domain associated assigned to the registered region.
+ * @phys_buf_array: Specifies a list of physical buffers to use in the
+ * memory region.
+ * @num_phys_buf: Specifies the size of the phys_buf_array.
+ * @mr_access_flags: Specifies the memory access rights.
+ * @iova_start: The offset of the region's starting I/O virtual address.
+ */
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start);
+
+/**
+ * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
+ * Conceptually, this call performs the functions deregister memory region
+ * followed by register physical memory region. Where possible,
+ * resources are reused instead of deallocated and reallocated.
+ * @mr: The memory region to modify.
+ * @mr_rereg_mask: A bit-mask used to indicate which of the following
+ * properties of the memory region are being modified.
+ * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
+ * the new protection domain to associated with the memory region,
+ * otherwise, this parameter is ignored.
+ * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
+ * field specifies a list of physical buffers to use in the new
+ * translation, otherwise, this parameter is ignored.
+ * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
+ * field specifies the size of the phys_buf_array, otherwise, this
+ * parameter is ignored.
+ * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
+ * field specifies the new memory access rights, otherwise, this
+ * parameter is ignored.
+ * @iova_start: The offset of the region's starting I/O virtual address.
+ */
+int ib_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start);
+
+/**
+ * ib_query_mr - Retrieves information about a specific memory region.
+ * @mr: The memory region to retrieve information about.
+ * @mr_attr: The attributes of the specified memory region.
+ */
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+/**
+ * ib_dereg_mr - Deregisters a memory region and removes it from the
+ * HCA translation table.
+ * @mr: The memory region to deregister.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr(struct ib_mr *mr);
+
+
+/**
+ * ib_create_mr - Allocates a memory region that may be used for
+ * signature handover operations.
+ * @pd: The protection domain associated with the region.
+ * @mr_init_attr: memory region init attributes.
+ */
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
+
+/**
+ * ib_destroy_mr - Destroys a memory region that was created using
+ * ib_create_mr and removes it from HW translation tables.
+ * @mr: The memory region to destroy.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_destroy_mr(struct ib_mr *mr);
+
+/**
+ * ib_alloc_fast_reg_mr - Allocates memory region usable with the
+ * IB_WR_FAST_REG_MR send work request.
+ * @pd: The protection domain associated with the region.
+ * @max_page_list_len: requested max physical buffer list length to be
+ * used with fast register work requests for this MR.
+ */
+struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+/**
+ * ib_alloc_fast_reg_page_list - Allocates a page list array
+ * @device - ib device pointer.
+ * @page_list_len - size of the page list array to be allocated.
+ *
+ * This allocates and returns a struct ib_fast_reg_page_list * and a
+ * page_list array that is at least page_list_len in size. The actual
+ * size is returned in max_page_list_len. The caller is responsible
+ * for initializing the contents of the page_list array before posting
+ * a send work request with the IB_WC_FAST_REG_MR opcode.
+ *
+ * The page_list array entries must be translated using one of the
+ * ib_dma_*() functions just like the addresses passed to
+ * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
+ * ib_fast_reg_page_list must not be modified by the caller until the
+ * IB_WC_FAST_REG_MR work request completes.
+ */
+struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
+ struct ib_device *device, int page_list_len);
+
+/**
+ * ib_free_fast_reg_page_list - Deallocates a previously allocated
+ * page list array.
+ * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
+ */
+void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+
+/**
+ * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
+ * R_Key and L_Key.
+ * @mr - struct ib_mr pointer to be updated.
+ * @newkey - new key to be used.
+ */
+static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
+{
+ mr->lkey = (mr->lkey & 0xffffff00) | newkey;
+ mr->rkey = (mr->rkey & 0xffffff00) | newkey;
+}
+
+/**
+ * ib_inc_rkey - increments the key portion of the given rkey. Can be used
+ * for calculating a new rkey for type 2 memory windows.
+ * @rkey - the rkey to increment.
+ */
+static inline u32 ib_inc_rkey(u32 rkey)
+{
+ const u32 mask = 0x000000ff;
+ return ((rkey + 1) & mask) | (rkey & ~mask);
+}
+
+/**
+ * ib_alloc_mw - Allocates a memory window.
+ * @pd: The protection domain associated with the memory window.
+ * @type: The type of the memory window (1 or 2).
+ */
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+
+/**
+ * ib_bind_mw - Posts a work request to the send queue of the specified
+ * QP, which binds the memory window to the given address range and
+ * remote access attributes.
+ * @qp: QP to post the bind work request on.
+ * @mw: The memory window to bind.
+ * @mw_bind: Specifies information about the memory window, including
+ * its address range, remote access rights, and associated memory region.
+ *
+ * If there is no immediate error, the function will update the rkey member
+ * of the mw parameter to its new value. The bind operation can still fail
+ * asynchronously.
+ */
+static inline int ib_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ /* XXX reference counting in corresponding MR? */
+ return mw->device->bind_mw ?
+ mw->device->bind_mw(qp, mw, mw_bind) :
+ -ENOSYS;
+}
+
+/**
+ * ib_dealloc_mw - Deallocates a memory window.
+ * @mw: The memory window to deallocate.
+ */
+int ib_dealloc_mw(struct ib_mw *mw);
+
+/**
+ * ib_alloc_fmr - Allocates a unmapped fast memory region.
+ * @pd: The protection domain associated with the unmapped region.
+ * @mr_access_flags: Specifies the memory access rights.
+ * @fmr_attr: Attributes of the unmapped region.
+ *
+ * A fast memory region must be mapped before it can be used as part of
+ * a work request.
+ */
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+/**
+ * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
+ * @fmr: The fast memory region to associate with the pages.
+ * @page_list: An array of physical pages to map to the fast memory region.
+ * @list_len: The number of pages in page_list.
+ * @iova: The I/O virtual address to use with the mapped region.
+ */
+static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
+ u64 *page_list, int list_len,
+ u64 iova)
+{
+ return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+}
+
+/**
+ * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
+ * @fmr_list: A linked list of fast memory regions to unmap.
+ */
+int ib_unmap_fmr(struct list_head *fmr_list);
+
+/**
+ * ib_dealloc_fmr - Deallocates a fast memory region.
+ * @fmr: The fast memory region to deallocate.
+ */
+int ib_dealloc_fmr(struct ib_fmr *fmr);
+
+/**
+ * ib_attach_mcast - Attaches the specified QP to a multicast group.
+ * @qp: QP to attach to the multicast group. The QP must be type
+ * IB_QPT_UD.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ *
+ * In order to send and receive multicast packets, subnet
+ * administration must have created the multicast group and configured
+ * the fabric appropriately. The port associated with the specified
+ * QP must also be a member of the multicast group.
+ */
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+/**
+ * ib_detach_mcast - Detaches the specified QP from a multicast group.
+ * @qp: QP to detach from the multicast group.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ */
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+/**
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ */
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+
+/**
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ */
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+
+struct ib_flow *ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr, int domain);
+int ib_destroy_flow(struct ib_flow *flow_id);
+
+static inline int ib_check_mr_access(int flags)
+{
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ib_check_mr_status: lightweight check of MR status.
+ * This routine may provide status checks on a selected
+ * ib_mr. first use is for signature status check.
+ *
+ * @mr: A memory region.
+ * @check_mask: Bitmask of which checks to perform from
+ * ib_mr_status_check enumeration.
+ * @mr_status: The container of relevant status checks.
+ * failed checks will be indicated in the status bitmask
+ * and the relevant info shall be in the error item.
+ */
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+
+#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
new file mode 100644
index 000000000..1017e0bdf
--- /dev/null
+++ b/include/rdma/iw_cm.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+ IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+ IW_CM_EVENT_CONNECT_REPLY, /* reply from active connect request */
+ IW_CM_EVENT_ESTABLISHED, /* passive side accept successful */
+ IW_CM_EVENT_DISCONNECT, /* orderly shutdown */
+ IW_CM_EVENT_CLOSE /* close complete */
+};
+
+struct iw_cm_event {
+ enum iw_cm_event_type event;
+ int status;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
+ void *private_data;
+ void *provider_data;
+ u8 private_data_len;
+ u8 ord;
+ u8 ird;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+ struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM. Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+ struct iw_cm_event *event);
+
+struct iw_cm_id {
+ iw_cm_handler cm_handler; /* client callback function */
+ void *context; /* client cb context */
+ struct ib_device *device;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
+ void *provider_data; /* provider private data */
+ iw_event_handler event_handler; /* cb for provider
+ events */
+ /* Used by provider to add and remove refs on IW cm_id */
+ void (*add_ref)(struct iw_cm_id *);
+ void (*rem_ref)(struct iw_cm_id *);
+};
+
+struct iw_cm_conn_param {
+ const void *private_data;
+ u16 private_data_len;
+ u32 ord;
+ u32 ird;
+ u32 qpn;
+};
+
+struct iw_cm_verbs {
+ void (*add_ref)(struct ib_qp *qp);
+
+ void (*rem_ref)(struct ib_qp *qp);
+
+ struct ib_qp * (*get_qp)(struct ib_device *device,
+ int qpn);
+
+ int (*connect)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+
+ int (*accept)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+
+ int (*reject)(struct iw_cm_id *cm_id,
+ const void *pdata, u8 pdata_len);
+
+ int (*create_listen)(struct iw_cm_id *cm_id,
+ int backlog);
+
+ int (*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ * returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+ iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ * requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ * parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ * reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+ u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection establishment
+ * parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ * connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ * valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask);
+
+#endif /* IW_CM_H */
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644
index 000000000..fda31673a
--- /dev/null
+++ b/include/rdma/iw_portmap.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#define IWPM_ULIBNAME_SIZE 32
+#define IWPM_DEVNAME_SIZE 32
+#define IWPM_IFNAME_SIZE 16
+#define IWPM_IPADDR_SIZE 16
+
+enum {
+ IWPM_INVALID_NLMSG_ERR = 10,
+ IWPM_CREATE_MAPPING_ERR,
+ IWPM_DUPLICATE_MAPPING_ERR,
+ IWPM_UNKNOWN_MAPPING_ERR,
+ IWPM_CLIENT_DEV_INFO_ERR,
+ IWPM_USER_LIB_INFO_ERR,
+ IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+ char dev_name[IWPM_DEVNAME_SIZE];
+ char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+ struct sockaddr_storage loc_addr;
+ struct sockaddr_storage mapped_loc_addr;
+ struct sockaddr_storage rem_addr;
+ struct sockaddr_storage mapped_rem_addr;
+};
+
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes up.
+ */
+int iwpm_init(u8);
+
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes down.
+ */
+int iwpm_exit(u8);
+
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
+int iwpm_valid_pid(void);
+
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ * to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ * the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
+ * to the userspace port mapper
+ * @pm_msg: Contains the local and remote ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores the
+ * port mapper response (mapped local and remote address info)
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ * to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ * iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
+ */
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ * iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ * iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_remote_info_cb - Process remote connecting peer address info, which
+ * the port mapper has received from the connecting peer
+ *
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Stores the IPv4/IPv6 address info in a hash table
+ */
+int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ * port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
+ */
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ * the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_get_remote_info - Get the remote connecting peer address info
+ *
+ * @mapped_loc_addr: Mapped local address of the listening peer
+ * @mapped_rem_addr: Mapped remote address of the connecting peer
+ * @remote_addr: To store the remote address of the connecting peer
+ * @nl_client: The index of the netlink client
+ *
+ * The remote address info is retrieved and provided to the client in
+ * the remote_addr. After that it is removed from the hash table
+ */
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+ struct sockaddr_storage *mapped_rem_addr,
+ struct sockaddr_storage *remote_addr, u8 nl_client);
+
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ * info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *mapped_addr, u8 nl_client);
+
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ * info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *mapped_addr);
+
+#endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
new file mode 100644
index 000000000..1ed2088dc
--- /dev/null
+++ b/include/rdma/rdma_cm.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(RDMA_CM_H)
+#define RDMA_CM_H
+
+#include <linux/socket.h>
+#include <linux/in6.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
+
+/*
+ * Upon receiving a device removal event, users must destroy the associated
+ * RDMA identifier and release all resources allocated with the device.
+ */
+enum rdma_cm_event_type {
+ RDMA_CM_EVENT_ADDR_RESOLVED,
+ RDMA_CM_EVENT_ADDR_ERROR,
+ RDMA_CM_EVENT_ROUTE_RESOLVED,
+ RDMA_CM_EVENT_ROUTE_ERROR,
+ RDMA_CM_EVENT_CONNECT_REQUEST,
+ RDMA_CM_EVENT_CONNECT_RESPONSE,
+ RDMA_CM_EVENT_CONNECT_ERROR,
+ RDMA_CM_EVENT_UNREACHABLE,
+ RDMA_CM_EVENT_REJECTED,
+ RDMA_CM_EVENT_ESTABLISHED,
+ RDMA_CM_EVENT_DISCONNECTED,
+ RDMA_CM_EVENT_DEVICE_REMOVAL,
+ RDMA_CM_EVENT_MULTICAST_JOIN,
+ RDMA_CM_EVENT_MULTICAST_ERROR,
+ RDMA_CM_EVENT_ADDR_CHANGE,
+ RDMA_CM_EVENT_TIMEWAIT_EXIT
+};
+
+enum rdma_port_space {
+ RDMA_PS_SDP = 0x0001,
+ RDMA_PS_IPOIB = 0x0002,
+ RDMA_PS_IB = 0x013F,
+ RDMA_PS_TCP = 0x0106,
+ RDMA_PS_UDP = 0x0111,
+};
+
+#define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL
+#define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL
+#define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL
+#define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL
+
+struct rdma_addr {
+ struct sockaddr_storage src_addr;
+ struct sockaddr_storage dst_addr;
+ struct rdma_dev_addr dev_addr;
+};
+
+struct rdma_route {
+ struct rdma_addr addr;
+ struct ib_sa_path_rec *path_rec;
+ int num_paths;
+};
+
+struct rdma_conn_param {
+ const void *private_data;
+ u8 private_data_len;
+ u8 responder_resources;
+ u8 initiator_depth;
+ u8 flow_control;
+ u8 retry_count; /* ignored when accepting */
+ u8 rnr_retry_count;
+ /* Fields below ignored if a QP is created on the rdma_cm_id. */
+ u8 srq;
+ u32 qp_num;
+ u32 qkey;
+};
+
+struct rdma_ud_param {
+ const void *private_data;
+ u8 private_data_len;
+ struct ib_ah_attr ah_attr;
+ u32 qp_num;
+ u32 qkey;
+};
+
+struct rdma_cm_event {
+ enum rdma_cm_event_type event;
+ int status;
+ union {
+ struct rdma_conn_param conn;
+ struct rdma_ud_param ud;
+ } param;
+};
+
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING
+};
+
+struct rdma_cm_id;
+
+/**
+ * rdma_cm_event_handler - Callback used to report user events.
+ *
+ * Notes: Users may not call rdma_destroy_id from this callback to destroy
+ * the passed in id, or a corresponding listen id. Returning a
+ * non-zero value from the callback will destroy the passed in id.
+ */
+typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
+ struct rdma_cm_event *event);
+
+struct rdma_cm_id {
+ struct ib_device *device;
+ void *context;
+ struct ib_qp *qp;
+ rdma_cm_event_handler event_handler;
+ struct rdma_route route;
+ enum rdma_port_space ps;
+ enum ib_qp_type qp_type;
+ u8 port_num;
+};
+
+/**
+ * rdma_create_id - Create an RDMA identifier.
+ *
+ * @event_handler: User callback invoked to report events associated with the
+ * returned rdma_id.
+ * @context: User specified context associated with the id.
+ * @ps: RDMA port space.
+ * @qp_type: type of queue pair associated with the id.
+ */
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+ void *context, enum rdma_port_space ps,
+ enum ib_qp_type qp_type);
+
+/**
+ * rdma_destroy_id - Destroys an RDMA identifier.
+ *
+ * @id: RDMA identifier.
+ *
+ * Note: calling this function has the effect of canceling in-flight
+ * asynchronous operations associated with the id.
+ */
+void rdma_destroy_id(struct rdma_cm_id *id);
+
+/**
+ * rdma_bind_addr - Bind an RDMA identifier to a source address and
+ * associated RDMA device, if needed.
+ *
+ * @id: RDMA identifier.
+ * @addr: Local address information. Wildcard values are permitted.
+ *
+ * This associates a source address with the RDMA identifier before calling
+ * rdma_listen. If a specific local address is given, the RDMA identifier will
+ * be bound to a local RDMA device.
+ */
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_resolve_addr - Resolve destination and optional source addresses
+ * from IP addresses to an RDMA address. If successful, the specified
+ * rdma_cm_id will be bound to a local device.
+ *
+ * @id: RDMA identifier.
+ * @src_addr: Source address information. This parameter may be NULL.
+ * @dst_addr: Destination address information.
+ * @timeout_ms: Time to wait for resolution to complete.
+ */
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ struct sockaddr *dst_addr, int timeout_ms);
+
+/**
+ * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
+ * into route information needed to establish a connection.
+ *
+ * This is called on the client side of a connection.
+ * Users must have first called rdma_resolve_addr to resolve a dst_addr
+ * into an RDMA address before calling this routine.
+ */
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+
+/**
+ * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+ * identifier.
+ *
+ * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA
+ * through their states.
+ */
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA
+ * identifier.
+ *
+ * Users must destroy any QP associated with an RDMA identifier before
+ * destroying the RDMA ID.
+ */
+void rdma_destroy_qp(struct rdma_cm_id *id);
+
+/**
+ * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning
+ * to a specified QP state.
+ * @id: Communication identifier associated with the QP attributes to
+ * initialize.
+ * @qp_attr: On input, specifies the desired QP state. On output, the
+ * mandatory and desired optional attributes will be set in order to
+ * modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ * QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state. This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes. Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ *
+ * Users that wish to have their QP automatically transitioned through its
+ * states can associate a QP with the rdma_cm_id by calling rdma_create_qp().
+ */
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+ int *qp_attr_mask);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with
+ * by having called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP
+ * information for unconnected rdma_cm_id's. The actual operation is
+ * based on the rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_listen - This function is called by the passive side to
+ * listen for incoming connection requests.
+ *
+ * Users must have bound the rdma_cm_id to a local address by calling
+ * rdma_bind_addr before calling this routine.
+ */
+int rdma_listen(struct rdma_cm_id *id, int backlog);
+
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
+ * occurred on the connection.
+ * @id: Connection identifier to transition to established.
+ * @event: Asynchronous event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events. Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ * QP before an RTU has been received.
+ */
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event);
+
+/**
+ * rdma_reject - Called to reject a connection request or response.
+ */
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+ u8 private_data_len);
+
+/**
+ * rdma_disconnect - This function disconnects the associated QP and
+ * transitions it into the error state.
+ */
+int rdma_disconnect(struct rdma_cm_id *id);
+
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ * address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context);
+
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ * address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_set_service_type - Set the type of service associated with a
+ * connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @tos: Type of service.
+ *
+ * The type of service is interpretted as a differentiated service
+ * field (RFC 2474). The service type should be specified before
+ * performing route resolution, as existing communication on the
+ * connection identifier may be unaffected. The type of service
+ * requested may not be supported by the network to all destinations.
+ */
+void rdma_set_service_type(struct rdma_cm_id *id, int tos);
+
+/**
+ * rdma_set_reuseaddr - Allow the reuse of local addresses when binding
+ * the rdma_cm_id.
+ * @id: Communication identifier to configure.
+ * @reuse: Value indicating if the bound address is reusable.
+ *
+ * Reuse must be set before an address is bound to the id.
+ */
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
+
+/**
+ * rdma_set_afonly - Specify that listens are restricted to the
+ * bound address family only.
+ * @id: Communication identifer to configure.
+ * @afonly: Value indicating if listens are restricted.
+ *
+ * Must be set before identifier is in the listening state.
+ */
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
+
+ /**
+ * rdma_get_service_id - Return the IB service ID for a specified address.
+ * @id: Communication identifier associated with the address.
+ * @addr: Address for the service ID.
+ */
+__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
new file mode 100644
index 000000000..2389c3b45
--- /dev/null
+++ b/include/rdma/rdma_cm_ib.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(RDMA_CM_IB_H)
+#define RDMA_CM_IB_H
+
+#include <rdma/rdma_cm.h>
+
+/**
+ * rdma_set_ib_paths - Manually sets the path records used to establish a
+ * connection.
+ * @id: Connection identifier associated with the request.
+ * @path_rec: Reference to the path record
+ *
+ * This call permits a user to specify routing information for rdma_cm_id's
+ * bound to Infiniband devices. It is called on the client side of a
+ * connection and replaces the call to rdma_resolve_route.
+ */
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+ struct ib_sa_path_rec *path_rec, int num_paths);
+
+/* Global qkey for UDP QPs and multicast groups. */
+#define RDMA_UDP_QKEY 0x01234567
+
+#endif /* RDMA_CM_IB_H */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
new file mode 100644
index 000000000..0790882e0
--- /dev/null
+++ b/include/rdma/rdma_netlink.h
@@ -0,0 +1,80 @@
+#ifndef _RDMA_NETLINK_H
+#define _RDMA_NETLINK_H
+
+
+#include <linux/netlink.h>
+#include <uapi/rdma/rdma_netlink.h>
+
+struct ibnl_client_cbs {
+ int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
+ struct module *module;
+};
+
+int ibnl_init(void);
+void ibnl_cleanup(void);
+
+/**
+ * Add a a client to the list of IB netlink exporters.
+ * @index: Index of the added client
+ * @nops: Number of supported ops by the added client.
+ * @cb_table: A table for op->callback
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_add_client(int index, int nops,
+ const struct ibnl_client_cbs cb_table[]);
+
+/**
+ * Remove a client from IB netlink.
+ * @index: Index of the removed IB client.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_remove_client(int index);
+
+/**
+ * Put a new message in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Pointer to put the header of the new netlink message.
+ * @seq: The message sequence number.
+ * @len: The requested message length to allocate.
+ * @client: Calling IB netlink client.
+ * @op: message content op.
+ * Returns the allocated buffer on success and NULL on failure.
+ */
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+ int len, int client, int op, int flags);
+/**
+ * Put a new attribute in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Header of the netlink message to append the attribute to.
+ * @len: The length of the attribute data.
+ * @data: The attribute data to put.
+ * @type: The attribute type.
+ * Returns the 0 and a negative error code on failure.
+ */
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ int len, void *data, int type);
+
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ __u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ unsigned int group, gfp_t flags);
+
+#endif /* _RDMA_NETLINK_H */