summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
commitb4b7ff4b08e691656c9d77c758fc355833128ac0 (patch)
tree82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /drivers/infiniband/core
parent35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff)
Linux-libre 4.4-gnupck-4.4-gnu
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/agent.c2
-rw-r--r--drivers/infiniband/core/cache.c112
-rw-r--r--drivers/infiniband/core/cm.c40
-rw-r--r--drivers/infiniband/core/cma.c194
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/device.c19
-rw-r--r--drivers/infiniband/core/mad.c47
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c3
-rw-r--r--drivers/infiniband/core/sa_query.c53
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c421
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c4
-rw-r--r--drivers/infiniband/core/verbs.c296
18 files changed, 820 insertions, 411 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 746cdf56b..34b1adad0 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
switch (addr->sa_family) {
case AF_INET:
- dev = ip_dev_find(&init_net,
+ dev = ip_dev_find(dev_addr->net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
@@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (ipv6_chk_addr(&init_net,
+ for_each_netdev_rcu(dev_addr->net, dev) {
+ if (ipv6_chk_addr(dev_addr->net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.daddr = dst_ip;
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
- rt = ip_route_output_key(&init_net, &fl4);
+ rt = ip_route_output_key(addr->net, &fl4);
if (IS_ERR(rt)) {
ret = PTR_ERR(rt);
goto out;
@@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(&init_net, NULL, &fl6);
+ dst = ip6_route_output(addr->net, NULL, &fl6);
if ((ret = dst->error))
goto put;
if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
if (ret)
goto put;
@@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
}
int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id)
+ u8 *dmac, u16 *vlan_id, int if_index)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
+ dev_addr.bound_dev_if = if_index;
+ dev_addr.net = &init_net;
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
@@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
rdma_gid2ip(&gid_addr._sockaddr, sgid);
memset(&dev_addr, 0, sizeof(dev_addr));
+ dev_addr.net = &init_net;
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 042904030..4fa524dfb 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
- mad_send_wr->send_wr.wr.ud.port_num = port_num;
+ mad_send_wr->send_wr.port_num = port_num;
}
if (ib_post_send_mad(send_buf, NULL)) {
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 87471ef37..89bebeada 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
mask, port, index);
}
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port, struct net_device *ndev,
- u16 *index)
+int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ u8 port, struct net_device *ndev,
+ u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
@@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
return -ENOENT;
}
+EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+
+/**
+ * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * GID value occurs
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value could be
+ * searched.
+ * @filter: The filter function is executed on any matching GID in the table.
+ * If the filter function returns true, the corresponding index is returned,
+ * otherwise, we continue searching the GID table. It's guaranteed that
+ * while filter is executed, ndev field is valid and the structure won't
+ * change. filter is executed in an atomic context. filter must not be NULL.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * of which the filter function returns true in the port's GID table.
+ * This function is only supported on RoCE ports.
+ *
+ */
+static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ u8 port,
+ bool (*filter)(const union ib_gid *,
+ const struct ib_gid_attr *,
+ void *),
+ void *context,
+ u16 *index)
+{
+ struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
+ unsigned int i;
+ bool found = false;
+
+ if (!ports_table)
+ return -EOPNOTSUPP;
+
+ if (port < rdma_start_port(ib_dev) ||
+ port > rdma_end_port(ib_dev) ||
+ !rdma_protocol_roce(ib_dev, port))
+ return -EPROTONOSUPPORT;
+
+ table = ports_table[port - rdma_start_port(ib_dev)];
+
+ for (i = 0; i < table->sz; i++) {
+ struct ib_gid_attr attr;
+ unsigned long flags;
+
+ read_lock_irqsave(&table->data_vec[i].lock, flags);
+ if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+ goto next;
+
+ if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+ goto next;
+
+ memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
+
+ if (filter(gid, &attr, context))
+ found = true;
+
+next:
+ read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+
+ if (found)
+ break;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ if (index)
+ *index = i;
+ return 0;
+}
static struct ib_gid_table *alloc_gid_table(int sz)
{
@@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
- union ib_gid *gid)
+ union ib_gid *gid,
+ struct ib_gid_attr *gid_attr)
{
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return __ib_cache_gid_get(device, port_num, index, gid, NULL);
+ return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ struct net_device *ndev,
u8 *port_num,
u16 *index)
{
- return ib_cache_gid_find(device, gid, NULL, port_num, index);
+ return ib_cache_gid_find(device, gid, ndev, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
+int ib_find_gid_by_filter(struct ib_device *device,
+ const union ib_gid *gid,
+ u8 port_num,
+ bool (*filter)(const union ib_gid *gid,
+ const struct ib_gid_attr *,
+ void *),
+ void *context, u16 *index)
+{
+ /* Only RoCE GID table supports filter function */
+ if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+ return -EPROTONOSUPPORT;
+
+ return ib_cache_gid_find_by_filter(device, gid,
+ port_num, filter,
+ context, index);
+}
+EXPORT_SYMBOL(ib_find_gid_by_filter);
+
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
int index,
@@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device,
if (!use_roce_gid_table) {
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i,
- gid_cache->table + i);
+ gid_cache->table + i, NULL);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4f918b929..0a26dd6d9 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -179,8 +179,6 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
- u8 valid;
- u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
unsigned long flags;
int ret;
u8 p;
+ struct net_device *ndev = ib_get_ndev_from_path(path);
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- &p, NULL)) {
+ ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
}
read_unlock_irqrestore(&cm.device_lock, flags);
+ if (ndev)
+ dev_put(ndev);
+
if (!port)
return -EINVAL;
@@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
- memcpy(av->smac, path->smac, sizeof(av->smac));
- av->valid = 1;
return 0;
}
@@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work)
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid);
+ work->port->port_num, 0, &work->path[0].sgid,
+ NULL);
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
- if (!cm_id_priv->av.valid) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return -EINVAL;
- }
- if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
- qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
- memcpy(qp_attr->smac, cm_id_priv->av.smac,
- sizeof(qp_attr->smac));
- *qp_attr_mask |= IB_QP_SMAC;
- }
- if (cm_id_priv->alt_av.valid) {
- if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
- qp_attr->alt_vlan_id =
- cm_id_priv->alt_av.ah_attr.vlan_id;
- *qp_attr_mask |= IB_QP_ALT_VID;
- }
- if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
- memcpy(qp_attr->alt_smac,
- cm_id_priv->alt_av.smac,
- sizeof(qp_attr->alt_smac));
- *qp_attr_mask |= IB_QP_ALT_SMAC;
- }
- }
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36b12d560..2d762a2ec 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -44,6 +44,8 @@
#include <linux/module.h>
#include <net/route.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -86,7 +88,7 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
-const char *rdma_event_msg(enum rdma_cm_event_type event)
+const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -110,22 +112,33 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(tcp_ps);
-static DEFINE_IDR(udp_ps);
-static DEFINE_IDR(ipoib_ps);
-static DEFINE_IDR(ib_ps);
+static int cma_pernet_id;
-static struct idr *cma_idr(enum rdma_port_space ps)
+struct cma_pernet {
+ struct idr tcp_ps;
+ struct idr udp_ps;
+ struct idr ipoib_ps;
+ struct idr ib_ps;
+};
+
+static struct cma_pernet *cma_pernet(struct net *net)
+{
+ return net_generic(net, cma_pernet_id);
+}
+
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
{
+ struct cma_pernet *pernet = cma_pernet(net);
+
switch (ps) {
case RDMA_PS_TCP:
- return &tcp_ps;
+ return &pernet->tcp_ps;
case RDMA_PS_UDP:
- return &udp_ps;
+ return &pernet->udp_ps;
case RDMA_PS_IPOIB:
- return &ipoib_ps;
+ return &pernet->ipoib_ps;
case RDMA_PS_IB:
- return &ib_ps;
+ return &pernet->ib_ps;
default:
return NULL;
}
@@ -145,24 +158,25 @@ struct rdma_bind_list {
unsigned short port;
};
-static int cma_ps_alloc(enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
}
-static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum)
+static struct rdma_bind_list *cma_ps_find(struct net *net,
+ enum rdma_port_space ps, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
-static void cma_ps_remove(enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
{
- struct idr *idr = cma_idr(ps);
+ struct idr *idr = cma_pernet_idr(net, ps);
idr_remove(idr, snum);
}
@@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
- union ib_gid *gid, int dev_type)
+ union ib_gid *gid, int dev_type,
+ int bound_if_index)
{
- u8 found_port;
int ret = -ENODEV;
+ struct net_device *ndev = NULL;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ret;
@@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- ret = ib_find_cached_gid(device, gid, &found_port, NULL);
- if (port != found_port)
- return -ENODEV;
+ if (dev_type == ARPHRD_ETHER)
+ ndev = dev_get_by_index(&init_net, bound_if_index);
+
+ ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+
+ if (ndev)
+ dev_put(ndev);
return ret;
}
@@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type);
+ dev_addr->dev_type,
+ dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type);
+ dev_addr->dev_type,
+ dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
goto out;
@@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
- for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+ for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
+ &gid, NULL);
+ i++) {
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
return 0;
}
-struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+struct rdma_cm_id *rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type)
{
@@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
+ id_priv->id.route.addr.dev_addr.net = get_net(net);
return &id_priv->id;
}
@@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
goto out;
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid);
+ qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
if (ret)
goto out;
BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
- if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
- ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
-
- if (ret)
- goto out;
- }
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -1107,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
rcu_read_lock();
err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
- if (err)
- return false;
-
- ret = FIB_RES_DEV(res) == net_dev;
+ ret = err == 0 && FIB_RES_DEV(res) == net_dev;
rcu_read_unlock();
return ret;
@@ -1249,18 +1265,20 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
return cma_protocol_roce_dev_port(device, port_num);
}
-static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
- const struct net_device *net_dev)
+static bool cma_match_net_dev(const struct rdma_cm_id *id,
+ const struct net_device *net_dev,
+ u8 port_num)
{
- const struct rdma_addr *addr = &id_priv->id.route.addr;
+ const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
/* This request is an AF_IB request or a RoCE request */
- return addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce(&id_priv->id);
+ return (!id->port_num || id->port_num == port_num) &&
+ (addr->src_addr.ss_family == AF_IB ||
+ cma_protocol_roce_dev_port(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
- (net_eq(dev_net(net_dev), &init_net) &&
+ (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
addr->dev_addr.bound_dev_if == net_dev->ifindex);
}
@@ -1279,13 +1297,13 @@ static struct rdma_id_private *cma_find_listener(
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
- cma_match_net_dev(id_priv, net_dev))
+ cma_match_net_dev(&id_priv->id, net_dev, req->port))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
listen_list) {
if (id_priv_dev->id.device == cm_id->device &&
- cma_match_net_dev(id_priv_dev, net_dev))
+ cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
return id_priv_dev;
}
}
@@ -1321,7 +1339,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
}
}
- bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id),
+ bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
+ rdma_ps_from_service_id(req.service_id),
cma_port_from_service_id(req.service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
if (IS_ERR(id_priv) && *net_dev) {
@@ -1392,6 +1411,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
static void cma_release_port(struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list = id_priv->bind_list;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
if (!bind_list)
return;
@@ -1399,7 +1419,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_lock(&lock);
hlist_del(&id_priv->node);
if (hlist_empty(&bind_list->owners)) {
- cma_ps_remove(bind_list->ps, bind_list->port);
+ cma_ps_remove(net, bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
@@ -1458,6 +1478,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+ put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
EXPORT_SYMBOL(rdma_destroy_id);
@@ -1588,7 +1609,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
listen_id->ps, ib_event->param.req_rcvd.qp_type);
if (IS_ERR(id))
return NULL;
@@ -1643,9 +1665,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+ struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(listen_id->event_handler, listen_id->context,
+ id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
listen_id->ps, IB_QPT_UD);
if (IS_ERR(id))
return NULL;
@@ -1882,7 +1905,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
return -ECONNABORTED;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.event_handler,
+ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
listen_id->id.context,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(new_cm_id)) {
@@ -2010,12 +2034,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
{
struct rdma_id_private *dev_id_priv;
struct rdma_cm_id *id;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
+ id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
id_priv->id.qp_type);
if (IS_ERR(id))
return;
@@ -2294,16 +2319,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
- if (addr->dev_addr.bound_dev_if)
+ if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ route->path_rec->net = &init_net;
+ route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ }
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
- memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
@@ -2426,7 +2452,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
p = 1;
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+ ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
if (ret)
goto out;
@@ -2688,7 +2714,8 @@ static int cma_alloc_port(enum rdma_port_space ps,
if (!bind_list)
return -ENOMEM;
- ret = cma_ps_alloc(ps, bind_list, snum);
+ ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
+ snum);
if (ret < 0)
goto err;
@@ -2707,13 +2734,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
static unsigned int last_used_port;
int low, high, remaining;
unsigned int rover;
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
- inet_get_local_port_range(&init_net, &low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
if (last_used_port != rover &&
- !cma_ps_find(ps, (unsigned short)rover)) {
+ !cma_ps_find(net, ps, (unsigned short)rover)) {
int ret = cma_alloc_port(ps, id_priv, rover);
/*
* Remember previously used port number in order to avoid
@@ -2779,7 +2807,7 @@ static int cma_use_port(enum rdma_port_space ps,
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
- bind_list = cma_ps_find(ps, snum);
+ bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
if (!bind_list) {
ret = cma_alloc_port(ps, id_priv, snum);
} else {
@@ -2971,8 +2999,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (addr->sa_family == AF_INET)
id_priv->afonly = 1;
#if IS_ENABLED(CONFIG_IPV6)
- else if (addr->sa_family == AF_INET6)
- id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
+ else if (addr->sa_family == AF_INET6) {
+ struct net *net = id_priv->id.route.addr.dev_addr.net;
+
+ id_priv->afonly = net->ipv6.sysctl.bindv6only;
+ }
#endif
}
ret = cma_get_port(id_priv);
@@ -3777,6 +3808,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
+ (net_eq(dev_net(ndev), dev_addr->net)) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);
@@ -3802,9 +3834,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
struct rdma_id_private *id_priv;
int ret = NOTIFY_DONE;
- if (dev_net(ndev) != &init_net)
- return NOTIFY_DONE;
-
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
@@ -3999,6 +4028,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = {
.module = THIS_MODULE },
};
+static int cma_init_net(struct net *net)
+{
+ struct cma_pernet *pernet = cma_pernet(net);
+
+ idr_init(&pernet->tcp_ps);
+ idr_init(&pernet->udp_ps);
+ idr_init(&pernet->ipoib_ps);
+ idr_init(&pernet->ib_ps);
+
+ return 0;
+}
+
+static void cma_exit_net(struct net *net)
+{
+ struct cma_pernet *pernet = cma_pernet(net);
+
+ idr_destroy(&pernet->tcp_ps);
+ idr_destroy(&pernet->udp_ps);
+ idr_destroy(&pernet->ipoib_ps);
+ idr_destroy(&pernet->ib_ps);
+}
+
+static struct pernet_operations cma_pernet_operations = {
+ .init = cma_init_net,
+ .exit = cma_exit_net,
+ .id = &cma_pernet_id,
+ .size = sizeof(struct cma_pernet),
+};
+
static int __init cma_init(void)
{
int ret;
@@ -4007,6 +4065,10 @@ static int __init cma_init(void)
if (!cma_wq)
return -ENOMEM;
+ ret = register_pernet_subsys(&cma_pernet_operations);
+ if (ret)
+ goto err_wq;
+
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
@@ -4024,6 +4086,7 @@ err:
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+err_wq:
destroy_workqueue(cma_wq);
return ret;
}
@@ -4035,11 +4098,8 @@ static void __exit cma_cleanup(void)
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
destroy_workqueue(cma_wq);
- idr_destroy(&tcp_ps);
- idr_destroy(&udp_ps);
- idr_destroy(&ipoib_ps);
- idr_destroy(&ib_ps);
}
module_init(cma_init);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 70bb36ebb..5cf6eb716 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask);
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
@@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port, struct net_device *ndev,
- u16 *index);
-
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 17639117a..179e8134d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port);
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
+ * @attr: Returned GID attributes related to this GID index (only in RoCE).
+ * NULL means ignore.
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid)
+ u8 port_num, int index, union ib_gid *gid,
+ struct ib_gid_attr *attr)
{
if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid);
+ return ib_get_cached_gid(device, port_num, index, gid, attr);
+
+ if (attr)
+ return -EINVAL;
return device->query_gid(device, port_num, index, gid);
}
@@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ struct net_device *ndev, u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_cache_gid_find_by_port(device, gid, port,
- NULL, index)) {
+ if (!ib_find_cached_gid_by_port(device, gid, port,
+ ndev, index)) {
*port_num = port;
return 0;
}
}
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid);
+ ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 4b5c72311..2281de122 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num;
struct ib_wc mad_wc;
- struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
+ struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
u16 out_mad_pkey_index = 0;
u16 drslid;
@@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
if (rdma_cap_ib_switch(device) &&
smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- port_num = send_wr->wr.ud.port_num;
+ port_num = send_wr->port_num;
else
port_num = mad_agent_priv->agent.port_num;
@@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
build_smp_wc(mad_agent_priv->agent.qp,
- send_wr->wr_id, drslid,
- send_wr->wr.ud.pkey_index,
- send_wr->wr.ud.port_num, &mad_wc);
+ send_wr->wr.wr_id, drslid,
+ send_wr->pkey_index,
+ send_wr->port_num, &mad_wc);
if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
@@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->mad_send_wr = mad_send_wr;
if (opa) {
- local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index;
+ local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
@@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
- mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
- mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- mad_send_wr->send_wr.num_sge = 2;
- mad_send_wr->send_wr.opcode = IB_WR_SEND;
- mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
- mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
- mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
+ mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
+ mad_send_wr->send_wr.wr.num_sge = 2;
+ mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
+ mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
+ mad_send_wr->send_wr.remote_qpn = remote_qpn;
+ mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
+ mad_send_wr->send_wr.pkey_index = pkey_index;
if (rmpp_active) {
ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
@@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
- mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
+ mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
mad_agent = mad_send_wr->send_buf.mad_agent;
@@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
- ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+ ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
list = &qp_info->send_queue.list;
} else {
@@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
* request associated with the completion
*/
next_send_buf = send_buf->next;
- mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
+ mad_send_wr->send_wr.ah = send_buf->ah;
if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
@@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
if (qp_num == 0)
valid = 1;
} else {
+ /* CM attributes other than ClassPortInfo only use Send method */
+ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
+ (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
+ (mad_hdr->method != IB_MGMT_METHOD_SEND))
+ goto out;
/* Filter GSI packets sent to QP0 */
if (qp_num != 0)
valid = 1;
@@ -1877,7 +1882,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
((1 << lmc) - 1)));
} else {
if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid))
+ attr.grh.sgid_index, &sgid, NULL))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
@@ -2457,7 +2462,7 @@ retry:
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
- ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
+ ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
&bad_send_wr);
if (ret) {
dev_err(&port_priv->device->dev,
@@ -2515,7 +2520,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
struct ib_send_wr *bad_send_wr;
mad_send_wr->retry = 0;
- ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+ ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
if (ret)
ib_mad_send_done_handler(port_priv, wc);
@@ -2713,7 +2718,7 @@ static void local_completions(struct work_struct *work)
build_smp_wc(recv_mad_agent->agent.qp,
(unsigned long) local->mad_send_wr,
be16_to_cpu(IB_LID_PERMISSIVE),
- local->mad_send_wr->send_wr.wr.ud.pkey_index,
+ local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
local->mad_priv->header.recv_wc.wc = &wc;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 4a4f7aad0..990698a6a 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -123,7 +123,7 @@ struct ib_mad_send_wr_private {
struct ib_mad_send_buf send_buf;
u64 header_mapping;
u64 payload_mapping;
- struct ib_send_wr send_wr;
+ struct ib_ud_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d38d8b2b2..bb6685fb0 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ NULL, &p, &gid_index);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d..a95a32ba5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query)
+static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
@@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
if (len <= 0)
return -EMSGSIZE;
- skb = nlmsg_new(len, GFP_KERNEL);
+ skb = nlmsg_new(len, gfp_mask);
if (!skb)
return -ENOMEM;
@@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
@@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
return ret;
}
-static int ib_nl_make_request(struct ib_sa_query *query)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
unsigned long flags;
unsigned long delay;
@@ -562,25 +562,27 @@ static int ib_nl_make_request(struct ib_sa_query *query)
INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ /* Put the request on the list first.*/
spin_lock_irqsave(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query);
- if (ret <= 0) {
- ret = -EIO;
- goto request_out;
- } else {
- ret = 0;
- }
-
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
-
-request_out:
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+ ret = ib_nl_send_msg(query, gfp_mask);
+ if (ret <= 0) {
+ ret = -EIO;
+ /* Remove the request */
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ list_del(&query->list);
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+ } else {
+ ret = 0;
+ }
+
return ret;
}
@@ -1007,26 +1009,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
force_grh = rdma_cap_eth_ah(device, port_num);
if (rec->hop_limit > 1 || force_grh) {
+ struct net_device *ndev = ib_get_ndev_from_path(rec);
+
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+ ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
&gid_index);
- if (ret)
+ if (ret) {
+ if (ndev)
+ dev_put(ndev);
return ret;
+ }
ah_attr->grh.sgid_index = gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
+ if (ndev)
+ dev_put(ndev);
}
if (force_grh) {
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
- ah_attr->vlan_id = rec->vlan_id;
- } else {
- ah_attr->vlan_id = 0xffff;
}
-
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1083,7 +1088,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
- bool preload = !!(gfp_mask & __GFP_WAIT);
+ bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
int ret, id;
@@ -1105,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
- if (!ib_nl_make_request(query))
+ if (!ib_nl_make_request(query, gfp_mask))
return id;
}
ib_sa_disable_local_svc(query);
@@ -1150,9 +1155,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
- rec.vlan_id = 0xffff;
+ rec.net = NULL;
+ rec.ifindex = 0;
memset(rec.dmac, 0, ETH_ALEN);
- memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 34cdd74b0..b1f37d409 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
union ib_gid gid;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 30467d10d..8b5a934e1 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -42,6 +42,7 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <linux/nsproxy.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
@@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
+ ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
@@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
return -EINVAL;
memset(&sa_path, 0, sizeof(sa_path));
- sa_path.vlan_id = 0xffff;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33c2..94bbd8c15 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
IB_UVERBS_DECLARE_EX_CMD(query_device);
IB_UVERBS_DECLARE_EX_CMD(create_cq);
+IB_UVERBS_DECLARE_EX_CMD(create_qp);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f04..1c02deab0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
* The ib_uobject locking scheme is as follows:
*
* - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr operations. When an object is
+ * needs to be held during all idr write operations. When an object is
* looked up, a reference must be taken on the object's kref before
- * dropping this lock.
+ * dropping this lock. For read operations, the rcu_read_lock()
+ * and rcu_write_lock() but similarly the kref reference is grabbed
+ * before the rcu_read_unlock().
*
* - Each object also has an rwsem. This rwsem must be held for
* reading while an operation that uses the object is performed.
@@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
static void release_uobj(struct kref *kref)
{
- kfree(container_of(kref, struct ib_uobject, ref));
+ kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
}
static void put_uobj(struct ib_uobject *uobj)
@@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
{
struct ib_uobject *uobj;
- spin_lock(&ib_uverbs_idr_lock);
+ rcu_read_lock();
uobj = idr_find(idr, id);
if (uobj) {
if (uobj->context == context)
@@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
else
uobj = NULL;
}
- spin_unlock(&ib_uverbs_idr_lock);
+ rcu_read_unlock();
return uobj;
}
@@ -1478,7 +1480,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp));
+ INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
INIT_UDATA(&uhw, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
@@ -1741,66 +1743,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
-{
- struct ib_uverbs_create_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
- struct ib_uqp_object *obj;
- struct ib_device *device;
- struct ib_pd *pd = NULL;
- struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
- struct ib_cq *scq = NULL, *rcq = NULL;
- struct ib_srq *srq = NULL;
- struct ib_qp *qp;
- struct ib_qp_init_attr attr;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+static int create_qp(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw,
+ struct ib_uverbs_ex_create_qp *cmd,
+ size_t cmd_sz,
+ int (*cb)(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *udata),
+ void *context)
+{
+ struct ib_uqp_object *obj;
+ struct ib_device *device;
+ struct ib_pd *pd = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_cq *scq = NULL, *rcq = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_qp *qp;
+ char *buf;
+ struct ib_qp_init_attr attr;
+ struct ib_uverbs_ex_create_qp_resp resp;
+ int ret;
- if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+ if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
obj = kzalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
+ &qp_lock_class);
down_write(&obj->uevent.uobject.mutex);
- if (cmd.qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ if (cmd->qp_type == IB_QPT_XRC_TGT) {
+ xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
+ &xrcd_uobj);
if (!xrcd) {
ret = -EINVAL;
goto err_put;
}
device = xrcd->device;
} else {
- if (cmd.qp_type == IB_QPT_XRC_INI) {
- cmd.max_recv_wr = cmd.max_recv_sge = 0;
+ if (cmd->qp_type == IB_QPT_XRC_INI) {
+ cmd->max_recv_wr = 0;
+ cmd->max_recv_sge = 0;
} else {
- if (cmd.is_srq) {
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+ if (cmd->is_srq) {
+ srq = idr_read_srq(cmd->srq_handle,
+ file->ucontext);
if (!srq || srq->srq_type != IB_SRQT_BASIC) {
ret = -EINVAL;
goto err_put;
}
}
- if (cmd.recv_cq_handle != cmd.send_cq_handle) {
- rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+ if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+ rcq = idr_read_cq(cmd->recv_cq_handle,
+ file->ucontext, 0);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1808,9 +1809,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
}
}
- scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+ scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd || !scq) {
ret = -EINVAL;
goto err_put;
@@ -1825,31 +1826,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.recv_cq = rcq;
attr.srq = srq;
attr.xrcd = xrcd;
- attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- attr.qp_type = cmd.qp_type;
+ attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
+ IB_SIGNAL_REQ_WR;
+ attr.qp_type = cmd->qp_type;
attr.create_flags = 0;
- attr.cap.max_send_wr = cmd.max_send_wr;
- attr.cap.max_recv_wr = cmd.max_recv_wr;
- attr.cap.max_send_sge = cmd.max_send_sge;
- attr.cap.max_recv_sge = cmd.max_recv_sge;
- attr.cap.max_inline_data = cmd.max_inline_data;
+ attr.cap.max_send_wr = cmd->max_send_wr;
+ attr.cap.max_recv_wr = cmd->max_recv_wr;
+ attr.cap.max_send_sge = cmd->max_send_sge;
+ attr.cap.max_recv_sge = cmd->max_recv_sge;
+ attr.cap.max_inline_data = cmd->max_inline_data;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd.qp_type == IB_QPT_XRC_TGT)
+ if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
+ sizeof(cmd->create_flags))
+ attr.create_flags = cmd->create_flags;
+
+ if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ buf = (void *)cmd + sizeof(*cmd);
+ if (cmd_sz > sizeof(*cmd))
+ if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
+ cmd_sz - sizeof(*cmd) - 1))) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = device->create_qp(pd, &attr, &udata);
+ qp = device->create_qp(pd, &attr, uhw);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
- if (cmd.qp_type != IB_QPT_XRC_TGT) {
+ if (cmd->qp_type != IB_QPT_XRC_TGT) {
qp->real_qp = qp;
qp->device = device;
qp->pd = pd;
@@ -1875,19 +1894,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
goto err_destroy;
memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
- resp.max_recv_sge = attr.cap.max_recv_sge;
- resp.max_send_sge = attr.cap.max_send_sge;
- resp.max_recv_wr = attr.cap.max_recv_wr;
- resp.max_send_wr = attr.cap.max_send_wr;
- resp.max_inline_data = attr.cap.max_inline_data;
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+
+ ret = cb(file, &resp, ucore);
+ if (ret)
+ goto err_cb;
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1933,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
up_write(&obj->uevent.uobject.mutex);
- return in_len;
-
-err_copy:
+ return 0;
+err_cb:
idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
err_destroy:
@@ -1937,6 +1956,113 @@ err_put:
return ret;
}
+static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *ucore)
+{
+ if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
+ return -EFAULT;
+
+ return 0;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_qp cmd;
+ struct ib_uverbs_ex_create_qp cmd_ex;
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
+ int err;
+
+ if (out_len < resp_size)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
+ resp_size);
+ INIT_UDATA(&uhw, buf + sizeof(cmd),
+ (unsigned long)cmd.response + resp_size,
+ in_len - sizeof(cmd), out_len - resp_size);
+
+ memset(&cmd_ex, 0, sizeof(cmd_ex));
+ cmd_ex.user_handle = cmd.user_handle;
+ cmd_ex.pd_handle = cmd.pd_handle;
+ cmd_ex.send_cq_handle = cmd.send_cq_handle;
+ cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
+ cmd_ex.srq_handle = cmd.srq_handle;
+ cmd_ex.max_send_wr = cmd.max_send_wr;
+ cmd_ex.max_recv_wr = cmd.max_recv_wr;
+ cmd_ex.max_send_sge = cmd.max_send_sge;
+ cmd_ex.max_recv_sge = cmd.max_recv_sge;
+ cmd_ex.max_inline_data = cmd.max_inline_data;
+ cmd_ex.sq_sig_all = cmd.sq_sig_all;
+ cmd_ex.qp_type = cmd.qp_type;
+ cmd_ex.is_srq = cmd.is_srq;
+
+ err = create_qp(file, &ucore, &uhw, &cmd_ex,
+ offsetof(typeof(cmd_ex), is_srq) +
+ sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
+ NULL);
+
+ if (err)
+ return err;
+
+ return in_len;
+}
+
+static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_create_qp_resp *resp,
+ struct ib_udata *ucore)
+{
+ if (ib_copy_to_udata(ucore, resp, resp->response_length))
+ return -EFAULT;
+
+ return 0;
+}
+
+int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp cmd = {0};
+ int err;
+
+ if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
+ sizeof(cmd.comp_mask)))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (err)
+ return err;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (cmd.reserved)
+ return -EINVAL;
+
+ if (ucore->outlen < (offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length)))
+ return -ENOSPC;
+
+ err = create_qp(file, ucore, uhw, &cmd,
+ min(ucore->inlen, sizeof(cmd)),
+ ib_uverbs_ex_create_qp_cb, NULL);
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
@@ -2221,7 +2347,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
if (qp->real_qp == qp) {
- ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+ ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
if (ret)
goto release_qp;
ret = qp->device->modify_qp(qp, attr,
@@ -2303,6 +2429,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
return in_len;
}
+static void *alloc_wr(size_t wr_size, __u32 num_sge)
+{
+ return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
+ num_sge * sizeof (struct ib_sge), GFP_KERNEL);
+};
+
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -2316,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
+ size_t next_size;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2351,14 +2484,87 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
- user_wr->num_sge * sizeof (struct ib_sge),
- GFP_KERNEL);
- if (!next) {
- ret = -ENOMEM;
+ if (is_ud) {
+ struct ib_ud_wr *ud;
+
+ if (user_wr->opcode != IB_WR_SEND &&
+ user_wr->opcode != IB_WR_SEND_WITH_IMM) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ next_size = sizeof(*ud);
+ ud = alloc_wr(next_size, user_wr->num_sge);
+ if (!ud) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+ if (!ud->ah) {
+ kfree(ud);
+ ret = -EINVAL;
+ goto out_put;
+ }
+ ud->remote_qpn = user_wr->wr.ud.remote_qpn;
+ ud->remote_qkey = user_wr->wr.ud.remote_qkey;
+
+ next = &ud->wr;
+ } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+ user_wr->opcode == IB_WR_RDMA_WRITE ||
+ user_wr->opcode == IB_WR_RDMA_READ) {
+ struct ib_rdma_wr *rdma;
+
+ next_size = sizeof(*rdma);
+ rdma = alloc_wr(next_size, user_wr->num_sge);
+ if (!rdma) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ rdma->remote_addr = user_wr->wr.rdma.remote_addr;
+ rdma->rkey = user_wr->wr.rdma.rkey;
+
+ next = &rdma->wr;
+ } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ struct ib_atomic_wr *atomic;
+
+ next_size = sizeof(*atomic);
+ atomic = alloc_wr(next_size, user_wr->num_sge);
+ if (!atomic) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ atomic->remote_addr = user_wr->wr.atomic.remote_addr;
+ atomic->compare_add = user_wr->wr.atomic.compare_add;
+ atomic->swap = user_wr->wr.atomic.swap;
+ atomic->rkey = user_wr->wr.atomic.rkey;
+
+ next = &atomic->wr;
+ } else if (user_wr->opcode == IB_WR_SEND ||
+ user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+ user_wr->opcode == IB_WR_SEND_WITH_INV) {
+ next_size = sizeof(*next);
+ next = alloc_wr(next_size, user_wr->num_sge);
+ if (!next) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+ } else {
+ ret = -EINVAL;
goto out_put;
}
+ if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+ user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
+ } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
+ next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
+ }
+
if (!last)
wr = next;
else
@@ -2371,63 +2577,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
next->opcode = user_wr->opcode;
next->send_flags = user_wr->send_flags;
- if (is_ud) {
- if (next->opcode != IB_WR_SEND &&
- next->opcode != IB_WR_SEND_WITH_IMM) {
- ret = -EINVAL;
- goto out_put;
- }
-
- next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
- file->ucontext);
- if (!next->wr.ud.ah) {
- ret = -EINVAL;
- goto out_put;
- }
- next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
- next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
- if (next->opcode == IB_WR_SEND_WITH_IMM)
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- } else {
- switch (next->opcode) {
- case IB_WR_RDMA_WRITE_WITH_IMM:
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_READ:
- next->wr.rdma.remote_addr =
- user_wr->wr.rdma.remote_addr;
- next->wr.rdma.rkey =
- user_wr->wr.rdma.rkey;
- break;
- case IB_WR_SEND_WITH_IMM:
- next->ex.imm_data =
- (__be32 __force) user_wr->ex.imm_data;
- break;
- case IB_WR_SEND_WITH_INV:
- next->ex.invalidate_rkey =
- user_wr->ex.invalidate_rkey;
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- next->wr.atomic.remote_addr =
- user_wr->wr.atomic.remote_addr;
- next->wr.atomic.compare_add =
- user_wr->wr.atomic.compare_add;
- next->wr.atomic.swap = user_wr->wr.atomic.swap;
- next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
- case IB_WR_SEND:
- break;
- default:
- ret = -EINVAL;
- goto out_put;
- }
- }
-
if (next->num_sge) {
next->sg_list = (void *) next +
- ALIGN(sizeof *next, sizeof (struct ib_sge));
+ ALIGN(next_size, sizeof(struct ib_sge));
if (copy_from_user(next->sg_list,
buf + sizeof cmd +
cmd.wr_count * cmd.wqe_size +
@@ -2458,8 +2610,8 @@ out_put:
put_qp_read(qp);
while (wr) {
- if (is_ud && wr->wr.ud.ah)
- put_ah_read(wr->wr.ud.ah);
+ if (is_ud && ud_wr(wr)->ah)
+ put_ah_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2698,7 +2850,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
attr.grh.sgid_index = cmd.attr.grh.sgid_index;
attr.grh.hop_limit = cmd.attr.grh.hop_limit;
attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- attr.vlan_id = 0;
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c29a660c7..e3ef28861 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
[IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
+ [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index abd972474..7d2f14c9b 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
- memset(dst->smac, 0, sizeof(dst->smac));
memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->vlan_id = 0xffff;
+ dst->net = NULL;
+ dst->ifindex = 0;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e1f2c9887..545906dec 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -41,6 +41,9 @@
#include <linux/export.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/addrconf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -70,7 +73,7 @@ static const char * const ib_events[] = {
[IB_EVENT_GID_CHANGE] = "GID changed",
};
-const char *ib_event_msg(enum ib_event_type event)
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
{
size_t index = event;
@@ -104,7 +107,7 @@ static const char * const wc_statuses[] = {
[IB_WC_GENERAL_ERR] = "general error",
};
-const char *ib_wc_status_msg(enum ib_wc_status status)
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
{
size_t index = status;
@@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
+struct find_gid_index_context {
+ u16 vlan_id;
+};
+
+static bool find_gid_index(const union ib_gid *gid,
+ const struct ib_gid_attr *gid_attr,
+ void *context)
+{
+ struct find_gid_index_context *ctx =
+ (struct find_gid_index_context *)context;
+
+ if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
+ (is_vlan_dev(gid_attr->ndev) &&
+ vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ return false;
+
+ return true;
+}
+
+static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
+ u16 vlan_id, const union ib_gid *sgid,
+ u16 *gid_index)
+{
+ struct find_gid_index_context context = {.vlan_id = vlan_id};
+
+ return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context, gid_index);
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct ib_ah_attr *ah_attr)
@@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
memset(ah_attr, 0, sizeof *ah_attr);
if (rdma_cap_eth_ah(device, port_num)) {
+ u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+ wc->vlan_id : 0xffff;
+
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (wc->wc_flags & IB_WC_WITH_SMAC &&
- wc->wc_flags & IB_WC_WITH_VLAN) {
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
- ah_attr->vlan_id = wc->vlan_id;
- } else {
+ if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
+ !(wc->wc_flags & IB_WC_WITH_VLAN)) {
ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac, &ah_attr->vlan_id);
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
+ 0);
if (ret)
return ret;
}
- } else {
- ah_attr->vlan_id = 0xffff;
+
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &grh->dgid, &gid_index);
+ if (ret)
+ return ret;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC)
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
}
ah_attr->dlid = wc->slid;
@@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = grh->sgid;
- ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
- &gid_index);
- if (ret)
- return ret;
+ if (!rdma_cap_eth_ah(device, port_num)) {
+ ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+ port_num, NULL,
+ &gid_index);
+ if (ret)
+ return ret;
+ }
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
- enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
- enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -700,12 +742,6 @@ static const struct {
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
- .req_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_SMAC),
- [IB_QPT_UC] = (IB_QP_SMAC),
- [IB_QPT_XRC_INI] = (IB_QP_SMAC),
- [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
- },
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@@ -726,21 +762,7 @@ static const struct {
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
},
- .opt_param_add_eth = {
- [IB_QPT_RC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_UC] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID),
- [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
- IB_QP_VID |
- IB_QP_ALT_VID)
- }
- }
+ },
},
[IB_QPS_RTR] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
- if (ll == IB_LINK_LAYER_ETHERNET) {
- req_param |= qp_state_table[cur_state][next_state].
- req_param_add_eth[type];
- opt_param |= qp_state_table[cur_state][next_state].
- opt_param_add_eth[type];
- }
-
if ((mask & req_param) != req_param)
return 0;
@@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
{
int ret = 0;
- union ib_gid sgid;
- if ((*qp_attr_mask & IB_QP_AV) &&
- (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) {
- ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
- qp_attr->ah_attr.grh.sgid_index, &sgid);
- if (ret)
- goto out;
+ if (*qp_attr_mask & IB_QP_AV) {
+ if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
+ qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
+ return -EINVAL;
+
+ if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
+ return 0;
+
if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
- rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
- if (!(*qp_attr_mask & IB_QP_VID))
- qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+ rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
+ qp_attr->ah_attr.dmac);
} else {
- ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
- if (ret)
- goto out;
- ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
- if (ret)
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ int ifindex;
+
+ ret = ib_query_gid(qp->device,
+ qp_attr->ah_attr.port_num,
+ qp_attr->ah_attr.grh.sgid_index,
+ &sgid, &sgid_attr);
+
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
goto out;
+ }
+
+ ifindex = sgid_attr.ndev->ifindex;
+
+ ret = rdma_addr_find_dmac_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, ifindex);
+
+ dev_put(sgid_attr.ndev);
}
- *qp_attr_mask |= IB_QP_SMAC;
- if (qp_attr->vlan_id < 0xFFFF)
- *qp_attr_mask |= IB_QP_VID;
}
out:
return ret;
}
-EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+EXPORT_SYMBOL(ib_resolve_eth_dmac);
int ib_modify_qp(struct ib_qp *qp,
@@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
{
int ret;
- ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+ ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
if (ret)
return ret;
@@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
}
EXPORT_SYMBOL(ib_alloc_mr);
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
- int max_page_list_len)
-{
- struct ib_fast_reg_page_list *page_list;
-
- if (!device->alloc_fast_reg_page_list)
- return ERR_PTR(-ENOSYS);
-
- page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
-
- if (!IS_ERR(page_list)) {
- page_list->device = device;
- page_list->max_page_list_len = max_page_list_len;
- }
-
- return page_list;
-}
-EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
-
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
- page_list->device->free_fast_reg_page_list(page_list);
-}
-EXPORT_SYMBOL(ib_free_fast_reg_page_list);
-
/* Memory windows */
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
@@ -1469,3 +1471,111 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);
+
+/**
+ * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
+ * and set it the memory region.
+ * @mr: memory region
+ * @sg: dma mapped scatterlist
+ * @sg_nents: number of entries in sg
+ * @page_size: page vector desired page size
+ *
+ * Constraints:
+ * - The first sg element is allowed to have an offset.
+ * - Each sg element must be aligned to page_size (or physically
+ * contiguous to the previous element). In case an sg element has a
+ * non contiguous offset, the mapping prefix will not include it.
+ * - The last sg element is allowed to have length less than page_size.
+ * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
+ * then only max_num_sg entries will be mapped.
+ *
+ * Returns the number of sg elements that were mapped to the memory region.
+ *
+ * After this completes successfully, the memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg(struct ib_mr *mr,
+ struct scatterlist *sg,
+ int sg_nents,
+ unsigned int page_size)
+{
+ if (unlikely(!mr->device->map_mr_sg))
+ return -ENOSYS;
+
+ mr->page_size = page_size;
+
+ return mr->device->map_mr_sg(mr, sg, sg_nents);
+}
+EXPORT_SYMBOL(ib_map_mr_sg);
+
+/**
+ * ib_sg_to_pages() - Convert the largest prefix of a sg list
+ * to a page vector
+ * @mr: memory region
+ * @sgl: dma mapped scatterlist
+ * @sg_nents: number of entries in sg
+ * @set_page: driver page assignment function pointer
+ *
+ * Core service helper for drivers to convert the largest
+ * prefix of given sg list to a page vector. The sg list
+ * prefix converted is the prefix that meet the requirements
+ * of ib_map_mr_sg.
+ *
+ * Returns the number of sg elements that were assigned to
+ * a page vector.
+ */
+int ib_sg_to_pages(struct ib_mr *mr,
+ struct scatterlist *sgl,
+ int sg_nents,
+ int (*set_page)(struct ib_mr *, u64))
+{
+ struct scatterlist *sg;
+ u64 last_end_dma_addr = 0, last_page_addr = 0;
+ unsigned int last_page_off = 0;
+ u64 page_mask = ~((u64)mr->page_size - 1);
+ int i, ret;
+
+ mr->iova = sg_dma_address(&sgl[0]);
+ mr->length = 0;
+
+ for_each_sg(sgl, sg, sg_nents, i) {
+ u64 dma_addr = sg_dma_address(sg);
+ unsigned int dma_len = sg_dma_len(sg);
+ u64 end_dma_addr = dma_addr + dma_len;
+ u64 page_addr = dma_addr & page_mask;
+
+ /*
+ * For the second and later elements, check whether either the
+ * end of element i-1 or the start of element i is not aligned
+ * on a page boundary.
+ */
+ if (i && (last_page_off != 0 || page_addr != dma_addr)) {
+ /* Stop mapping if there is a gap. */
+ if (last_end_dma_addr != dma_addr)
+ break;
+
+ /*
+ * Coalesce this element with the last. If it is small
+ * enough just update mr->length. Otherwise start
+ * mapping from the next page.
+ */
+ goto next_page;
+ }
+
+ do {
+ ret = set_page(mr, page_addr);
+ if (unlikely(ret < 0))
+ return i ? : ret;
+next_page:
+ page_addr += mr->page_size;
+ } while (page_addr < end_dma_addr);
+
+ mr->length += dma_len;
+ last_end_dma_addr = end_dma_addr;
+ last_page_addr = end_dma_addr & page_mask;
+ last_page_off = end_dma_addr & ~page_mask;
+ }
+
+ return i;
+}
+EXPORT_SYMBOL(ib_sg_to_pages);