diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-01-20 14:01:31 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2016-01-20 14:01:31 -0300 |
commit | b4b7ff4b08e691656c9d77c758fc355833128ac0 (patch) | |
tree | 82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /drivers/infiniband/core | |
parent | 35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff) |
Linux-libre 4.4-gnupck-4.4-gnu
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/addr.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/core/agent.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/cache.c | 112 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 40 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 194 | ||||
-rw-r--r-- | drivers/infiniband/core/core_priv.h | 9 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/core/mad.c | 47 | ||||
-rw-r--r-- | drivers/infiniband/core/mad_priv.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 53 | ||||
-rw-r--r-- | drivers/infiniband/core/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 421 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_marshall.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/verbs.c | 296 |
18 files changed, 820 insertions, 411 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 746cdf56b..34b1adad0 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, switch (addr->sa_family) { case AF_INET: - dev = ip_dev_find(&init_net, + dev = ip_dev_find(dev_addr->net, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) @@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; - #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (ipv6_chk_addr(&init_net, + for_each_netdev_rcu(dev_addr->net, dev) { + if (ipv6_chk_addr(dev_addr->net, &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; - rt = ip_route_output_key(&init_net, &fl4); + rt = ip_route_output_key(addr->net, &fl4); if (IS_ERR(rt)) { ret = PTR_ERR(rt); goto out; @@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ip6_route_output(&init_net, NULL, &fl6); + dst = ip6_route_output(addr->net, NULL, &fl6); if ((ret = dst->error)) goto put; if (ipv6_addr_any(&fl6.saddr)) { - ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr); if (ret) goto put; @@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr, } int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *dmac, u16 *vlan_id) + u8 *dmac, u16 *vlan_id, int if_index) { int ret = 0; struct rdma_dev_addr dev_addr; @@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.bound_dev_if = if_index; + dev_addr.net = &init_net; ctx.addr = &dev_addr; init_completion(&ctx.comp); @@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) rdma_gid2ip(&gid_addr._sockaddr, sgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.net = &init_net; ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) return ret; diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 042904030..4fa524dfb 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); - mad_send_wr->send_wr.wr.ud.port_num = port_num; + mad_send_wr->send_wr.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 87471ef37..89bebeada 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev, mask, port, index); } -int ib_cache_gid_find_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, struct net_device *ndev, - u16 *index) +int ib_find_cached_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, struct net_device *ndev, + u16 *index) { int local_index; struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; @@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev, return -ENOENT; } +EXPORT_SYMBOL(ib_find_cached_gid_by_port); + +/** + * ib_find_gid_by_filter - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value could be + * searched. + * @filter: The filter function is executed on any matching GID in the table. + * If the filter function returns true, the corresponding index is returned, + * otherwise, we continue searching the GID table. It's guaranteed that + * while filter is executed, ndev field is valid and the structure won't + * change. filter is executed in an atomic context. filter must not be NULL. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_cache_gid_find_by_filter() searches for the specified GID value + * of which the filter function returns true in the port's GID table. + * This function is only supported on RoCE ports. + * + */ +static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, + bool (*filter)(const union ib_gid *, + const struct ib_gid_attr *, + void *), + void *context, + u16 *index) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + unsigned int i; + bool found = false; + + if (!ports_table) + return -EOPNOTSUPP; + + if (port < rdma_start_port(ib_dev) || + port > rdma_end_port(ib_dev) || + !rdma_protocol_roce(ib_dev, port)) + return -EPROTONOSUPPORT; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + for (i = 0; i < table->sz; i++) { + struct ib_gid_attr attr; + unsigned long flags; + + read_lock_irqsave(&table->data_vec[i].lock, flags); + if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + goto next; + + if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + goto next; + + memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); + + if (filter(gid, &attr, context)) + found = true; + +next: + read_unlock_irqrestore(&table->data_vec[i].lock, flags); + + if (found) + break; + } + + if (!found) + return -ENOENT; + + if (index) + *index = i; + return 0; +} static struct ib_gid_table *alloc_gid_table(int sz) { @@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev) int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, - union ib_gid *gid) + union ib_gid *gid, + struct ib_gid_attr *gid_attr) { if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; - return __ib_cache_gid_get(device, port_num, index, gid, NULL); + return __ib_cache_gid_get(device, port_num, index, gid, gid_attr); } EXPORT_SYMBOL(ib_get_cached_gid); int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, + struct net_device *ndev, u8 *port_num, u16 *index) { - return ib_cache_gid_find(device, gid, NULL, port_num, index); + return ib_cache_gid_find(device, gid, ndev, port_num, index); } EXPORT_SYMBOL(ib_find_cached_gid); +int ib_find_gid_by_filter(struct ib_device *device, + const union ib_gid *gid, + u8 port_num, + bool (*filter)(const union ib_gid *gid, + const struct ib_gid_attr *, + void *), + void *context, u16 *index) +{ + /* Only RoCE GID table supports filter function */ + if (!rdma_cap_roce_gid_table(device, port_num) && filter) + return -EPROTONOSUPPORT; + + return ib_cache_gid_find_by_filter(device, gid, + port_num, filter, + context, index); +} +EXPORT_SYMBOL(ib_find_gid_by_filter); + int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, @@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device, if (!use_roce_gid_table) { for (i = 0; i < gid_cache->table_len; ++i) { ret = ib_query_gid(device, port, i, - gid_cache->table + i); + gid_cache->table + i, NULL); if (ret) { printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4f918b929..0a26dd6d9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -179,8 +179,6 @@ struct cm_av { struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; - u8 valid; - u8 smac[ETH_ALEN]; }; struct cm_work { @@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) unsigned long flags; int ret; u8 p; + struct net_device *ndev = ib_get_ndev_from_path(path); read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - &p, NULL)) { + ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); + if (ndev) + dev_put(ndev); + if (!port) return -EINVAL; @@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; - memcpy(av->smac, path->smac, sizeof(av->smac)); - av->valid = 1; return 0; } @@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); - work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, &work->path[0].sgid); + work->port->port_num, 0, &work->path[0].sgid, + NULL); ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); @@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; - if (!cm_id_priv->av.valid) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return -EINVAL; - } - if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { - qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_VID; - } - if (!is_zero_ether_addr(cm_id_priv->av.smac)) { - memcpy(qp_attr->smac, cm_id_priv->av.smac, - sizeof(qp_attr->smac)); - *qp_attr_mask |= IB_QP_SMAC; - } - if (cm_id_priv->alt_av.valid) { - if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { - qp_attr->alt_vlan_id = - cm_id_priv->alt_av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_ALT_VID; - } - if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { - memcpy(qp_attr->alt_smac, - cm_id_priv->alt_av.smac, - sizeof(qp_attr->alt_smac)); - *qp_attr_mask |= IB_QP_ALT_SMAC; - } - } qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36b12d560..2d762a2ec 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -44,6 +44,8 @@ #include <linux/module.h> #include <net/route.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/tcp.h> #include <net/ipv6.h> #include <net/ip_fib.h> @@ -86,7 +88,7 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -const char *rdma_event_msg(enum rdma_cm_event_type event) +const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -110,22 +112,33 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(tcp_ps); -static DEFINE_IDR(udp_ps); -static DEFINE_IDR(ipoib_ps); -static DEFINE_IDR(ib_ps); +static int cma_pernet_id; -static struct idr *cma_idr(enum rdma_port_space ps) +struct cma_pernet { + struct idr tcp_ps; + struct idr udp_ps; + struct idr ipoib_ps; + struct idr ib_ps; +}; + +static struct cma_pernet *cma_pernet(struct net *net) +{ + return net_generic(net, cma_pernet_id); +} + +static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) { + struct cma_pernet *pernet = cma_pernet(net); + switch (ps) { case RDMA_PS_TCP: - return &tcp_ps; + return &pernet->tcp_ps; case RDMA_PS_UDP: - return &udp_ps; + return &pernet->udp_ps; case RDMA_PS_IPOIB: - return &ipoib_ps; + return &pernet->ipoib_ps; case RDMA_PS_IB: - return &ib_ps; + return &pernet->ib_ps; default: return NULL; } @@ -145,24 +158,25 @@ struct rdma_bind_list { unsigned short port; }; -static int cma_ps_alloc(enum rdma_port_space ps, +static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); } -static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum) +static struct rdma_bind_list *cma_ps_find(struct net *net, + enum rdma_port_space ps, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); return idr_find(idr, snum); } -static void cma_ps_remove(enum rdma_port_space ps, int snum) +static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); idr_remove(idr, snum); } @@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a } static inline int cma_validate_port(struct ib_device *device, u8 port, - union ib_gid *gid, int dev_type) + union ib_gid *gid, int dev_type, + int bound_if_index) { - u8 found_port; int ret = -ENODEV; + struct net_device *ndev = NULL; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ret; @@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - ret = ib_find_cached_gid(device, gid, &found_port, NULL); - if (port != found_port) - return -ENODEV; + if (dev_type == ARPHRD_ETHER) + ndev = dev_get_by_index(&init_net, bound_if_index); + + ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); + + if (ndev) + dev_put(ndev); return ret; } @@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, gidp, - dev_addr->dev_type); + dev_addr->dev_type, + dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; @@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, gidp, - dev_addr->dev_type); + dev_addr->dev_type, + dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; @@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, + &gid, NULL); + i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { @@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + id_priv->id.route.addr.dev_addr.net = get_net(net); return &id_priv->id; } @@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - qp_attr.ah_attr.grh.sgid_index, &sgid); + qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); if (ret) goto out; BUG_ON(id_priv->cma_dev->device != id_priv->id.device); - if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); - - if (ret) - goto out; - } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); @@ -1107,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, rcu_read_lock(); err = fib_lookup(dev_net(net_dev), &fl4, &res, 0); - if (err) - return false; - - ret = FIB_RES_DEV(res) == net_dev; + ret = err == 0 && FIB_RES_DEV(res) == net_dev; rcu_read_unlock(); return ret; @@ -1249,18 +1265,20 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id) return cma_protocol_roce_dev_port(device, port_num); } -static bool cma_match_net_dev(const struct rdma_id_private *id_priv, - const struct net_device *net_dev) +static bool cma_match_net_dev(const struct rdma_cm_id *id, + const struct net_device *net_dev, + u8 port_num) { - const struct rdma_addr *addr = &id_priv->id.route.addr; + const struct rdma_addr *addr = &id->route.addr; if (!net_dev) /* This request is an AF_IB request or a RoCE request */ - return addr->src_addr.ss_family == AF_IB || - cma_protocol_roce(&id_priv->id); + return (!id->port_num || id->port_num == port_num) && + (addr->src_addr.ss_family == AF_IB || + cma_protocol_roce_dev_port(id->device, port_num)); return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), &init_net) && + (net_eq(dev_net(net_dev), addr->dev_addr.net) && addr->dev_addr.bound_dev_if == net_dev->ifindex); } @@ -1279,13 +1297,13 @@ static struct rdma_id_private *cma_find_listener( hlist_for_each_entry(id_priv, &bind_list->owners, node) { if (cma_match_private_data(id_priv, ib_event->private_data)) { if (id_priv->id.device == cm_id->device && - cma_match_net_dev(id_priv, net_dev)) + cma_match_net_dev(&id_priv->id, net_dev, req->port)) return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, listen_list) { if (id_priv_dev->id.device == cm_id->device && - cma_match_net_dev(id_priv_dev, net_dev)) + cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) return id_priv_dev; } } @@ -1321,7 +1339,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } - bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id), + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, + rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); if (IS_ERR(id_priv) && *net_dev) { @@ -1392,6 +1411,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; + struct net *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; @@ -1399,7 +1419,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { - cma_ps_remove(bind_list->ps, bind_list->port); + cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); @@ -1458,6 +1478,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); @@ -1588,7 +1609,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; @@ -1643,9 +1665,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; @@ -1882,7 +1905,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.event_handler, + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { @@ -2010,12 +2034,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; + struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; @@ -2294,16 +2319,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; - if (addr->dev_addr.bound_dev_if) + if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + route->path_rec->net = &init_net; + route->path_rec->ifindex = addr->dev_addr.bound_dev_if; + } if (!ndev) { ret = -ENODEV; goto err2; } - route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); - memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); @@ -2426,7 +2452,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); + ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); if (ret) goto out; @@ -2688,7 +2714,8 @@ static int cma_alloc_port(enum rdma_port_space ps, if (!bind_list) return -ENOMEM; - ret = cma_ps_alloc(ps, bind_list, snum); + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, + snum); if (ret < 0) goto err; @@ -2707,13 +2734,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps, static unsigned int last_used_port; int low, high, remaining; unsigned int rover; + struct net *net = id_priv->id.route.addr.dev_addr.net; - inet_get_local_port_range(&init_net, &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && - !cma_ps_find(ps, (unsigned short)rover)) { + !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid @@ -2779,7 +2807,7 @@ static int cma_use_port(enum rdma_port_space ps, if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - bind_list = cma_ps_find(ps, snum); + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { @@ -2971,8 +2999,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (addr->sa_family == AF_INET) id_priv->afonly = 1; #if IS_ENABLED(CONFIG_IPV6) - else if (addr->sa_family == AF_INET6) - id_priv->afonly = init_net.ipv6.sysctl.bindv6only; + else if (addr->sa_family == AF_INET6) { + struct net *net = id_priv->id.route.addr.dev_addr.net; + + id_priv->afonly = net->ipv6.sysctl.bindv6only; + } #endif } ret = cma_get_port(id_priv); @@ -3777,6 +3808,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; if ((dev_addr->bound_dev_if == ndev->ifindex) && + (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); @@ -3802,9 +3834,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; - if (dev_net(ndev) != &init_net) - return NOTIFY_DONE; - if (event != NETDEV_BONDING_FAILOVER) return NOTIFY_DONE; @@ -3999,6 +4028,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = { .module = THIS_MODULE }, }; +static int cma_init_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_init(&pernet->tcp_ps); + idr_init(&pernet->udp_ps); + idr_init(&pernet->ipoib_ps); + idr_init(&pernet->ib_ps); + + return 0; +} + +static void cma_exit_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_destroy(&pernet->tcp_ps); + idr_destroy(&pernet->udp_ps); + idr_destroy(&pernet->ipoib_ps); + idr_destroy(&pernet->ib_ps); +} + +static struct pernet_operations cma_pernet_operations = { + .init = cma_init_net, + .exit = cma_exit_net, + .id = &cma_pernet_id, + .size = sizeof(struct cma_pernet), +}; + static int __init cma_init(void) { int ret; @@ -4007,6 +4065,10 @@ static int __init cma_init(void) if (!cma_wq) return -ENOMEM; + ret = register_pernet_subsys(&cma_pernet_operations); + if (ret) + goto err_wq; + ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); @@ -4024,6 +4086,7 @@ err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); +err_wq: destroy_workqueue(cma_wq); return ret; } @@ -4035,11 +4098,8 @@ static void __exit cma_cleanup(void) unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); destroy_workqueue(cma_wq); - idr_destroy(&tcp_ps); - idr_destroy(&udp_ps); - idr_destroy(&ipoib_ps); - idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 70bb36ebb..5cf6eb716 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); @@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); -int ib_cache_gid_find_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, struct net_device *ndev, - u16 *index); - enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 17639117a..179e8134d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port); * @port_num:Port number to query * @index:GID table index to query * @gid:Returned GID + * @attr: Returned GID attributes related to this GID index (only in RoCE). + * NULL means ignore. * * ib_query_gid() fetches the specified GID table entry. */ int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid) + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr) { if (rdma_cap_roce_gid_table(device, port_num)) - return ib_get_cached_gid(device, port_num, index, gid); + return ib_get_cached_gid(device, port_num, index, gid, attr); + + if (attr) + return -EINVAL; return device->query_gid(device, port_num, index, gid); } @@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port); * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @ndev: The ndev related to the GID to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index) + struct net_device *ndev, u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { if (rdma_cap_roce_gid_table(device, port)) { - if (!ib_cache_gid_find_by_port(device, gid, port, - NULL, index)) { + if (!ib_find_cached_gid_by_port(device, gid, port, + ndev, index)) { *port_num = port; return 0; } } for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid); + ret = ib_query_gid(device, port, i, &tmp_gid, NULL); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4b5c72311..2281de122 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; @@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->wr.ud.port_num; + port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; @@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, drslid, - send_wr->wr.ud.pkey_index, - send_wr->wr.ud.port_num, &mad_wc); + send_wr->wr.wr_id, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len @@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->mad_send_wr = mad_send_wr; if (opa) { - local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ @@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); @@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; + mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_agent = mad_send_wr->send_buf.mad_agent; @@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { @@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * request associated with the completion */ next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { @@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr, if (qp_num == 0) valid = 1; } else { + /* CM attributes other than ClassPortInfo only use Send method */ + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && + (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && + (mad_hdr->method != IB_MGMT_METHOD_SEND)) + goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; @@ -1877,7 +1882,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ ((1 << lmc) - 1))); } else { if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid)) + attr.grh.sgid_index, &sgid, NULL)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); @@ -2457,7 +2462,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, &bad_send_wr); if (ret) { dev_err(&port_priv->device->dev, @@ -2515,7 +2520,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, struct ib_send_wr *bad_send_wr; mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); if (ret) ib_mad_send_done_handler(port_priv, wc); @@ -2713,7 +2718,7 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - local->mad_send_wr->send_wr.wr.ud.pkey_index, + local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 4a4f7aad0..990698a6a 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -123,7 +123,7 @@ struct ib_mad_send_wr_private { struct ib_mad_send_buf send_buf; u64 header_mapping; u64 payload_mapping; - struct ib_send_wr send_wr; + struct ib_ud_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index d38d8b2b2..bb6685fb0 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, u16 gid_index; u8 p; - ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); + ret = ib_find_cached_gid(device, &rec->port_gid, + NULL, &p, &gid_index); if (ret) return ret; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c014b33d..a95a32ba5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) return len; } -static int ib_nl_send_msg(struct ib_sa_query *query) +static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; @@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) if (len <= 0) return -EMSGSIZE; - skb = nlmsg_new(len, GFP_KERNEL); + skb = nlmsg_new(len, gfp_mask); if (!skb) return -ENOMEM; @@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); + ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask); if (!ret) ret = len; else @@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query) return ret; } -static int ib_nl_make_request(struct ib_sa_query *query) +static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { unsigned long flags; unsigned long delay; @@ -562,25 +562,27 @@ static int ib_nl_make_request(struct ib_sa_query *query) INIT_LIST_HEAD(&query->list); query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); + /* Put the request on the list first.*/ spin_lock_irqsave(&ib_nl_request_lock, flags); - ret = ib_nl_send_msg(query); - if (ret <= 0) { - ret = -EIO; - goto request_out; - } else { - ret = 0; - } - delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); - -request_out: spin_unlock_irqrestore(&ib_nl_request_lock, flags); + ret = ib_nl_send_msg(query, gfp_mask); + if (ret <= 0) { + ret = -EIO; + /* Remove the request */ + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_del(&query->list); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + } else { + ret = 0; + } + return ret; } @@ -1007,26 +1009,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, force_grh = rdma_cap_eth_ah(device, port_num); if (rec->hop_limit > 1 || force_grh) { + struct net_device *ndev = ib_get_ndev_from_path(rec); + ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; - ret = ib_find_cached_gid(device, &rec->sgid, &port_num, + ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num, &gid_index); - if (ret) + if (ret) { + if (ndev) + dev_put(ndev); return ret; + } ah_attr->grh.sgid_index = gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; + if (ndev) + dev_put(ndev); } if (force_grh) { memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); - ah_attr->vlan_id = rec->vlan_id; - } else { - ah_attr->vlan_id = 0xffff; } - return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); @@ -1083,7 +1088,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { - bool preload = !!(gfp_mask & __GFP_WAIT); + bool preload = gfpflags_allow_blocking(gfp_mask); unsigned long flags; int ret, id; @@ -1105,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) { - if (!ib_nl_make_request(query)) + if (!ib_nl_make_request(query, gfp_mask)) return id; } ib_sa_disable_local_svc(query); @@ -1150,9 +1155,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); - rec.vlan_id = 0xffff; + rec.net = NULL; + rec.ifindex = 0; memset(rec.dmac, 0, ETH_ALEN); - memset(rec.smac, 0, ETH_ALEN); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 34cdd74b0..b1f37d409 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, union ib_gid gid; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); + ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); if (ret) return ret; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 30467d10d..8b5a934e1 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/module.h> +#include <linux/nsproxy.h> #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> @@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; @@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx, return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); - sa_path.vlan_id = 0xffff; ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3863d33c2..94bbd8c15 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); +IB_UVERBS_DECLARE_EX_CMD(create_qp); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index be4cb9f04..1c02deab0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; * The ib_uobject locking scheme is as follows: * * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr operations. When an object is + * needs to be held during all idr write operations. When an object is * looked up, a reference must be taken on the object's kref before - * dropping this lock. + * dropping this lock. For read operations, the rcu_read_lock() + * and rcu_write_lock() but similarly the kref reference is grabbed + * before the rcu_read_unlock(). * * - Each object also has an rwsem. This rwsem must be held for * reading while an operation that uses the object is performed. @@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, static void release_uobj(struct kref *kref) { - kfree(container_of(kref, struct ib_uobject, ref)); + kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); } static void put_uobj(struct ib_uobject *uobj) @@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, { struct ib_uobject *uobj; - spin_lock(&ib_uverbs_idr_lock); + rcu_read_lock(); uobj = idr_find(idr, id); if (uobj) { if (uobj->context == context) @@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, else uobj = NULL; } - spin_unlock(&ib_uverbs_idr_lock); + rcu_read_unlock(); return uobj; } @@ -1478,7 +1480,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), @@ -1741,66 +1743,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_qp cmd; - struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; - struct ib_uqp_object *obj; - struct ib_device *device; - struct ib_pd *pd = NULL; - struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_cq *scq = NULL, *rcq = NULL; - struct ib_srq *srq = NULL; - struct ib_qp *qp; - struct ib_qp_init_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; +static int create_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_qp *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *udata), + void *context) +{ + struct ib_uqp_object *obj; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; + struct ib_qp *qp; + char *buf; + struct ib_qp_init_attr attr; + struct ib_uverbs_ex_create_qp_resp resp; + int ret; - if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, + &qp_lock_class); down_write(&obj->uevent.uobject.mutex); - if (cmd.qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (cmd->qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, + &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put; } device = xrcd->device; } else { - if (cmd.qp_type == IB_QPT_XRC_INI) { - cmd.max_recv_wr = cmd.max_recv_sge = 0; + if (cmd->qp_type == IB_QPT_XRC_INI) { + cmd->max_recv_wr = 0; + cmd->max_recv_sge = 0; } else { - if (cmd.is_srq) { - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (cmd->is_srq) { + srq = idr_read_srq(cmd->srq_handle, + file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; } } - if (cmd.recv_cq_handle != cmd.send_cq_handle) { - rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1808,9 +1809,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } } - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); rcq = rcq ?: scq; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd || !scq) { ret = -EINVAL; goto err_put; @@ -1825,31 +1826,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.recv_cq = rcq; attr.srq = srq; attr.xrcd = xrcd; - attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - attr.qp_type = cmd.qp_type; + attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + attr.qp_type = cmd->qp_type; attr.create_flags = 0; - attr.cap.max_send_wr = cmd.max_send_wr; - attr.cap.max_recv_wr = cmd.max_recv_wr; - attr.cap.max_send_sge = cmd.max_send_sge; - attr.cap.max_recv_sge = cmd.max_recv_sge; - attr.cap.max_inline_data = cmd.max_inline_data; + attr.cap.max_send_wr = cmd->max_send_wr; + attr.cap.max_recv_wr = cmd->max_recv_wr; + attr.cap.max_send_sge = cmd->max_send_sge; + attr.cap.max_recv_sge = cmd->max_recv_sge; + attr.cap.max_inline_data = cmd->max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd.qp_type == IB_QPT_XRC_TGT) + if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + + sizeof(cmd->create_flags)) + attr.create_flags = cmd->create_flags; + + if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + ret = -EINVAL; + goto err_put; + } + + buf = (void *)cmd + sizeof(*cmd); + if (cmd_sz > sizeof(*cmd)) + if (!(buf[0] == 0 && !memcmp(buf, buf + 1, + cmd_sz - sizeof(*cmd) - 1))) { + ret = -EINVAL; + goto err_put; + } + + if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = device->create_qp(pd, &attr, &udata); + qp = device->create_qp(pd, &attr, uhw); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - if (cmd.qp_type != IB_QPT_XRC_TGT) { + if (cmd->qp_type != IB_QPT_XRC_TGT) { qp->real_qp = qp; qp->device = device; qp->pd = pd; @@ -1875,19 +1894,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_destroy; memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - resp.max_recv_sge = attr.cap.max_recv_sge; - resp.max_send_sge = attr.cap.max_send_sge; - resp.max_recv_wr = attr.cap.max_recv_wr; - resp.max_send_wr = attr.cap.max_send_wr; - resp.max_inline_data = attr.cap.max_inline_data; + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, &resp, ucore); + if (ret) + goto err_cb; if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, @@ -1913,9 +1933,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, up_write(&obj->uevent.uobject.mutex); - return in_len; - -err_copy: + return 0; +err_cb: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: @@ -1937,6 +1956,113 @@ err_put: return ret; } +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_ex_create_qp cmd_ex; + struct ib_udata ucore; + struct ib_udata uhw; + ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); + int err; + + if (out_len < resp_size) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), + resp_size); + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + resp_size, + in_len - sizeof(cmd), out_len - resp_size); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.pd_handle = cmd.pd_handle; + cmd_ex.send_cq_handle = cmd.send_cq_handle; + cmd_ex.recv_cq_handle = cmd.recv_cq_handle; + cmd_ex.srq_handle = cmd.srq_handle; + cmd_ex.max_send_wr = cmd.max_send_wr; + cmd_ex.max_recv_wr = cmd.max_recv_wr; + cmd_ex.max_send_sge = cmd.max_send_sge; + cmd_ex.max_recv_sge = cmd.max_recv_sge; + cmd_ex.max_inline_data = cmd.max_inline_data; + cmd_ex.sq_sig_all = cmd.sq_sig_all; + cmd_ex.qp_type = cmd.qp_type; + cmd_ex.is_srq = cmd.is_srq; + + err = create_qp(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), is_srq) + + sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, + NULL); + + if (err) + return err; + + return in_len; +} + +static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp cmd = {0}; + int err; + + if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + + sizeof(cmd.comp_mask))) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + err = create_qp(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_qp_cb, NULL); + + if (err) + return err; + + return 0; +} + ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) @@ -2221,7 +2347,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); + ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); if (ret) goto release_qp; ret = qp->device->modify_qp(qp, attr, @@ -2303,6 +2429,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, return in_len; } +static void *alloc_wr(size_t wr_size, __u32 num_sge) +{ + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + + num_sge * sizeof (struct ib_sge), GFP_KERNEL); +}; + ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -2316,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, int i, sg_ind; int is_ud; ssize_t ret = -EINVAL; + size_t next_size; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2351,14 +2484,87 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; + if (is_ud) { + struct ib_ud_wr *ud; + + if (user_wr->opcode != IB_WR_SEND && + user_wr->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + + next_size = sizeof(*ud); + ud = alloc_wr(next_size, user_wr->num_sge); + if (!ud) { + ret = -ENOMEM; + goto out_put; + } + + ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + if (!ud->ah) { + kfree(ud); + ret = -EINVAL; + goto out_put; + } + ud->remote_qpn = user_wr->wr.ud.remote_qpn; + ud->remote_qkey = user_wr->wr.ud.remote_qkey; + + next = &ud->wr; + } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE || + user_wr->opcode == IB_WR_RDMA_READ) { + struct ib_rdma_wr *rdma; + + next_size = sizeof(*rdma); + rdma = alloc_wr(next_size, user_wr->num_sge); + if (!rdma) { + ret = -ENOMEM; + goto out_put; + } + + rdma->remote_addr = user_wr->wr.rdma.remote_addr; + rdma->rkey = user_wr->wr.rdma.rkey; + + next = &rdma->wr; + } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + struct ib_atomic_wr *atomic; + + next_size = sizeof(*atomic); + atomic = alloc_wr(next_size, user_wr->num_sge); + if (!atomic) { + ret = -ENOMEM; + goto out_put; + } + + atomic->remote_addr = user_wr->wr.atomic.remote_addr; + atomic->compare_add = user_wr->wr.atomic.compare_add; + atomic->swap = user_wr->wr.atomic.swap; + atomic->rkey = user_wr->wr.atomic.rkey; + + next = &atomic->wr; + } else if (user_wr->opcode == IB_WR_SEND || + user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_SEND_WITH_INV) { + next_size = sizeof(*next); + next = alloc_wr(next_size, user_wr->num_sge); + if (!next) { + ret = -ENOMEM; + goto out_put; + } + } else { + ret = -EINVAL; goto out_put; } + if (user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; + } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { + next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; + } + if (!last) wr = next; else @@ -2371,63 +2577,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; - if (is_ud) { - if (next->opcode != IB_WR_SEND && - next->opcode != IB_WR_SEND_WITH_IMM) { - ret = -EINVAL; - goto out_put; - } - - next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, - file->ucontext); - if (!next->wr.ud.ah) { - ret = -EINVAL; - goto out_put; - } - next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; - next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; - if (next->opcode == IB_WR_SEND_WITH_IMM) - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - } else { - switch (next->opcode) { - case IB_WR_RDMA_WRITE_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_READ: - next->wr.rdma.remote_addr = - user_wr->wr.rdma.remote_addr; - next->wr.rdma.rkey = - user_wr->wr.rdma.rkey; - break; - case IB_WR_SEND_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - break; - case IB_WR_SEND_WITH_INV: - next->ex.invalidate_rkey = - user_wr->ex.invalidate_rkey; - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - next->wr.atomic.remote_addr = - user_wr->wr.atomic.remote_addr; - next->wr.atomic.compare_add = - user_wr->wr.atomic.compare_add; - next->wr.atomic.swap = user_wr->wr.atomic.swap; - next->wr.atomic.rkey = user_wr->wr.atomic.rkey; - case IB_WR_SEND: - break; - default: - ret = -EINVAL; - goto out_put; - } - } - if (next->num_sge) { next->sg_list = (void *) next + - ALIGN(sizeof *next, sizeof (struct ib_sge)); + ALIGN(next_size, sizeof(struct ib_sge)); if (copy_from_user(next->sg_list, buf + sizeof cmd + cmd.wr_count * cmd.wqe_size + @@ -2458,8 +2610,8 @@ out_put: put_qp_read(qp); while (wr) { - if (is_ud && wr->wr.ud.ah) - put_ah_read(wr->wr.ud.ah); + if (is_ud && ud_wr(wr)->ah) + put_ah_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; @@ -2698,7 +2850,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; - attr.vlan_id = 0; memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c29a660c7..e3ef28861 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, + [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index abd972474..7d2f14c9b 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; - memset(dst->smac, 0, sizeof(dst->smac)); memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->vlan_id = 0xffff; + dst->net = NULL; + dst->ifindex = 0; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e1f2c9887..545906dec 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -41,6 +41,9 @@ #include <linux/export.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -70,7 +73,7 @@ static const char * const ib_events[] = { [IB_EVENT_GID_CHANGE] = "GID changed", }; -const char *ib_event_msg(enum ib_event_type event) +const char *__attribute_const__ ib_event_msg(enum ib_event_type event) { size_t index = event; @@ -104,7 +107,7 @@ static const char * const wc_statuses[] = { [IB_WC_GENERAL_ERR] = "general error", }; -const char *ib_wc_status_msg(enum ib_wc_status status) +const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) { size_t index = status; @@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); +struct find_gid_index_context { + u16 vlan_id; +}; + +static bool find_gid_index(const union ib_gid *gid, + const struct ib_gid_attr *gid_attr, + void *context) +{ + struct find_gid_index_context *ctx = + (struct find_gid_index_context *)context; + + if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || + (is_vlan_dev(gid_attr->ndev) && + vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + return false; + + return true; +} + +static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + u16 *gid_index) +{ + struct find_gid_index_context context = {.vlan_id = vlan_id}; + + return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context, gid_index); +} + int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct ib_ah_attr *ah_attr) @@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, memset(ah_attr, 0, sizeof *ah_attr); if (rdma_cap_eth_ah(device, port_num)) { + u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? + wc->vlan_id : 0xffff; + if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - if (wc->wc_flags & IB_WC_WITH_SMAC && - wc->wc_flags & IB_WC_WITH_VLAN) { - memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); - ah_attr->vlan_id = wc->vlan_id; - } else { + if (!(wc->wc_flags & IB_WC_WITH_SMAC) || + !(wc->wc_flags & IB_WC_WITH_VLAN)) { ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, - ah_attr->dmac, &ah_attr->vlan_id); + ah_attr->dmac, + wc->wc_flags & IB_WC_WITH_VLAN ? + NULL : &vlan_id, + 0); if (ret) return ret; } - } else { - ah_attr->vlan_id = 0xffff; + + ret = get_sgid_index_from_eth(device, port_num, vlan_id, + &grh->dgid, &gid_index); + if (ret) + return ret; + + if (wc->wc_flags & IB_WC_WITH_SMAC) + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); } ah_attr->dlid = wc->slid; @@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(device, &grh->dgid, &port_num, - &gid_index); - if (ret) - return ret; + if (!rdma_cap_eth_ah(device, port_num)) { + ret = ib_find_cached_gid_by_port(device, &grh->dgid, + port_num, NULL, + &gid_index); + if (ret) + return ret; + } ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); @@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; - enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; - enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -700,12 +742,6 @@ static const struct { IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, - .req_param_add_eth = { - [IB_QPT_RC] = (IB_QP_SMAC), - [IB_QPT_UC] = (IB_QP_SMAC), - [IB_QPT_XRC_INI] = (IB_QP_SMAC), - [IB_QPT_XRC_TGT] = (IB_QP_SMAC) - }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), @@ -726,21 +762,7 @@ static const struct { [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), }, - .opt_param_add_eth = { - [IB_QPT_RC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_UC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID) - } - } + }, }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; - if (ll == IB_LINK_LAYER_ETHERNET) { - req_param |= qp_state_table[cur_state][next_state]. - req_param_add_eth[type]; - opt_param |= qp_state_table[cur_state][next_state]. - opt_param_add_eth[type]; - } - if ((mask & req_param) != req_param) return 0; @@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret = 0; - union ib_gid sgid; - if ((*qp_attr_mask & IB_QP_AV) && - (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { - ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, &sgid); - if (ret) - goto out; + if (*qp_attr_mask & IB_QP_AV) { + if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || + qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) + return -EINVAL; + + if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) + return 0; + if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); - rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); - if (!(*qp_attr_mask & IB_QP_VID)) - qp_attr->vlan_id = rdma_get_vlan_id(&sgid); + rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, + qp_attr->ah_attr.dmac); } else { - ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, &qp_attr->vlan_id); - if (ret) - goto out; - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); - if (ret) + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + + ret = ib_query_gid(qp->device, + qp_attr->ah_attr.port_num, + qp_attr->ah_attr.grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; goto out; + } + + ifindex = sgid_attr.ndev->ifindex; + + ret = rdma_addr_find_dmac_by_grh(&sgid, + &qp_attr->ah_attr.grh.dgid, + qp_attr->ah_attr.dmac, + NULL, ifindex); + + dev_put(sgid_attr.ndev); } - *qp_attr_mask |= IB_QP_SMAC; - if (qp_attr->vlan_id < 0xFFFF) - *qp_attr_mask |= IB_QP_VID; } out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); +EXPORT_SYMBOL(ib_resolve_eth_dmac); int ib_modify_qp(struct ib_qp *qp, @@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp, { int ret; - ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); + ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); if (ret) return ret; @@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, } EXPORT_SYMBOL(ib_alloc_mr); -struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device, - int max_page_list_len) -{ - struct ib_fast_reg_page_list *page_list; - - if (!device->alloc_fast_reg_page_list) - return ERR_PTR(-ENOSYS); - - page_list = device->alloc_fast_reg_page_list(device, max_page_list_len); - - if (!IS_ERR(page_list)) { - page_list->device = device; - page_list->max_page_list_len = max_page_list_len; - } - - return page_list; -} -EXPORT_SYMBOL(ib_alloc_fast_reg_page_list); - -void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - page_list->device->free_fast_reg_page_list(page_list); -} -EXPORT_SYMBOL(ib_free_fast_reg_page_list); - /* Memory windows */ struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) @@ -1469,3 +1471,111 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; } EXPORT_SYMBOL(ib_check_mr_status); + +/** + * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list + * and set it the memory region. + * @mr: memory region + * @sg: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @page_size: page vector desired page size + * + * Constraints: + * - The first sg element is allowed to have an offset. + * - Each sg element must be aligned to page_size (or physically + * contiguous to the previous element). In case an sg element has a + * non contiguous offset, the mapping prefix will not include it. + * - The last sg element is allowed to have length less than page_size. + * - If sg_nents total byte length exceeds the mr max_num_sge * page_size + * then only max_num_sg entries will be mapped. + * + * Returns the number of sg elements that were mapped to the memory region. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents, + unsigned int page_size) +{ + if (unlikely(!mr->device->map_mr_sg)) + return -ENOSYS; + + mr->page_size = page_size; + + return mr->device->map_mr_sg(mr, sg, sg_nents); +} +EXPORT_SYMBOL(ib_map_mr_sg); + +/** + * ib_sg_to_pages() - Convert the largest prefix of a sg list + * to a page vector + * @mr: memory region + * @sgl: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @set_page: driver page assignment function pointer + * + * Core service helper for drivers to convert the largest + * prefix of given sg list to a page vector. The sg list + * prefix converted is the prefix that meet the requirements + * of ib_map_mr_sg. + * + * Returns the number of sg elements that were assigned to + * a page vector. + */ +int ib_sg_to_pages(struct ib_mr *mr, + struct scatterlist *sgl, + int sg_nents, + int (*set_page)(struct ib_mr *, u64)) +{ + struct scatterlist *sg; + u64 last_end_dma_addr = 0, last_page_addr = 0; + unsigned int last_page_off = 0; + u64 page_mask = ~((u64)mr->page_size - 1); + int i, ret; + + mr->iova = sg_dma_address(&sgl[0]); + mr->length = 0; + + for_each_sg(sgl, sg, sg_nents, i) { + u64 dma_addr = sg_dma_address(sg); + unsigned int dma_len = sg_dma_len(sg); + u64 end_dma_addr = dma_addr + dma_len; + u64 page_addr = dma_addr & page_mask; + + /* + * For the second and later elements, check whether either the + * end of element i-1 or the start of element i is not aligned + * on a page boundary. + */ + if (i && (last_page_off != 0 || page_addr != dma_addr)) { + /* Stop mapping if there is a gap. */ + if (last_end_dma_addr != dma_addr) + break; + + /* + * Coalesce this element with the last. If it is small + * enough just update mr->length. Otherwise start + * mapping from the next page. + */ + goto next_page; + } + + do { + ret = set_page(mr, page_addr); + if (unlikely(ret < 0)) + return i ? : ret; +next_page: + page_addr += mr->page_size; + } while (page_addr < end_dma_addr); + + mr->length += dma_len; + last_end_dma_addr = end_dma_addr; + last_page_addr = end_dma_addr & page_mask; + last_page_off = end_dma_addr & ~page_mask; + } + + return i; +} +EXPORT_SYMBOL(ib_sg_to_pages); |