diff options
Diffstat (limited to 'include/rdma')
-rw-r--r-- | include/rdma/ib_addr.h | 14 | ||||
-rw-r--r-- | include/rdma/ib_mad.h | 4 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 48 | ||||
-rw-r--r-- | include/rdma/iw_cm.h | 6 | ||||
-rw-r--r-- | include/rdma/opa_port_info.h | 2 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 481 | ||||
-rw-r--r-- | include/rdma/rdmavt_cq.h | 99 | ||||
-rw-r--r-- | include/rdma/rdmavt_mr.h | 139 | ||||
-rw-r--r-- | include/rdma/rdmavt_qp.h | 446 |
9 files changed, 1225 insertions, 14 deletions
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index c34c90024..931a47ba4 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -262,24 +262,22 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) static inline int iboe_get_rate(struct net_device *dev) { - struct ethtool_cmd cmd; - u32 speed; + struct ethtool_link_ksettings cmd; int err; rtnl_lock(); - err = __ethtool_get_settings(dev, &cmd); + err = __ethtool_get_link_ksettings(dev, &cmd); rtnl_unlock(); if (err) return IB_RATE_PORT_CURRENT; - speed = ethtool_cmd_speed(&cmd); - if (speed >= 40000) + if (cmd.base.speed >= 40000) return IB_RATE_40_GBPS; - else if (speed >= 30000) + else if (cmd.base.speed >= 30000) return IB_RATE_30_GBPS; - else if (speed >= 20000) + else if (cmd.base.speed >= 20000) return IB_RATE_20_GBPS; - else if (speed >= 10000) + else if (cmd.base.speed >= 10000) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 0ff049bd9..37dd534cb 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, /** * ib_mad_snoop_handler - Callback handler for snooping sent MADs. * @mad_agent: MAD agent that snooped the MAD. - * @send_wr: Work request information on the sent MAD. + * @send_buf: send MAD data buffer. * @mad_send_wc: Work completion information on the sent MAD. Valid * only for snooping that occurs on a send completion. * - * Clients snooping MADs should not modify data referenced by the @send_wr + * Clients snooping MADs should not modify data referenced by the @send_buf * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c8f..fb2cef4e9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -56,6 +56,7 @@ #include <linux/string.h> #include <linux/slab.h> +#include <linux/if_link.h> #include <linux/atomic.h> #include <linux/mmu_notifier.h> #include <asm/uaccess.h> @@ -97,6 +98,11 @@ enum rdma_node_type { RDMA_NODE_USNIC_UDP, }; +enum { + /* set the local administered indication */ + IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, +}; + enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, @@ -212,6 +218,8 @@ enum ib_device_cap_flags { IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_SG_GAPS_REG = (1ULL << 32), + IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), }; enum ib_signature_prot_cap { @@ -273,7 +281,7 @@ struct ib_device_attr { u32 hw_ver; int max_qp; int max_qp_wr; - int device_cap_flags; + u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; @@ -489,6 +497,7 @@ union rdma_protocol_stats { | RDMA_CORE_CAP_OPA_MAD) struct ib_port_attr { + u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; @@ -508,6 +517,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + bool grh_required; }; enum ib_device_modify_flags { @@ -613,6 +623,7 @@ enum { }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 @@ -662,10 +673,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * @IB_MR_TYPE_SIGNATURE: memory region that is used for * signature operations (data-integrity * capable regions) + * @IB_MR_TYPE_SG_GAPS: memory region that is capable to + * register any arbitrary sg lists (without + * the normal mr constraints - see + * ib_map_mr_sg) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, + IB_MR_TYPE_SG_GAPS, }; /** @@ -1487,6 +1503,11 @@ enum ib_flow_domain { IB_FLOW_DOMAIN_NUM /* Must be last */ }; +enum ib_flow_flags { + IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ + IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ +}; + struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; @@ -1808,7 +1829,8 @@ struct ib_device { struct scatterlist *sg, int sg_nents); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, - enum ib_mw_type type); + enum ib_mw_type type, + struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, @@ -1846,6 +1868,16 @@ struct ib_device { int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); struct ib_dma_mapping_ops *dma_ops; @@ -2289,6 +2321,15 @@ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); +int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, + int state); +int ib_get_vf_config(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *info); +int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); +int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); @@ -3094,4 +3135,7 @@ int ib_sg_to_pages(struct ib_mr *mr, int sg_nents, int (*set_page)(struct ib_mr *, u64)); +void ib_drain_rq(struct ib_qp *qp); +void ib_drain_sq(struct ib_qp *qp); +void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */ diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 036bd2772..6d0065c32 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -83,8 +83,10 @@ struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; - struct sockaddr_storage local_addr; + struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; + struct sockaddr_storage m_local_addr; /* nmapped local addr */ + struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ @@ -92,6 +94,7 @@ struct iw_cm_id { void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; + bool mapped; }; struct iw_cm_conn_param { @@ -123,6 +126,7 @@ struct iw_cm_verbs { int backlog); int (*destroy_listen)(struct iw_cm_id *cm_id); + char ifname[IFNAMSIZ]; }; /** diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index a0fa975cd..2b95c2c33 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -97,7 +97,7 @@ #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 -#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50 +#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h new file mode 100644 index 000000000..a8696551a --- /dev/null +++ b/include/rdma/rdma_vt.h @@ -0,0 +1,481 @@ +#ifndef DEF_RDMA_VT_H +#define DEF_RDMA_VT_H + +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Structure that low level drivers will populate in order to register with the + * rdmavt layer. + */ + +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/hash.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdmavt_mr.h> +#include <rdma/rdmavt_qp.h> + +#define RVT_MAX_PKEY_VALUES 16 + +struct rvt_ibport { + struct rvt_qp __rcu *qp[2]; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + u64 tid; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 mkey_lease_period; + u16 sm_lid; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + + /* + * Driver is expected to keep these up to date. These + * counters are informational only and not required to be + * completely accurate. + */ + u64 n_rc_resends; + u64 n_seq_naks; + u64 n_rdma_seq; + u64 n_rnr_naks; + u64 n_other_naks; + u64 n_loop_pkts; + u64 n_pkt_drops; + u64 n_vl15_dropped; + u64 n_rc_timeouts; + u64 n_dmawait; + u64 n_unaligned; + u64 n_rc_dupreq; + u64 n_rc_seqnak; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + + /* Hot-path per CPU counters to avoid cacheline trading to update */ + u64 z_rc_acks; + u64 z_rc_qacks; + u64 z_rc_delayed_comp; + u64 __percpu *rc_acks; + u64 __percpu *rc_qacks; + u64 __percpu *rc_delayed_comp; + + void *priv; /* driver private data */ + + /* + * The pkey table is allocated and maintained by the driver. Drivers + * need to have access to this before registering with rdmav. However + * rdmavt will need access to it so drivers need to proviee this during + * the attach port API call. + */ + u16 *pkey_table; + + struct rvt_ah *sm_ah; +}; + +#define RVT_CQN_MAX 16 /* maximum length of cq name */ + +/* + * Things that are driver specific, module parameters in hfi1 and qib + */ +struct rvt_driver_params { + struct ib_device_attr props; + + /* + * Anything driver specific that is not covered by props + * For instance special module parameters. Goes here. + */ + unsigned int lkey_table_size; + unsigned int qp_table_size; + int qpn_start; + int qpn_inc; + int qpn_res_start; + int qpn_res_end; + int nports; + int npkeys; + u8 qos_shift; + char cq_name[RVT_CQN_MAX]; + int node; + int max_rdma_atomic; + int psn_mask; + int psn_shift; + int psn_modify_mask; + u32 core_cap_flags; + u32 max_mad_size; +}; + +/* Protection domain */ +struct rvt_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address handle */ +struct rvt_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; + u8 vl; + u8 log_pmtu; +}; + +struct rvt_dev_info; +struct rvt_swqe; +struct rvt_driver_provided { + /* + * Which functions are required depends on which verbs rdmavt is + * providing and which verbs the driver is overriding. See + * check_support() for details. + */ + + /* Passed to ib core registration. Callback to create syfs files */ + int (*port_callback)(struct ib_device *, u8, struct kobject *); + + /* + * Returns a string to represent the device for which is being + * registered. This is primarily used for error and debug messages on + * the console. + */ + const char * (*get_card_name)(struct rvt_dev_info *rdi); + + /* + * Returns a pointer to the undelying hardware's PCI device. This is + * used to display information as to what hardware is being referenced + * in an output message + */ + struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + + /* + * Allocate a private queue pair data structure for driver specific + * information which is opaque to rdmavt. + */ + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); + + /* + * Free the driver's private qp structure. + */ + void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + + /* + * Inform the driver the particular qp in quesiton has been reset so + * that it can clean up anything it needs to. + */ + void (*notify_qp_reset)(struct rvt_qp *qp); + + /* + * Give the driver a notice that there is send work to do. It is up to + * the driver to generally push the packets out, this just queues the + * work with the driver. There are two variants here. The no_lock + * version requires the s_lock not to be held. The other assumes the + * s_lock is held. + */ + void (*schedule_send)(struct rvt_qp *qp); + void (*schedule_send_no_lock)(struct rvt_qp *qp); + + /* + * Sometimes rdmavt needs to kick the driver's send progress. That is + * done by this call back. + */ + void (*do_send)(struct rvt_qp *qp); + + /* + * Get a path mtu from the driver based on qp attributes. + */ + int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + + /* + * Notify driver that it needs to flush any outstanding IO requests that + * are waiting on a qp. + */ + void (*flush_qp_waiters)(struct rvt_qp *qp); + + /* + * Notify driver to stop its queue of sending packets. Nothing else + * should be posted to the queue pair after this has been called. + */ + void (*stop_send_queue)(struct rvt_qp *qp); + + /* + * Have the drivr drain any in progress operations + */ + void (*quiesce_qp)(struct rvt_qp *qp); + + /* + * Inform the driver a qp has went to error state. + */ + void (*notify_error_qp)(struct rvt_qp *qp); + + /* + * Get an MTU for a qp. + */ + u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + u32 pmtu); + /* + * Convert an mtu to a path mtu + */ + int (*mtu_to_path_mtu)(u32 mtu); + + /* + * Get the guid of a port in big endian byte order + */ + int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, + int guid_index, __be64 *guid); + + /* + * Query driver for the state of the port. + */ + int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, + struct ib_port_attr *props); + + /* + * Tell driver to shutdown a port + */ + int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); + + /* Tell driver to send a trap for changed port capabilities */ + void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); + + /* + * The following functions can be safely ignored completely. Any use of + * these is checked for NULL before blindly calling. Rdmavt should also + * be functional if drivers omit these. + */ + + /* Called to inform the driver that all qps should now be freed. */ + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + + /* Driver specific AH validation */ + int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + + /* Inform the driver a new AH has been created */ + void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + struct rvt_ah *); + + /* Let the driver pick the next queue pair number*/ + int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port_num, gfp_t gfp); + + /* Determine if its safe or allowed to modify the qp */ + int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + + /* Driver specific QP modification/notification-of */ + void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + + /* Driver specific work request checking */ + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); + + /* Notify driver a mad agent has been created */ + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + + /* Notify driver a mad agent has been removed */ + void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); + +}; + +struct rvt_dev_info { + struct ib_device ibdev; /* Keep this first. Nothing above here */ + + /* + * Prior to calling for registration the driver will be responsible for + * allocating space for this structure. + * + * The driver will also be responsible for filling in certain members of + * dparms.props. The driver needs to fill in dparms exactly as it would + * want values reported to a ULP. This will be returned to the caller + * in rdmavt's device. The driver should also therefore refrain from + * modifying this directly after registration with rdmavt. + */ + + /* Driver specific properties */ + struct rvt_driver_params dparms; + + struct rvt_mregion __rcu *dma_mr; + struct rvt_lkey_table lkey_table; + + /* Driver specific helper functions */ + struct rvt_driver_provided driver_f; + + /* Internal use */ + int n_pds_allocated; + spinlock_t n_pds_lock; /* Protect pd allocated count */ + + int n_ahs_allocated; + spinlock_t n_ahs_lock; /* Protect ah allocated count */ + + u32 n_srqs_allocated; + spinlock_t n_srqs_lock; /* Protect srqs allocated count */ + + int flags; + struct rvt_ibport **ports; + + /* QP */ + struct rvt_qp_ibdev *qp_dev; + u32 n_qps_allocated; /* number of QPs allocated for device */ + u32 n_rc_qps; /* number of RC QPs allocated for device */ + u32 busy_jiffies; /* timeout scaling based on RC QP count */ + spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ + + /* memory maps */ + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + spinlock_t pending_lock; /* protect pending mmap list */ + + /* CQ */ + struct kthread_worker *worker; /* per device cq worker */ + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; /* protect count of in use cqs */ + + /* Multicast */ + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + +}; + +static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct rvt_pd, ibpd); +} + +static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) +{ + return container_of(ibah, struct rvt_ah, ibah); +} + +static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) +{ + return container_of(ibdev, struct rvt_dev_info, ibdev); +} + +static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct rvt_srq, ibsrq); +} + +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + +static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) +{ + /* + * All ports have same number of pkeys. + */ + return rdi->dparms.npkeys; +} + +/* + * Return the indexed PKEY from the port PKEY table. + */ +static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, + int port_index, + unsigned index) +{ + if (index >= rvt_get_npkeys(rdi)) + return 0; + else + return rdi->ports[port_index]->pkey_table[index]; +} + +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + +struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); +int rvt_register_device(struct rvt_dev_info *rvd); +void rvt_unregister_device(struct rvt_dev_info *rvd); +int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, + int port_index, u16 *pkey_table); +int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); +int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, + struct rvt_sge *isge, struct ib_sge *sge, int acc); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); + +#endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h new file mode 100644 index 000000000..51fd00b24 --- /dev/null +++ b/include/rdma/rdmavt_cq.h @@ -0,0 +1,99 @@ +#ifndef DEF_RDMAVT_INCCQ_H +#define DEF_RDMAVT_INCCQ_H + +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <linux/kthread.h> +#include <rdma/ib_user_verbs.h> + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct rvt_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct rvt_cq { + struct ib_cq ibcq; + struct kthread_work comptask; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct rvt_dev_info *rdi; + struct rvt_cq_wc *queue; + struct rvt_mmap_info *ip; +}; + +static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct rvt_cq, ibcq); +} + +void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); + +#endif /* DEF_RDMAVT_INCCQH */ diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h new file mode 100644 index 000000000..5edffdca8 --- /dev/null +++ b/include/rdma/rdmavt_mr.h @@ -0,0 +1,139 @@ +#ifndef DEF_RDMAVT_INCMR_H +#define DEF_RDMAVT_INCMR_H + +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once + * drivers no longer need access to the MR directly. + */ + +/* + * A segment is a linear region of low physical memory. + * Used by the verbs layer. + */ +struct rvt_seg { + void *vaddr; + size_t length; +}; + +/* The number of rvt_segs that fit in a page. */ +#define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) + +struct rvt_segarray { + struct rvt_seg segs[RVT_SEGSZ]; +}; + +struct rvt_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of rvt_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct rvt_segarray *map[0]; /* the segments */ +}; + +#define RVT_MAX_LKEY_TABLE_BITS 23 + +struct rvt_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct rvt_mregion __rcu **table; +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct rvt_sge { + struct rvt_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +struct rvt_sge_state { + struct rvt_sge *sg_list; /* next SGE to be used if any */ + struct rvt_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +static inline void rvt_put_mr(struct rvt_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + complete(&mr->comp); +} + +static inline void rvt_get_mr(struct rvt_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +static inline void rvt_put_ss(struct rvt_sge_state *ss) +{ + while (ss->num_sge) { + rvt_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + +#endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h new file mode 100644 index 000000000..497e59065 --- /dev/null +++ b/include/rdma/rdmavt_qp.h @@ -0,0 +1,446 @@ +#ifndef DEF_RDMAVT_INCQP_H +#define DEF_RDMAVT_INCQP_H + +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <rdma/rdma_vt.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_verbs.h> +/* + * Atomic bit definitions for r_aflags. + */ +#define RVT_R_WRID_VALID 0 +#define RVT_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define RVT_R_REUSE_SGE 0x01 +#define RVT_R_RDMAR_SEQ 0x02 +#define RVT_R_RSP_NAK 0x04 +#define RVT_R_RSP_SEND 0x08 +#define RVT_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * RVT_S_BUSY - send tasklet is processing the QP + * RVT_S_TIMER - the RC retry timer is active + * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * RVT_S_WAIT_RNR - waiting for RNR timeout + * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * RVT_S_WAIT_PIO - waiting for a send buffer to be available + * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets + * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available + * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * RVT_S_WAIT_KMEM - waiting for kernel memory to be available + * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK + * RVT_S_ECN - a BECN was queued to the send engine + */ +#define RVT_S_SIGNAL_REQ_WR 0x0001 +#define RVT_S_BUSY 0x0002 +#define RVT_S_TIMER 0x0004 +#define RVT_S_RESP_PENDING 0x0008 +#define RVT_S_ACK_PENDING 0x0010 +#define RVT_S_WAIT_FENCE 0x0020 +#define RVT_S_WAIT_RDMAR 0x0040 +#define RVT_S_WAIT_RNR 0x0080 +#define RVT_S_WAIT_SSN_CREDIT 0x0100 +#define RVT_S_WAIT_DMA 0x0200 +#define RVT_S_WAIT_PIO 0x0400 +#define RVT_S_WAIT_PIO_DRAIN 0x0800 +#define RVT_S_WAIT_TX 0x1000 +#define RVT_S_WAIT_DMA_DESC 0x2000 +#define RVT_S_WAIT_KMEM 0x4000 +#define RVT_S_WAIT_PSN 0x8000 +#define RVT_S_WAIT_ACK 0x10000 +#define RVT_S_SEND_ONE 0x20000 +#define RVT_S_UNLIMITED_CREDIT 0x40000 +#define RVT_S_AHG_VALID 0x80000 +#define RVT_S_AHG_CLEAR 0x100000 +#define RVT_S_ECN 0x200000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ + RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ + RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ + RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) + +#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* Number of bits to pay attention to in the opcode for checking qp type */ +#define RVT_OPCODE_QP_MASK 0xE0 + +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct rvt_swqe { + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct rvt_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct rvt_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct rvt_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct rvt_rwqe wq[0]; +}; + +struct rvt_rq { + struct rvt_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + /* protect changes in this struct */ + spinlock_t lock ____cacheline_aligned_in_smp; +}; + +/* + * This structure is used by rvt_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct rvt_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +#define RVT_MAX_RDMA_ATOMIC 16 + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct rvt_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct rvt_sge rdma_sge; + u64 atomic_data; + }; +}; + +#define RC_QP_SCALING_INTERVAL 5 + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct rvt_qp { + struct ib_qp ibqp; + void *priv; /* Driver private data */ + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct rvt_qp __rcu *next; /* link list for QPN hash table */ + struct rvt_swqe *s_wq; /* send work queue */ + struct rvt_mmap_info *ip; + + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + int srate_mbps; /* s_srate (below) converted to Mbit/s */ + pid_t pid; /* pid for user mode QPs */ + u32 remote_qpn; + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_ahgpsn; /* set to the psn in the copy of the header */ + + u16 pmtu; /* decoded from path_mtu */ + u8 log_pmtu; /* shift for pmtu */ + u8 state; /* QP state */ + u8 allowed_ops; /* high order bits of allowed opcodes */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct rvt_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + u32 r_psn; /* expected rcv packet sequence number */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waiting to respond */ + + struct rvt_sge_state r_sge; /* current receive data */ + struct rvt_rq r_rq; /* receive work queue */ + + /* post send line */ + spinlock_t s_hlock ____cacheline_aligned_in_smp; + u32 s_head; /* new entries added here */ + u32 s_next_psn; /* PSN for next request */ + u32 s_avail; /* number of entries avail */ + u32 s_ssn; /* SSN of tail entry */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + u32 s_flags; + struct rvt_sge_state *s_cur_sge; + struct rvt_swqe *s_wqe; + struct rvt_sge_state s_sge; /* current send request data */ + struct rvt_mregion *s_rdma_mr; + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + s8 s_ahgidx; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct rvt_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + + /* + * This sge list MUST be last. Do not add anything below here. + */ + struct rvt_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +struct rvt_srq { + struct ib_srq ibsrq; + struct rvt_rq rq; + struct rvt_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +#define RVT_QPN_MAX BIT(24) +#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) +#define RVT_QPN_MASK 0xFFFFFF + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct rvt_qpn_map { + void *page; +}; + +struct rvt_qpn_table { + spinlock_t lock; /* protect changes to the qp table */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u8 incr; + /* bit map of free QP numbers other than 0/1 */ + struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; +}; + +struct rvt_qp_ibdev { + u32 qp_table_size; + u32 qp_table_bits; + struct rvt_qp __rcu **qp_table; + spinlock_t qpt_lock; /* qptable lock */ + struct rvt_qpn_table qpn_table; +}; + +/* + * There is one struct rvt_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct rvt_mcast_qp. + */ +struct rvt_mcast_qp { + struct list_head list; + struct rvt_qp *qp; +}; + +struct rvt_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct rvt_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +/* + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. + */ +static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) +{ + return (struct rvt_rwqe *) + ((char *)rq->wq->wq + + (sizeof(struct rvt_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + +struct rvt_dev_info; +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); + +#endif /* DEF_RDMAVT_INCQP_H */ |