summaryrefslogtreecommitdiff
path: root/include/rdma/ib_verbs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/rdma/ib_verbs.h')
-rw-r--r--include/rdma/ib_verbs.h249
1 files changed, 116 insertions, 133 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b0f898e3b..7845fae6f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -48,6 +48,7 @@
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <linux/socket.h>
#include <uapi/linux/if_ether.h>
#include <linux/atomic.h>
@@ -64,6 +65,12 @@ union ib_gid {
} global;
};
+extern union ib_gid zgid;
+
+struct ib_gid_attr {
+ struct net_device *ndev;
+};
+
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
@@ -284,7 +291,7 @@ enum ib_port_cap_flags {
IB_PORT_BOOT_MGMT_SUP = 1 << 23,
IB_PORT_LINK_LATENCY_SUP = 1 << 24,
IB_PORT_CLIENT_REG_SUP = 1 << 25,
- IB_PORT_IP_BASED_GIDS = 1 << 26
+ IB_PORT_IP_BASED_GIDS = 1 << 26,
};
enum ib_port_width {
@@ -556,20 +563,18 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
*/
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
-enum ib_mr_create_flags {
- IB_MR_SIGNATURE_EN = 1,
-};
/**
- * ib_mr_init_attr - Memory region init attributes passed to routine
- * ib_create_mr.
- * @max_reg_descriptors: max number of registration descriptors that
- * may be used with registration work requests.
- * @flags: MR creation flags bit mask.
+ * enum ib_mr_type - memory region type
+ * @IB_MR_TYPE_MEM_REG: memory region that is used for
+ * normal registration
+ * @IB_MR_TYPE_SIGNATURE: memory region that is used for
+ * signature operations (data-integrity
+ * capable regions)
*/
-struct ib_mr_init_attr {
- int max_reg_descriptors;
- u32 flags;
+enum ib_mr_type {
+ IB_MR_TYPE_MEM_REG,
+ IB_MR_TYPE_SIGNATURE,
};
/**
@@ -1252,9 +1257,11 @@ struct ib_udata {
};
struct ib_pd {
+ u32 local_dma_lkey;
struct ib_device *device;
struct ib_uobject *uobject;
atomic_t usecnt; /* count all resources */
+ struct ib_mr *local_mr;
};
struct ib_xrcd {
@@ -1488,7 +1495,7 @@ struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
struct ib_pkey_cache **pkey_cache;
- struct ib_gid_cache **gid_cache;
+ struct ib_gid_table **gid_cache;
u8 *lmc_cache;
};
@@ -1550,6 +1557,8 @@ struct ib_device {
spinlock_t client_data_lock;
struct list_head core_list;
+ /* Access to the client_data_list is protected by the client_data_lock
+ * spinlock and the lists_rwsem read-write semaphore */
struct list_head client_data_list;
struct ib_cache cache;
@@ -1572,9 +1581,47 @@ struct ib_device {
struct ib_port_attr *port_attr);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u8 port_num);
+ /* When calling get_netdev, the HW vendor's driver should return the
+ * net device of device @device at port @port_num or NULL if such
+ * a net device doesn't exist. The vendor driver should call dev_hold
+ * on this net device. The HW vendor's device driver must guarantee
+ * that this function returns NULL before the net device reaches
+ * NETDEV_UNREGISTER_FINAL state.
+ */
+ struct net_device *(*get_netdev)(struct ib_device *device,
+ u8 port_num);
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
+ /* When calling add_gid, the HW vendor's driver should
+ * add the gid of device @device at gid index @index of
+ * port @port_num to be @gid. Meta-info of that gid (for example,
+ * the network device related to this gid is available
+ * at @attr. @context allows the HW vendor driver to store extra
+ * information together with a GID entry. The HW vendor may allocate
+ * memory to contain this information and store it in @context when a
+ * new GID entry is written to. Params are consistent until the next
+ * call of add_gid or delete_gid. The function should return 0 on
+ * success or error otherwise. The function could be called
+ * concurrently for different ports. This function is only called
+ * when roce_gid_table is used.
+ */
+ int (*add_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context);
+ /* When calling del_gid, the HW vendor's driver should delete the
+ * gid of device @device at gid index @index of port @port_num.
+ * Upon the deletion of a GID entry, the HW vendor must free any
+ * allocated memory. The caller will clear @context afterwards.
+ * This function is only called when roce_gid_table is used.
+ */
+ int (*del_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ void **context);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
int (*modify_device)(struct ib_device *device,
@@ -1668,11 +1715,9 @@ struct ib_device {
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
- int (*destroy_mr)(struct ib_mr *mr);
- struct ib_mr * (*create_mr)(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
- struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
- int max_page_list_len);
+ struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
int page_list_len);
void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
@@ -1724,6 +1769,7 @@ struct ib_device {
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
struct ib_dma_mapping_ops *dma_ops;
@@ -1761,8 +1807,30 @@ struct ib_device {
struct ib_client {
char *name;
void (*add) (struct ib_device *);
- void (*remove)(struct ib_device *);
-
+ void (*remove)(struct ib_device *, void *client_data);
+
+ /* Returns the net_dev belonging to this ib_client and matching the
+ * given parameters.
+ * @dev: An RDMA device that the net_dev use for communication.
+ * @port: A physical port number on the RDMA device.
+ * @pkey: P_Key that the net_dev uses if applicable.
+ * @gid: A GID that the net_dev uses to communicate.
+ * @addr: An IP address the net_dev is configured with.
+ * @client_data: The device's client data set by ib_set_client_data().
+ *
+ * An ib_client that implements a net_dev on top of RDMA devices
+ * (such as IP over IB) should implement this callback, allowing the
+ * rdma_cm module to find the right net_dev for a given request.
+ *
+ * The caller is responsible for calling dev_put on the returned
+ * netdev. */
+ struct net_device *(*get_net_dev_by_params)(
+ struct ib_device *dev,
+ u8 port,
+ u16 pkey,
+ const union ib_gid *gid,
+ const struct sockaddr *addr,
+ void *client_data);
struct list_head list;
};
@@ -2071,34 +2139,6 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
}
/**
- * rdma_cap_read_multi_sge - Check if the port of device has the capability
- * RDMA Read Multiple Scatter-Gather Entries.
- * @device: Device to check
- * @port_num: Port number to check
- *
- * iWARP has a restriction that RDMA READ requests may only have a single
- * Scatter/Gather Entry (SGE) in the work request.
- *
- * NOTE: although the linux kernel currently assumes all devices are either
- * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and
- * WRITEs, according to Tom Talpey, this is not accurate. There are some
- * devices out there that support more than a single SGE on RDMA READ
- * requests, but do not support the same number of SGEs as they do on
- * RDMA WRITE requests. The linux kernel would need rearchitecting to
- * support these imbalanced READ/WRITE SGEs allowed devices. So, for now,
- * suffice with either the device supports the same READ/WRITE SGEs, or
- * it only gets one READ sge.
- *
- * Return: true for any device that allows more than one SGE in RDMA READ
- * requests.
- */
-static inline bool rdma_cap_read_multi_sge(struct ib_device *device,
- u8 port_num)
-{
- return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP);
-}
-
-/**
* rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
*
* @device: Device
@@ -2115,6 +2155,26 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n
return device->port_immutable[port_num].max_mad_size;
}
+/**
+ * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE GID table mechanism manages the various GIDs for a device.
+ *
+ * NOTE: if allocating the port's GID table has failed, this call will still
+ * return true, but any RoCE GID table API will fail.
+ *
+ * Return: true if the port uses RoCE GID table mechanism in order to manage
+ * its GIDs.
+ */
+static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
+ u8 port_num)
+{
+ return rdma_protocol_roce(device, port_num) &&
+ device->add_gid && device->del_gid;
+}
+
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid);
@@ -2135,20 +2195,9 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
-/**
- * ib_alloc_pd - Allocates an unused protection domain.
- * @device: The device on which to allocate the protection domain.
- *
- * A protection domain object provides an association between QPs, shared
- * receive queues, address handles, memory regions, and memory windows.
- */
struct ib_pd *ib_alloc_pd(struct ib_device *device);
-/**
- * ib_dealloc_pd - Deallocates a protection domain.
- * @pd: The protection domain to deallocate.
- */
-int ib_dealloc_pd(struct ib_pd *pd);
+void ib_dealloc_pd(struct ib_pd *pd);
/**
* ib_create_ah - Creates an address handle for the given address vector.
@@ -2760,52 +2809,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
}
/**
- * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
- * by an HCA.
- * @pd: The protection domain associated assigned to the registered region.
- * @phys_buf_array: Specifies a list of physical buffers to use in the
- * memory region.
- * @num_phys_buf: Specifies the size of the phys_buf_array.
- * @mr_access_flags: Specifies the memory access rights.
- * @iova_start: The offset of the region's starting I/O virtual address.
- */
-struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
-
-/**
- * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
- * Conceptually, this call performs the functions deregister memory region
- * followed by register physical memory region. Where possible,
- * resources are reused instead of deallocated and reallocated.
- * @mr: The memory region to modify.
- * @mr_rereg_mask: A bit-mask used to indicate which of the following
- * properties of the memory region are being modified.
- * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
- * the new protection domain to associated with the memory region,
- * otherwise, this parameter is ignored.
- * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
- * field specifies a list of physical buffers to use in the new
- * translation, otherwise, this parameter is ignored.
- * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
- * field specifies the size of the phys_buf_array, otherwise, this
- * parameter is ignored.
- * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
- * field specifies the new memory access rights, otherwise, this
- * parameter is ignored.
- * @iova_start: The offset of the region's starting I/O virtual address.
- */
-int ib_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
-
-/**
* ib_query_mr - Retrieves information about a specific memory region.
* @mr: The memory region to retrieve information about.
* @mr_attr: The attributes of the specified memory region.
@@ -2821,33 +2824,9 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
*/
int ib_dereg_mr(struct ib_mr *mr);
-
-/**
- * ib_create_mr - Allocates a memory region that may be used for
- * signature handover operations.
- * @pd: The protection domain associated with the region.
- * @mr_init_attr: memory region init attributes.
- */
-struct ib_mr *ib_create_mr(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
-
-/**
- * ib_destroy_mr - Destroys a memory region that was created using
- * ib_create_mr and removes it from HW translation tables.
- * @mr: The memory region to destroy.
- *
- * This function can fail, if the memory region has memory windows bound to it.
- */
-int ib_destroy_mr(struct ib_mr *mr);
-
-/**
- * ib_alloc_fast_reg_mr - Allocates memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
- * @pd: The protection domain associated with the region.
- * @max_page_list_len: requested max physical buffer list length to be
- * used with fast register work requests for this MR.
- */
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
/**
* ib_alloc_fast_reg_page_list - Allocates a page list array
@@ -3040,4 +3019,8 @@ static inline int ib_check_mr_access(int flags)
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+ u16 pkey, const union ib_gid *gid,
+ const struct sockaddr *addr);
+
#endif /* IB_VERBS_H */