From 57f0f512b273f60d52568b8c6b77e17f5636edc0 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Wed, 5 Aug 2015 17:04:01 -0300 Subject: Initial import --- .../lustre/include/linux/lnet/api-support.h | 44 + drivers/staging/lustre/include/linux/lnet/api.h | 217 +++++ .../staging/lustre/include/linux/lnet/lib-lnet.h | 883 +++++++++++++++++++++ .../staging/lustre/include/linux/lnet/lib-types.h | 760 ++++++++++++++++++ .../lustre/include/linux/lnet/linux/api-support.h | 42 + .../lustre/include/linux/lnet/linux/lib-lnet.h | 71 ++ .../lustre/include/linux/lnet/linux/lib-types.h | 45 ++ .../staging/lustre/include/linux/lnet/linux/lnet.h | 56 ++ .../lustre/include/linux/lnet/lnet-sysctl.h | 49 ++ drivers/staging/lustre/include/linux/lnet/lnet.h | 51 ++ .../staging/lustre/include/linux/lnet/lnetctl.h | 80 ++ drivers/staging/lustre/include/linux/lnet/lnetst.h | 491 ++++++++++++ drivers/staging/lustre/include/linux/lnet/ptllnd.h | 93 +++ .../lustre/include/linux/lnet/ptllnd_wire.h | 119 +++ .../staging/lustre/include/linux/lnet/socklnd.h | 103 +++ drivers/staging/lustre/include/linux/lnet/types.h | 492 ++++++++++++ 16 files changed, 3596 insertions(+) create mode 100644 drivers/staging/lustre/include/linux/lnet/api-support.h create mode 100644 drivers/staging/lustre/include/linux/lnet/api.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lib-lnet.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lib-types.h create mode 100644 drivers/staging/lustre/include/linux/lnet/linux/api-support.h create mode 100644 drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h create mode 100644 drivers/staging/lustre/include/linux/lnet/linux/lib-types.h create mode 100644 drivers/staging/lustre/include/linux/lnet/linux/lnet.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lnet.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lnetctl.h create mode 100644 drivers/staging/lustre/include/linux/lnet/lnetst.h create mode 100644 drivers/staging/lustre/include/linux/lnet/ptllnd.h create mode 100644 drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h create mode 100644 drivers/staging/lustre/include/linux/lnet/socklnd.h create mode 100644 drivers/staging/lustre/include/linux/lnet/types.h (limited to 'drivers/staging/lustre/include/linux/lnet') diff --git a/drivers/staging/lustre/include/linux/lnet/api-support.h b/drivers/staging/lustre/include/linux/lnet/api-support.h new file mode 100644 index 000000000..8f7fa28b5 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/api-support.h @@ -0,0 +1,44 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_API_SUPPORT_H__ +#define __LNET_API_SUPPORT_H__ + +#include "linux/api-support.h" + +#include "../libcfs/libcfs.h" +#include "types.h" +#include "lnet.h" + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h new file mode 100644 index 000000000..cd8651757 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/api.h @@ -0,0 +1,217 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_API_H__ +#define __LNET_API_H__ + +/** \defgroup lnet LNet + * + * The Lustre Networking subsystem. + * + * LNet is an asynchronous message-passing API, which provides an unreliable + * connectionless service that can't guarantee any order. It supports OFA IB, + * TCP/IP, and Cray Portals, and routes between heterogeneous networks. + * + * LNet can run both in OS kernel space and in userspace as a library. + * @{ + */ + +#include "../lnet/types.h" + +/** \defgroup lnet_init_fini Initialization and cleanup + * The LNet must be properly initialized before any LNet calls can be made. + * @{ */ +int LNetInit(void); +void LNetFini(void); + +int LNetNIInit(lnet_pid_t requested_pid); +int LNetNIFini(void); +/** @} lnet_init_fini */ + +/** \defgroup lnet_addr LNet addressing and basic types + * + * Addressing scheme and basic data types of LNet. + * + * The LNet API is memory-oriented, so LNet must be able to address not only + * end-points but also memory region within a process address space. + * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process + * in a node. A portal represents an opening in the address space of a + * process. Match bits is criteria to identify a region of memory inside a + * portal, and offset specifies an offset within the memory region. + * + * LNet creates a table of portals for each process during initialization. + * This table has MAX_PORTALS entries and its size can't be dynamically + * changed. A portal stays empty until the owning process starts to add + * memory regions to it. A portal is sometimes called an index because + * it's an entry in the portals table of a process. + * + * \see LNetMEAttach + * @{ */ +int LNetGetId(unsigned int index, lnet_process_id_t *id); +int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order); +void LNetSnprintHandle(char *str, int str_len, lnet_handle_any_t handle); + +/** @} lnet_addr */ + +/** \defgroup lnet_me Match entries + * + * A match entry (abbreviated as ME) describes a set of criteria to accept + * incoming requests. + * + * A portal is essentially a match list plus a set of attributes. A match + * list is a chain of MEs. Each ME includes a pointer to a memory descriptor + * and a set of match criteria. The match criteria can be used to reject + * incoming requests based on process ID or the match bits provided in the + * request. MEs can be dynamically inserted into a match list by LNetMEAttach() + * and LNetMEInsert(), and removed from its list by LNetMEUnlink(). + * @{ */ +int LNetMEAttach(unsigned int portal, + lnet_process_id_t match_id_in, + __u64 match_bits_in, + __u64 ignore_bits_in, + lnet_unlink_t unlink_in, + lnet_ins_pos_t pos_in, + lnet_handle_me_t *handle_out); + +int LNetMEInsert(lnet_handle_me_t current_in, + lnet_process_id_t match_id_in, + __u64 match_bits_in, + __u64 ignore_bits_in, + lnet_unlink_t unlink_in, + lnet_ins_pos_t position_in, + lnet_handle_me_t *handle_out); + +int LNetMEUnlink(lnet_handle_me_t current_in); +/** @} lnet_me */ + +/** \defgroup lnet_md Memory descriptors + * + * A memory descriptor contains information about a region of a user's + * memory (either in kernel or user space) and optionally points to an + * event queue where information about the operations performed on the + * memory descriptor are recorded. Memory descriptor is abbreviated as + * MD and can be used interchangeably with the memory region it describes. + * + * The LNet API provides two operations to create MDs: LNetMDAttach() + * and LNetMDBind(); one operation to unlink and release the resources + * associated with a MD: LNetMDUnlink(). + * @{ */ +int LNetMDAttach(lnet_handle_me_t current_in, + lnet_md_t md_in, + lnet_unlink_t unlink_in, + lnet_handle_md_t *handle_out); + +int LNetMDBind(lnet_md_t md_in, + lnet_unlink_t unlink_in, + lnet_handle_md_t *handle_out); + +int LNetMDUnlink(lnet_handle_md_t md_in); +/** @} lnet_md */ + +/** \defgroup lnet_eq Events and event queues + * + * Event queues (abbreviated as EQ) are used to log operations performed on + * local MDs. In particular, they signal the completion of a data transmission + * into or out of a MD. They can also be used to hold acknowledgments for + * completed PUT operations and indicate when a MD has been unlinked. Multiple + * MDs can share a single EQ. An EQ may have an optional event handler + * associated with it. If an event handler exists, it will be run for each + * event that is deposited into the EQ. + * + * In addition to the lnet_handle_eq_t, the LNet API defines two types + * associated with events: The ::lnet_event_kind_t defines the kinds of events + * that can be stored in an EQ. The lnet_event_t defines a structure that + * holds the information about with an event. + * + * There are five functions for dealing with EQs: LNetEQAlloc() is used to + * create an EQ and allocate the resources needed, while LNetEQFree() + * releases these resources and free the EQ. LNetEQGet() retrieves the next + * event from an EQ, and LNetEQWait() can be used to block a process until + * an EQ has at least one event. LNetEQPoll() can be used to test or wait + * on multiple EQs. + * @{ */ +int LNetEQAlloc(unsigned int count_in, + lnet_eq_handler_t handler, + lnet_handle_eq_t *handle_out); + +int LNetEQFree(lnet_handle_eq_t eventq_in); + +int LNetEQGet(lnet_handle_eq_t eventq_in, + lnet_event_t *event_out); + +int LNetEQWait(lnet_handle_eq_t eventq_in, + lnet_event_t *event_out); + +int LNetEQPoll(lnet_handle_eq_t *eventqs_in, + int neq_in, + int timeout_ms, + lnet_event_t *event_out, + int *which_eq_out); +/** @} lnet_eq */ + +/** \defgroup lnet_data Data movement operations + * + * The LNet API provides two data movement operations: LNetPut() + * and LNetGet(). + * @{ */ +int LNetPut(lnet_nid_t self, + lnet_handle_md_t md_in, + lnet_ack_req_t ack_req_in, + lnet_process_id_t target_in, + unsigned int portal_in, + __u64 match_bits_in, + unsigned int offset_in, + __u64 hdr_data_in); + +int LNetGet(lnet_nid_t self, + lnet_handle_md_t md_in, + lnet_process_id_t target_in, + unsigned int portal_in, + __u64 match_bits_in, + unsigned int offset_in); +/** @} lnet_data */ + +/** \defgroup lnet_misc Miscellaneous operations. + * Miscellaneous operations. + * @{ */ + +int LNetSetLazyPortal(int portal); +int LNetClearLazyPortal(int portal); +int LNetCtl(unsigned int cmd, void *arg); +int LNetSetAsync(lnet_process_id_t id, int nasync); + +/** @} lnet_misc */ + +/** @} lnet */ +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h new file mode 100644 index 000000000..0038d29a3 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -0,0 +1,883 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lib-lnet.h + * + * Top level include for library side routines + */ + +#ifndef __LNET_LIB_LNET_H__ +#define __LNET_LIB_LNET_H__ + +#include "linux/lib-lnet.h" +#include "../libcfs/libcfs.h" +#include "types.h" +#include "lnet.h" +#include "lib-types.h" + +extern lnet_t the_lnet; /* THE network */ + +#if defined(LNET_USE_LIB_FREELIST) +/* 1 CPT, simplify implementation... */ +# define LNET_CPT_MAX_BITS 0 + +#else /* KERNEL and no freelist */ + +# if (BITS_PER_LONG == 32) +/* 2 CPTs, allowing more CPTs might make us under memory pressure */ +# define LNET_CPT_MAX_BITS 1 + +# else /* 64-bit system */ +/* + * 256 CPTs for thousands of CPUs, allowing more CPTs might make us + * under risk of consuming all lh_cookie. + */ +# define LNET_CPT_MAX_BITS 8 +# endif /* BITS_PER_LONG == 32 */ +#endif + +/* max allowed CPT number */ +#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS) + +#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number) +#define LNET_CPT_BITS (the_lnet.ln_cpt_bits) +#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1) + +/** exclusive lock */ +#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX + +static inline int lnet_is_wire_handle_none(lnet_handle_wire_t *wh) +{ + return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE && + wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE); +} + +static inline int lnet_md_exhausted(lnet_libmd_t *md) +{ + return (md->md_threshold == 0 || + ((md->md_options & LNET_MD_MAX_SIZE) != 0 && + md->md_offset + md->md_max_size > md->md_length)); +} + +static inline int lnet_md_unlinkable(lnet_libmd_t *md) +{ + /* Should unlink md when its refcount is 0 and either: + * - md has been flagged for deletion (by auto unlink or + * LNetM[DE]Unlink, in the latter case md may not be exhausted). + * - auto unlink is on and md is exhausted. + */ + if (md->md_refcount != 0) + return 0; + + if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0) + return 1; + + return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && + lnet_md_exhausted(md)); +} + +#define lnet_cpt_table() (the_lnet.ln_cpt_table) +#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1) + +static inline int +lnet_cpt_of_cookie(__u64 cookie) +{ + unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK; + + /* LNET_CPT_NUMBER doesn't have to be power2, which means we can + * get illegal cpt from it's invalid cookie */ + return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER; +} + +static inline void +lnet_res_lock(int cpt) +{ + cfs_percpt_lock(the_lnet.ln_res_lock, cpt); +} + +static inline void +lnet_res_unlock(int cpt) +{ + cfs_percpt_unlock(the_lnet.ln_res_lock, cpt); +} + +static inline int +lnet_res_lock_current(void) +{ + int cpt = lnet_cpt_current(); + + lnet_res_lock(cpt); + return cpt; +} + +static inline void +lnet_net_lock(int cpt) +{ + cfs_percpt_lock(the_lnet.ln_net_lock, cpt); +} + +static inline void +lnet_net_unlock(int cpt) +{ + cfs_percpt_unlock(the_lnet.ln_net_lock, cpt); +} + +static inline int +lnet_net_lock_current(void) +{ + int cpt = lnet_cpt_current(); + + lnet_net_lock(cpt); + return cpt; +} + +#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX) +#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX) + +#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock) +#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock) +#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock) +#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock) +#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock) +#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock) +#define LNET_MUTEX_LOCK(m) mutex_lock(m) +#define LNET_MUTEX_UNLOCK(m) mutex_unlock(m) + +#define MAX_PORTALS 64 + +/* these are only used by code with LNET_USE_LIB_FREELIST, but we still + * exported them to !LNET_USE_LIB_FREELIST for easy implementation */ +#define LNET_FL_MAX_MES 2048 +#define LNET_FL_MAX_MDS 2048 +#define LNET_FL_MAX_EQS 512 +#define LNET_FL_MAX_MSGS 2048 /* Outstanding messages */ + +#ifdef LNET_USE_LIB_FREELIST + +int lnet_freelist_init(lnet_freelist_t *fl, int n, int size); +void lnet_freelist_fini(lnet_freelist_t *fl); + +static inline void * +lnet_freelist_alloc(lnet_freelist_t *fl) +{ + /* ALWAYS called with liblock held */ + lnet_freeobj_t *o; + + if (list_empty(&fl->fl_list)) + return NULL; + + o = list_entry(fl->fl_list.next, lnet_freeobj_t, fo_list); + list_del(&o->fo_list); + return (void *)&o->fo_contents; +} + +static inline void +lnet_freelist_free(lnet_freelist_t *fl, void *obj) +{ + /* ALWAYS called with liblock held */ + lnet_freeobj_t *o = list_entry(obj, lnet_freeobj_t, fo_contents); + + list_add(&o->fo_list, &fl->fl_list); +} + +static inline lnet_eq_t * +lnet_eq_alloc(void) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = &the_lnet.ln_eq_container; + lnet_eq_t *eq; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + eq = (lnet_eq_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + return eq; +} + +static inline void +lnet_eq_free_locked(lnet_eq_t *eq) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = &the_lnet.ln_eq_container; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, eq); +} + +static inline void +lnet_eq_free(lnet_eq_t *eq) +{ + lnet_res_lock(0); + lnet_eq_free_locked(eq); + lnet_res_unlock(0); +} + +static inline lnet_libmd_t * +lnet_md_alloc(lnet_md_t *umd) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_md_containers[0]; + lnet_libmd_t *md; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + md = (lnet_libmd_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + if (md != NULL) + INIT_LIST_HEAD(&md->md_list); + + return md; +} + +static inline void +lnet_md_free_locked(lnet_libmd_t *md) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_md_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, md); +} + +static inline void +lnet_md_free(lnet_libmd_t *md) +{ + lnet_res_lock(0); + lnet_md_free_locked(md); + lnet_res_unlock(0); +} + +static inline lnet_me_t * +lnet_me_alloc(void) +{ + /* NEVER called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_me_containers[0]; + lnet_me_t *me; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_res_lock(0); + me = (lnet_me_t *)lnet_freelist_alloc(&rec->rec_freelist); + lnet_res_unlock(0); + + return me; +} + +static inline void +lnet_me_free_locked(lnet_me_t *me) +{ + /* ALWAYS called with resource lock held */ + struct lnet_res_container *rec = the_lnet.ln_me_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + lnet_freelist_free(&rec->rec_freelist, me); +} + +static inline void +lnet_me_free(lnet_me_t *me) +{ + lnet_res_lock(0); + lnet_me_free_locked(me); + lnet_res_unlock(0); +} + +static inline lnet_msg_t * +lnet_msg_alloc(void) +{ + /* NEVER called with network lock held */ + struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0]; + lnet_msg_t *msg; + + LASSERT(LNET_CPT_NUMBER == 1); + + lnet_net_lock(0); + msg = (lnet_msg_t *)lnet_freelist_alloc(&msc->msc_freelist); + lnet_net_unlock(0); + + if (msg != NULL) { + /* NULL pointers, clear flags etc */ + memset(msg, 0, sizeof(*msg)); + } + return msg; +} + +static inline void +lnet_msg_free_locked(lnet_msg_t *msg) +{ + /* ALWAYS called with network lock held */ + struct lnet_msg_container *msc = the_lnet.ln_msg_containers[0]; + + LASSERT(LNET_CPT_NUMBER == 1); + LASSERT(!msg->msg_onactivelist); + lnet_freelist_free(&msc->msc_freelist, msg); +} + +static inline void +lnet_msg_free(lnet_msg_t *msg) +{ + lnet_net_lock(0); + lnet_msg_free_locked(msg); + lnet_net_unlock(0); +} + +#else /* !LNET_USE_LIB_FREELIST */ + +static inline lnet_eq_t * +lnet_eq_alloc(void) +{ + /* NEVER called with liblock held */ + lnet_eq_t *eq; + + LIBCFS_ALLOC(eq, sizeof(*eq)); + return eq; +} + +static inline void +lnet_eq_free(lnet_eq_t *eq) +{ + /* ALWAYS called with resource lock held */ + LIBCFS_FREE(eq, sizeof(*eq)); +} + +static inline lnet_libmd_t * +lnet_md_alloc(lnet_md_t *umd) +{ + /* NEVER called with liblock held */ + lnet_libmd_t *md; + unsigned int size; + unsigned int niov; + + if ((umd->options & LNET_MD_KIOV) != 0) { + niov = umd->length; + size = offsetof(lnet_libmd_t, md_iov.kiov[niov]); + } else { + niov = ((umd->options & LNET_MD_IOVEC) != 0) ? + umd->length : 1; + size = offsetof(lnet_libmd_t, md_iov.iov[niov]); + } + + LIBCFS_ALLOC(md, size); + + if (md != NULL) { + /* Set here in case of early free */ + md->md_options = umd->options; + md->md_niov = niov; + INIT_LIST_HEAD(&md->md_list); + } + + return md; +} + +static inline void +lnet_md_free(lnet_libmd_t *md) +{ + /* ALWAYS called with resource lock held */ + unsigned int size; + + if ((md->md_options & LNET_MD_KIOV) != 0) + size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]); + else + size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]); + + LIBCFS_FREE(md, size); +} + +static inline lnet_me_t * +lnet_me_alloc(void) +{ + /* NEVER called with liblock held */ + lnet_me_t *me; + + LIBCFS_ALLOC(me, sizeof(*me)); + return me; +} + +static inline void +lnet_me_free(lnet_me_t *me) +{ + /* ALWAYS called with resource lock held */ + LIBCFS_FREE(me, sizeof(*me)); +} + +static inline lnet_msg_t * +lnet_msg_alloc(void) +{ + /* NEVER called with liblock held */ + lnet_msg_t *msg; + + LIBCFS_ALLOC(msg, sizeof(*msg)); + + /* no need to zero, LIBCFS_ALLOC does for us */ + return msg; +} + +static inline void +lnet_msg_free(lnet_msg_t *msg) +{ + /* ALWAYS called with network lock held */ + LASSERT(!msg->msg_onactivelist); + LIBCFS_FREE(msg, sizeof(*msg)); +} + +#define lnet_eq_free_locked(eq) lnet_eq_free(eq) +#define lnet_md_free_locked(md) lnet_md_free(md) +#define lnet_me_free_locked(me) lnet_me_free(me) +#define lnet_msg_free_locked(msg) lnet_msg_free(msg) + +#endif /* LNET_USE_LIB_FREELIST */ + +lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec, + __u64 cookie); +void lnet_res_lh_initialize(struct lnet_res_container *rec, + lnet_libhandle_t *lh); +static inline void +lnet_res_lh_invalidate(lnet_libhandle_t *lh) +{ + /* ALWAYS called with resource lock held */ + /* NB: cookie is still useful, don't reset it */ + list_del(&lh->lh_hash_chain); +} + +static inline void +lnet_eq2handle(lnet_handle_eq_t *handle, lnet_eq_t *eq) +{ + if (eq == NULL) { + LNetInvalidateHandle(handle); + return; + } + + handle->cookie = eq->eq_lh.lh_cookie; +} + +static inline lnet_eq_t * +lnet_handle2eq(lnet_handle_eq_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + + lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_eq_t, eq_lh); +} + +static inline void +lnet_md2handle(lnet_handle_md_t *handle, lnet_libmd_t *md) +{ + handle->cookie = md->md_lh.lh_cookie; +} + +static inline lnet_libmd_t * +lnet_handle2md(lnet_handle_md_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + cpt = lnet_cpt_of_cookie(handle->cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], + handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_libmd_t, md_lh); +} + +static inline lnet_libmd_t * +lnet_wire_handle2md(lnet_handle_wire_t *wh) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie) + return NULL; + + cpt = lnet_cpt_of_cookie(wh->wh_object_cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], + wh->wh_object_cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_libmd_t, md_lh); +} + +static inline void +lnet_me2handle(lnet_handle_me_t *handle, lnet_me_t *me) +{ + handle->cookie = me->me_lh.lh_cookie; +} + +static inline lnet_me_t * +lnet_handle2me(lnet_handle_me_t *handle) +{ + /* ALWAYS called with resource lock held */ + lnet_libhandle_t *lh; + int cpt; + + cpt = lnet_cpt_of_cookie(handle->cookie); + lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt], + handle->cookie); + if (lh == NULL) + return NULL; + + return lh_entry(lh, lnet_me_t, me_lh); +} + +static inline void +lnet_peer_addref_locked(lnet_peer_t *lp) +{ + LASSERT(lp->lp_refcount > 0); + lp->lp_refcount++; +} + +void lnet_destroy_peer_locked(lnet_peer_t *lp); + +static inline void +lnet_peer_decref_locked(lnet_peer_t *lp) +{ + LASSERT(lp->lp_refcount > 0); + lp->lp_refcount--; + if (lp->lp_refcount == 0) + lnet_destroy_peer_locked(lp); +} + +static inline int +lnet_isrouter(lnet_peer_t *lp) +{ + return lp->lp_rtr_refcount != 0; +} + +static inline void +lnet_ni_addref_locked(lnet_ni_t *ni, int cpt) +{ + LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); + LASSERT(*ni->ni_refs[cpt] >= 0); + + (*ni->ni_refs[cpt])++; +} + +static inline void +lnet_ni_addref(lnet_ni_t *ni) +{ + lnet_net_lock(0); + lnet_ni_addref_locked(ni, 0); + lnet_net_unlock(0); +} + +static inline void +lnet_ni_decref_locked(lnet_ni_t *ni, int cpt) +{ + LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); + LASSERT(*ni->ni_refs[cpt] > 0); + + (*ni->ni_refs[cpt])--; +} + +static inline void +lnet_ni_decref(lnet_ni_t *ni) +{ + lnet_net_lock(0); + lnet_ni_decref_locked(ni, 0); + lnet_net_unlock(0); +} + +void lnet_ni_free(lnet_ni_t *ni); + +static inline int +lnet_nid2peerhash(lnet_nid_t nid) +{ + return hash_long(nid, LNET_PEER_HASH_BITS); +} + +static inline struct list_head * +lnet_net2rnethash(__u32 net) +{ + return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) + + LNET_NETTYP(net)) & + ((1U << the_lnet.ln_remote_nets_hbits) - 1)]; +} + +extern lnd_t the_lolnd; +extern int avoid_asym_router_failure; + +int lnet_cpt_of_nid_locked(lnet_nid_t nid); +int lnet_cpt_of_nid(lnet_nid_t nid); +lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); +lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt); +lnet_ni_t *lnet_net2ni(__u32 net); + +int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, unsigned long when); +void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, + unsigned long when); +int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid, + unsigned int priority); +int lnet_check_routes(void); +int lnet_del_route(__u32 net, lnet_nid_t gw_nid); +void lnet_destroy_routes(void); +int lnet_get_route(int idx, __u32 *net, __u32 *hops, + lnet_nid_t *gateway, __u32 *alive, __u32 *priority); +void lnet_proc_init(void); +void lnet_proc_fini(void); +int lnet_rtrpools_alloc(int im_a_router); +void lnet_rtrpools_free(void); +lnet_remotenet_t *lnet_find_net_locked(__u32 net); + +int lnet_islocalnid(lnet_nid_t nid); +int lnet_islocalnet(__u32 net); + +void lnet_msg_attach_md(lnet_msg_t *msg, lnet_libmd_t *md, + unsigned int offset, unsigned int mlen); +void lnet_msg_detach_md(lnet_msg_t *msg, int status); +void lnet_build_unlink_event(lnet_libmd_t *md, lnet_event_t *ev); +void lnet_build_msg_event(lnet_msg_t *msg, lnet_event_kind_t ev_type); +void lnet_msg_commit(lnet_msg_t *msg, int cpt); +void lnet_msg_decommit(lnet_msg_t *msg, int cpt, int status); + +void lnet_eq_enqueue_event(lnet_eq_t *eq, lnet_event_t *ev); +void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, + unsigned int offset, unsigned int len); +int lnet_send(lnet_nid_t nid, lnet_msg_t *msg, lnet_nid_t rtr_nid); +void lnet_return_tx_credits_locked(lnet_msg_t *msg); +void lnet_return_rx_credits_locked(lnet_msg_t *msg); + +/* portals functions */ +/* portals attributes */ +static inline int +lnet_ptl_is_lazy(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_LAZY); +} + +static inline int +lnet_ptl_is_unique(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE); +} + +static inline int +lnet_ptl_is_wildcard(lnet_portal_t *ptl) +{ + return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD); +} + +static inline void +lnet_ptl_setopt(lnet_portal_t *ptl, int opt) +{ + ptl->ptl_options |= opt; +} + +static inline void +lnet_ptl_unsetopt(lnet_portal_t *ptl, int opt) +{ + ptl->ptl_options &= ~opt; +} + +/* match-table functions */ +struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable, + lnet_process_id_t id, __u64 mbits); +struct lnet_match_table *lnet_mt_of_attach(unsigned int index, + lnet_process_id_t id, __u64 mbits, + __u64 ignore_bits, + lnet_ins_pos_t pos); +int lnet_mt_match_md(struct lnet_match_table *mtable, + struct lnet_match_info *info, struct lnet_msg *msg); + +/* portals match/attach functions */ +void lnet_ptl_attach_md(lnet_me_t *me, lnet_libmd_t *md, + struct list_head *matches, struct list_head *drops); +void lnet_ptl_detach_md(lnet_me_t *me, lnet_libmd_t *md); +int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg); + +/* initialized and finalize portals */ +int lnet_portals_create(void); +void lnet_portals_destroy(void); + +/* message functions */ +int lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, + lnet_nid_t fromnid, void *private, int rdma_req); +void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, + unsigned int offset, unsigned int mlen, unsigned int rlen); +lnet_msg_t *lnet_create_reply_msg(lnet_ni_t *ni, lnet_msg_t *get_msg); +void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len); +void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc); +void lnet_drop_delayed_msg_list(struct list_head *head, char *reason); +void lnet_recv_delayed_msg_list(struct list_head *head); + +int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt); +void lnet_msg_container_cleanup(struct lnet_msg_container *container); +void lnet_msg_containers_destroy(void); +int lnet_msg_containers_create(void); + +char *lnet_msgtyp2str(int type); +void lnet_print_hdr(lnet_hdr_t *hdr); +int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); + +void lnet_counters_get(lnet_counters_t *counters); +void lnet_counters_reset(void); + +unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov); +int lnet_extract_iov(int dst_niov, struct kvec *dst, + int src_niov, struct kvec *src, + unsigned int offset, unsigned int len); + +unsigned int lnet_kiov_nob(unsigned int niov, lnet_kiov_t *iov); +int lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst, + int src_niov, lnet_kiov_t *src, + unsigned int offset, unsigned int len); + +void lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, + unsigned int doffset, + unsigned int nsiov, struct kvec *siov, + unsigned int soffset, unsigned int nob); +void lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, + unsigned int iovoffset, + unsigned int nkiov, lnet_kiov_t *kiov, + unsigned int kiovoffset, unsigned int nob); +void lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, + unsigned int kiovoffset, + unsigned int niov, struct kvec *iov, + unsigned int iovoffset, unsigned int nob); +void lnet_copy_kiov2kiov(unsigned int ndkiov, lnet_kiov_t *dkiov, + unsigned int doffset, + unsigned int nskiov, lnet_kiov_t *skiov, + unsigned int soffset, unsigned int nob); + +static inline void +lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset, + unsigned int nsiov, struct kvec *siov, unsigned int soffset, + unsigned int nob) +{ + struct kvec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen}; + + lnet_copy_iov2iov(1, &diov, doffset, + nsiov, siov, soffset, nob); +} + +static inline void +lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset, + unsigned int nsiov, lnet_kiov_t *skiov, + unsigned int soffset, unsigned int nob) +{ + struct kvec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen}; + + lnet_copy_kiov2iov(1, &diov, doffset, + nsiov, skiov, soffset, nob); +} + +static inline void +lnet_copy_flat2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset, + int slen, void *src, unsigned int soffset, unsigned int nob) +{ + struct kvec siov = {/*.iov_base = */ src, /*.iov_len = */slen}; + + lnet_copy_iov2iov(ndiov, diov, doffset, + 1, &siov, soffset, nob); +} + +static inline void +lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, + unsigned int doffset, int slen, void *src, + unsigned int soffset, unsigned int nob) +{ + struct kvec siov = {/* .iov_base = */ src, /* .iov_len = */ slen}; + + lnet_copy_iov2kiov(ndiov, dkiov, doffset, + 1, &siov, soffset, nob); +} + +void lnet_me_unlink(lnet_me_t *me); + +void lnet_md_unlink(lnet_libmd_t *md); +void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd); + +void lnet_register_lnd(lnd_t *lnd); +void lnet_unregister_lnd(lnd_t *lnd); +int lnet_set_ip_niaddr(lnet_ni_t *ni); + +int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, + __u32 local_ip, __u32 peer_ip, int peer_port); +void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, + __u32 peer_ip, int port); +int lnet_count_acceptor_nis(void); +int lnet_acceptor_timeout(void); +int lnet_acceptor_port(void); + +int lnet_count_acceptor_nis(void); +int lnet_acceptor_port(void); + +int lnet_acceptor_start(void); +void lnet_acceptor_stop(void); + +void lnet_get_tunables(void); +int lnet_peers_start_down(void); +int lnet_peer_buffer_credits(lnet_ni_t *ni); + +int lnet_router_checker_start(void); +void lnet_router_checker_stop(void); +void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net); +void lnet_swap_pinginfo(lnet_ping_info_t *info); + +int lnet_ping_target_init(void); +void lnet_ping_target_fini(void); +int lnet_ping(lnet_process_id_t id, int timeout_ms, + lnet_process_id_t *ids, int n_ids); + +int lnet_parse_ip2nets(char **networksp, char *ip2nets); +int lnet_parse_routes(char *route_str, int *im_a_router); +int lnet_parse_networks(struct list_head *nilist, char *networks); + +int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt); +lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable, + lnet_nid_t nid); +void lnet_peer_tables_cleanup(void); +void lnet_peer_tables_destroy(void); +int lnet_peer_tables_create(void); +void lnet_debug_peer(lnet_nid_t nid); + +static inline void lnet_peer_set_alive(lnet_peer_t *lp) +{ + lp->lp_last_alive = lp->lp_last_query = get_seconds(); + if (!lp->lp_alive) + lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); +} + + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h new file mode 100644 index 000000000..50537668f --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -0,0 +1,760 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lib-types.h + * + * Types used by the library side routines that do not need to be + * exposed to the user application + */ + +#ifndef __LNET_LIB_TYPES_H__ +#define __LNET_LIB_TYPES_H__ + +#include "linux/lib-types.h" + +#include "../libcfs/libcfs.h" +#include +#include "types.h" + +#define WIRE_ATTR __attribute__((packed)) + +/* Packed version of lnet_process_id_t to transfer via network */ +typedef struct { + lnet_nid_t nid; + lnet_pid_t pid; /* node id / process id */ +} WIRE_ATTR lnet_process_id_packed_t; + +/* The wire handle's interface cookie only matches one network interface in + * one epoch (i.e. new cookie when the interface restarts or the node + * reboots). The object cookie only matches one object on that interface + * during that object's lifetime (i.e. no cookie re-use). */ +typedef struct { + __u64 wh_interface_cookie; + __u64 wh_object_cookie; +} WIRE_ATTR lnet_handle_wire_t; + +typedef enum { + LNET_MSG_ACK = 0, + LNET_MSG_PUT, + LNET_MSG_GET, + LNET_MSG_REPLY, + LNET_MSG_HELLO, +} lnet_msg_type_t; + +/* The variant fields of the portals message header are aligned on an 8 + * byte boundary in the message header. Note that all types used in these + * wire structs MUST be fixed size and the smaller types are placed at the + * end. */ +typedef struct lnet_ack { + lnet_handle_wire_t dst_wmd; + __u64 match_bits; + __u32 mlength; +} WIRE_ATTR lnet_ack_t; + +typedef struct lnet_put { + lnet_handle_wire_t ack_wmd; + __u64 match_bits; + __u64 hdr_data; + __u32 ptl_index; + __u32 offset; +} WIRE_ATTR lnet_put_t; + +typedef struct lnet_get { + lnet_handle_wire_t return_wmd; + __u64 match_bits; + __u32 ptl_index; + __u32 src_offset; + __u32 sink_length; +} WIRE_ATTR lnet_get_t; + +typedef struct lnet_reply { + lnet_handle_wire_t dst_wmd; +} WIRE_ATTR lnet_reply_t; + +typedef struct lnet_hello { + __u64 incarnation; + __u32 type; +} WIRE_ATTR lnet_hello_t; + +typedef struct { + lnet_nid_t dest_nid; + lnet_nid_t src_nid; + lnet_pid_t dest_pid; + lnet_pid_t src_pid; + __u32 type; /* lnet_msg_type_t */ + __u32 payload_length; /* payload data to follow */ + /*<------__u64 aligned------->*/ + union { + lnet_ack_t ack; + lnet_put_t put; + lnet_get_t get; + lnet_reply_t reply; + lnet_hello_t hello; + } msg; +} WIRE_ATTR lnet_hdr_t; + +/* A HELLO message contains a magic number and protocol version + * code in the header's dest_nid, the peer's NID in the src_nid, and + * LNET_MSG_HELLO in the type field. All other common fields are zero + * (including payload_size; i.e. no payload). + * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is + * running the same protocol and to find out its NID. These LNDs should + * exchange HELLO messages when a connection is first established. Individual + * LNDs can put whatever else they fancy in lnet_hdr_t::msg. + */ +typedef struct { + __u32 magic; /* LNET_PROTO_TCP_MAGIC */ + __u16 version_major; /* increment on incompatible change */ + __u16 version_minor; /* increment on compatible change */ +} WIRE_ATTR lnet_magicversion_t; + +/* PROTO MAGIC for LNDs */ +#define LNET_PROTO_IB_MAGIC 0x0be91b91 +#define LNET_PROTO_RA_MAGIC 0x0be91b92 +#define LNET_PROTO_QSW_MAGIC 0x0be91b93 +#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */ +#define LNET_PROTO_TCP_MAGIC 0xeebc0ded +#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */ +#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */ +#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 +#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ + +/* Placeholder for a future "unified" protocol across all LNDs */ +/* Current LNDs that receive a request with this magic will respond with a + * "stub" reply using their current protocol */ +#define LNET_PROTO_MAGIC 0x45726963 /* ! */ + +#define LNET_PROTO_TCP_VERSION_MAJOR 1 +#define LNET_PROTO_TCP_VERSION_MINOR 0 + +/* Acceptor connection request */ +typedef struct { + __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ + __u32 acr_version; /* protocol version */ + __u64 acr_nid; /* target NID */ +} WIRE_ATTR lnet_acceptor_connreq_t; + +#define LNET_PROTO_ACCEPTOR_VERSION 1 + +/* forward refs */ +struct lnet_libmd; + +typedef struct lnet_msg { + struct list_head msg_activelist; + struct list_head msg_list; /* Q for credits/MD */ + + lnet_process_id_t msg_target; + /* where is it from, it's only for building event */ + lnet_nid_t msg_from; + __u32 msg_type; + + /* committed for sending */ + unsigned int msg_tx_committed:1; + /* CPT # this message committed for sending */ + unsigned int msg_tx_cpt:15; + /* committed for receiving */ + unsigned int msg_rx_committed:1; + /* CPT # this message committed for receiving */ + unsigned int msg_rx_cpt:15; + /* queued for tx credit */ + unsigned int msg_tx_delayed:1; + /* queued for RX buffer */ + unsigned int msg_rx_delayed:1; + /* ready for pending on RX delay list */ + unsigned int msg_rx_ready_delay:1; + + unsigned int msg_vmflush:1; /* VM trying to free memory */ + unsigned int msg_target_is_router:1; /* sending to a router */ + unsigned int msg_routing:1; /* being forwarded */ + unsigned int msg_ack:1; /* ack on finalize (PUT) */ + unsigned int msg_sending:1; /* outgoing message */ + unsigned int msg_receiving:1; /* being received */ + unsigned int msg_txcredit:1; /* taken an NI send credit */ + unsigned int msg_peertxcredit:1; /* taken a peer send credit */ + unsigned int msg_rtrcredit:1; /* taken a global router credit */ + unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ + unsigned int msg_onactivelist:1; /* on the activelist */ + + struct lnet_peer *msg_txpeer; /* peer I'm sending to */ + struct lnet_peer *msg_rxpeer; /* peer I received from */ + + void *msg_private; + struct lnet_libmd *msg_md; + + unsigned int msg_len; + unsigned int msg_wanted; + unsigned int msg_offset; + unsigned int msg_niov; + struct kvec *msg_iov; + lnet_kiov_t *msg_kiov; + + lnet_event_t msg_ev; + lnet_hdr_t msg_hdr; +} lnet_msg_t; + +typedef struct lnet_libhandle { + struct list_head lh_hash_chain; + __u64 lh_cookie; +} lnet_libhandle_t; + +#define lh_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) + +typedef struct lnet_eq { + struct list_head eq_list; + lnet_libhandle_t eq_lh; + lnet_seq_t eq_enq_seq; + lnet_seq_t eq_deq_seq; + unsigned int eq_size; + lnet_eq_handler_t eq_callback; + lnet_event_t *eq_events; + int **eq_refs; /* percpt refcount for EQ */ +} lnet_eq_t; + +typedef struct lnet_me { + struct list_head me_list; + lnet_libhandle_t me_lh; + lnet_process_id_t me_match_id; + unsigned int me_portal; + unsigned int me_pos; /* hash offset in mt_hash */ + __u64 me_match_bits; + __u64 me_ignore_bits; + lnet_unlink_t me_unlink; + struct lnet_libmd *me_md; +} lnet_me_t; + +typedef struct lnet_libmd { + struct list_head md_list; + lnet_libhandle_t md_lh; + lnet_me_t *md_me; + char *md_start; + unsigned int md_offset; + unsigned int md_length; + unsigned int md_max_size; + int md_threshold; + int md_refcount; + unsigned int md_options; + unsigned int md_flags; + void *md_user_ptr; + lnet_eq_t *md_eq; + unsigned int md_niov; /* # frags */ + union { + struct kvec iov[LNET_MAX_IOV]; + lnet_kiov_t kiov[LNET_MAX_IOV]; + } md_iov; +} lnet_libmd_t; + +#define LNET_MD_FLAG_ZOMBIE (1 << 0) +#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) +#define LNET_MD_FLAG_ABORTED (1 << 2) + +#ifdef LNET_USE_LIB_FREELIST +typedef struct { + void *fl_objs; /* single contiguous array of objects */ + int fl_nobjs; /* the number of them */ + int fl_objsize; /* the size (including overhead) of each of them */ + struct list_head fl_list; /* where they are enqueued */ +} lnet_freelist_t; + +typedef struct { + struct list_head fo_list; /* enqueue on fl_list */ + void *fo_contents; /* aligned contents */ +} lnet_freeobj_t; +#endif + +typedef struct { + /* info about peers we are trying to fail */ + struct list_head tp_list; /* ln_test_peers */ + lnet_nid_t tp_nid; /* matching nid */ + unsigned int tp_threshold; /* # failures to simulate */ +} lnet_test_peer_t; + +#define LNET_COOKIE_TYPE_MD 1 +#define LNET_COOKIE_TYPE_ME 2 +#define LNET_COOKIE_TYPE_EQ 3 +#define LNET_COOKIE_TYPE_BITS 2 +#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) + +struct lnet_ni; /* forward ref */ + +typedef struct lnet_lnd { + /* fields managed by portals */ + struct list_head lnd_list; /* stash in the LND table */ + int lnd_refcount; /* # active instances */ + + /* fields initialised by the LND */ + unsigned int lnd_type; + + int (*lnd_startup)(struct lnet_ni *ni); + void (*lnd_shutdown)(struct lnet_ni *ni); + int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); + + /* In data movement APIs below, payload buffers are described as a set + * of 'niov' fragments which are... + * EITHER + * in virtual memory (struct iovec *iov != NULL) + * OR + * in pages (kernel only: plt_kiov_t *kiov != NULL). + * The LND may NOT overwrite these fragment descriptors. + * An 'offset' and may specify a byte offset within the set of + * fragments to start from + */ + + /* Start sending a preformatted message. 'private' is NULL for PUT and + * GET messages; otherwise this is a response to an incoming message + * and 'private' is the 'private' passed to lnet_parse(). Return + * non-zero for immediate failure, otherwise complete later with + * lnet_finalize() */ + int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); + + /* Start receiving 'mlen' bytes of payload data, skipping the following + * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to + * lnet_parse(). Return non-zero for immediate failure, otherwise + * complete later with lnet_finalize(). This also gives back a receive + * credit if the LND does flow control. */ + int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + int delayed, unsigned int niov, + struct kvec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen); + + /* lnet_parse() has had to delay processing of this message + * (e.g. waiting for a forwarding buffer or send credits). Give the + * LND a chance to free urgently needed resources. If called, return 0 + * for success and do NOT give back a receive credit; that has to wait + * until lnd_recv() gets called. On failure return < 0 and + * release resources; lnd_recv() will not be called. */ + int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, + void **new_privatep); + + /* notification of peer health */ + void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); + + /* query of peer aliveness */ + void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, unsigned long *when); + + /* accept a new connection */ + int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); + +} lnd_t; + +#define LNET_NI_STATUS_UP 0x15aac0de +#define LNET_NI_STATUS_DOWN 0xdeadface +#define LNET_NI_STATUS_INVALID 0x00000000 +typedef struct { + lnet_nid_t ns_nid; + __u32 ns_status; + __u32 ns_unused; +} WIRE_ATTR lnet_ni_status_t; + +struct lnet_tx_queue { + int tq_credits; /* # tx credits free */ + int tq_credits_min; /* lowest it's been */ + int tq_credits_max; /* total # tx credits */ + struct list_head tq_delayed; /* delayed TXs */ +}; + +#define LNET_MAX_INTERFACES 16 + +typedef struct lnet_ni { + spinlock_t ni_lock; + struct list_head ni_list; /* chain on ln_nis */ + struct list_head ni_cptlist; /* chain on ln_nis_cpt */ + int ni_maxtxcredits; /* # tx credits */ + /* # per-peer send credits */ + int ni_peertxcredits; + /* # per-peer router buffer credits */ + int ni_peerrtrcredits; + /* seconds to consider peer dead */ + int ni_peertimeout; + int ni_ncpts; /* number of CPTs */ + __u32 *ni_cpts; /* bond NI on some CPTs */ + lnet_nid_t ni_nid; /* interface's NID */ + void *ni_data; /* instance-specific data */ + lnd_t *ni_lnd; /* procedural interface */ + struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ + int **ni_refs; /* percpt reference count */ + long ni_last_alive; /* when I was last alive */ + lnet_ni_status_t *ni_status; /* my health status */ + /* equivalent interfaces to use */ + char *ni_interfaces[LNET_MAX_INTERFACES]; +} lnet_ni_t; + +#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL + +/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x + * of old LNet, so there shouldn't be any compatibility issue */ +#define LNET_PING_FEAT_INVAL (0) /* no feature */ +#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ +#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ + +#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ + LNET_PING_FEAT_NI_STATUS) + +typedef struct { + __u32 pi_magic; + __u32 pi_features; + lnet_pid_t pi_pid; + __u32 pi_nnis; + lnet_ni_status_t pi_ni[0]; +} WIRE_ATTR lnet_ping_info_t; + +/* router checker data, per router */ +#define LNET_MAX_RTR_NIS 16 +#define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) +typedef struct { + /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ + struct list_head rcd_list; + lnet_handle_md_t rcd_mdh; /* ping buffer MD */ + struct lnet_peer *rcd_gateway; /* reference to gateway */ + lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ +} lnet_rc_data_t; + +typedef struct lnet_peer { + struct list_head lp_hashlist; /* chain on peer hash */ + struct list_head lp_txq; /* messages blocking for tx credits */ + struct list_head lp_rtrq; /* messages blocking for router credits */ + struct list_head lp_rtr_list; /* chain on router list */ + int lp_txcredits; /* # tx credits available */ + int lp_mintxcredits; /* low water mark */ + int lp_rtrcredits; /* # router credits */ + int lp_minrtrcredits; /* low water mark */ + unsigned int lp_alive:1; /* alive/dead? */ + unsigned int lp_notify:1; /* notification outstanding? */ + unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ + unsigned int lp_notifying:1; /* some thread is handling notification */ + unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ + int lp_alive_count; /* # times router went dead<->alive */ + long lp_txqnob; /* bytes queued for sending */ + unsigned long lp_timestamp; /* time of last aliveness news */ + unsigned long lp_ping_timestamp; /* time of last ping attempt */ + unsigned long lp_ping_deadline; /* != 0 if ping reply expected */ + unsigned long lp_last_alive; /* when I was last alive */ + unsigned long lp_last_query; /* when lp_ni was queried last time */ + lnet_ni_t *lp_ni; /* interface peer is on */ + lnet_nid_t lp_nid; /* peer's NID */ + int lp_refcount; /* # refs */ + int lp_cpt; /* CPT this peer attached on */ + /* # refs from lnet_route_t::lr_gateway */ + int lp_rtr_refcount; + /* returned RC ping features */ + unsigned int lp_ping_feats; + struct list_head lp_routes; /* routers on this peer */ + lnet_rc_data_t *lp_rcd; /* router checker state */ +} lnet_peer_t; + +/* peer hash size */ +#define LNET_PEER_HASH_BITS 9 +#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) + +/* peer hash table */ +struct lnet_peer_table { + int pt_version; /* /proc validity stamp */ + int pt_number; /* # peers extant */ + struct list_head pt_deathrow; /* zombie peers */ + struct list_head *pt_hash; /* NID->peer hash */ +}; + +/* peer aliveness is enabled only on routers for peers in a network where the + * lnet_ni_t::ni_peertimeout has been set to a positive value */ +#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ + (lp)->lp_ni->ni_peertimeout > 0) + +typedef struct { + struct list_head lr_list; /* chain on net */ + struct list_head lr_gwlist; /* chain on gateway */ + lnet_peer_t *lr_gateway; /* router node */ + __u32 lr_net; /* remote network number */ + int lr_seq; /* sequence for round-robin */ + unsigned int lr_downis; /* number of down NIs */ + unsigned int lr_hops; /* how far I am */ + unsigned int lr_priority; /* route priority */ +} lnet_route_t; + +#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) +#define LNET_REMOTE_NETS_HASH_MAX (1U << 16) +#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) + +typedef struct { + struct list_head lrn_list; /* chain on ln_remote_nets_hash */ + struct list_head lrn_routes; /* routes to me */ + __u32 lrn_net; /* my net number */ +} lnet_remotenet_t; + +typedef struct { + struct list_head rbp_bufs; /* my free buffer pool */ + struct list_head rbp_msgs; /* messages blocking for a buffer */ + int rbp_npages; /* # pages in each buffer */ + int rbp_nbuffers; /* # buffers */ + int rbp_credits; /* # free buffers / blocked messages */ + int rbp_mincredits; /* low water mark */ +} lnet_rtrbufpool_t; + +typedef struct { + struct list_head rb_list; /* chain on rbp_bufs */ + lnet_rtrbufpool_t *rb_pool; /* owning pool */ + lnet_kiov_t rb_kiov[0]; /* the buffer space */ +} lnet_rtrbuf_t; + +typedef struct { + __u32 msgs_alloc; + __u32 msgs_max; + __u32 errors; + __u32 send_count; + __u32 recv_count; + __u32 route_count; + __u32 drop_count; + __u64 send_length; + __u64 recv_length; + __u64 route_length; + __u64 drop_length; +} WIRE_ATTR lnet_counters_t; + +#define LNET_PEER_HASHSIZE 503 /* prime! */ + +#define LNET_NRBPOOLS 3 /* # different router buffer pools */ + +enum { + /* Didn't match anything */ + LNET_MATCHMD_NONE = (1 << 0), + /* Matched OK */ + LNET_MATCHMD_OK = (1 << 1), + /* Must be discarded */ + LNET_MATCHMD_DROP = (1 << 2), + /* match and buffer is exhausted */ + LNET_MATCHMD_EXHAUSTED = (1 << 3), + /* match or drop */ + LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), +}; + +/* Options for lnet_portal_t::ptl_options */ +#define LNET_PTL_LAZY (1 << 0) +#define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ +#define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, request portal */ + +/* parameter for matching operations (GET, PUT) */ +struct lnet_match_info { + __u64 mi_mbits; + lnet_process_id_t mi_id; + unsigned int mi_opc; + unsigned int mi_portal; + unsigned int mi_rlength; + unsigned int mi_roffset; +}; + +/* ME hash of RDMA portal */ +#define LNET_MT_HASH_BITS 8 +#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) +#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) +/* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, + * the last entry is reserved for MEs with ignore-bits */ +#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE +/* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which + * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the + * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */ +#define LNET_MT_BITS_U64 6 /* 2^6 bits */ +#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) +#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) + +/* portal match table */ +struct lnet_match_table { + /* reserved for upcoming patches, CPU partition ID */ + unsigned int mt_cpt; + unsigned int mt_portal; /* portal index */ + /* match table is set as "enabled" if there's non-exhausted MD + * attached on mt_mhash, it's only valid for wildcard portal */ + unsigned int mt_enabled; + /* bitmap to flag whether MEs on mt_hash are exhausted or not */ + __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; + struct list_head *mt_mhash; /* matching hash */ +}; + +/* these are only useful for wildcard portal */ +/* Turn off message rotor for wildcard portals */ +#define LNET_PTL_ROTOR_OFF 0 +/* round-robin dispatch all PUT messages for wildcard portals */ +#define LNET_PTL_ROTOR_ON 1 +/* round-robin dispatch routed PUT message for wildcard portals */ +#define LNET_PTL_ROTOR_RR_RT 2 +/* dispatch routed PUT message by hashing source NID for wildcard portals */ +#define LNET_PTL_ROTOR_HASH_RT 3 + +typedef struct lnet_portal { + spinlock_t ptl_lock; + unsigned int ptl_index; /* portal ID, reserved */ + /* flags on this portal: lazy, unique... */ + unsigned int ptl_options; + /* list of messages which are stealing buffer */ + struct list_head ptl_msg_stealing; + /* messages blocking for MD */ + struct list_head ptl_msg_delayed; + /* Match table for each CPT */ + struct lnet_match_table **ptl_mtables; + /* spread rotor of incoming "PUT" */ + unsigned int ptl_rotor; + /* # active entries for this portal */ + int ptl_mt_nmaps; + /* array of active entries' cpu-partition-id */ + int ptl_mt_maps[0]; +} lnet_portal_t; + +#define LNET_LH_HASH_BITS 12 +#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) +#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) + +/* resource container (ME, MD, EQ) */ +struct lnet_res_container { + unsigned int rec_type; /* container type */ + __u64 rec_lh_cookie; /* cookie generator */ + struct list_head rec_active; /* active resource list */ + struct list_head *rec_lh_hash; /* handle hash */ +#ifdef LNET_USE_LIB_FREELIST + lnet_freelist_t rec_freelist; /* freelist for resources */ +#endif +}; + +/* message container */ +struct lnet_msg_container { + int msc_init; /* initialized or not */ + /* max # threads finalizing */ + int msc_nfinalizers; + /* msgs waiting to complete finalizing */ + struct list_head msc_finalizing; + struct list_head msc_active; /* active message list */ + /* threads doing finalization */ + void **msc_finalizers; +#ifdef LNET_USE_LIB_FREELIST + lnet_freelist_t msc_freelist; /* freelist for messages */ +#endif +}; + +/* Router Checker states */ +#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ +#define LNET_RC_STATE_RUNNING 1 /* started up OK */ +#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ + +typedef struct { + /* CPU partition table of LNet */ + struct cfs_cpt_table *ln_cpt_table; + /* number of CPTs in ln_cpt_table */ + unsigned int ln_cpt_number; + unsigned int ln_cpt_bits; + + /* protect LNet resources (ME/MD/EQ) */ + struct cfs_percpt_lock *ln_res_lock; + /* # portals */ + int ln_nportals; + /* the vector of portals */ + lnet_portal_t **ln_portals; + /* percpt ME containers */ + struct lnet_res_container **ln_me_containers; + /* percpt MD container */ + struct lnet_res_container **ln_md_containers; + + /* Event Queue container */ + struct lnet_res_container ln_eq_container; + wait_queue_head_t ln_eq_waitq; + spinlock_t ln_eq_wait_lock; + unsigned int ln_remote_nets_hbits; + + /* protect NI, peer table, credits, routers, rtrbuf... */ + struct cfs_percpt_lock *ln_net_lock; + /* percpt message containers for active/finalizing/freed message */ + struct lnet_msg_container **ln_msg_containers; + lnet_counters_t **ln_counters; + struct lnet_peer_table **ln_peer_tables; + /* failure simulation */ + struct list_head ln_test_peers; + + struct list_head ln_nis; /* LND instances */ + /* NIs bond on specific CPT(s) */ + struct list_head ln_nis_cpt; + /* dying LND instances */ + struct list_head ln_nis_zombie; + lnet_ni_t *ln_loni; /* the loopback NI */ + /* NI to wait for events in */ + lnet_ni_t *ln_eq_waitni; + + /* remote networks with routes to them */ + struct list_head *ln_remote_nets_hash; + /* validity stamp */ + __u64 ln_remote_nets_version; + /* list of all known routers */ + struct list_head ln_routers; + /* validity stamp */ + __u64 ln_routers_version; + /* percpt router buffer pools */ + lnet_rtrbufpool_t **ln_rtrpools; + + lnet_handle_md_t ln_ping_target_md; + lnet_handle_eq_t ln_ping_target_eq; + lnet_ping_info_t *ln_ping_info; + + /* router checker startup/shutdown state */ + int ln_rc_state; + /* router checker's event queue */ + lnet_handle_eq_t ln_rc_eqh; + /* rcd still pending on net */ + struct list_head ln_rcd_deathrow; + /* rcd ready for free */ + struct list_head ln_rcd_zombie; + /* serialise startup/shutdown */ + struct semaphore ln_rc_signal; + + struct mutex ln_api_mutex; + struct mutex ln_lnd_mutex; + int ln_init; /* LNetInit() called? */ + /* Have I called LNetNIInit myself? */ + int ln_niinit_self; + /* LNetNIInit/LNetNIFini counter */ + int ln_refcount; + /* shutdown in progress */ + int ln_shutdown; + + int ln_routing; /* am I a router? */ + lnet_pid_t ln_pid; /* requested pid */ + /* uniquely identifies this ni in this epoch */ + __u64 ln_interface_cookie; + /* registered LNDs */ + struct list_head ln_lnds; + + /* space for network names */ + char *ln_network_tokens; + int ln_network_tokens_nob; + /* test protocol compatibility flags */ + int ln_testprotocompat; + +} lnet_t; + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/api-support.h b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h new file mode 100644 index 000000000..e237ad6af --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/api-support.h @@ -0,0 +1,42 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LINUX_API_SUPPORT_H__ +#define __LINUX_API_SUPPORT_H__ + +#ifndef __LNET_API_SUPPORT_H__ +#error Do not #include this file directly. #include instead +#endif + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h new file mode 100644 index 000000000..0f8f04d1e --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h @@ -0,0 +1,71 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LIB_LNET_H__ +#define __LNET_LINUX_LIB_LNET_H__ + +#ifndef __LNET_LIB_LNET_H__ +#error Do not #include this file directly. #include instead +#endif + +# include +# include +# include +#include "../../libcfs/libcfs.h" + +static inline __u64 +lnet_page2phys(struct page *p) +{ + /* compiler optimizer will elide unused branches */ + + switch (sizeof(typeof(page_to_phys(p)))) { + case 4: + /* page_to_phys returns a 32 bit physical address. This must + * be a 32 bit machine with <= 4G memory and we must ensure we + * don't sign extend when converting to 64 bits. */ + return (unsigned long)page_to_phys(p); + + case 8: + /* page_to_phys returns a 64 bit physical address :) */ + return page_to_phys(p); + + default: + LBUG(); + return 0; + } +} + +#define LNET_ROUTER + +#endif /* __LNET_LINUX_LIB_LNET_H__ */ diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h new file mode 100644 index 000000000..669e8c038 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lib-types.h @@ -0,0 +1,45 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LIB_TYPES_H__ +#define __LNET_LINUX_LIB_TYPES_H__ + +#ifndef __LNET_LIB_TYPES_H__ +#error Do not #include this file directly. #include instead +#endif + +# include +# include + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/linux/lnet.h b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h new file mode 100644 index 000000000..1e888f1ef --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/linux/lnet.h @@ -0,0 +1,56 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_LINUX_LNET_H__ +#define __LNET_LINUX_LNET_H__ + +#ifndef __LNET_H__ +#error Do not #include this file directly. #include instead +#endif + +/* + * lnet.h + * + * User application interface file + */ + +#include +#include + +#define cfs_tcp_sendpage(sk, page, offset, size, flags) \ + tcp_sendpage(sk, page, offset, size, flags) + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h new file mode 100644 index 000000000..2dee1b97f --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnet-sysctl.h @@ -0,0 +1,49 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_SYSCTL_H__ +#define __LNET_SYSCTL_H__ + +#if defined(CONFIG_SYSCTL) + +#define CTL_KRANAL 201 +#define CTL_O2IBLND 205 +#define CTL_PTLLND 206 +#define CTL_QSWNAL 207 +#define CTL_SOCKLND 208 +#define CTL_GNILND 210 + +#endif + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnet.h b/drivers/staging/lustre/include/linux/lnet/lnet.h new file mode 100644 index 000000000..75c0ab919 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnet.h @@ -0,0 +1,51 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_H__ +#define __LNET_H__ + +/* + * lnet.h + * + * User application interface file + */ +#include "linux/lnet.h" + +#include "types.h" +#include "api.h" + +#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/linux/lnet/lnetctl.h new file mode 100644 index 000000000..98181d389 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnetctl.h @@ -0,0 +1,80 @@ +/* + * This file is part of Portals, http://www.sf.net/projects/lustre/ + * + * Portals is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Portals is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Portals; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * header for libptlctl.a + */ +#ifndef _PTLCTL_H_ +#define _PTLCTL_H_ + +#include "../libcfs/libcfs.h" +#include "types.h" + +#define LNET_DEV_ID 0 +#define LNET_DEV_PATH "/dev/lnet" +#define LNET_DEV_MAJOR 10 +#define LNET_DEV_MINOR 240 +#define OBD_DEV_ID 1 +#define OBD_DEV_NAME "obd" +#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME +#define OBD_DEV_MAJOR 10 +#define OBD_DEV_MINOR 241 +#define SMFS_DEV_ID 2 +#define SMFS_DEV_PATH "/dev/snapdev" +#define SMFS_DEV_MAJOR 10 +#define SMFS_DEV_MINOR 242 + +int ptl_initialize(int argc, char **argv); +int jt_ptl_network(int argc, char **argv); +int jt_ptl_list_nids(int argc, char **argv); +int jt_ptl_which_nid(int argc, char **argv); +int jt_ptl_print_interfaces(int argc, char **argv); +int jt_ptl_add_interface(int argc, char **argv); +int jt_ptl_del_interface(int argc, char **argv); +int jt_ptl_print_peers(int argc, char **argv); +int jt_ptl_add_peer(int argc, char **argv); +int jt_ptl_del_peer(int argc, char **argv); +int jt_ptl_print_connections(int argc, char **argv); +int jt_ptl_disconnect(int argc, char **argv); +int jt_ptl_push_connection(int argc, char **argv); +int jt_ptl_print_active_txs(int argc, char **argv); +int jt_ptl_ping(int argc, char **argv); +int jt_ptl_mynid(int argc, char **argv); +int jt_ptl_add_uuid(int argc, char **argv); +int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ +int jt_ptl_close_uuid(int argc, char **argv); +int jt_ptl_del_uuid(int argc, char **argv); +int jt_ptl_add_route(int argc, char **argv); +int jt_ptl_del_route(int argc, char **argv); +int jt_ptl_notify_router(int argc, char **argv); +int jt_ptl_print_routes(int argc, char **argv); +int jt_ptl_fail_nid(int argc, char **argv); +int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_testprotocompat(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); + +int dbg_initialize(int argc, char **argv); +int jt_dbg_filter(int argc, char **argv); +int jt_dbg_show(int argc, char **argv); +int jt_dbg_list(int argc, char **argv); +int jt_dbg_debug_kernel(int argc, char **argv); +int jt_dbg_debug_daemon(int argc, char **argv); +int jt_dbg_debug_file(int argc, char **argv); +int jt_dbg_clear_debug_buf(int argc, char **argv); +int jt_dbg_mark_debug_buf(int argc, char **argv); +int jt_dbg_modules(int argc, char **argv); +int jt_dbg_panic(int argc, char **argv); + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lnetst.h b/drivers/staging/lustre/include/linux/lnet/lnetst.h new file mode 100644 index 000000000..885f708d4 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/lnetst.h @@ -0,0 +1,491 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/lnetst.h + * + * Author: Liang Zhen + */ + +#ifndef __LNET_ST_H__ +#define __LNET_ST_H__ + +#include "../libcfs/libcfs.h" +#include "lnet.h" +#include "lib-types.h" + +#define LST_FEAT_NONE (0) +#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */ + +#define LST_FEATS_EMPTY (LST_FEAT_NONE) +#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN) + +#define LST_NAME_SIZE 32 /* max name buffer length */ + +#define LSTIO_DEBUG 0xC00 /* debug */ +#define LSTIO_SESSION_NEW 0xC01 /* create session */ +#define LSTIO_SESSION_END 0xC02 /* end session */ +#define LSTIO_SESSION_INFO 0xC03 /* query session */ +#define LSTIO_GROUP_ADD 0xC10 /* add group */ +#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */ +#define LSTIO_GROUP_INFO 0xC12 /* query default information of specified group */ +#define LSTIO_GROUP_DEL 0xC13 /* delete group */ +#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */ +#define LSTIO_GROUP_UPDATE 0xC15 /* update group */ +#define LSTIO_BATCH_ADD 0xC20 /* add batch */ +#define LSTIO_BATCH_START 0xC21 /* start batch */ +#define LSTIO_BATCH_STOP 0xC22 /* stop batch */ +#define LSTIO_BATCH_DEL 0xC23 /* delete batch */ +#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */ +#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */ +#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */ +#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */ +#define LSTIO_STAT_QUERY 0xC30 /* get stats */ + +typedef struct { + lnet_nid_t ses_nid; /* nid of console node */ + __u64 ses_stamp; /* time stamp */ +} lst_sid_t; /*** session id */ + +extern lst_sid_t LST_INVALID_SID; + +typedef struct { + __u64 bat_id; /* unique id in session */ +} lst_bid_t; /*** batch id (group of tests) */ + +/* Status of test node */ +#define LST_NODE_ACTIVE 0x1 /* node in this session */ +#define LST_NODE_BUSY 0x2 /* node is taken by other session */ +#define LST_NODE_DOWN 0x4 /* node is down */ +#define LST_NODE_UNKNOWN 0x8 /* node not in session */ + +typedef struct { + lnet_process_id_t nde_id; /* id of node */ + int nde_state; /* state of node */ +} lstcon_node_ent_t; /*** node entry, for list_group command */ + +typedef struct { + int nle_nnode; /* # of nodes */ + int nle_nactive; /* # of active nodes */ + int nle_nbusy; /* # of busy nodes */ + int nle_ndown; /* # of down nodes */ + int nle_nunknown; /* # of unknown nodes */ +} lstcon_ndlist_ent_t; /*** node_list entry, for list_batch command */ + +typedef struct { + int tse_type; /* test type */ + int tse_loop; /* loop count */ + int tse_concur; /* concurrency of test */ +} lstcon_test_ent_t; /*** test summary entry, for list_batch command */ + +typedef struct { + int bae_state; /* batch status */ + int bae_timeout; /* batch timeout */ + int bae_ntest; /* # of tests in the batch */ +} lstcon_batch_ent_t; /*** batch summary entry, for list_batch command */ + +typedef struct { + lstcon_ndlist_ent_t tbe_cli_nle; /* client (group) node_list entry */ + lstcon_ndlist_ent_t tbe_srv_nle; /* server (group) node_list entry */ + union { + lstcon_test_ent_t tbe_test; /* test entry */ + lstcon_batch_ent_t tbe_batch; /* batch entry */ + } u; +} lstcon_test_batch_ent_t; /*** test/batch verbose information entry, + *** for list_batch command */ + +typedef struct { + struct list_head rpe_link; /* link chain */ + lnet_process_id_t rpe_peer; /* peer's id */ + struct timeval rpe_stamp; /* time stamp of RPC */ + int rpe_state; /* peer's state */ + int rpe_rpc_errno; /* RPC errno */ + + lst_sid_t rpe_sid; /* peer's session id */ + int rpe_fwk_errno; /* framework errno */ + int rpe_priv[4]; /* private data */ + char rpe_payload[0]; /* private reply payload */ +} lstcon_rpc_ent_t; + +typedef struct { + int trs_rpc_stat[4]; /* RPCs stat (0: total, 1: failed, 2: finished, 4: reserved */ + int trs_rpc_errno; /* RPC errno */ + int trs_fwk_stat[8]; /* framework stat */ + int trs_fwk_errno; /* errno of the first remote error */ + void *trs_fwk_private; /* private framework stat */ +} lstcon_trans_stat_t; + +static inline int +lstcon_rpc_stat_total(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0]; +} + +static inline int +lstcon_rpc_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1]; +} + +static inline int +lstcon_rpc_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2]; +} + +static inline int +lstcon_sesop_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesop_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_active(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_sesqry_stat_busy(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_sesqry_stat_unknown(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_tsbop_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbop_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_idle(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_tsbqry_stat_run(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +static inline int +lstcon_tsbqry_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; +} + +static inline int +lstcon_statqry_stat_success(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; +} + +static inline int +lstcon_statqry_stat_failure(lstcon_trans_stat_t *stat, int inc) +{ + return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; +} + +/* create a session */ +typedef struct { + int lstio_ses_key; /* IN: local key */ + int lstio_ses_timeout; /* IN: session timeout */ + int lstio_ses_force; /* IN: force create ? */ + /** IN: session features */ + unsigned lstio_ses_feats; + lst_sid_t *lstio_ses_idp; /* OUT: session id */ + int lstio_ses_nmlen; /* IN: name length */ + char *lstio_ses_namep; /* IN: session name */ +} lstio_session_new_args_t; + +/* query current session */ +typedef struct { + lst_sid_t *lstio_ses_idp; /* OUT: session id */ + int *lstio_ses_keyp; /* OUT: local key */ + /** OUT: session features */ + unsigned *lstio_ses_featp; + lstcon_ndlist_ent_t *lstio_ses_ndinfo; /* OUT: */ + int lstio_ses_nmlen; /* IN: name length */ + char *lstio_ses_namep; /* OUT: session name */ +} lstio_session_info_args_t; + +/* delete a session */ +typedef struct { + int lstio_ses_key; /* IN: session key */ +} lstio_session_end_args_t; + +#define LST_OPC_SESSION 1 +#define LST_OPC_GROUP 2 +#define LST_OPC_NODES 3 +#define LST_OPC_BATCHCLI 4 +#define LST_OPC_BATCHSRV 5 + +typedef struct { + int lstio_dbg_key; /* IN: session key */ + int lstio_dbg_type; /* IN: debug sessin|batch|group|nodes list */ + int lstio_dbg_flags; /* IN: reserved debug flags */ + int lstio_dbg_timeout; /* IN: timeout of debug */ + + int lstio_dbg_nmlen; /* IN: len of name */ + char *lstio_dbg_namep; /* IN: name of group|batch */ + int lstio_dbg_count; /* IN: # of test nodes to debug */ + lnet_process_id_t *lstio_dbg_idsp; /* IN: id of test nodes */ + struct list_head *lstio_dbg_resultp; /* OUT: list head of result buffer */ +} lstio_debug_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ +} lstio_group_add_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ +} lstio_group_del_args_t; + +#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */ +#define LST_GROUP_REFRESH 2 /* refresh inactive nodes in the group */ +#define LST_GROUP_RMND 3 /* delete nodes from the group */ + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_opc; /* IN: OPC */ + int lstio_grp_args; /* IN: arguments */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes id */ + lnet_process_id_t *lstio_grp_idsp; /* IN: array of nodes */ + struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ +} lstio_group_update_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name length */ + char *lstio_grp_namep; /* IN: group name */ + int lstio_grp_count; /* IN: # of nodes */ + /** OUT: session features */ + unsigned *lstio_grp_featp; + lnet_process_id_t *lstio_grp_idsp; /* IN: nodes */ + struct list_head *lstio_grp_resultp; /* OUT: list head of result buffer */ +} lstio_group_nodes_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_idx; /* IN: group idx */ + int lstio_grp_nmlen; /* IN: name len */ + char *lstio_grp_namep; /* OUT: name */ +} lstio_group_list_args_t; + +typedef struct { + int lstio_grp_key; /* IN: session key */ + int lstio_grp_nmlen; /* IN: name len */ + char *lstio_grp_namep; /* IN: name */ + lstcon_ndlist_ent_t *lstio_grp_entp; /* OUT: description of group */ + + int *lstio_grp_idxp; /* IN/OUT: node index */ + int *lstio_grp_ndentp; /* IN/OUT: # of nodent */ + lstcon_node_ent_t *lstio_grp_dentsp; /* OUT: nodent array */ +} lstio_group_info_args_t; + +#define LST_DEFAULT_BATCH "batch" /* default batch name */ + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_add_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_del_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_timeout; /* IN: timeout for the batch */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_run_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_force; /* IN: abort unfinished test RPC */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_stop_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_testidx; /* IN: test index */ + int lstio_bat_client; /* IN: is test client? */ + int lstio_bat_timeout; /* IN: timeout for waiting */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ + struct list_head *lstio_bat_resultp; /* OUT: list head of result buffer */ +} lstio_batch_query_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_idx; /* IN: index */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: batch name */ +} lstio_batch_list_args_t; + +typedef struct { + int lstio_bat_key; /* IN: session key */ + int lstio_bat_nmlen; /* IN: name length */ + char *lstio_bat_namep; /* IN: name */ + int lstio_bat_server; /* IN: query server or not */ + int lstio_bat_testidx; /* IN: test index */ + lstcon_test_batch_ent_t *lstio_bat_entp; /* OUT: batch ent */ + + int *lstio_bat_idxp; /* IN/OUT: index of node */ + int *lstio_bat_ndentp; /* IN/OUT: # of nodent */ + lstcon_node_ent_t *lstio_bat_dentsp; /* array of nodent */ +} lstio_batch_info_args_t; + +/* add stat in session */ +typedef struct { + int lstio_sta_key; /* IN: session key */ + int lstio_sta_timeout; /* IN: timeout for stat request */ + int lstio_sta_nmlen; /* IN: group name length */ + char *lstio_sta_namep; /* IN: group name */ + int lstio_sta_count; /* IN: # of pid */ + lnet_process_id_t *lstio_sta_idsp; /* IN: pid */ + struct list_head *lstio_sta_resultp; /* OUT: list head of result buffer */ +} lstio_stat_args_t; + +typedef enum { + LST_TEST_BULK = 1, + LST_TEST_PING = 2 +} lst_test_type_t; + +/* create a test in a batch */ +#define LST_MAX_CONCUR 1024 /* Max concurrency of test */ + +typedef struct { + int lstio_tes_key; /* IN: session key */ + int lstio_tes_bat_nmlen; /* IN: batch name len */ + char *lstio_tes_bat_name; /* IN: batch name */ + int lstio_tes_type; /* IN: test type */ + int lstio_tes_oneside; /* IN: one sided test */ + int lstio_tes_loop; /* IN: loop count */ + int lstio_tes_concur; /* IN: concurrency */ + + int lstio_tes_dist; /* IN: node distribution in destination groups */ + int lstio_tes_span; /* IN: node span in destination groups */ + int lstio_tes_sgrp_nmlen; /* IN: source group name length */ + char *lstio_tes_sgrp_name; /* IN: group name */ + int lstio_tes_dgrp_nmlen; /* IN: destination group name length */ + char *lstio_tes_dgrp_name; /* IN: group name */ + + int lstio_tes_param_len; /* IN: param buffer len */ + void *lstio_tes_param; /* IN: parameter for specified test: + lstio_bulk_param_t, + lstio_ping_param_t, + ... more */ + int *lstio_tes_retp; /* OUT: private returned value */ + struct list_head *lstio_tes_resultp; /* OUT: list head of result buffer */ +} lstio_test_args_t; + +typedef enum { + LST_BRW_READ = 1, + LST_BRW_WRITE = 2 +} lst_brw_type_t; + +typedef enum { + LST_BRW_CHECK_NONE = 1, + LST_BRW_CHECK_SIMPLE = 2, + LST_BRW_CHECK_FULL = 3 +} lst_brw_flags_t; + +typedef struct { + int blk_opc; /* bulk operation code */ + int blk_size; /* size (bytes) */ + int blk_time; /* time of running the test*/ + int blk_flags; /* reserved flags */ +} lst_test_bulk_param_t; + +typedef struct { + int png_size; /* size of ping message */ + int png_time; /* time */ + int png_loop; /* loop */ + int png_flags; /* reserved flags */ +} lst_test_ping_param_t; + +/* more tests */ +typedef struct { + __u32 errors; + __u32 rpcs_sent; + __u32 rpcs_rcvd; + __u32 rpcs_dropped; + __u32 rpcs_expired; + __u64 bulk_get; + __u64 bulk_put; +} WIRE_ATTR srpc_counters_t; + +typedef struct { + /** milliseconds since current session started */ + __u32 running_ms; + __u32 active_batches; + __u32 zombie_sessions; + __u32 brw_errors; + __u32 ping_errors; +} WIRE_ATTR sfw_counters_t; + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd.h b/drivers/staging/lustre/include/linux/lnet/ptllnd.h new file mode 100644 index 000000000..c91d65329 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/ptllnd.h @@ -0,0 +1,93 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/ptllnd.h + * + * Author: PJ Kirner + */ + +/* + * The PTLLND was designed to support Portals with + * Lustre and non-lustre UNLINK semantics. + * However for now the two targets are Cray Portals + * on the XT3 and Lustre Portals (for testing) both + * have Lustre UNLINK semantics, so this is defined + * by default. + */ +#define LUSTRE_PORTALS_UNLINK_SEMANTICS + +#ifdef _USING_LUSTRE_PORTALS_ + +/* NIDs are 64-bits on Lustre Portals */ +#define FMT_NID "%llu" +#define FMT_PID "%d" + +/* When using Lustre Portals Lustre completion semantics are imlicit*/ +#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 + +#else /* _USING_CRAY_PORTALS_ */ + +/* NIDs are integers on Cray Portals */ +#define FMT_NID "%u" +#define FMT_PID "%d" + +/* When using Cray Portals this is defined in the Cray Portals Header*/ +/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */ + +/* Can compare handles directly on Cray Portals */ +#define PtlHandleIsEqual(a, b) ((a) == (b)) + +/* Different error types on Cray Portals*/ +#define ptl_err_t ptl_ni_fail_t + +/* + * The Cray Portals has no maximum number of IOVs. The + * maximum is limited only by memory and size of the + * int parameters (2^31-1). + * Lustre only really require that the underyling + * implementation to support at least LNET_MAX_IOV, + * so for Cray portals we can safely just use that + * value here. + * + */ +#define PTL_MD_MAX_IOV LNET_MAX_IOV + +#endif + +#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID + +/* Align incoming small request messages to an 8 byte boundary if this is + * supported to avoid alignment issues on some architectures */ +#ifndef PTL_MD_LOCAL_ALIGN8 +# define PTL_MD_LOCAL_ALIGN8 0 +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h new file mode 100644 index 000000000..808f37b64 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/ptllnd_wire.h @@ -0,0 +1,119 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/ptllnd_wire.h + * + * Author: PJ Kirner + */ + +/* Minimum buffer size that any peer will post to receive ptllnd messages */ +#define PTLLND_MIN_BUFFER_SIZE 256 + +/************************************************************************ + * Tunable defaults that {u,k}lnds/ptllnd should have in common. + */ + +#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */ +#define PTLLND_PID 9 /* The Portals PID */ +#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */ + +/* Default buffer size for kernel ptllnds (guaranteed eager) */ +#define PTLLND_MAX_KLND_MSG_SIZE 512 + +/* Default buffer size for catamount ptllnds (not guaranteed eager) - large + * enough to avoid RDMA for anything sent while control is not in liblustre */ +#define PTLLND_MAX_ULND_MSG_SIZE 512 + +/************************************************************************ + * Portals LND Wire message format. + * These are sent in sender's byte order (i.e. receiver flips). + */ + +#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved + * above is for bulk data transfer */ +#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */ + +typedef struct { + lnet_hdr_t kptlim_hdr; /* portals header */ + char kptlim_payload[0]; /* piggy-backed payload */ +} WIRE_ATTR kptl_immediate_msg_t; + +typedef struct { + lnet_hdr_t kptlrm_hdr; /* portals header */ + __u64 kptlrm_matchbits; /* matchbits */ +} WIRE_ATTR kptl_rdma_msg_t; + +typedef struct { + __u64 kptlhm_matchbits; /* matchbits */ + __u32 kptlhm_max_msg_size; /* max message size */ +} WIRE_ATTR kptl_hello_msg_t; + +typedef struct { + /* First 2 fields fixed FOR ALL TIME */ + __u32 ptlm_magic; /* I'm a Portals LND message */ + __u16 ptlm_version; /* this is my version number */ + __u8 ptlm_type; /* the message type */ + __u8 ptlm_credits; /* returned credits */ + __u32 ptlm_nob; /* # bytes in whole message */ + __u32 ptlm_cksum; /* checksum (0 == no checksum) */ + __u64 ptlm_srcnid; /* sender's NID */ + __u64 ptlm_srcstamp; /* sender's incarnation */ + __u64 ptlm_dstnid; /* destination's NID */ + __u64 ptlm_dststamp; /* destination's incarnation */ + __u32 ptlm_srcpid; /* sender's PID */ + __u32 ptlm_dstpid; /* destination's PID */ + + union { + kptl_immediate_msg_t immediate; + kptl_rdma_msg_t rdma; + kptl_hello_msg_t hello; + } WIRE_ATTR ptlm_u; + +} kptl_msg_t; + +/* kptl_msg_t::ptlm_credits is only a __u8 */ +#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t *)0)->ptlm_credits)) - 1) + +#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC +#define PTLLND_MSG_VERSION 0x04 + +#define PTLLND_RDMA_OK 0x00 +#define PTLLND_RDMA_FAIL 0x01 + +#define PTLLND_MSG_TYPE_INVALID 0x00 +#define PTLLND_MSG_TYPE_PUT 0x01 +#define PTLLND_MSG_TYPE_GET 0x02 +#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/ +#define PTLLND_MSG_TYPE_NOOP 0x04 +#define PTLLND_MSG_TYPE_HELLO 0x05 +#define PTLLND_MSG_TYPE_NAK 0x06 diff --git a/drivers/staging/lustre/include/linux/lnet/socklnd.h b/drivers/staging/lustre/include/linux/lnet/socklnd.h new file mode 100644 index 000000000..389038b12 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/socklnd.h @@ -0,0 +1,103 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/include/lnet/socklnd.h + * + * #defines shared between socknal implementation and utilities + */ +#ifndef __LNET_LNET_SOCKLND_H__ +#define __LNET_LNET_SOCKLND_H__ + +#include "types.h" +#include "lib-types.h" + +#define SOCKLND_CONN_NONE (-1) +#define SOCKLND_CONN_ANY 0 +#define SOCKLND_CONN_CONTROL 1 +#define SOCKLND_CONN_BULK_IN 2 +#define SOCKLND_CONN_BULK_OUT 3 +#define SOCKLND_CONN_NTYPES 4 + +#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN + +typedef struct { + __u32 kshm_magic; /* magic number of socklnd message */ + __u32 kshm_version; /* version of socklnd message */ + lnet_nid_t kshm_src_nid; /* sender's nid */ + lnet_nid_t kshm_dst_nid; /* destination nid */ + lnet_pid_t kshm_src_pid; /* sender's pid */ + lnet_pid_t kshm_dst_pid; /* destination pid */ + __u64 kshm_src_incarnation; /* sender's incarnation */ + __u64 kshm_dst_incarnation; /* destination's incarnation */ + __u32 kshm_ctype; /* connection type */ + __u32 kshm_nips; /* # IP addrs */ + __u32 kshm_ips[0]; /* IP addrs */ +} WIRE_ATTR ksock_hello_msg_t; + +typedef struct { + lnet_hdr_t ksnm_hdr; /* lnet hdr */ + + /* + * ksnm_payload is removed because of winnt compiler's limitation: + * zero-sized array can only be placed at the tail of [nested] + * structure definitions. lnet payload will be stored just after + * the body of structure ksock_lnet_msg_t + */ +} WIRE_ATTR ksock_lnet_msg_t; + +typedef struct { + __u32 ksm_type; /* type of socklnd message */ + __u32 ksm_csum; /* checksum if != 0 */ + __u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */ + union { + ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */ + } WIRE_ATTR ksm_u; +} WIRE_ATTR ksock_msg_t; + +static inline void +socklnd_init_msg(ksock_msg_t *msg, int type) +{ + msg->ksm_csum = 0; + msg->ksm_type = type; + msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; +} + +#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */ +#define KSOCK_MSG_LNET 0xc1 /* lnet msg */ + +/* We need to know this number to parse hello msg from ksocklnd in + * other LND (usocklnd, for example) */ +#define KSOCK_PROTO_V2 2 +#define KSOCK_PROTO_V3 3 + +#endif diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h new file mode 100644 index 000000000..68d8139a2 --- /dev/null +++ b/drivers/staging/lustre/include/linux/lnet/types.h @@ -0,0 +1,492 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#ifndef __LNET_TYPES_H__ +#define __LNET_TYPES_H__ + +/** \addtogroup lnet + * @{ */ + +#include "../libcfs/libcfs.h" + +/** \addtogroup lnet_addr + * @{ */ + +/** Portal reserved for LNet's own use. + * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments. + */ +#define LNET_RESERVED_PORTAL 0 + +/** + * Address of an end-point in an LNet network. + * + * A node can have multiple end-points and hence multiple addresses. + * An LNet network can be a simple network (e.g. tcp0) or a network of + * LNet networks connected by LNet routers. Therefore an end-point address + * has two parts: network ID, and address within a network. + * + * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID. + */ +typedef __u64 lnet_nid_t; +/** + * ID of a process in a node. Shortened as PID to distinguish from + * lnet_process_id_t, the global process ID. + */ +typedef __u32 lnet_pid_t; + +/** wildcard NID that matches any end-point address */ +#define LNET_NID_ANY ((lnet_nid_t) -1) +/** wildcard PID that matches any lnet_pid_t */ +#define LNET_PID_ANY ((lnet_pid_t) -1) + +#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ +#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ + +#define LNET_TIME_FOREVER (-1) + +/** + * Objects maintained by the LNet are accessed through handles. Handle types + * have names of the form lnet_handle_xx_t, where xx is one of the two letter + * object type codes ('eq' for event queue, 'md' for memory descriptor, and + * 'me' for match entry). + * Each type of object is given a unique handle type to enhance type checking. + * The type lnet_handle_any_t can be used when a generic handle is needed. + * Every handle value can be converted into a value of type lnet_handle_any_t + * without loss of information. + */ +typedef struct { + __u64 cookie; +} lnet_handle_any_t; + +typedef lnet_handle_any_t lnet_handle_eq_t; +typedef lnet_handle_any_t lnet_handle_md_t; +typedef lnet_handle_any_t lnet_handle_me_t; + +#define LNET_WIRE_HANDLE_COOKIE_NONE (-1) + +/** + * Invalidate handle \a h. + */ +static inline void LNetInvalidateHandle(lnet_handle_any_t *h) +{ + h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; +} + +/** + * Compare handles \a h1 and \a h2. + * + * \return 1 if handles are equal, 0 if otherwise. + */ +static inline int LNetHandleIsEqual(lnet_handle_any_t h1, lnet_handle_any_t h2) +{ + return h1.cookie == h2.cookie; +} + +/** + * Check whether handle \a h is invalid. + * + * \return 1 if handle is invalid, 0 if valid. + */ +static inline int LNetHandleIsInvalid(lnet_handle_any_t h) +{ + return LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie; +} + +/** + * Global process ID. + */ +typedef struct { + /** node id */ + lnet_nid_t nid; + /** process id */ + lnet_pid_t pid; +} lnet_process_id_t; +/** @} lnet_addr */ + +/** \addtogroup lnet_me + * @{ */ + +/** + * Specifies whether the match entry or memory descriptor should be unlinked + * automatically (LNET_UNLINK) or not (LNET_RETAIN). + */ +typedef enum { + LNET_RETAIN = 0, + LNET_UNLINK +} lnet_unlink_t; + +/** + * Values of the type lnet_ins_pos_t are used to control where a new match + * entry is inserted. The value LNET_INS_BEFORE is used to insert the new + * entry before the current entry or before the head of the list. The value + * LNET_INS_AFTER is used to insert the new entry after the current entry + * or after the last item in the list. + */ +typedef enum { + /** insert ME before current position or head of the list */ + LNET_INS_BEFORE, + /** insert ME after current position or tail of the list */ + LNET_INS_AFTER, + /** attach ME at tail of local CPU partition ME list */ + LNET_INS_LOCAL +} lnet_ins_pos_t; + +/** @} lnet_me */ + +/** \addtogroup lnet_md + * @{ */ + +/** + * Defines the visible parts of a memory descriptor. Values of this type + * are used to initialize memory descriptors. + */ +typedef struct { + /** + * Specify the memory region associated with the memory descriptor. + * If the options field has: + * - LNET_MD_KIOV bit set: The start field points to the starting + * address of an array of lnet_kiov_t and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The lnet_kiov_t is used to describe page-based + * fragments that are not necessarily mapped in virtual memory. + * - LNET_MD_IOVEC bit set: The start field points to the starting + * address of an array of struct iovec and the length field specifies + * the number of entries in the array. The length can't be bigger + * than LNET_MAX_IOV. The struct iovec is used to describe fragments + * that have virtual addresses. + * - Otherwise: The memory region is contiguous. The start field + * specifies the starting address for the memory region and the + * length field specifies its length. + * + * When the memory region is fragmented, all fragments but the first + * one must start on page boundary, and all but the last must end on + * page boundary. + */ + void *start; + unsigned int length; + /** + * Specifies the maximum number of operations that can be performed + * on the memory descriptor. An operation is any action that could + * possibly generate an event. In the usual case, the threshold value + * is decremented for each operation on the MD. When the threshold + * drops to zero, the MD becomes inactive and does not respond to + * operations. A threshold value of LNET_MD_THRESH_INF indicates that + * there is no bound on the number of operations that may be applied + * to a MD. + */ + int threshold; + /** + * Specifies the largest incoming request that the memory descriptor + * should respond to. When the unused portion of a MD (length - + * local offset) falls below this value, the MD becomes inactive and + * does not respond to further operations. This value is only used + * if the LNET_MD_MAX_SIZE option is set. + */ + int max_size; + /** + * Specifies the behavior of the memory descriptor. A bitwise OR + * of the following values can be used: + * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD. + * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD. + * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory + * region is provided by the incoming request. By default, the + * offset is maintained locally. When maintained locally, the + * offset is incremented by the length of the request so that + * the next operation (PUT or GET) will access the next part of + * the memory region. Note that only one offset variable exists + * per memory descriptor. If both PUT and GET operations are + * performed on a memory descriptor, the offset is updated each time. + * - LNET_MD_TRUNCATE: The length provided in the incoming request can + * be reduced to match the memory available in the region (determined + * by subtracting the offset from the length of the memory region). + * By default, if the length in the incoming operation is greater + * than the amount of memory available, the operation is rejected. + * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for + * incoming PUT operations, even if requested. By default, + * acknowledgments are sent for PUT operations that request an + * acknowledgment. Acknowledgments are never sent for GET operations. + * The data sent in the REPLY serves as an implicit acknowledgment. + * - LNET_MD_KIOV: The start and length fields specify an array of + * lnet_kiov_t. + * - LNET_MD_IOVEC: The start and length fields specify an array of + * struct iovec. + * - LNET_MD_MAX_SIZE: The max_size field is valid. + * + * Note: + * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather + * capability for memory descriptors. They can't be both set. + * - When LNET_MD_MAX_SIZE is set, the total length of the memory + * region (i.e. sum of all fragment lengths) must not be less than + * \a max_size. + */ + unsigned int options; + /** + * A user-specified value that is associated with the memory + * descriptor. The value does not need to be a pointer, but must fit + * in the space used by a pointer. This value is recorded in events + * associated with operations on this MD. + */ + void *user_ptr; + /** + * A handle for the event queue used to log the operations performed on + * the memory region. If this argument is a NULL handle (i.e. nullified + * by LNetInvalidateHandle()), operations performed on this memory + * descriptor are not logged. + */ + lnet_handle_eq_t eq_handle; +} lnet_md_t; + +/* Max Transfer Unit (minimum supported everywhere). + * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) + * these limits are system wide and not interface-local. */ +#define LNET_MTU_BITS 20 +#define LNET_MTU (1 << LNET_MTU_BITS) + +/** limit on the number of fragments in discontiguous MDs */ +#define LNET_MAX_IOV 256 + +/* Max payload size */ +# define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD +# if (LNET_MAX_PAYLOAD < LNET_MTU) +# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" +# else +# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) +/* PAGE_SIZE is a constant: check with cpp! */ +# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" +# endif +# endif + +/** + * Options for the MD structure. See lnet_md_t::options. + */ +#define LNET_MD_OP_PUT (1 << 0) +/** See lnet_md_t::options. */ +#define LNET_MD_OP_GET (1 << 1) +/** See lnet_md_t::options. */ +#define LNET_MD_MANAGE_REMOTE (1 << 2) +/* unused (1 << 3) */ +/** See lnet_md_t::options. */ +#define LNET_MD_TRUNCATE (1 << 4) +/** See lnet_md_t::options. */ +#define LNET_MD_ACK_DISABLE (1 << 5) +/** See lnet_md_t::options. */ +#define LNET_MD_IOVEC (1 << 6) +/** See lnet_md_t::options. */ +#define LNET_MD_MAX_SIZE (1 << 7) +/** See lnet_md_t::options. */ +#define LNET_MD_KIOV (1 << 8) + +/* For compatibility with Cray Portals */ +#define LNET_MD_PHYS 0 + +/** Infinite threshold on MD operations. See lnet_md_t::threshold */ +#define LNET_MD_THRESH_INF (-1) + +/* NB lustre portals uses struct iovec internally! */ +typedef struct iovec lnet_md_iovec_t; + +/** + * A page-based fragment of a MD. + */ +typedef struct { + /** Pointer to the page where the fragment resides */ + struct page *kiov_page; + /** Length in bytes of the fragment */ + unsigned int kiov_len; + /** + * Starting offset of the fragment within the page. Note that the + * end of the fragment must not pass the end of the page; i.e., + * kiov_len + kiov_offset <= PAGE_CACHE_SIZE. + */ + unsigned int kiov_offset; +} lnet_kiov_t; +/** @} lnet_md */ + +/** \addtogroup lnet_eq + * @{ */ + +/** + * Six types of events can be logged in an event queue. + */ +typedef enum { + /** An incoming GET operation has completed on the MD. */ + LNET_EVENT_GET = 1, + /** + * An incoming PUT operation has completed on the MD. The + * underlying layers will not alter the memory (on behalf of this + * operation) once this event has been logged. + */ + LNET_EVENT_PUT, + /** + * A REPLY operation has completed. This event is logged after the + * data (if any) from the REPLY has been written into the MD. + */ + LNET_EVENT_REPLY, + /** An acknowledgment has been received. */ + LNET_EVENT_ACK, + /** + * An outgoing send (PUT or GET) operation has completed. This event + * is logged after the entire buffer has been sent and it is safe for + * the caller to reuse the buffer. + * + * Note: + * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can + * happen even when the message has not yet been put out on wire. + * - It's unsafe to assume that in an outgoing GET operation + * the LNET_EVENT_SEND event would happen before the + * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and + * LNET_EVENT_ACK events in an outgoing PUT operation. + */ + LNET_EVENT_SEND, + /** + * A MD has been unlinked. Note that LNetMDUnlink() does not + * necessarily trigger an LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + LNET_EVENT_UNLINK, +} lnet_event_kind_t; + +#define LNET_SEQ_BASETYPE long +typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t; +#define LNET_SEQ_GT(a, b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0) + +/** + * Information about an event on a MD. + */ +typedef struct { + /** The identifier (nid, pid) of the target. */ + lnet_process_id_t target; + /** The identifier (nid, pid) of the initiator. */ + lnet_process_id_t initiator; + /** + * The NID of the immediate sender. If the request has been forwarded + * by routers, this is the NID of the last hop; otherwise it's the + * same as the initiator. + */ + lnet_nid_t sender; + /** Indicates the type of the event. */ + lnet_event_kind_t type; + /** The portal table index specified in the request */ + unsigned int pt_index; + /** A copy of the match bits specified in the request. */ + __u64 match_bits; + /** The length (in bytes) specified in the request. */ + unsigned int rlength; + /** + * The length (in bytes) of the data that was manipulated by the + * operation. For truncated operations, the manipulated length will be + * the number of bytes specified by the MD (possibly with an offset, + * see lnet_md_t). For all other operations, the manipulated length + * will be the length of the requested operation, i.e. rlength. + */ + unsigned int mlength; + /** + * The handle to the MD associated with the event. The handle may be + * invalid if the MD has been unlinked. + */ + lnet_handle_md_t md_handle; + /** + * A snapshot of the state of the MD immediately after the event has + * been processed. In particular, the threshold field in md will + * reflect the value of the threshold after the operation occurred. + */ + lnet_md_t md; + /** + * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT. + * \see LNetPut + */ + __u64 hdr_data; + /** + * Indicates the completion status of the operation. It's 0 for + * successful operations, otherwise it's an error code. + */ + int status; + /** + * Indicates whether the MD has been unlinked. Note that: + * - An event with unlinked set is the last event on the MD. + * - This field is also set for an explicit LNET_EVENT_UNLINK event. + * \see LNetMDUnlink + */ + int unlinked; + /** + * The displacement (in bytes) into the memory region that the + * operation used. The offset can be determined by the operation for + * a remote managed MD or by the local MD. + * \see lnet_md_t::options + */ + unsigned int offset; + /** + * The sequence number for this event. Sequence numbers are unique + * to each event. + */ + volatile lnet_seq_t sequence; +} lnet_event_t; + +/** + * Event queue handler function type. + * + * The EQ handler runs for each event that is deposited into the EQ. The + * handler is supplied with a pointer to the event that triggered the + * handler invocation. + * + * The handler must not block, must be reentrant, and must not call any LNet + * API functions. It should return as quickly as possible. + */ +typedef void (*lnet_eq_handler_t)(lnet_event_t *event); +#define LNET_EQ_HANDLER_NONE NULL +/** @} lnet_eq */ + +/** \addtogroup lnet_data + * @{ */ + +/** + * Specify whether an acknowledgment should be sent by target when the PUT + * operation completes (i.e., when the data has been written to a MD of the + * target process). + * + * \see lnet_md_t::options for the discussion on LNET_MD_ACK_DISABLE by which + * acknowledgments can be disabled for a MD. + */ +typedef enum { + /** Request an acknowledgment */ + LNET_ACK_REQ, + /** Request that no acknowledgment should be generated. */ + LNET_NOACK_REQ +} lnet_ack_req_t; +/** @} lnet_data */ + +/** @} lnet */ +#endif -- cgit v1.2.3-54-g00ecf