From 863981e96738983919de841ec669e157e6bdaeb0 Mon Sep 17 00:00:00 2001 From: AndrĂ© Fabian Silva Delgado Date: Sun, 11 Sep 2016 04:34:46 -0300 Subject: Linux-libre 4.7.1-gnu --- .../staging/lustre/include/linux/libcfs/libcfs.h | 51 +- .../lustre/include/linux/libcfs/libcfs_cpu.h | 79 + .../lustre/include/linux/libcfs/libcfs_crypto.h | 136 +- .../lustre/include/linux/libcfs/libcfs_debug.h | 18 +- .../lustre/include/linux/libcfs/libcfs_fail.h | 15 +- .../lustre/include/linux/libcfs/libcfs_hash.h | 4 +- .../lustre/include/linux/libcfs/libcfs_ioctl.h | 161 +- .../lustre/include/linux/libcfs/libcfs_prim.h | 31 +- .../lustre/include/linux/libcfs/libcfs_private.h | 75 - .../lustre/include/linux/libcfs/libcfs_workitem.h | 12 +- .../lustre/include/linux/libcfs/linux/libcfs.h | 2 +- .../lustre/include/linux/libcfs/linux/linux-cpu.h | 2 +- .../lustre/include/linux/libcfs/linux/linux-mem.h | 80 - .../lustre/include/linux/libcfs/linux/linux-time.h | 4 +- .../staging/lustre/include/linux/lnet/lib-dlc.h | 29 +- .../staging/lustre/include/linux/lnet/lib-lnet.h | 9 +- .../staging/lustre/include/linux/lnet/lib-types.h | 2 + .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 405 +++- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 134 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 101 +- .../lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c | 139 +- .../staging/lustre/lnet/klnds/socklnd/socklnd.c | 1 - .../lustre/lnet/klnds/socklnd/socklnd_lib.c | 3 - drivers/staging/lustre/lnet/libcfs/debug.c | 126 +- drivers/staging/lustre/lnet/libcfs/fail.c | 3 + drivers/staging/lustre/lnet/libcfs/hash.c | 6 +- drivers/staging/lustre/lnet/libcfs/libcfs_lock.c | 54 +- drivers/staging/lustre/lnet/libcfs/libcfs_mem.c | 28 - .../staging/lustre/lnet/libcfs/linux/linux-cpu.c | 9 +- .../lustre/lnet/libcfs/linux/linux-crypto.c | 283 ++- .../lustre/lnet/libcfs/linux/linux-module.c | 154 +- .../staging/lustre/lnet/libcfs/linux/linux-prim.c | 31 - drivers/staging/lustre/lnet/libcfs/module.c | 132 +- drivers/staging/lustre/lnet/libcfs/tracefile.c | 17 +- drivers/staging/lustre/lnet/libcfs/workitem.c | 12 +- drivers/staging/lustre/lnet/lnet/api-ni.c | 143 +- drivers/staging/lustre/lnet/lnet/config.c | 3 + drivers/staging/lustre/lnet/lnet/lib-move.c | 10 +- drivers/staging/lustre/lnet/lnet/module.c | 7 +- drivers/staging/lustre/lnet/selftest/brw_test.c | 82 +- drivers/staging/lustre/lnet/selftest/conctl.c | 52 +- drivers/staging/lustre/lnet/selftest/conrpc.c | 215 +- drivers/staging/lustre/lnet/selftest/conrpc.h | 40 +- drivers/staging/lustre/lnet/selftest/console.c | 282 +-- drivers/staging/lustre/lnet/selftest/console.h | 47 +- drivers/staging/lustre/lnet/selftest/framework.c | 270 +-- drivers/staging/lustre/lnet/selftest/ping_test.c | 44 +- drivers/staging/lustre/lnet/selftest/rpc.c | 133 +- drivers/staging/lustre/lnet/selftest/rpc.h | 156 +- drivers/staging/lustre/lnet/selftest/selftest.h | 204 +- drivers/staging/lustre/lnet/selftest/timer.c | 12 +- drivers/staging/lustre/lustre/fid/fid_request.c | 12 +- drivers/staging/lustre/lustre/fld/fld_cache.c | 3 +- drivers/staging/lustre/lustre/fld/fld_internal.h | 9 +- drivers/staging/lustre/lustre/fld/fld_request.c | 94 +- drivers/staging/lustre/lustre/include/cl_object.h | 978 ++------- drivers/staging/lustre/lustre/include/lclient.h | 408 ---- drivers/staging/lustre/lustre/include/linux/obd.h | 125 -- drivers/staging/lustre/lustre/include/lu_object.h | 75 +- .../lustre/lustre/include/lustre/lustre_idl.h | 112 +- .../lustre/lustre/include/lustre/lustre_user.h | 54 +- drivers/staging/lustre/lustre/include/lustre_cfg.h | 2 +- .../staging/lustre/lustre/include/lustre_disk.h | 2 - drivers/staging/lustre/lustre/include/lustre_dlm.h | 14 +- .../lustre/lustre/include/lustre_dlm_flags.h | 120 +- drivers/staging/lustre/lustre/include/lustre_fid.h | 22 +- .../staging/lustre/lustre/include/lustre_import.h | 2 +- drivers/staging/lustre/lustre/include/lustre_lib.h | 60 +- drivers/staging/lustre/lustre/include/lustre_mdc.h | 18 + drivers/staging/lustre/lustre/include/lustre_net.h | 4 +- .../staging/lustre/lustre/include/lustre_param.h | 1 + .../lustre/lustre/include/lustre_req_layout.h | 3 +- drivers/staging/lustre/lustre/include/obd.h | 77 +- drivers/staging/lustre/lustre/include/obd_cksum.h | 1 + drivers/staging/lustre/lustre/include/obd_class.h | 5 +- .../staging/lustre/lustre/include/obd_support.h | 4 + drivers/staging/lustre/lustre/lclient/glimpse.c | 270 --- drivers/staging/lustre/lustre/lclient/lcommon_cl.c | 1203 ----------- .../staging/lustre/lustre/lclient/lcommon_misc.c | 200 -- drivers/staging/lustre/lustre/ldlm/l_lock.c | 4 +- drivers/staging/lustre/lustre/ldlm/ldlm_extent.c | 4 +- drivers/staging/lustre/lustre/ldlm/ldlm_flock.c | 30 +- drivers/staging/lustre/lustre/ldlm/ldlm_internal.h | 19 +- drivers/staging/lustre/lustre/ldlm/ldlm_lib.c | 14 +- drivers/staging/lustre/lustre/ldlm/ldlm_lock.c | 115 +- drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c | 28 +- drivers/staging/lustre/lustre/ldlm/ldlm_request.c | 163 +- drivers/staging/lustre/lustre/ldlm/ldlm_resource.c | 19 +- drivers/staging/lustre/lustre/llite/Makefile | 5 +- drivers/staging/lustre/lustre/llite/dcache.c | 15 +- drivers/staging/lustre/lustre/llite/dir.c | 99 +- drivers/staging/lustre/lustre/llite/file.c | 277 +-- drivers/staging/lustre/lustre/llite/glimpse.c | 255 +++ drivers/staging/lustre/lustre/llite/lcommon_cl.c | 327 +++ drivers/staging/lustre/lustre/llite/lcommon_misc.c | 201 ++ drivers/staging/lustre/lustre/llite/llite_close.c | 71 +- .../staging/lustre/lustre/llite/llite_internal.h | 274 +-- drivers/staging/lustre/lustre/llite/llite_lib.c | 176 +- drivers/staging/lustre/lustre/llite/llite_mmap.c | 48 +- drivers/staging/lustre/lustre/llite/llite_nfs.c | 29 +- drivers/staging/lustre/lustre/llite/lloop.c | 3 +- drivers/staging/lustre/lustre/llite/lproc_llite.c | 33 +- drivers/staging/lustre/lustre/llite/namei.c | 143 +- drivers/staging/lustre/lustre/llite/rw.c | 367 ++-- drivers/staging/lustre/lustre/llite/rw26.c | 318 ++- drivers/staging/lustre/lustre/llite/statahead.c | 17 +- drivers/staging/lustre/lustre/llite/super25.c | 14 +- drivers/staging/lustre/lustre/llite/symlink.c | 10 +- drivers/staging/lustre/lustre/llite/vvp_dev.c | 270 ++- drivers/staging/lustre/lustre/llite/vvp_internal.h | 332 ++- drivers/staging/lustre/lustre/llite/vvp_io.c | 928 +++++---- drivers/staging/lustre/lustre/llite/vvp_lock.c | 53 +- drivers/staging/lustre/lustre/llite/vvp_object.c | 141 +- drivers/staging/lustre/lustre/llite/vvp_page.c | 211 +- drivers/staging/lustre/lustre/llite/vvp_req.c | 121 ++ drivers/staging/lustre/lustre/llite/xattr.c | 45 +- drivers/staging/lustre/lustre/llite/xattr_cache.c | 1 - drivers/staging/lustre/lustre/lmv/lmv_internal.h | 3 - drivers/staging/lustre/lustre/lmv/lmv_obd.c | 182 +- .../staging/lustre/lustre/lov/lov_cl_internal.h | 105 +- drivers/staging/lustre/lustre/lov/lov_dev.c | 15 +- drivers/staging/lustre/lustre/lov/lov_ea.c | 5 - drivers/staging/lustre/lustre/lov/lov_internal.h | 34 +- drivers/staging/lustre/lustre/lov/lov_io.c | 246 +-- drivers/staging/lustre/lustre/lov/lov_lock.c | 996 +-------- drivers/staging/lustre/lustre/lov/lov_merge.c | 11 + drivers/staging/lustre/lustre/lov/lov_obd.c | 26 +- drivers/staging/lustre/lustre/lov/lov_object.c | 54 +- drivers/staging/lustre/lustre/lov/lov_offset.c | 12 + drivers/staging/lustre/lustre/lov/lov_pack.c | 8 +- drivers/staging/lustre/lustre/lov/lov_page.c | 183 +- drivers/staging/lustre/lustre/lov/lov_pool.c | 62 +- drivers/staging/lustre/lustre/lov/lov_request.c | 11 +- drivers/staging/lustre/lustre/lov/lovsub_dev.c | 9 +- drivers/staging/lustre/lustre/lov/lovsub_lock.c | 386 +--- drivers/staging/lustre/lustre/lov/lovsub_object.c | 7 +- drivers/staging/lustre/lustre/lov/lovsub_page.c | 4 +- drivers/staging/lustre/lustre/mdc/lproc_mdc.c | 8 +- drivers/staging/lustre/lustre/mdc/mdc_lib.c | 24 +- drivers/staging/lustre/lustre/mdc/mdc_locks.c | 5 +- drivers/staging/lustre/lustre/mdc/mdc_request.c | 26 +- drivers/staging/lustre/lustre/mgc/mgc_request.c | 12 +- drivers/staging/lustre/lustre/obdclass/cl_io.c | 430 ++-- drivers/staging/lustre/lustre/obdclass/cl_lock.c | 2152 +------------------- drivers/staging/lustre/lustre/obdclass/cl_object.c | 303 ++- drivers/staging/lustre/lustre/obdclass/cl_page.c | 659 +----- drivers/staging/lustre/lustre/obdclass/class_obd.c | 5 +- drivers/staging/lustre/lustre/obdclass/debug.c | 4 +- drivers/staging/lustre/lustre/obdclass/genops.c | 1 - .../lustre/lustre/obdclass/linux/linux-module.c | 4 +- drivers/staging/lustre/lustre/obdclass/llog.c | 1 - .../lustre/lustre/obdclass/lprocfs_status.c | 72 +- drivers/staging/lustre/lustre/obdclass/lu_object.c | 9 +- .../staging/lustre/lustre/obdclass/lustre_peer.c | 3 +- .../staging/lustre/lustre/obdclass/obd_config.c | 26 +- drivers/staging/lustre/lustre/obdclass/obd_mount.c | 15 +- drivers/staging/lustre/lustre/obdclass/obdo.c | 3 +- .../staging/lustre/lustre/obdecho/echo_client.c | 173 +- drivers/staging/lustre/lustre/osc/lproc_osc.c | 68 +- drivers/staging/lustre/lustre/osc/osc_cache.c | 531 ++++- .../staging/lustre/lustre/osc/osc_cl_internal.h | 159 +- drivers/staging/lustre/lustre/osc/osc_internal.h | 27 +- drivers/staging/lustre/lustre/osc/osc_io.c | 283 +-- drivers/staging/lustre/lustre/osc/osc_lock.c | 1698 ++++++--------- drivers/staging/lustre/lustre/osc/osc_object.c | 38 +- drivers/staging/lustre/lustre/osc/osc_page.c | 544 +++-- drivers/staging/lustre/lustre/osc/osc_request.c | 423 ++-- drivers/staging/lustre/lustre/ptlrpc/client.c | 11 +- drivers/staging/lustre/lustre/ptlrpc/events.c | 1 - drivers/staging/lustre/lustre/ptlrpc/import.c | 12 +- drivers/staging/lustre/lustre/ptlrpc/layout.c | 31 +- .../staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c | 11 +- drivers/staging/lustre/lustre/ptlrpc/nrs.c | 7 +- .../staging/lustre/lustre/ptlrpc/pack_generic.c | 3 - drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c | 21 +- drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c | 14 +- drivers/staging/lustre/lustre/ptlrpc/sec_plain.c | 2 +- drivers/staging/lustre/lustre/ptlrpc/service.c | 52 +- drivers/staging/lustre/lustre/ptlrpc/wiretest.c | 12 +- 179 files changed, 9401 insertions(+), 13947 deletions(-) delete mode 100644 drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h delete mode 100644 drivers/staging/lustre/lustre/include/lclient.h delete mode 100644 drivers/staging/lustre/lustre/include/linux/obd.h delete mode 100644 drivers/staging/lustre/lustre/lclient/glimpse.c delete mode 100644 drivers/staging/lustre/lustre/lclient/lcommon_cl.c delete mode 100644 drivers/staging/lustre/lustre/lclient/lcommon_misc.c create mode 100644 drivers/staging/lustre/lustre/llite/glimpse.c create mode 100644 drivers/staging/lustre/lustre/llite/lcommon_cl.c create mode 100644 drivers/staging/lustre/lustre/llite/lcommon_misc.c create mode 100644 drivers/staging/lustre/lustre/llite/vvp_req.c (limited to 'drivers/staging/lustre') diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h index 40af75c42..4141afb10 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h @@ -59,42 +59,13 @@ #define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 #define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 -/* - * libcfs pseudo device operations - * - * It's just draft now. - */ - -struct cfs_psdev_file { - unsigned long off; - void *private_data; - unsigned long reserved1; - unsigned long reserved2; -}; - -struct cfs_psdev_ops { - int (*p_open)(unsigned long, void *); - int (*p_close)(unsigned long, void *); - int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void __user *); -}; - -/* - * Drop into debugger, if possible. Implementation is provided by platform. - */ - -void cfs_enter_debugger(void); - /* * Defined by platform */ -int unshare_fs_struct(void); sigset_t cfs_block_allsigs(void); sigset_t cfs_block_sigs(unsigned long sigs); sigset_t cfs_block_sigsinv(unsigned long sigs); void cfs_restore_sigs(sigset_t); -int cfs_signal_pending(void); void cfs_clear_sigpending(void); /* @@ -117,7 +88,25 @@ void cfs_get_random_bytes(void *buf, int size); #include "libcfs_workitem.h" #include "libcfs_hash.h" #include "libcfs_fail.h" -#include "libcfs_crypto.h" + +struct libcfs_ioctl_handler { + struct list_head item; + int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); +}; + +#define DECLARE_IOCTL_HANDLER(ident, func) \ + struct libcfs_ioctl_handler ident = { \ + .item = LIST_HEAD_INIT(ident.item), \ + .handle_ioctl = func \ + } + +int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); +int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); + +int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp, + const struct libcfs_ioctl_hdr __user *uparam); +int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data); +int libcfs_ioctl(unsigned long cmd, void __user *arg); /* container_of depends on "likely" which is defined in libcfs_private.h */ static inline void *__container_of(void *ptr, unsigned long shift) @@ -143,8 +132,6 @@ extern struct miscdevice libcfs_dev; extern char lnet_upcall[1024]; extern char lnet_debug_log_upcall[1024]; -extern struct cfs_psdev_ops libcfs_psdev_ops; - extern struct cfs_wi_sched *cfs_sched_rehash; struct lnet_debugfs_symlink_def { diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h index 9e62c5971..81d8079e3 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h @@ -203,6 +203,85 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt); */ int cfs_cpu_ht_nsiblings(int cpu); +/* + * allocate per-cpu-partition data, returned value is an array of pointers, + * variable can be indexed by CPU ID. + * cptab != NULL: size of array is number of CPU partitions + * cptab == NULL: size of array is number of HW cores + */ +void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); +/* + * destory per-cpu-partition variable + */ +void cfs_percpt_free(void *vars); +int cfs_percpt_number(void *vars); + +#define cfs_percpt_for_each(var, i, vars) \ + for (i = 0; i < cfs_percpt_number(vars) && \ + ((var) = (vars)[i]) != NULL; i++) + +/* + * percpu partition lock + * + * There are some use-cases like this in Lustre: + * . each CPU partition has it's own private data which is frequently changed, + * and mostly by the local CPU partition. + * . all CPU partitions share some global data, these data are rarely changed. + * + * LNet is typical example. + * CPU partition lock is designed for this kind of use-cases: + * . each CPU partition has it's own private lock + * . change on private data just needs to take the private lock + * . read on shared data just needs to take _any_ of private locks + * . change on shared data needs to take _all_ private locks, + * which is slow and should be really rare. + */ +enum { + CFS_PERCPT_LOCK_EX = -1, /* negative */ +}; + +struct cfs_percpt_lock { + /* cpu-partition-table for this lock */ + struct cfs_cpt_table *pcl_cptab; + /* exclusively locked */ + unsigned int pcl_locked; + /* private lock table */ + spinlock_t **pcl_locks; +}; + +/* return number of private locks */ +#define cfs_percpt_lock_num(pcl) cfs_cpt_number(pcl->pcl_cptab) + +/* + * create a cpu-partition lock based on CPU partition table \a cptab, + * each private lock has extra \a psize bytes padding data + */ +struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab, + struct lock_class_key *keys); +/* destroy a cpu-partition lock */ +void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); + +/* lock private lock \a index of \a pcl */ +void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); + +/* unlock private lock \a index of \a pcl */ +void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); + +#define CFS_PERCPT_LOCK_KEYS 256 + +/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */ +#define cfs_percpt_lock_alloc(cptab) \ +({ \ + static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS]; \ + struct cfs_percpt_lock *___lk; \ + \ + if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS) \ + ___lk = cfs_percpt_lock_create(cptab, NULL); \ + else \ + ___lk = cfs_percpt_lock_create(cptab, ___keys); \ + ___lk; \ +}) + /** * iterate over all CPU partitions in \a cptab */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h index e8663697e..02be7d760 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h @@ -46,7 +46,8 @@ enum cfs_crypto_hash_alg { CFS_HASH_ALG_SHA384, CFS_HASH_ALG_SHA512, CFS_HASH_ALG_CRC32C, - CFS_HASH_ALG_MAX + CFS_HASH_ALG_MAX, + CFS_HASH_ALG_UNKNOWN = 0xff }; static struct cfs_crypto_hash_type hash_types[] = { @@ -59,11 +60,22 @@ static struct cfs_crypto_hash_type hash_types[] = { [CFS_HASH_ALG_SHA256] = { "sha256", 0, 32 }, [CFS_HASH_ALG_SHA384] = { "sha384", 0, 48 }, [CFS_HASH_ALG_SHA512] = { "sha512", 0, 64 }, + [CFS_HASH_ALG_MAX] = { NULL, 0, 64 }, }; -/** Return pointer to type of hash for valid hash algorithm identifier */ +/* Maximum size of hash_types[].cht_size */ +#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX 64 + +/** + * Return hash algorithm information for the specified algorithm identifier + * + * Hash information includes algorithm name, initial seed, hash size. + * + * \retval cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*) + * \retval NULL for unknown algorithm identifier + */ static inline const struct cfs_crypto_hash_type * - cfs_crypto_hash_type(unsigned char hash_alg) +cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg) { struct cfs_crypto_hash_type *ht; @@ -75,8 +87,16 @@ static inline const struct cfs_crypto_hash_type * return NULL; } -/** Return hash name for valid hash algorithm identifier or "unknown" */ -static inline const char *cfs_crypto_hash_name(unsigned char hash_alg) +/** + * Return hash name for hash algorithm identifier + * + * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*) + * + * \retval string name of known hash algorithm + * \retval "unknown" if hash algorithm is unknown + */ +static inline const char * +cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg) { const struct cfs_crypto_hash_type *ht; @@ -86,8 +106,15 @@ static inline const char *cfs_crypto_hash_name(unsigned char hash_alg) return "unknown"; } -/** Return digest size for valid algorithm identifier or 0 */ -static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg) +/** + * Return digest size for hash algorithm type + * + * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*) + * + * \retval hash algorithm digest size in bytes + * \retval 0 if hash algorithm type is unknown + */ +static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg) { const struct cfs_crypto_hash_type *ht; @@ -97,36 +124,24 @@ static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg) return 0; } -/** Return hash identifier for valid hash algorithm name or 0xFF */ +/** + * Find hash algorithm ID for the specified algorithm name + * + * \retval hash algorithm ID for valid ID (CFS_HASH_ALG_*) + * \retval CFS_HASH_ALG_UNKNOWN for unknown algorithm name + */ static inline unsigned char cfs_crypto_hash_alg(const char *algname) { - unsigned char i; + enum cfs_crypto_hash_alg hash_alg; - for (i = 0; i < CFS_HASH_ALG_MAX; i++) - if (!strcmp(hash_types[i].cht_name, algname)) - break; - return (i == CFS_HASH_ALG_MAX ? 0xFF : i); + for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) + if (strcmp(hash_types[hash_alg].cht_name, algname) == 0) + return hash_alg; + + return CFS_HASH_ALG_UNKNOWN; } -/** Calculate hash digest for buffer. - * @param alg id of hash algorithm - * @param buf buffer of data - * @param buf_len buffer len - * @param key initial value for algorithm, if it is NULL, - * default initial value should be used. - * @param key_len len of initial value - * @param hash [out] pointer to hash, if it is NULL, hash_len is - * set to valid digest size in bytes, retval -ENOSPC. - * @param hash_len [in,out] size of hash buffer - * @returns status of operation - * @retval -EINVAL if buf, buf_len, hash_len or alg_id is invalid - * @retval -ENODEV if this algorithm is unsupported - * @retval -ENOSPC if pointer to hash is NULL, or hash_len less than - * digest size - * @retval 0 for success - * @retval < 0 other errors from lower layers. - */ -int cfs_crypto_hash_digest(unsigned char alg, +int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, const void *buf, unsigned int buf_len, unsigned char *key, unsigned int key_len, unsigned char *hash, unsigned int *hash_len); @@ -134,66 +149,17 @@ int cfs_crypto_hash_digest(unsigned char alg, /* cfs crypto hash descriptor */ struct cfs_crypto_hash_desc; -/** Allocate and initialize descriptor for hash algorithm. - * @param alg algorithm id - * @param key initial value for algorithm, if it is NULL, - * default initial value should be used. - * @param key_len len of initial value - * @returns pointer to descriptor of hash instance - * @retval ERR_PTR(error) when errors occurred. - */ -struct cfs_crypto_hash_desc* - cfs_crypto_hash_init(unsigned char alg, - unsigned char *key, unsigned int key_len); - -/** Update digest by part of data. - * @param desc hash descriptor - * @param page data page - * @param offset data offset - * @param len data len - * @returns status of operation - * @retval 0 for success. - */ +struct cfs_crypto_hash_desc * +cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, + unsigned char *key, unsigned int key_len); int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc, struct page *page, unsigned int offset, unsigned int len); - -/** Update digest by part of data. - * @param desc hash descriptor - * @param buf pointer to data buffer - * @param buf_len size of data at buffer - * @returns status of operation - * @retval 0 for success. - */ int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf, unsigned int buf_len); - -/** Finalize hash calculation, copy hash digest to buffer, destroy hash - * descriptor. - * @param desc hash descriptor - * @param hash buffer pointer to store hash digest - * @param hash_len pointer to hash buffer size, if NULL - * destroy hash descriptor - * @returns status of operation - * @retval -ENOSPC if hash is NULL, or *hash_len less than - * digest size - * @retval 0 for success - * @retval < 0 other errors from lower layers. - */ int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc, unsigned char *hash, unsigned int *hash_len); -/** - * Register crypto hash algorithms - */ int cfs_crypto_register(void); - -/** - * Unregister - */ void cfs_crypto_unregister(void); - -/** Return hash speed in Mbytes per second for valid hash algorithm - * identifier. If test was unsuccessful -1 would be returned. - */ -int cfs_crypto_hash_speed(unsigned char hash_alg); +int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg); #endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h index 98430e710..455c54d0d 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h @@ -85,7 +85,6 @@ struct ptldebug_header { #define PH_FLAG_FIRST_RECORD 1 /* Debugging subsystems (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ #define S_UNDEFINED 0x00000001 #define S_MDC 0x00000002 #define S_MDS 0x00000004 @@ -118,10 +117,14 @@ struct ptldebug_header { #define S_MGS 0x20000000 #define S_FID 0x40000000 /* b_new_cmd */ #define S_FLD 0x80000000 /* b_new_cmd */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ + +#define LIBCFS_DEBUG_SUBSYS_NAMES { \ + "undefined", "mdc", "mds", "osc", "ost", "class", "log", \ + "llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter", "", \ + "echo", "ldlm", "lov", "lquota", "osd", "lfsck", "", "", "lmv", \ + "", "sec", "gss", "", "mgc", "mgs", "fid", "fld", NULL } /* Debugging masks (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ #define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ #define D_INODE 0x00000002 #define D_SUPER 0x00000004 @@ -151,9 +154,14 @@ struct ptldebug_header { #define D_QUOTA 0x04000000 #define D_SEC 0x08000000 #define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */ -/* keep these in sync with lnet/{utils,libcfs}/debug.c */ +#define D_HSM 0x20000000 -#define D_HSM D_TRACE +#define LIBCFS_DEBUG_MASKS_NAMES { \ + "trace", "inode", "super", "ext2", "malloc", "cache", "info", \ + "ioctl", "neterror", "net", "warning", "buffs", "other", \ + "dentry", "nettrace", "page", "dlmtrace", "error", "emerg", \ + "ha", "rpctrace", "vfstrace", "reada", "mmap", "config", \ + "console", "quota", "sec", "lfsck", "hsm", NULL } #define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h index aa69c6a33..2e008bffc 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h @@ -38,6 +38,7 @@ extern unsigned long cfs_fail_loc; extern unsigned int cfs_fail_val; +extern int cfs_fail_err; extern wait_queue_head_t cfs_race_waitq; extern int cfs_race_state; @@ -70,9 +71,14 @@ enum { #define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */ #define CFS_FAIL_USR1 0x04000000 /* user flag */ -#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc && \ - (cfs_fail_loc & CFS_FAIL_MASK_LOC) == \ - ((id) & CFS_FAIL_MASK_LOC)) +#define CFS_FAULT 0x02000000 /* match any CFS_FAULT_CHECK */ + +static inline bool CFS_FAIL_PRECHECK(__u32 id) +{ + return cfs_fail_loc != 0 && + ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) || + (cfs_fail_loc & id & CFS_FAULT)); +} static inline int cfs_fail_check_set(__u32 id, __u32 value, int set, int quiet) @@ -144,6 +150,9 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) #define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \ cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET) +#define CFS_FAULT_CHECK(id) \ + CFS_FAIL_CHECK(CFS_FAULT | (id)) + /* The idea here is to synchronise two threads to force a race. The * first thread that calls this with a matching fail_loc is put to * sleep. The next thread that calls with the same fail_loc wakes up diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h index c3f2332fa..119986bc7 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h @@ -245,7 +245,7 @@ struct cfs_hash { /** # of iterators (caller of cfs_hash_for_each_*) */ __u32 hs_iterators; /** rehash workitem */ - cfs_workitem_t hs_rehash_wi; + struct cfs_workitem hs_rehash_wi; /** refcount on this hash table */ atomic_t hs_refcount; /** rehash buckets-table */ @@ -262,7 +262,7 @@ struct cfs_hash { /** bits when we found the max depth */ unsigned int hs_dep_bits; /** workitem to output max depth */ - cfs_workitem_t hs_dep_wi; + struct cfs_workitem hs_dep_wi; #endif /** name of htable */ char hs_name[0]; diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h index 5ca99bd6f..4b9102bd9 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h @@ -34,13 +34,16 @@ * libcfs/include/libcfs/libcfs_ioctl.h * * Low-level ioctl data structures. Kernel ioctl functions declared here, - * and user space functions are in libcfsutil_ioctl.h. + * and user space functions are in libcfs/util/ioctl.h. * */ #ifndef __LIBCFS_IOCTL_H__ #define __LIBCFS_IOCTL_H__ +#include +#include + #define LIBCFS_IOCTL_VERSION 0x0001000a #define LIBCFS_IOCTL_VERSION2 0x0001000b @@ -49,6 +52,9 @@ struct libcfs_ioctl_hdr { __u32 ioc_version; }; +/** max size to copy from userspace */ +#define LIBCFS_IOC_DATA_MAX (128 * 1024) + struct libcfs_ioctl_data { struct libcfs_ioctl_hdr ioc_hdr; @@ -73,67 +79,48 @@ struct libcfs_ioctl_data { char ioc_bulk[0]; }; -#define ioc_priority ioc_u32[0] - struct libcfs_debug_ioctl_data { struct libcfs_ioctl_hdr hdr; unsigned int subs; unsigned int debug; }; -#define LIBCFS_IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_version = LIBCFS_IOCTL_VERSION; \ - data.ioc_len = sizeof(data); \ -} while (0) - -struct libcfs_ioctl_handler { - struct list_head item; - int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_hdr *hdr); -}; - -#define DECLARE_IOCTL_HANDLER(ident, func) \ - struct libcfs_ioctl_handler ident = { \ - /* .item = */ LIST_HEAD_INIT(ident.item), \ - /* .handle_ioctl = */ func \ - } +/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */ +#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) +#define IOCTL_LIBCFS_TYPE long -/* FIXME check conflict with lustre_lib.h */ -#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) - -#define IOC_LIBCFS_TYPE 'e' -#define IOC_LIBCFS_MIN_NR 30 +#define IOC_LIBCFS_TYPE ('e') +#define IOC_LIBCFS_MIN_NR 30 /* libcfs ioctls */ -#define IOC_LIBCFS_PANIC _IOWR('e', 30, long) -#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, long) -#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, long) -#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, long) +/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */ /* lnet ioctls */ -#define IOC_LIBCFS_GET_NI _IOWR('e', 50, long) -#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, long) -#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, long) -#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, long) -/* #define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, long) */ -#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, long) -#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, long) -#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, long) -#define IOC_LIBCFS_PING _IOWR('e', 61, long) -/* #define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, long) */ -#define IOC_LIBCFS_LNETST _IOWR('e', 63, long) -#define IOC_LIBCFS_LNET_FAULT _IOWR('e', 64, long) +#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) +/* IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */ +#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNET_FAULT _IOWR('e', 64, IOCTL_LIBCFS_TYPE) /* lnd ioctls */ -#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, long) -#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, long) -#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, long) -#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, long) -#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, long) -#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, long) -#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, long) +#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) /* ioctl 77 is free for use */ -#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, long) -#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, long) -#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, long) +#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) /* * DLC Specific IOCTL numbers. @@ -155,76 +142,4 @@ struct libcfs_ioctl_handler { #define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE) #define IOC_LIBCFS_MAX_NR 91 -static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) -{ - int len = sizeof(*data); - - len += cfs_size_round(data->ioc_inllen1); - len += cfs_size_round(data->ioc_inllen2); - return len; -} - -static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) -{ - if (data->ioc_hdr.ioc_len > (1 << 30)) { - CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return 1; - } - if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { - CERROR("LIBCFS ioctl: packlen != ioc_len\n"); - return 1; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[cfs_size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - return 0; -} - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); -int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg, - __u32 *buf_len); -int libcfs_ioctl_popdata(void __user *arg, void *buf, int size); -int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data); - #endif /* __LIBCFS_IOCTL_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h index 082fe6de9..ac4e8cfe6 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h @@ -40,21 +40,32 @@ #ifndef __LIBCFS_PRIM_H__ #define __LIBCFS_PRIM_H__ -void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *); - /* * Memory */ -#ifndef memory_pressure_get -#define memory_pressure_get() (0) -#endif -#ifndef memory_pressure_set -#define memory_pressure_set() do {} while (0) -#endif -#ifndef memory_pressure_clr -#define memory_pressure_clr() do {} while (0) +#if BITS_PER_LONG == 32 +/* limit to lowmem on 32-bit systems */ +#define NUM_CACHEPAGES \ + min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4) +#else +#define NUM_CACHEPAGES totalram_pages #endif +static inline unsigned int memory_pressure_get(void) +{ + return current->flags & PF_MEMALLOC; +} + +static inline void memory_pressure_set(void) +{ + current->flags |= PF_MEMALLOC; +} + +static inline void memory_pressure_clr(void) +{ + current->flags &= ~PF_MEMALLOC; +} + static inline int cfs_memory_pressure_get_and_set(void) { int old = memory_pressure_get(); diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h index 13335437c..2fd2a9690 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h @@ -181,25 +181,6 @@ int libcfs_debug_cleanup(void); int libcfs_debug_clear_buffer(void); int libcfs_debug_mark_buffer(const char *text); -/* - * allocate per-cpu-partition data, returned value is an array of pointers, - * variable can be indexed by CPU ID. - * cptable != NULL: size of array is number of CPU partitions - * cptable == NULL: size of array is number of HW cores - */ -void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); -/* - * destroy per-cpu-partition variable - */ -void cfs_percpt_free(void *vars); -int cfs_percpt_number(void *vars); -void *cfs_percpt_current(void *vars); -void *cfs_percpt_index(void *vars, int idx); - -#define cfs_percpt_for_each(var, i, vars) \ - for (i = 0; i < cfs_percpt_number(vars) && \ - ((var) = (vars)[i]) != NULL; i++) - /* * allocate a variable array, returned value is an array of pointers. * Caller can specify length of array by count. @@ -302,62 +283,6 @@ do { \ #define CFS_ALLOC_PTR(ptr) LIBCFS_ALLOC(ptr, sizeof(*(ptr))) #define CFS_FREE_PTR(ptr) LIBCFS_FREE(ptr, sizeof(*(ptr))) -/* - * percpu partition lock - * - * There are some use-cases like this in Lustre: - * . each CPU partition has it's own private data which is frequently changed, - * and mostly by the local CPU partition. - * . all CPU partitions share some global data, these data are rarely changed. - * - * LNet is typical example. - * CPU partition lock is designed for this kind of use-cases: - * . each CPU partition has it's own private lock - * . change on private data just needs to take the private lock - * . read on shared data just needs to take _any_ of private locks - * . change on shared data needs to take _all_ private locks, - * which is slow and should be really rare. - */ - -enum { - CFS_PERCPT_LOCK_EX = -1, /* negative */ -}; - -struct cfs_percpt_lock { - /* cpu-partition-table for this lock */ - struct cfs_cpt_table *pcl_cptab; - /* exclusively locked */ - unsigned int pcl_locked; - /* private lock table */ - spinlock_t **pcl_locks; -}; - -/* return number of private locks */ -static inline int -cfs_percpt_lock_num(struct cfs_percpt_lock *pcl) -{ - return cfs_cpt_number(pcl->pcl_cptab); -} - -/* - * create a cpu-partition lock based on CPU partition table \a cptab, - * each private lock has extra \a psize bytes padding data - */ -struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab); -/* destroy a cpu-partition lock */ -void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); - -/* lock private lock \a index of \a pcl */ -void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); -/* unlock private lock \a index of \a pcl */ -void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); -/* create percpt (atomic) refcount based on @cptab */ -atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val); -/* destroy percpt refcount */ -void cfs_percpt_atomic_free(atomic_t **refs); -/* return sum of all percpu refs */ -int cfs_percpt_atomic_summary(atomic_t **refs); - /** Compile-time assertion. * Check an invariant described by a constant expression at compile time by diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h index 5cc64f327..f9b20c5ac 100644 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h +++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h @@ -73,7 +73,7 @@ int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt, struct cfs_workitem; typedef int (*cfs_wi_action_t) (struct cfs_workitem *); -typedef struct cfs_workitem { +struct cfs_workitem { /** chain on runq or rerunq */ struct list_head wi_list; /** working function */ @@ -84,10 +84,10 @@ typedef struct cfs_workitem { unsigned short wi_running:1; /** scheduled */ unsigned short wi_scheduled:1; -} cfs_workitem_t; +}; static inline void -cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action) +cfs_wi_init(struct cfs_workitem *wi, void *data, cfs_wi_action_t action) { INIT_LIST_HEAD(&wi->wi_list); @@ -97,9 +97,9 @@ cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action) wi->wi_action = action; } -void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi); -int cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi); -void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi); +void cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi); +int cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi); +void cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi); int cfs_wi_startup(void); void cfs_wi_shutdown(void); diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h index d94b26616..a268ef7aa 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,6 @@ #include #include "linux-cpu.h" #include "linux-time.h" -#include "linux-mem.h" #define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h index c04979ae0..f63cb47bc 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h @@ -23,7 +23,7 @@ * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. * - * libcfs/include/libcfs/linux/linux-mem.h + * libcfs/include/libcfs/linux/linux-cpu.h * * Basic library routines. * diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h deleted file mode 100644 index 837eb2274..000000000 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/linux/linux-mem.h - * - * Basic library routines. - */ - -#ifndef __LIBCFS_LINUX_CFS_MEM_H__ -#define __LIBCFS_LINUX_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include instead -#endif - -#include -#include -#include -#include -#include -#include - -#ifndef HAVE_LIBCFS_CPT -/* Need this for cfs_cpt_table */ -#include "../libcfs_cpu.h" -#endif - -#define CFS_PAGE_MASK (~((__u64)PAGE_SIZE-1)) -#define page_index(p) ((p)->index) - -#define memory_pressure_get() (current->flags & PF_MEMALLOC) -#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0) -#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0) - -#if BITS_PER_LONG == 32 -/* limit to lowmem on 32-bit systems */ -#define NUM_CACHEPAGES \ - min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4) -#else -#define NUM_CACHEPAGES totalram_pages -#endif - -#define DECL_MMSPACE mm_segment_t __oldfs -#define MMSPACE_OPEN \ - do { __oldfs = get_fs(); set_fs(get_ds()); } while (0) -#define MMSPACE_CLOSE set_fs(__oldfs) - -#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h index ed8764b11..7656b09b8 100644 --- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h +++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h @@ -70,12 +70,12 @@ static inline unsigned long cfs_time_current(void) static inline long cfs_time_seconds(int seconds) { - return ((long)seconds) * HZ; + return ((long)seconds) * msecs_to_jiffies(MSEC_PER_SEC); } static inline long cfs_duration_sec(long d) { - return d / HZ; + return d / msecs_to_jiffies(MSEC_PER_SEC); } #define cfs_time_current_64 get_jiffies_64 diff --git a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h index 84a19e96e..6ce9accb9 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h @@ -37,10 +37,37 @@ #define LNET_MAX_SHOW_NUM_CPT 128 #define LNET_UNDEFINED_HOPS ((__u32) -1) +struct lnet_ioctl_config_lnd_cmn_tunables { + __u32 lct_version; + __u32 lct_peer_timeout; + __u32 lct_peer_tx_credits; + __u32 lct_peer_rtr_credits; + __u32 lct_max_tx_credits; +}; + +struct lnet_ioctl_config_o2iblnd_tunables { + __u32 lnd_version; + __u32 lnd_peercredits_hiw; + __u32 lnd_map_on_demand; + __u32 lnd_concurrent_sends; + __u32 lnd_fmr_pool_size; + __u32 lnd_fmr_flush_trigger; + __u32 lnd_fmr_cache; + __u32 pad; +}; + +struct lnet_ioctl_config_lnd_tunables { + struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn; + union { + struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib; + } lt_tun_u; +}; + struct lnet_ioctl_net_config { char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN]; __u32 ni_status; __u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT]; + char cfg_bulk[0]; }; #define LNET_TINY_BUF_IDX 0 @@ -81,7 +108,7 @@ struct lnet_ioctl_config_data { __s32 net_peer_rtr_credits; __s32 net_max_tx_credits; __u32 net_cksum_algo; - __u32 net_pad; + __u32 net_interface_count; } cfg_net; struct { __u32 buf_enable; diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index dfc0208dc..513a8225f 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -463,10 +463,6 @@ int lnet_del_route(__u32 net, lnet_nid_t gw_nid); void lnet_destroy_routes(void); int lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway, __u32 *alive, __u32 *priority); -int lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, - int *peer_timeout, int *peer_tx_credits, - int *peer_rtr_cr, int *max_tx_credits, - struct lnet_ioctl_net_config *net_config); int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg); void lnet_router_debugfs_init(void); @@ -478,9 +474,8 @@ int lnet_rtrpools_enable(void); void lnet_rtrpools_disable(void); void lnet_rtrpools_free(int keep_pools); lnet_remotenet_t *lnet_find_net_locked(__u32 net); -int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, - __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr, - __s32 credits); +int lnet_dyn_add_ni(lnet_pid_t requested_pid, + struct lnet_ioctl_config_data *conf); int lnet_dyn_del_ni(__u32 net); int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason); diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h index 29c72f8c2..24c4a08e6 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -273,6 +273,8 @@ typedef struct lnet_ni { int **ni_refs; /* percpt reference count */ time64_t ni_last_alive;/* when I was last alive */ lnet_ni_status_t *ni_status; /* my health status */ + /* per NI LND tunables */ + struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables; /* equivalent interfaces to use */ char *ni_interfaces[LNET_MAX_INTERFACES]; } lnet_ni_t; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 0d32e6541..6c59f2ff2 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -335,8 +335,8 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) peer->ibp_nid = nid; peer->ibp_error = 0; peer->ibp_last_alive = 0; - peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS; - peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits; + peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); + peer->ibp_queue_depth = ni->ni_peertxcredits; atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ @@ -1283,65 +1283,86 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo) } } -struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd, +struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags) { - __u16 nfrags = (negotiated_nfrags != -1) ? - negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand; + kib_net_t *net = ni->ni_data; + kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + __u16 nfrags; + int mod; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + mod = tunables->lnd_map_on_demand; + nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod; LASSERT(hdev->ibh_mrs); - if (*kiblnd_tunables.kib_map_on_demand > 0 && - nfrags <= rd->rd_nfrags) + if (mod > 0 && nfrags <= rd->rd_nfrags) return NULL; return hdev->ibh_mrs; } -static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool) +static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo) { - LASSERT(!pool->fpo_map_count); + LASSERT(!fpo->fpo_map_count); - if (pool->fpo_fmr_pool) - ib_destroy_fmr_pool(pool->fpo_fmr_pool); + if (fpo->fpo_is_fmr) { + if (fpo->fmr.fpo_fmr_pool) + ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool); + } else { + struct kib_fast_reg_descriptor *frd, *tmp; + int i = 0; + + list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, + frd_list) { + list_del(&frd->frd_list); + ib_dereg_mr(frd->frd_mr); + LIBCFS_FREE(frd, sizeof(*frd)); + i++; + } + if (i < fpo->fast_reg.fpo_pool_size) + CERROR("FastReg pool still has %d regions registered\n", + fpo->fast_reg.fpo_pool_size - i); + } - if (pool->fpo_hdev) - kiblnd_hdev_decref(pool->fpo_hdev); + if (fpo->fpo_hdev) + kiblnd_hdev_decref(fpo->fpo_hdev); - LIBCFS_FREE(pool, sizeof(*pool)); + LIBCFS_FREE(fpo, sizeof(*fpo)); } static void kiblnd_destroy_fmr_pool_list(struct list_head *head) { - kib_fmr_pool_t *pool; + kib_fmr_pool_t *fpo, *tmp; - while (!list_empty(head)) { - pool = list_entry(head->next, kib_fmr_pool_t, fpo_list); - list_del(&pool->fpo_list); - kiblnd_destroy_fmr_pool(pool); + list_for_each_entry_safe(fpo, tmp, head, fpo_list) { + list_del(&fpo->fpo_list); + kiblnd_destroy_fmr_pool(fpo); } } -static int kiblnd_fmr_pool_size(int ncpts) +static int +kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts; + int size = tunables->lnd_fmr_pool_size / ncpts; return max(IBLND_FMR_POOL, size); } -static int kiblnd_fmr_flush_trigger(int ncpts) +static int +kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts; + int size = tunables->lnd_fmr_flush_trigger / ncpts; return max(IBLND_FMR_POOL_FLUSH, size); } -static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, - kib_fmr_pool_t **pp_fpo) +static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) { - /* FMR pool for RDMA */ - kib_dev_t *dev = fps->fps_net->ibn_dev; - kib_fmr_pool_t *fpo; struct ib_fmr_pool_param param = { .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE, .page_shift = PAGE_SHIFT, @@ -1351,7 +1372,78 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, .dirty_watermark = fps->fps_flush_trigger, .flush_function = NULL, .flush_arg = NULL, - .cache = !!*kiblnd_tunables.kib_fmr_cache}; + .cache = !!fps->fps_cache }; + int rc = 0; + + fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, + ¶m); + if (IS_ERR(fpo->fmr.fpo_fmr_pool)) { + rc = PTR_ERR(fpo->fmr.fpo_fmr_pool); + if (rc != -ENOSYS) + CERROR("Failed to create FMR pool: %d\n", rc); + else + CERROR("FMRs are not supported\n"); + } + + return rc; +} + +static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) +{ + struct kib_fast_reg_descriptor *frd, *tmp; + int i, rc; + + INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list); + fpo->fast_reg.fpo_pool_size = 0; + for (i = 0; i < fps->fps_pool_size; i++) { + LIBCFS_CPT_ALLOC(frd, lnet_cpt_table(), fps->fps_cpt, + sizeof(*frd)); + if (!frd) { + CERROR("Failed to allocate a new fast_reg descriptor\n"); + rc = -ENOMEM; + goto out; + } + + frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd, + IB_MR_TYPE_MEM_REG, + LNET_MAX_PAYLOAD / PAGE_SIZE); + if (IS_ERR(frd->frd_mr)) { + rc = PTR_ERR(frd->frd_mr); + CERROR("Failed to allocate ib_alloc_mr: %d\n", rc); + frd->frd_mr = NULL; + goto out_middle; + } + + frd->frd_valid = true; + + list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); + fpo->fast_reg.fpo_pool_size++; + } + + return 0; + +out_middle: + if (frd->frd_mr) + ib_dereg_mr(frd->frd_mr); + LIBCFS_FREE(frd, sizeof(*frd)); + +out: + list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, + frd_list) { + list_del(&frd->frd_list); + ib_dereg_mr(frd->frd_mr); + LIBCFS_FREE(frd, sizeof(*frd)); + } + + return rc; +} + +static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, + kib_fmr_pool_t **pp_fpo) +{ + kib_dev_t *dev = fps->fps_net->ibn_dev; + struct ib_device_attr *dev_attr; + kib_fmr_pool_t *fpo; int rc; LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); @@ -1359,22 +1451,41 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, return -ENOMEM; fpo->fpo_hdev = kiblnd_current_hdev(dev); - - fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, ¶m); - if (IS_ERR(fpo->fpo_fmr_pool)) { - rc = PTR_ERR(fpo->fpo_fmr_pool); - CERROR("Failed to create FMR pool: %d\n", rc); - - kiblnd_hdev_decref(fpo->fpo_hdev); - LIBCFS_FREE(fpo, sizeof(*fpo)); - return rc; + dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs; + + /* Check for FMR or FastReg support */ + fpo->fpo_is_fmr = 0; + if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr && + fpo->fpo_hdev->ibh_ibdev->dealloc_fmr && + fpo->fpo_hdev->ibh_ibdev->map_phys_fmr && + fpo->fpo_hdev->ibh_ibdev->unmap_fmr) { + LCONSOLE_INFO("Using FMR for registration\n"); + fpo->fpo_is_fmr = 1; + } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + LCONSOLE_INFO("Using FastReg for registration\n"); + } else { + rc = -ENOSYS; + LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n"); + goto out_fpo; } + if (fpo->fpo_is_fmr) + rc = kiblnd_alloc_fmr_pool(fps, fpo); + else + rc = kiblnd_alloc_freg_pool(fps, fpo); + if (rc) + goto out_fpo; + fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - fpo->fpo_owner = fps; + fpo->fpo_owner = fps; *pp_fpo = fpo; return 0; + +out_fpo: + kiblnd_hdev_decref(fpo->fpo_hdev); + LIBCFS_FREE(fpo, sizeof(*fpo)); + return rc; } static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, @@ -1407,9 +1518,10 @@ static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps) } } -static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, - kib_net_t *net, int pool_size, - int flush_trigger) +static int +kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts, + kib_net_t *net, + struct lnet_ioctl_config_o2iblnd_tunables *tunables) { kib_fmr_pool_t *fpo; int rc; @@ -1418,8 +1530,11 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, fps->fps_net = net; fps->fps_cpt = cpt; - fps->fps_pool_size = pool_size; - fps->fps_flush_trigger = flush_trigger; + + fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts); + fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts); + fps->fps_cache = tunables->lnd_fmr_cache; + spin_lock_init(&fps->fps_lock); INIT_LIST_HEAD(&fps->fps_pool_list); INIT_LIST_HEAD(&fps->fps_failed_pool_list); @@ -1440,25 +1555,64 @@ static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now) return cfs_time_aftereq(now, fpo->fpo_deadline); } +static int +kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd) +{ + __u64 *pages = tx->tx_pages; + kib_hca_dev_t *hdev; + int npages; + int size; + int i; + + hdev = tx->tx_pool->tpo_hdev; + + for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { + for (size = 0; size < rd->rd_frags[i].rf_nob; + size += hdev->ibh_page_size) { + pages[npages++] = (rd->rd_frags[i].rf_addr & + hdev->ibh_page_mask) + size; + } + } + + return npages; +} + void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) { LIST_HEAD(zombies); kib_fmr_pool_t *fpo = fmr->fmr_pool; - kib_fmr_poolset_t *fps = fpo->fpo_owner; + kib_fmr_poolset_t *fps; unsigned long now = cfs_time_current(); kib_fmr_pool_t *tmp; int rc; - rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); - LASSERT(!rc); + if (!fpo) + return; - if (status) { - rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool); - LASSERT(!rc); - } + fps = fpo->fpo_owner; + if (fpo->fpo_is_fmr) { + if (fmr->fmr_pfmr) { + rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); + LASSERT(!rc); + fmr->fmr_pfmr = NULL; + } + + if (status) { + rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool); + LASSERT(!rc); + } + } else { + struct kib_fast_reg_descriptor *frd = fmr->fmr_frd; + if (frd) { + frd->frd_valid = false; + spin_lock(&fps->fps_lock); + list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); + spin_unlock(&fps->fps_lock); + fmr->fmr_frd = NULL; + } + } fmr->fmr_pool = NULL; - fmr->fmr_pfmr = NULL; spin_lock(&fps->fps_lock); fpo->fpo_map_count--; /* decref the pool */ @@ -1479,11 +1633,15 @@ void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) kiblnd_destroy_fmr_pool_list(&zombies); } -int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, - __u64 iov, kib_fmr_t *fmr) +int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, + kib_rdma_desc_t *rd, __u32 nob, __u64 iov, + kib_fmr_t *fmr) { - struct ib_pool_fmr *pfmr; + __u64 *pages = tx->tx_pages; + bool is_rx = (rd != tx->tx_rd); + bool tx_pages_mapped = 0; kib_fmr_pool_t *fpo; + int npages = 0; __u64 version; int rc; @@ -1493,21 +1651,95 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); fpo->fpo_map_count++; - spin_unlock(&fps->fps_lock); - pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool, - pages, npages, iov); - if (likely(!IS_ERR(pfmr))) { - fmr->fmr_pool = fpo; - fmr->fmr_pfmr = pfmr; - return 0; + if (fpo->fpo_is_fmr) { + struct ib_pool_fmr *pfmr; + + spin_unlock(&fps->fps_lock); + + if (!tx_pages_mapped) { + npages = kiblnd_map_tx_pages(tx, rd); + tx_pages_mapped = 1; + } + + pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, + pages, npages, iov); + if (likely(!IS_ERR(pfmr))) { + fmr->fmr_key = is_rx ? pfmr->fmr->rkey : + pfmr->fmr->lkey; + fmr->fmr_frd = NULL; + fmr->fmr_pfmr = pfmr; + fmr->fmr_pool = fpo; + return 0; + } + rc = PTR_ERR(pfmr); + } else { + if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { + struct kib_fast_reg_descriptor *frd; + struct ib_reg_wr *wr; + struct ib_mr *mr; + int n; + + frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, + struct kib_fast_reg_descriptor, + frd_list); + list_del(&frd->frd_list); + spin_unlock(&fps->fps_lock); + + mr = frd->frd_mr; + + if (!frd->frd_valid) { + __u32 key = is_rx ? mr->rkey : mr->lkey; + struct ib_send_wr *inv_wr; + + inv_wr = &frd->frd_inv_wr; + memset(inv_wr, 0, sizeof(*inv_wr)); + inv_wr->opcode = IB_WR_LOCAL_INV; + inv_wr->wr_id = IBLND_WID_MR; + inv_wr->ex.invalidate_rkey = key; + + /* Bump the key */ + key = ib_inc_rkey(key); + ib_update_fast_reg_key(mr, key); + } + + n = ib_map_mr_sg(mr, tx->tx_frags, + tx->tx_nfrags, NULL, PAGE_SIZE); + if (unlikely(n != tx->tx_nfrags)) { + CERROR("Failed to map mr %d/%d elements\n", + n, tx->tx_nfrags); + return n < 0 ? n : -EINVAL; + } + + mr->iova = iov; + + /* Prepare FastReg WR */ + wr = &frd->frd_fastreg_wr; + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR; + wr->wr.wr_id = IBLND_WID_MR; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = mr; + wr->key = is_rx ? mr->rkey : mr->lkey; + wr->access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + + fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; + fmr->fmr_frd = frd; + fmr->fmr_pfmr = NULL; + fmr->fmr_pool = fpo; + return 0; + } + spin_unlock(&fps->fps_lock); + rc = -EBUSY; } spin_lock(&fps->fps_lock); fpo->fpo_map_count--; - if (PTR_ERR(pfmr) != -EAGAIN) { + if (rc != -EAGAIN) { spin_unlock(&fps->fps_lock); - return PTR_ERR(pfmr); + return rc; } /* EAGAIN and ... */ @@ -1932,25 +2164,28 @@ static void kiblnd_net_fini_pools(kib_net_t *net) } } -static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) +static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, + int ncpts) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; unsigned long flags; int cpt; - int rc = 0; + int rc; int i; + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (!*kiblnd_tunables.kib_map_on_demand) { + if (!tunables->lnd_map_on_demand) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); goto create_tx_pool; } read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (*kiblnd_tunables.kib_fmr_pool_size < - *kiblnd_tunables.kib_ntx / 4) { + if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", - *kiblnd_tunables.kib_fmr_pool_size, + tunables->lnd_fmr_pool_size, *kiblnd_tunables.kib_ntx / 4); rc = -EINVAL; goto failed; @@ -1965,8 +2200,11 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) /* * premapping can fail if ibd_nmr > 1, so we always create * FMR pool and map-on-demand if premapping failed + * + * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset + * The number of struct kib_fmr_poolsets create is equal to the + * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt]. */ - net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(), sizeof(kib_fmr_poolset_t)); if (!net->ibn_fmr_ps) { @@ -1977,9 +2215,8 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) for (i = 0; i < ncpts; i++) { cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net, - kiblnd_fmr_pool_size(ncpts), - kiblnd_fmr_flush_trigger(ncpts)); + rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts, + net, tunables); if (rc) { CERROR("Can't initialize FMR pool for CPT %d: %d\n", cpt, rc); @@ -1991,6 +2228,11 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) LASSERT(i == ncpts); create_tx_pool: + /* + * cfs_precpt_alloc is creating an array of struct kib_tx_poolset + * The number of struct kib_tx_poolsets create is equal to the + * number of CPTs that exist, i.e net->ibn_tx_ps[cpt]. + */ net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), sizeof(kib_tx_poolset_t)); if (!net->ibn_tx_ps) { @@ -2694,10 +2936,9 @@ static int kiblnd_startup(lnet_ni_t *ni) net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC + tv.tv_nsec / NSEC_PER_USEC; - ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; - ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; - ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits; - ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits; + rc = kiblnd_tunables_setup(ni); + if (rc) + goto net_failed; if (ni->ni_interfaces[0]) { /* Use the IPoIB interface specified in 'networks=' */ @@ -2736,7 +2977,7 @@ static int kiblnd_startup(lnet_ni_t *ni) if (rc) goto failed; - rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts); + rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts); if (rc) { CERROR("Failed to initialize NI pools: %d\n", rc); goto failed; @@ -2779,8 +3020,6 @@ static void __exit ko2iblnd_exit(void) static int __init ko2iblnd_init(void) { - int rc; - CLASSERT(sizeof(kib_msg_t) <= IBLND_MSG_SIZE); CLASSERT(offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) @@ -2789,9 +3028,7 @@ static int __init ko2iblnd_init(void) ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) <= IBLND_MSG_SIZE); - rc = kiblnd_tunables_init(); - if (rc) - return rc; + kiblnd_tunables_init(); lnet_register_lnd(&the_o2iblnd); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index bfcbdd167..b22984fd9 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -87,22 +87,10 @@ typedef struct { int *kib_timeout; /* comms timeout (seconds) */ int *kib_keepalive; /* keepalive timeout (seconds) */ int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peertxcredits; /* # concurrent sends to 1 peer */ - int *kib_peerrtrcredits; /* # per-peer router buffer credits */ - int *kib_peercredits_hiw; /* # when eagerly to return credits */ - int *kib_peertimeout; /* seconds to consider peer dead */ char **kib_default_ipif; /* default IPoIB interface */ int *kib_retry_count; int *kib_rnr_retry_count; - int *kib_concurrent_sends; /* send work queue sizing */ int *kib_ib_mtu; /* IB MTU */ - int *kib_map_on_demand; /* map-on-demand if RD has more */ - /* fragments than this value, 0 */ - /* disable map-on-demand */ - int *kib_fmr_pool_size; /* # FMRs in pool */ - int *kib_fmr_flush_trigger; /* When to trigger FMR flush */ - int *kib_fmr_cache; /* enable FMR pool cache? */ int *kib_require_priv_port; /* accept only privileged ports */ int *kib_use_priv_port; /* use privileged port for active connect */ int *kib_nscheds; /* # threads on each CPT */ @@ -116,43 +104,21 @@ extern kib_tunables_t kiblnd_tunables; #define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */ #define IBLND_CREDITS_MAX ((typeof(((kib_msg_t *) 0)->ibm_credits)) - 1) /* Max # of peer credits */ -#define IBLND_MSG_QUEUE_SIZE(v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_MSG_QUEUE_SIZE_V1 : \ - *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */ -#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_CREDIT_HIGHWATER_V1 : \ - *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ +/* when eagerly to return credits */ +#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ + IBLND_CREDIT_HIGHWATER_V1 : \ + t->lnd_peercredits_hiw) #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \ cb, dev, \ ps, qpt) -static inline int -kiblnd_concurrent_sends_v1(void) -{ - if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) - return IBLND_MSG_QUEUE_SIZE_V1 * 2; - - if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) - return IBLND_MSG_QUEUE_SIZE_V1 / 2; - - return *kiblnd_tunables.kib_concurrent_sends; -} - -#define IBLND_CONCURRENT_SENDS(v) ((v) == IBLND_MSG_VERSION_1 ? \ - kiblnd_concurrent_sends_v1() : \ - *kiblnd_tunables.kib_concurrent_sends) /* 2 OOB shall suffice for 1 keepalive and 1 returning credits */ #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) #define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ #define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */ -#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand ? \ - *kiblnd_tunables.kib_map_on_demand : \ - IBLND_MAX_RDMA_FRAGS) /* max # of fragments configured by user */ -#define IBLND_RDMA_FRAGS(v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS) /************************/ /* derived constants... */ @@ -171,7 +137,8 @@ kiblnd_concurrent_sends_v1(void) /* WRs and CQEs (per connection) */ #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) #define IBLND_SEND_WRS(c) \ - ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version)) + ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \ + c->ibc_peer->ibp_ni)) #define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) struct kib_hca_dev; @@ -286,24 +253,44 @@ typedef struct { int fps_cpt; /* CPT id */ int fps_pool_size; int fps_flush_trigger; + int fps_cache; int fps_increasing; /* is allocating new pool */ unsigned long fps_next_retry; /* time stamp for retry if*/ /* failed to allocate */ } kib_fmr_poolset_t; +struct kib_fast_reg_descriptor { /* For fast registration */ + struct list_head frd_list; + struct ib_send_wr frd_inv_wr; + struct ib_reg_wr frd_fastreg_wr; + struct ib_mr *frd_mr; + bool frd_valid; +}; + typedef struct { struct list_head fpo_list; /* chain on pool list */ struct kib_hca_dev *fpo_hdev; /* device for this pool */ kib_fmr_poolset_t *fpo_owner; /* owner of this pool */ - struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ + union { + struct { + struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ + } fmr; + struct { /* For fast registration */ + struct list_head fpo_pool_list; + int fpo_pool_size; + } fast_reg; + }; unsigned long fpo_deadline; /* deadline of this pool */ int fpo_failed; /* fmr pool is failed */ int fpo_map_count; /* # of mapped FMR */ + int fpo_is_fmr; } kib_fmr_pool_t; typedef struct { - struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ - kib_fmr_pool_t *fmr_pool; /* pool of FMR */ + kib_fmr_pool_t *fmr_pool; /* pool of FMR */ + struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ + struct kib_fast_reg_descriptor *fmr_frd; + u32 fmr_key; } kib_fmr_t; typedef struct kib_net { @@ -615,6 +602,48 @@ extern kib_data_t kiblnd_data; void kiblnd_hdev_destroy(kib_hca_dev_t *hdev); +int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); + +/* max # of fragments configured by user */ +static inline int +kiblnd_cfg_rdma_frags(struct lnet_ni *ni) +{ + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + int mod; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + mod = tunables->lnd_map_on_demand; + return mod ? mod : IBLND_MAX_RDMA_FRAGS; +} + +static inline int +kiblnd_rdma_frags(int version, struct lnet_ni *ni) +{ + return version == IBLND_MSG_VERSION_1 ? + IBLND_MAX_RDMA_FRAGS : + kiblnd_cfg_rdma_frags(ni); +} + +static inline int +kiblnd_concurrent_sends(int version, struct lnet_ni *ni) +{ + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + int concurrent_sends; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + concurrent_sends = tunables->lnd_concurrent_sends; + + if (version == IBLND_MSG_VERSION_1) { + if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) + return IBLND_MSG_QUEUE_SIZE_V1 * 2; + + if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) + return IBLND_MSG_QUEUE_SIZE_V1 / 2; + } + + return concurrent_sends; +} + static inline void kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev) { @@ -737,10 +766,14 @@ kiblnd_send_keepalive(kib_conn_t *conn) static inline int kiblnd_need_noop(kib_conn_t *conn) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + lnet_ni_t *ni = conn->ibc_peer->ibp_ni; + LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; if (conn->ibc_outstanding_credits < - IBLND_CREDITS_HIGHWATER(conn->ibc_version) && + IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && !kiblnd_send_keepalive(conn)) return 0; /* No need to send NOOP */ @@ -799,7 +832,8 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q) #define IBLND_WID_TX 1 #define IBLND_WID_RX 2 #define IBLND_WID_RDMA 3 -#define IBLND_WID_MASK 3UL +#define IBLND_WID_MR 4 +#define IBLND_WID_MASK 7UL static inline __u64 kiblnd_ptr2wreqid(void *ptr, int type) @@ -947,20 +981,20 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) -struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, - kib_rdma_desc_t *rd, +struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags); void kiblnd_map_rx_descs(kib_conn_t *conn); void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps); -int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, - int npages, __u64 iov, kib_fmr_t *fmr); +int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, + kib_rdma_desc_t *rd, __u32 nob, __u64 iov, + kib_fmr_t *fmr); void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status); -int kiblnd_tunables_init(void); -void kiblnd_tunables_fini(void); +int kiblnd_tunables_setup(struct lnet_ni *ni); +void kiblnd_tunables_init(void); int kiblnd_connd(void *arg); int kiblnd_scheduler(void *arg); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 2323e8d3a..845e49a52 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -561,36 +561,23 @@ kiblnd_kvaddr_to_page(unsigned long vaddr) } static int -kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) +kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob) { kib_hca_dev_t *hdev; - __u64 *pages = tx->tx_pages; kib_fmr_poolset_t *fps; - int npages; - int size; int cpt; int rc; - int i; LASSERT(tx->tx_pool); LASSERT(tx->tx_pool->tpo_pool.po_owner); hdev = tx->tx_pool->tpo_hdev; - - for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { - for (size = 0; size < rd->rd_frags[i].rf_nob; - size += hdev->ibh_page_size) { - pages[npages++] = (rd->rd_frags[i].rf_addr & - hdev->ibh_page_mask) + size; - } - } - cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt; fps = net->ibn_fmr_ps[cpt]; - rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->fmr); + rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr); if (rc) { - CERROR("Can't map %d pages: %d\n", npages, rc); + CERROR("Can't map %u bytes: %d\n", nob, rc); return rc; } @@ -598,8 +585,7 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) * If rd is not tx_rd, it's going to get sent to a peer, who will need * the rkey */ - rd->rd_key = (rd != tx->tx_rd) ? tx->fmr.fmr_pfmr->fmr->rkey : - tx->fmr.fmr_pfmr->fmr->lkey; + rd->rd_key = tx->fmr.fmr_key; rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; rd->rd_frags[0].rf_nob = nob; rd->rd_nfrags = 1; @@ -613,10 +599,8 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx) LASSERT(net); - if (net->ibn_fmr_ps && tx->fmr.fmr_pfmr) { + if (net->ibn_fmr_ps) kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); - tx->fmr.fmr_pfmr = NULL; - } if (tx->tx_nfrags) { kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev, @@ -628,8 +612,8 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx) static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) { - kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; kib_net_t *net = ni->ni_data; + kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; struct ib_mr *mr = NULL; __u32 nob; int i; @@ -652,7 +636,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, nob += rd->rd_frags[i].rf_nob; } - mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ? + mr = kiblnd_find_rd_dma_mr(ni, rd, tx->tx_conn ? tx->tx_conn->ibc_max_frags : -1); if (mr) { /* found pre-mapping MR */ @@ -704,7 +688,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); sg_set_page(sg, page, fragnob, page_offset); - sg++; + sg = sg_next(sg); if (offset + fragnob < iov->iov_len) { offset += fragnob; @@ -748,7 +732,7 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, sg_set_page(sg, kiov->kiov_page, fragnob, kiov->kiov_offset + offset); - sg++; + sg = sg_next(sg); offset = 0; kiov++; @@ -765,6 +749,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) { kib_msg_t *msg = tx->tx_msg; kib_peer_t *peer = conn->ibc_peer; + struct lnet_ni *ni = peer->ibp_ni; int ver = conn->ibc_version; int rc; int done; @@ -780,7 +765,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) LASSERT(conn->ibc_credits >= 0); LASSERT(conn->ibc_credits <= conn->ibc_queue_depth); - if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) { + if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) { /* tx completions outstanding... */ CDEBUG(D_NET, "%s: posted enough\n", libcfs_nid2str(peer->ibp_nid)); @@ -851,14 +836,26 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) /* close_conn will launch failover */ rc = -ENETDOWN; } else { - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1].wr; + struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd; + struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr; + struct ib_send_wr *wrq = &tx->tx_wrq[0].wr; + + if (frd) { + if (!frd->frd_valid) { + wrq = &frd->frd_inv_wr; + wrq->next = &frd->frd_fastreg_wr.wr; + } else { + wrq = &frd->frd_fastreg_wr.wr; + } + frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr; + } - LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), + LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), "bad wr_id %llx, opc %d, flags %d, peer: %s\n", - wrq->wr_id, wrq->opcode, wrq->send_flags, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - wrq = NULL; - rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &wrq); + bad->wr_id, bad->opcode, bad->send_flags, + libcfs_nid2str(conn->ibc_peer->ibp_nid)); + bad = NULL; + rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad); } conn->ibc_last_send = jiffies; @@ -919,7 +916,7 @@ kiblnd_check_sends(kib_conn_t *conn) spin_lock(&conn->ibc_lock); - LASSERT(conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver)); + LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni)); LASSERT(!IBLND_OOB_CAPABLE(ver) || conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); LASSERT(conn->ibc_reserved_credits >= 0); @@ -1066,7 +1063,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, kib_msg_t *ibmsg = tx->tx_msg; kib_rdma_desc_t *srcrd = tx->tx_rd; struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next; + struct ib_rdma_wr *wrq, *next; int rc = resid; int srcidx = 0; int dstidx = 0; @@ -2333,11 +2330,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } if (reqmsg->ibm_u.connparams.ibcp_queue_depth > - IBLND_MSG_QUEUE_SIZE(version)) { + kiblnd_msg_queue_size(version, ni)) { CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE(version)); + kiblnd_msg_queue_size(version, ni)); if (version == IBLND_MSG_VERSION) rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE; @@ -2346,24 +2343,24 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } if (reqmsg->ibm_u.connparams.ibcp_max_frags > - IBLND_RDMA_FRAGS(version)) { + kiblnd_rdma_frags(version, ni)) { CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, - IBLND_RDMA_FRAGS(version)); + kiblnd_rdma_frags(version, ni)); if (version >= IBLND_MSG_VERSION) rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; goto failed; } else if (reqmsg->ibm_u.connparams.ibcp_max_frags < - IBLND_RDMA_FRAGS(version) && !net->ibn_fmr_ps) { + kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) { CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, - IBLND_RDMA_FRAGS(version)); + kiblnd_rdma_frags(version, ni)); - if (version >= IBLND_MSG_VERSION) + if (version == IBLND_MSG_VERSION) rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; goto failed; @@ -2524,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) return 0; failed: - if (ni) + if (ni) { lnet_ni_decref(ni); + rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); + rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); + } rej.ibr_version = version; - rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version); - rej.ibr_cp.ibcp_max_frags = IBLND_RDMA_FRAGS(version); kiblnd_reject(cmid, &rej); return -ECONNREFUSED; @@ -2580,12 +2578,15 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version, reason = "Unknown"; break; - case IBLND_REJECT_RDMA_FRAGS: + case IBLND_REJECT_RDMA_FRAGS: { + struct lnet_ioctl_config_lnd_tunables *tunables; + if (!cp) { reason = "can't negotiate max frags"; goto out; } - if (!*kiblnd_tunables.kib_map_on_demand) { + tunables = peer->ibp_ni->ni_lnd_tunables; + if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) { reason = "map_on_demand must be enabled"; goto out; } @@ -2597,7 +2598,7 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version, peer->ibp_max_frags = frag_num; reason = "rdma fragments"; break; - + } case IBLND_REJECT_MSG_QUEUE_SIZE: if (!cp) { reason = "can't negotiate queue depth"; @@ -3430,6 +3431,12 @@ kiblnd_complete(struct ib_wc *wc) default: LBUG(); + case IBLND_WID_MR: + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) + CNETERR("FastReg failed: %d\n", wc->status); + break; + case IBLND_WID_RDMA: /* * We only get RDMA completion notification if it fails. All diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c index b4607dad3..f8fdd4ae3 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -152,74 +152,135 @@ kib_tunables_t kiblnd_tunables = { .kib_timeout = &timeout, .kib_keepalive = &keepalive, .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peertxcredits = &peer_credits, - .kib_peercredits_hiw = &peer_credits_hiw, - .kib_peerrtrcredits = &peer_buffer_credits, - .kib_peertimeout = &peer_timeout, .kib_default_ipif = &ipif_name, .kib_retry_count = &retry_count, .kib_rnr_retry_count = &rnr_retry_count, - .kib_concurrent_sends = &concurrent_sends, .kib_ib_mtu = &ib_mtu, - .kib_map_on_demand = &map_on_demand, - .kib_fmr_pool_size = &fmr_pool_size, - .kib_fmr_flush_trigger = &fmr_flush_trigger, - .kib_fmr_cache = &fmr_cache, .kib_require_priv_port = &require_privileged_port, .kib_use_priv_port = &use_privileged_port, .kib_nscheds = &nscheds }; -int -kiblnd_tunables_init(void) +static struct lnet_ioctl_config_o2iblnd_tunables default_tunables; + +/* # messages/RDMAs in-flight */ +int kiblnd_msg_queue_size(int version, lnet_ni_t *ni) { + if (version == IBLND_MSG_VERSION_1) + return IBLND_MSG_QUEUE_SIZE_V1; + else if (ni) + return ni->ni_peertxcredits; + else + return peer_credits; +} + +int kiblnd_tunables_setup(struct lnet_ni *ni) +{ + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + + /* + * if there was no tunables specified, setup the tunables to be + * defaulted + */ + if (!ni->ni_lnd_tunables) { + LIBCFS_ALLOC(ni->ni_lnd_tunables, + sizeof(*ni->ni_lnd_tunables)); + if (!ni->ni_lnd_tunables) + return -ENOMEM; + + memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib, + &default_tunables, sizeof(*tunables)); + } + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + + /* Current API version */ + tunables->lnd_version = 0; + if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) { CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n", *kiblnd_tunables.kib_ib_mtu); return -EINVAL; } - if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT) - *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT; + if (!ni->ni_peertimeout) + ni->ni_peertimeout = peer_timeout; + + if (!ni->ni_maxtxcredits) + ni->ni_maxtxcredits = credits; + + if (!ni->ni_peertxcredits) + ni->ni_peertxcredits = peer_credits; - if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX) - *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX; + if (!ni->ni_peerrtrcredits) + ni->ni_peerrtrcredits = peer_buffer_credits; - if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits) - *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits; + if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT) + ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT; - if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2) - *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2; + if (ni->ni_peertxcredits > IBLND_CREDITS_MAX) + ni->ni_peertxcredits = IBLND_CREDITS_MAX; - if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits) - *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1; + if (ni->ni_peertxcredits > credits) + ni->ni_peertxcredits = credits; - if (*kiblnd_tunables.kib_map_on_demand < 0 || - *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS) - *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */ + if (!tunables->lnd_peercredits_hiw) + tunables->lnd_peercredits_hiw = peer_credits_hiw; - if (*kiblnd_tunables.kib_map_on_demand == 1) - *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */ + if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2) + tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2; - if (!*kiblnd_tunables.kib_concurrent_sends) { - if (*kiblnd_tunables.kib_map_on_demand > 0 && - *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) - *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2; - else - *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits); + if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits) + tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1; + + if (tunables->lnd_map_on_demand < 0 || + tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { + /* disable map-on-demand */ + tunables->lnd_map_on_demand = 0; + } + + if (tunables->lnd_map_on_demand == 1) { + /* don't make sense to create map if only one fragment */ + tunables->lnd_map_on_demand = 2; + } + + if (!tunables->lnd_concurrent_sends) { + if (tunables->lnd_map_on_demand > 0 && + tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) { + tunables->lnd_concurrent_sends = + ni->ni_peertxcredits * 2; + } else { + tunables->lnd_concurrent_sends = ni->ni_peertxcredits; + } } - if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2) - *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2; + if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2) + tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2; - if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2) - *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2; + if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2) + tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2; - if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) { + if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) { CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n", - *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits); + tunables->lnd_concurrent_sends, ni->ni_peertxcredits); } + if (!tunables->lnd_fmr_pool_size) + tunables->lnd_fmr_pool_size = fmr_pool_size; + if (!tunables->lnd_fmr_flush_trigger) + tunables->lnd_fmr_flush_trigger = fmr_flush_trigger; + if (!tunables->lnd_fmr_cache) + tunables->lnd_fmr_cache = fmr_cache; + return 0; } + +void kiblnd_tunables_init(void) +{ + default_tunables.lnd_version = 0; + default_tunables.lnd_peercredits_hiw = peer_credits_hiw, + default_tunables.lnd_map_on_demand = map_on_demand; + default_tunables.lnd_concurrent_sends = concurrent_sends; + default_tunables.lnd_fmr_pool_size = fmr_pool_size; + default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger; + default_tunables.lnd_fmr_cache = fmr_cache; +} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index cca7b2f7f..406c0e7a5 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -2582,7 +2582,6 @@ ksocknal_debug_peerhash(lnet_ni_t *ni) } read_unlock(&ksocknal_data.ksnd_global_lock); - return; } void diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index d4ce06d0a..964b4e338 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -675,7 +675,6 @@ ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) sock->sk->sk_user_data = conn; sock->sk->sk_data_ready = ksocknal_data_ready; sock->sk->sk_write_space = ksocknal_write_space; - return; } void @@ -695,8 +694,6 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) * sk_user_data is NULL. */ sock->sk->sk_user_data = NULL; - - return ; } int diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c index c3d628bac..8c260c3d5 100644 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ b/drivers/staging/lustre/lnet/libcfs/debug.c @@ -232,130 +232,24 @@ int libcfs_panic_in_progress; static const char * libcfs_debug_subsys2str(int subsys) { - switch (1 << subsys) { - default: + static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES; + + if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems)) return NULL; - case S_UNDEFINED: - return "undefined"; - case S_MDC: - return "mdc"; - case S_MDS: - return "mds"; - case S_OSC: - return "osc"; - case S_OST: - return "ost"; - case S_CLASS: - return "class"; - case S_LOG: - return "log"; - case S_LLITE: - return "llite"; - case S_RPC: - return "rpc"; - case S_LNET: - return "lnet"; - case S_LND: - return "lnd"; - case S_PINGER: - return "pinger"; - case S_FILTER: - return "filter"; - case S_ECHO: - return "echo"; - case S_LDLM: - return "ldlm"; - case S_LOV: - return "lov"; - case S_LQUOTA: - return "lquota"; - case S_OSD: - return "osd"; - case S_LFSCK: - return "lfsck"; - case S_LMV: - return "lmv"; - case S_SEC: - return "sec"; - case S_GSS: - return "gss"; - case S_MGC: - return "mgc"; - case S_MGS: - return "mgs"; - case S_FID: - return "fid"; - case S_FLD: - return "fld"; - } + + return libcfs_debug_subsystems[subsys]; } /* libcfs_debug_token2mask() expects the returned string in lower-case */ static const char * libcfs_debug_dbg2str(int debug) { - switch (1 << debug) { - default: + static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES; + + if (debug >= ARRAY_SIZE(libcfs_debug_masks)) return NULL; - case D_TRACE: - return "trace"; - case D_INODE: - return "inode"; - case D_SUPER: - return "super"; - case D_EXT2: - return "ext2"; - case D_MALLOC: - return "malloc"; - case D_CACHE: - return "cache"; - case D_INFO: - return "info"; - case D_IOCTL: - return "ioctl"; - case D_NETERROR: - return "neterror"; - case D_NET: - return "net"; - case D_WARNING: - return "warning"; - case D_BUFFS: - return "buffs"; - case D_OTHER: - return "other"; - case D_DENTRY: - return "dentry"; - case D_NETTRACE: - return "nettrace"; - case D_PAGE: - return "page"; - case D_DLMTRACE: - return "dlmtrace"; - case D_ERROR: - return "error"; - case D_EMERG: - return "emerg"; - case D_HA: - return "ha"; - case D_RPCTRACE: - return "rpctrace"; - case D_VFSTRACE: - return "vfstrace"; - case D_READA: - return "reada"; - case D_MMAP: - return "mmap"; - case D_CONFIG: - return "config"; - case D_CONSOLE: - return "console"; - case D_QUOTA: - return "quota"; - case D_SEC: - return "sec"; - case D_LFSCK: - return "lfsck"; - } + + return libcfs_debug_masks[debug]; } int diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c index dadaf7685..086e690bd 100644 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ b/drivers/staging/lustre/lnet/libcfs/fail.c @@ -41,6 +41,9 @@ EXPORT_SYMBOL(cfs_fail_loc); unsigned int cfs_fail_val; EXPORT_SYMBOL(cfs_fail_val); +int cfs_fail_err; +EXPORT_SYMBOL(cfs_fail_err); + DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); EXPORT_SYMBOL(cfs_race_waitq); diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c index f60feb3a3..cc45ed82b 100644 --- a/drivers/staging/lustre/lnet/libcfs/hash.c +++ b/drivers/staging/lustre/lnet/libcfs/hash.c @@ -942,10 +942,10 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, * @flags - CFS_HASH_REHASH enable synamic hash resizing * - CFS_HASH_SORT enable chained hash sort */ -static int cfs_hash_rehash_worker(cfs_workitem_t *wi); +static int cfs_hash_rehash_worker(struct cfs_workitem *wi); #if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static int cfs_hash_dep_print(cfs_workitem_t *wi) +static int cfs_hash_dep_print(struct cfs_workitem *wi) { struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi); int dep; @@ -1847,7 +1847,7 @@ cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) } static int -cfs_hash_rehash_worker(cfs_workitem_t *wi) +cfs_hash_rehash_worker(struct cfs_workitem *wi) { struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi); struct cfs_hash_bucket **bkts; diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c index 2de9eeae0..83543f928 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c @@ -49,7 +49,8 @@ EXPORT_SYMBOL(cfs_percpt_lock_free); * reason we always allocate cacheline-aligned memory block. */ struct cfs_percpt_lock * -cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab) +cfs_percpt_lock_create(struct cfs_cpt_table *cptab, + struct lock_class_key *keys) { struct cfs_percpt_lock *pcl; spinlock_t *lock; @@ -67,12 +68,18 @@ cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab) return NULL; } - cfs_percpt_for_each(lock, i, pcl->pcl_locks) + if (!keys) + CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n"); + + cfs_percpt_for_each(lock, i, pcl->pcl_locks) { spin_lock_init(lock); + if (keys != NULL) + lockdep_set_class(lock, &keys[i]); + } return pcl; } -EXPORT_SYMBOL(cfs_percpt_lock_alloc); +EXPORT_SYMBOL(cfs_percpt_lock_create); /** * lock a CPU partition @@ -142,44 +149,3 @@ cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) } } EXPORT_SYMBOL(cfs_percpt_unlock); - -/** free cpu-partition refcount */ -void -cfs_percpt_atomic_free(atomic_t **refs) -{ - cfs_percpt_free(refs); -} -EXPORT_SYMBOL(cfs_percpt_atomic_free); - -/** allocate cpu-partition refcount with initial value @init_val */ -atomic_t ** -cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val) -{ - atomic_t **refs; - atomic_t *ref; - int i; - - refs = cfs_percpt_alloc(cptab, sizeof(*ref)); - if (!refs) - return NULL; - - cfs_percpt_for_each(ref, i, refs) - atomic_set(ref, init_val); - return refs; -} -EXPORT_SYMBOL(cfs_percpt_atomic_alloc); - -/** return sum of cpu-partition refs */ -int -cfs_percpt_atomic_summary(atomic_t **refs) -{ - atomic_t *ref; - int i; - int val = 0; - - cfs_percpt_for_each(ref, i, refs) - val += atomic_read(ref); - - return val; -} -EXPORT_SYMBOL(cfs_percpt_atomic_summary); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c index c5a695151..d0e81bb41 100644 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c @@ -114,34 +114,6 @@ cfs_percpt_number(void *vars) } EXPORT_SYMBOL(cfs_percpt_number); -/* - * return memory block shadowed from current CPU - */ -void * -cfs_percpt_current(void *vars) -{ - struct cfs_var_array *arr; - int cpt; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - cpt = cfs_cpt_current(arr->va_cptab, 0); - if (cpt < 0) - return NULL; - - return arr->va_ptrs[cpt]; -} - -void * -cfs_percpt_index(void *vars, int idx) -{ - struct cfs_var_array *arr; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - LASSERT(idx >= 0 && idx < arr->va_count); - return arr->va_ptrs[idx]; -} - /* * free variable array, see more detail in cfs_array_alloc */ diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c index 389fb9eee..b52518c54 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c @@ -755,8 +755,13 @@ cfs_cpt_table_create(int ncpt) struct cfs_cpu_partition *part; int n; - if (cpt >= ncpt) - goto failed; + /* + * Each emulated NUMA node has all allowed CPUs in + * the mask. + * End loop when all partitions have assigned CPUs. + */ + if (cpt == ncpt) + break; part = &cptab->ctb_parts[cpt]; diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c index 8c9377ed8..84f9b7b47 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c @@ -30,13 +30,34 @@ #include #include #include "../../../include/linux/libcfs/libcfs.h" +#include "../../../include/linux/libcfs/libcfs_crypto.h" #include "linux-crypto.h" + /** - * Array of hash algorithm speed in MByte per second + * Array of hash algorithm speed in MByte per second */ static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; -static int cfs_crypto_hash_alloc(unsigned char alg_id, +/** + * Initialize the state descriptor for the specified hash algorithm. + * + * An internal routine to allocate the hash-specific state in \a hdesc for + * use with cfs_crypto_hash_digest() to compute the hash of a single message, + * though possibly in multiple chunks. The descriptor internal state should + * be freed with cfs_crypto_hash_final(). + * + * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) + * \param[out] type pointer to the hash description in hash_types[] + * array + * \param[in,out] hdesc hash state descriptor to be initialized + * \param[in] key initial hash value/state, NULL to use default + * value + * \param[in] key_len length of \a key + * + * \retval 0 on success + * \retval negative errno on failure + */ +static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, const struct cfs_crypto_hash_type **type, struct ahash_request **req, unsigned char *key, @@ -45,11 +66,11 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id, struct crypto_ahash *tfm; int err = 0; - *type = cfs_crypto_hash_type(alg_id); + *type = cfs_crypto_hash_type(hash_alg); if (!*type) { CWARN("Unsupported hash algorithm id = %d, max id is %d\n", - alg_id, CFS_HASH_ALG_MAX); + hash_alg, CFS_HASH_ALG_MAX); return -EINVAL; } tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC); @@ -70,12 +91,6 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id, ahash_request_set_callback(*req, 0, NULL, NULL); - /** Shash have different logic for initialization then digest - * shash: crypto_hash_setkey, crypto_hash_init - * digest: crypto_digest_init, crypto_digest_setkey - * Skip this function for digest, because we use shash logic at - * cfs_crypto_hash_alloc. - */ if (key) err = crypto_ahash_setkey(tfm, key, key_len); else if ((*type)->cht_key != 0) @@ -90,7 +105,7 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id, CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm), - cfs_crypto_hash_speeds[alg_id]); + cfs_crypto_hash_speeds[hash_alg]); err = crypto_ahash_init(*req); if (err) { @@ -100,7 +115,33 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id, return err; } -int cfs_crypto_hash_digest(unsigned char alg_id, +/** + * Calculate hash digest for the passed buffer. + * + * This should be used when computing the hash on a single contiguous buffer. + * It combines the hash initialization, computation, and cleanup. + * + * \param[in] hash_alg id of hash algorithm (CFS_HASH_ALG_*) + * \param[in] buf data buffer on which to compute hash + * \param[in] buf_len length of \a buf in bytes + * \param[in] key initial value/state for algorithm, + * if \a key = NULL use default initial value + * \param[in] key_len length of \a key in bytes + * \param[out] hash pointer to computed hash value, + * if \a hash = NULL then \a hash_len is to digest + * size in bytes, retval -ENOSPC + * \param[in,out] hash_len size of \a hash buffer + * + * \retval -EINVAL \a buf, \a buf_len, \a hash_len, + * \a hash_alg invalid + * \retval -ENOENT \a hash_alg is unsupported + * \retval -ENOSPC \a hash is NULL, or \a hash_len less than + * digest size + * \retval 0 for success + * \retval negative errno for other errors from lower + * layers. + */ +int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, const void *buf, unsigned int buf_len, unsigned char *key, unsigned int key_len, unsigned char *hash, unsigned int *hash_len) @@ -113,7 +154,7 @@ int cfs_crypto_hash_digest(unsigned char alg_id, if (!buf || buf_len == 0 || !hash_len) return -EINVAL; - err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len); + err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); if (err != 0) return err; @@ -134,15 +175,32 @@ int cfs_crypto_hash_digest(unsigned char alg_id, } EXPORT_SYMBOL(cfs_crypto_hash_digest); +/** + * Allocate and initialize desriptor for hash algorithm. + * + * This should be used to initialize a hash descriptor for multiple calls + * to a single hash function when computing the hash across multiple + * separate buffers or pages using cfs_crypto_hash_update{,_page}(). + * + * The hash descriptor should be freed with cfs_crypto_hash_final(). + * + * \param[in] hash_alg algorithm id (CFS_HASH_ALG_*) + * \param[in] key initial value/state for algorithm, if \a key = NULL + * use default initial value + * \param[in] key_len length of \a key in bytes + * + * \retval pointer to descriptor of hash instance + * \retval ERR_PTR(errno) in case of error + */ struct cfs_crypto_hash_desc * - cfs_crypto_hash_init(unsigned char alg_id, - unsigned char *key, unsigned int key_len) +cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, + unsigned char *key, unsigned int key_len) { struct ahash_request *req; int err; const struct cfs_crypto_hash_type *type; - err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len); + err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); if (err) return ERR_PTR(err); @@ -150,6 +208,17 @@ struct cfs_crypto_hash_desc * } EXPORT_SYMBOL(cfs_crypto_hash_init); +/** + * Update hash digest computed on data within the given \a page + * + * \param[in] hdesc hash state descriptor + * \param[in] page data page on which to compute the hash + * \param[in] offset offset within \a page at which to start hash + * \param[in] len length of data on which to compute hash + * + * \retval 0 for success + * \retval negative errno on failure + */ int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, struct page *page, unsigned int offset, unsigned int len) @@ -158,13 +227,23 @@ int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc, struct scatterlist sl; sg_init_table(&sl, 1); - sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK); + sg_set_page(&sl, page, len, offset & ~PAGE_MASK); ahash_request_set_crypt(req, &sl, NULL, sl.length); return crypto_ahash_update(req); } EXPORT_SYMBOL(cfs_crypto_hash_update_page); +/** + * Update hash digest computed on the specified data + * + * \param[in] hdesc hash state descriptor + * \param[in] buf data buffer on which to compute the hash + * \param[in] buf_len length of \buf on which to compute hash + * + * \retval 0 for success + * \retval negative errno on failure + */ int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, const void *buf, unsigned int buf_len) { @@ -178,7 +257,18 @@ int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc, } EXPORT_SYMBOL(cfs_crypto_hash_update); -/* If hash_len pointer is NULL - destroy descriptor. */ +/** + * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor + * + * \param[in] hdesc hash descriptor + * \param[out] hash pointer to hash buffer to store hash digest + * \param[in,out] hash_len pointer to hash buffer size, if \a hdesc = NULL + * only free \a hdesc instead of computing the hash + * + * \retval 0 for success + * \retval -EOVERFLOW if hash_len is too small for the hash digest + * \retval negative errno for other errors from lower layers + */ int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, unsigned char *hash, unsigned int *hash_len) { @@ -186,99 +276,153 @@ int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc, struct ahash_request *req = (void *)hdesc; int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); - if (!hash_len) { - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - return 0; + if (!hash || !hash_len) { + err = 0; + goto free_ahash; } - if (!hash || *hash_len < size) { - *hash_len = size; - return -ENOSPC; + if (*hash_len < size) { + err = -EOVERFLOW; + goto free_ahash; } + ahash_request_set_crypt(req, NULL, hash, 0); err = crypto_ahash_final(req); - - if (err < 0) { - /* May be caller can fix error */ - return err; - } + if (!err) + *hash_len = size; +free_ahash: crypto_free_ahash(crypto_ahash_reqtfm(req)); ahash_request_free(req); return err; } EXPORT_SYMBOL(cfs_crypto_hash_final); -static void cfs_crypto_performance_test(unsigned char alg_id, - const unsigned char *buf, - unsigned int buf_len) +/** + * Compute the speed of specified hash function + * + * Run a speed test on the given hash algorithm on buffer of the given size. + * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and + * is available through the cfs_crypto_hash_speed() function. + * + * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) + * \param[in] buf data buffer on which to compute the hash + * \param[in] buf_len length of \buf on which to compute hash + */ +static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg) { + int buf_len = max(PAGE_SIZE, 1048576UL); + void *buf; unsigned long start, end; int bcount, err = 0; - int sec = 1; /* do test only 1 sec */ - unsigned char hash[64]; - unsigned int hash_len = 64; - - for (start = jiffies, end = start + sec * HZ, bcount = 0; - time_before(jiffies, end); bcount++) { - err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0, - hash, &hash_len); + struct page *page; + unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; + unsigned int hash_len = sizeof(hash); + + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out_err; + } + + buf = kmap(page); + memset(buf, 0xAD, PAGE_SIZE); + kunmap(page); + + for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC), + bcount = 0; time_before(jiffies, end); bcount++) { + struct cfs_crypto_hash_desc *hdesc; + int i; + + hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0); + if (IS_ERR(hdesc)) { + err = PTR_ERR(hdesc); + break; + } + + for (i = 0; i < buf_len / PAGE_SIZE; i++) { + err = cfs_crypto_hash_update_page(hdesc, page, 0, + PAGE_SIZE); + if (err) + break; + } + + err = cfs_crypto_hash_final(hdesc, hash, &hash_len); if (err) break; } end = jiffies; - + __free_page(page); +out_err: if (err) { - cfs_crypto_hash_speeds[alg_id] = -1; - CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n", - cfs_crypto_hash_name(alg_id), err); + cfs_crypto_hash_speeds[hash_alg] = err; + CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n", + cfs_crypto_hash_name(hash_alg), err); } else { unsigned long tmp; tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * 1000) / (1024 * 1024); - cfs_crypto_hash_speeds[alg_id] = (int)tmp; + cfs_crypto_hash_speeds[hash_alg] = (int)tmp; + CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n", + cfs_crypto_hash_name(hash_alg), + cfs_crypto_hash_speeds[hash_alg]); } - CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n", - cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]); } -int cfs_crypto_hash_speed(unsigned char hash_alg) +/** + * hash speed in Mbytes per second for valid hash algorithm + * + * Return the performance of the specified \a hash_alg that was previously + * computed using cfs_crypto_performance_test(). + * + * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) + * + * \retval positive speed of the hash function in MB/s + * \retval -ENOENT if \a hash_alg is unsupported + * \retval negative errno if \a hash_alg speed is unavailable + */ +int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg) { if (hash_alg < CFS_HASH_ALG_MAX) return cfs_crypto_hash_speeds[hash_alg]; - return -1; + return -ENOENT; } EXPORT_SYMBOL(cfs_crypto_hash_speed); /** - * Do performance test for all hash algorithms. + * Run the performance test for all hash algorithms. + * + * Run the cfs_crypto_performance_test() benchmark for all of the available + * hash functions using a 1MB buffer size. This is a reasonable buffer size + * for Lustre RPCs, even if the actual RPC size is larger or smaller. + * + * Since the setup cost and computation speed of various hash algorithms is + * a function of the buffer size (and possibly internal contention of offload + * engines), this speed only represents an estimate of the actual speed under + * actual usage, but is reasonable for comparing available algorithms. + * + * The actual speeds are available via cfs_crypto_hash_speed() for later + * comparison. + * + * \retval 0 on success + * \retval -ENOMEM if no memory is available for test buffer */ static int cfs_crypto_test_hashes(void) { - unsigned char i; - unsigned char *data; - unsigned int j; - /* Data block size for testing hash. Maximum - * kmalloc size for 2.6.18 kernel is 128K - */ - unsigned int data_len = 1 * 128 * 1024; - - data = kmalloc(data_len, 0); - if (!data) - return -ENOMEM; + enum cfs_crypto_hash_alg hash_alg; - for (j = 0; j < data_len; j++) - data[j] = j & 0xff; + for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) + cfs_crypto_performance_test(hash_alg); - for (i = 0; i < CFS_HASH_ALG_MAX; i++) - cfs_crypto_performance_test(i, data, data_len); - - kfree(data); return 0; } static int adler32; +/** + * Register available hash functions + * + * \retval 0 + */ int cfs_crypto_register(void) { request_module("crc32c"); @@ -290,6 +434,9 @@ int cfs_crypto_register(void) return 0; } +/** + * Unregister previously registered hash functions + */ void cfs_crypto_unregister(void) { if (adler32 == 0) diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c index ebc60ac9b..d89f71ee4 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c @@ -40,10 +40,75 @@ #define LNET_MINOR 240 +static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) +{ + size_t len = sizeof(*data); + + len += cfs_size_round(data->ioc_inllen1); + len += cfs_size_round(data->ioc_inllen2); + return len; +} + +static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) +{ + if (data->ioc_hdr.ioc_len > BIT(30)) { + CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n"); + return true; + } + if (data->ioc_inllen1 > BIT(30)) { + CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); + return true; + } + if (data->ioc_inllen2 > BIT(30)) { + CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); + return true; + } + if (data->ioc_inlbuf1 && !data->ioc_inllen1) { + CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); + return true; + } + if (data->ioc_inlbuf2 && !data->ioc_inllen2) { + CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); + return true; + } + if (data->ioc_pbuf1 && !data->ioc_plen1) { + CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); + return true; + } + if (data->ioc_pbuf2 && !data->ioc_plen2) { + CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); + return true; + } + if (data->ioc_plen1 && !data->ioc_pbuf1) { + CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); + return true; + } + if (data->ioc_plen2 && !data->ioc_pbuf2) { + CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); + return true; + } + if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { + CERROR("LIBCFS ioctl: packlen != ioc_len\n"); + return true; + } + if (data->ioc_inllen1 && + data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { + CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); + return true; + } + if (data->ioc_inllen2 && + data->ioc_bulk[cfs_size_round(data->ioc_inllen1) + + data->ioc_inllen2 - 1] != '\0') { + CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); + return true; + } + return false; +} + int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) { if (libcfs_ioctl_is_invalid(data)) { - CERROR("LNET: ioctl not correctly formatted\n"); + CERROR("libcfs ioctl: parameter not correctly formatted\n"); return -EINVAL; } @@ -57,68 +122,47 @@ int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) return 0; } -int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg, - __u32 *len) +int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp, + const struct libcfs_ioctl_hdr __user *uhdr) { struct libcfs_ioctl_hdr hdr; + int err = 0; - if (copy_from_user(&hdr, arg, sizeof(hdr))) + if (copy_from_user(&hdr, uhdr, sizeof(hdr))) return -EFAULT; if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { - CERROR("LNET: version mismatch expected %#x, got %#x\n", + CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n", LIBCFS_IOCTL_VERSION, hdr.ioc_version); return -EINVAL; } - *len = hdr.ioc_len; - - return 0; -} - -int libcfs_ioctl_popdata(void __user *arg, void *data, int size) -{ - if (copy_to_user(arg, data, size)) - return -EFAULT; - return 0; -} - -static int -libcfs_psdev_open(struct inode *inode, struct file *file) -{ - int rc = 0; + if (hdr.ioc_len < sizeof(struct libcfs_ioctl_data)) { + CERROR("libcfs ioctl: user buffer too small for ioctl\n"); + return -EINVAL; + } - if (!inode) + if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) { + CERROR("libcfs ioctl: user buffer is too large %d/%d\n", + hdr.ioc_len, LIBCFS_IOC_DATA_MAX); return -EINVAL; - if (libcfs_psdev_ops.p_open) - rc = libcfs_psdev_ops.p_open(0, NULL); - else - return -EPERM; - return rc; -} + } -/* called when closing /dev/device */ -static int -libcfs_psdev_release(struct inode *inode, struct file *file) -{ - int rc = 0; + LIBCFS_ALLOC(*hdr_pp, hdr.ioc_len); + if (!*hdr_pp) + return -ENOMEM; - if (!inode) - return -EINVAL; - if (libcfs_psdev_ops.p_close) - rc = libcfs_psdev_ops.p_close(0, NULL); - else - rc = -EPERM; - return rc; + if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) { + LIBCFS_FREE(*hdr_pp, hdr.ioc_len); + err = -EFAULT; + } + return err; } -static long libcfs_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static long +libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct cfs_psdev_file pfile; - int rc = 0; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -130,26 +174,12 @@ static long libcfs_ioctl(struct file *file, return -EINVAL; } - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable(CFS_CAP_SYS_BOOT)) - return -EPERM; - panic("debugctl-invoked panic"); - return 0; - } - - if (libcfs_psdev_ops.p_ioctl) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg); - else - rc = -EPERM; - return rc; + return libcfs_ioctl(cmd, (void __user *)arg); } static const struct file_operations libcfs_fops = { - .unlocked_ioctl = libcfs_ioctl, - .open = libcfs_psdev_open, - .release = libcfs_psdev_release, + .owner = THIS_MODULE, + .unlocked_ioctl = libcfs_psdev_ioctl, }; struct miscdevice libcfs_dev = { diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c index 890844602..bbe19a684 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -46,30 +46,6 @@ #include #endif -/** - * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively - * waiting threads, which is not always desirable because all threads will - * be waken up again and again, even user only needs a few of them to be - * active most time. This is not good for performance because cache can - * be polluted by different threads. - * - * LIFO list can resolve this problem because we always wakeup the most - * recent active thread by default. - * - * NB: please don't call non-exclusive & exclusive wait on the same - * waitq if add_wait_queue_exclusive_head is used. - */ -void -add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link) -{ - unsigned long flags; - - spin_lock_irqsave(&waitq->lock, flags); - __add_wait_queue_exclusive(waitq, link); - spin_unlock_irqrestore(&waitq->lock, flags); -} -EXPORT_SYMBOL(add_wait_queue_exclusive_head); - sigset_t cfs_block_allsigs(void) { @@ -128,13 +104,6 @@ cfs_restore_sigs(sigset_t old) } EXPORT_SYMBOL(cfs_restore_sigs); -int -cfs_signal_pending(void) -{ - return signal_pending(current); -} -EXPORT_SYMBOL(cfs_signal_pending); - void cfs_clear_sigpending(void) { diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c index cdc640bfd..f2d041118 100644 --- a/drivers/staging/lustre/lnet/libcfs/module.c +++ b/drivers/staging/lustre/lnet/libcfs/module.c @@ -54,9 +54,6 @@ # define DEBUG_SUBSYSTEM S_LNET -#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \ - sizeof(struct lnet_ioctl_config_data)) - #include "../../include/linux/libcfs/libcfs.h" #include @@ -68,20 +65,6 @@ static struct dentry *lnet_debugfs_root; -/* called when opening /dev/device */ -static int libcfs_psdev_open(unsigned long flags, void *args) -{ - try_module_get(THIS_MODULE); - return 0; -} - -/* called when closing /dev/device */ -static int libcfs_psdev_release(unsigned long flags, void *args) -{ - module_put(THIS_MODULE); - return 0; -} - static DECLARE_RWSEM(ioctl_list_sem); static LIST_HEAD(ioctl_list); @@ -115,39 +98,47 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) } EXPORT_SYMBOL(libcfs_deregister_ioctl); -static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd, - void __user *arg, struct libcfs_ioctl_hdr *hdr) +int libcfs_ioctl(unsigned long cmd, void __user *uparam) { struct libcfs_ioctl_data *data = NULL; - int err = -EINVAL; + struct libcfs_ioctl_hdr *hdr; + int err; + + /* 'cmd' and permissions get checked in our arch-specific caller */ + err = libcfs_ioctl_getdata(&hdr, uparam); + if (err) { + CDEBUG_LIMIT(D_ERROR, + "libcfs ioctl: data header error %d\n", err); + return err; + } - /* - * The libcfs_ioctl_data_adjust() function performs adjustment - * operations on the libcfs_ioctl_data structure to make - * it usable by the code. This doesn't need to be called - * for new data structures added. - */ if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { + /* + * The libcfs_ioctl_data_adjust() function performs adjustment + * operations on the libcfs_ioctl_data structure to make + * it usable by the code. This doesn't need to be called + * for new data structures added. + */ data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); err = libcfs_ioctl_data_adjust(data); if (err) - return err; + goto out; } + CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd); switch (cmd) { case IOC_LIBCFS_CLEAR_DEBUG: libcfs_debug_clear_buffer(); - return 0; - /* - * case IOC_LIBCFS_PANIC: - * Handled in arch/cfs_module.c - */ + break; + case IOC_LIBCFS_MARK_DEBUG: - if (!data->ioc_inlbuf1 || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - return -EINVAL; + if (!data || !data->ioc_inlbuf1 || + data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') { + err = -EINVAL; + goto out; + } libcfs_debug_mark_buffer(data->ioc_inlbuf1); - return 0; + break; default: { struct libcfs_ioctl_handler *hand; @@ -156,67 +147,23 @@ static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd, down_read(&ioctl_list_sem); list_for_each_entry(hand, &ioctl_list, item) { err = hand->handle_ioctl(cmd, hdr); - if (err != -EINVAL) { - if (err == 0) - err = libcfs_ioctl_popdata(arg, - hdr, hdr->ioc_len); - break; + if (err == -EINVAL) + continue; + + if (!err) { + if (copy_to_user(uparam, hdr, hdr->ioc_len)) + err = -EFAULT; } + break; } up_read(&ioctl_list_sem); - break; - } - } - - return err; -} - -static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, - void __user *arg) -{ - struct libcfs_ioctl_hdr *hdr; - int err = 0; - __u32 buf_len; - - err = libcfs_ioctl_getdata_len(arg, &buf_len); - if (err) - return err; - - /* - * do a check here to restrict the size of the memory - * to allocate to guard against DoS attacks. - */ - if (buf_len > LNET_MAX_IOCTL_BUF_LEN) { - CERROR("LNET: user buffer exceeds kernel buffer\n"); - return -EINVAL; - } - - LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL); - if (!hdr) - return -ENOMEM; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - if (copy_from_user(hdr, arg, buf_len)) { - CERROR("LNET ioctl: data error\n"); - err = -EFAULT; - goto out; + break; } } - - err = libcfs_ioctl_handle(pfile, cmd, arg, hdr); - out: - LIBCFS_FREE(hdr, buf_len); + LIBCFS_FREE(hdr, hdr->ioc_len); return err; } -struct cfs_psdev_ops libcfs_psdev_ops = { - libcfs_psdev_open, - libcfs_psdev_release, - NULL, - NULL, - libcfs_ioctl -}; - int lprocfs_call_handler(void *data, int write, loff_t *ppos, void __user *buffer, size_t *lenp, int (*handler)(void *data, int write, loff_t pos, @@ -477,6 +424,13 @@ static struct ctl_table lnet_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .procname = "fail_err", + .data = &cfs_fail_err, + .maxlen = sizeof(cfs_fail_err), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { } }; diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c index 244eb89ee..7739b9469 100644 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c @@ -707,10 +707,9 @@ int cfs_tracefile_dump_all_pages(char *filename) struct cfs_trace_page *tage; struct cfs_trace_page *tmp; char *buf; + mm_segment_t __oldfs; int rc; - DECL_MMSPACE; - cfs_tracefile_write_lock(); filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, @@ -729,11 +728,12 @@ int cfs_tracefile_dump_all_pages(char *filename) rc = 0; goto close; } + __oldfs = get_fs(); + set_fs(get_ds()); /* ok, for now, just write the pages. in the future we'll be building * iobufs with the pages and calling generic_direct_IO */ - MMSPACE_OPEN; list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { __LASSERT_TAGE_INVARIANT(tage); @@ -752,7 +752,7 @@ int cfs_tracefile_dump_all_pages(char *filename) list_del(&tage->linkage); cfs_tage_free(tage); } - MMSPACE_CLOSE; + set_fs(__oldfs); rc = vfs_fsync(filp, 1); if (rc) pr_err("sync returns %d\n", rc); @@ -986,13 +986,12 @@ static int tracefiled(void *arg) struct tracefiled_ctl *tctl = arg; struct cfs_trace_page *tage; struct cfs_trace_page *tmp; + mm_segment_t __oldfs; struct file *filp; char *buf; int last_loop = 0; int rc; - DECL_MMSPACE; - /* we're started late enough that we pick up init's fs context */ /* this is so broken in uml? what on earth is going on? */ @@ -1025,8 +1024,8 @@ static int tracefiled(void *arg) __LASSERT(list_empty(&pc.pc_pages)); goto end_loop; } - - MMSPACE_OPEN; + __oldfs = get_fs(); + set_fs(get_ds()); list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { static loff_t f_pos; @@ -1051,7 +1050,7 @@ static int tracefiled(void *arg) break; } } - MMSPACE_CLOSE; + set_fs(__oldfs); filp_close(filp, NULL); put_pages_on_daemon_list(&pc); diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c index c72fe00dc..92236ae59 100644 --- a/drivers/staging/lustre/lnet/libcfs/workitem.c +++ b/drivers/staging/lustre/lnet/libcfs/workitem.c @@ -111,7 +111,7 @@ cfs_wi_sched_cansleep(struct cfs_wi_sched *sched) * 1. when it returns no one shall try to schedule the workitem. */ void -cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); @@ -138,7 +138,7 @@ EXPORT_SYMBOL(cfs_wi_exit); * cancel schedule request of workitem \a wi */ int -cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { int rc; @@ -179,7 +179,7 @@ EXPORT_SYMBOL(cfs_wi_deschedule); * be added, and even dynamic creation of serialised queues might be supported. */ void -cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi) +cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi) { LASSERT(!in_interrupt()); /* because we use plain spinlock */ LASSERT(!sched->ws_stopping); @@ -229,12 +229,12 @@ static int cfs_wi_scheduler(void *arg) while (!sched->ws_stopping) { int nloops = 0; int rc; - cfs_workitem_t *wi; + struct cfs_workitem *wi; while (!list_empty(&sched->ws_runq) && nloops < CFS_WI_RESCHED) { - wi = list_entry(sched->ws_runq.next, cfs_workitem_t, - wi_list); + wi = list_entry(sched->ws_runq.next, + struct cfs_workitem, wi_list); LASSERT(wi->wi_scheduled && !wi->wi_running); list_del_init(&wi->wi_list); diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index 876475554..fe0dbe746 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -1215,9 +1215,9 @@ lnet_shutdown_lndni(struct lnet_ni *ni) } static int -lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout, - __s32 peer_cr, __s32 peer_buf_cr, __s32 credits) +lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) { + struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL; int rc = -EINVAL; int lnd_type; lnd_t *lnd; @@ -1275,6 +1275,21 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout, ni->ni_lnd = lnd; + if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf)) + lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk; + + if (lnd_tunables) { + LIBCFS_ALLOC(ni->ni_lnd_tunables, + sizeof(*ni->ni_lnd_tunables)); + if (!ni->ni_lnd_tunables) { + mutex_unlock(&the_lnet.ln_lnd_mutex); + rc = -ENOMEM; + goto failed0; + } + memcpy(ni->ni_lnd_tunables, lnd_tunables, + sizeof(*ni->ni_lnd_tunables)); + } + rc = lnd->lnd_startup(ni); mutex_unlock(&the_lnet.ln_lnd_mutex); @@ -1292,20 +1307,28 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout, * If given some LND tunable parameters, parse those now to * override the values in the NI structure. */ - if (peer_buf_cr >= 0) - ni->ni_peerrtrcredits = peer_buf_cr; - if (peer_timeout >= 0) - ni->ni_peertimeout = peer_timeout; + if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) { + ni->ni_peerrtrcredits = + conf->cfg_config_u.cfg_net.net_peer_rtr_credits; + } + if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) { + ni->ni_peertimeout = + conf->cfg_config_u.cfg_net.net_peer_timeout; + } /* * TODO * Note: For now, don't allow the user to change * peertxcredits as this number is used in the * IB LND to control queue depth. - * if (peer_cr != -1) - * ni->ni_peertxcredits = peer_cr; + * + * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1) + * ni->ni_peertxcredits = + * conf->cfg_config_u.cfg_net.net_peer_tx_credits; */ - if (credits >= 0) - ni->ni_maxtxcredits = credits; + if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) { + ni->ni_maxtxcredits = + conf->cfg_config_u.cfg_net.net_max_tx_credits; + } LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query); @@ -1367,7 +1390,7 @@ lnet_startup_lndnis(struct list_head *nilist) while (!list_empty(nilist)) { ni = list_entry(nilist->next, lnet_ni_t, ni_list); list_del(&ni->ni_list); - rc = lnet_startup_lndni(ni, -1, -1, -1, -1); + rc = lnet_startup_lndni(ni, NULL); if (rc < 0) goto failed; @@ -1641,25 +1664,20 @@ EXPORT_SYMBOL(LNetNIFini); * parameters * * \param[in] ni network interface structure - * \param[out] cpt_count the number of cpts the ni is on - * \param[out] nid Network Interface ID - * \param[out] peer_timeout NI peer timeout - * \param[out] peer_tx_crdits NI peer transmit credits - * \param[out] peer_rtr_credits NI peer router credits - * \param[out] max_tx_credits NI max transmit credit - * \param[out] net_config Network configuration + * \param[out] config NI configuration */ static void -lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid, - int *peer_timeout, int *peer_tx_credits, - int *peer_rtr_credits, int *max_tx_credits, - struct lnet_ioctl_net_config *net_config) +lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config) { + struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL; + struct lnet_ioctl_net_config *net_config; + size_t min_size, tunable_size = 0; int i; - if (!ni) + if (!ni || !config) return; + net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk; if (!net_config) return; @@ -1675,11 +1693,11 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid, sizeof(net_config->ni_interfaces[i])); } - *nid = ni->ni_nid; - *peer_timeout = ni->ni_peertimeout; - *peer_tx_credits = ni->ni_peertxcredits; - *peer_rtr_credits = ni->ni_peerrtrcredits; - *max_tx_credits = ni->ni_maxtxcredits; + config->cfg_nid = ni->ni_nid; + config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout; + config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits; + config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits; + config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits; net_config->ni_status = ni->ni_status->ns_status; @@ -1689,18 +1707,40 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid, for (i = 0; i < num_cpts; i++) net_config->ni_cpts[i] = ni->ni_cpts[i]; - *cpt_count = num_cpts; + config->cfg_ncpts = num_cpts; + } + + /* + * See if user land tools sent in a newer and larger version + * of struct lnet_tunables than what the kernel uses. + */ + min_size = sizeof(*config) + sizeof(*net_config); + + if (config->cfg_hdr.ioc_len > min_size) + tunable_size = config->cfg_hdr.ioc_len - min_size; + + /* Don't copy to much data to user space */ + min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables)); + lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk; + + if (ni->ni_lnd_tunables && lnd_cfg && min_size) { + memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size); + config->cfg_config_u.cfg_net.net_interface_count = 1; + + /* Tell user land that kernel side has less data */ + if (tunable_size > sizeof(*ni->ni_lnd_tunables)) { + min_size = tunable_size - sizeof(ni->ni_lnd_tunables); + config->cfg_hdr.ioc_len -= min_size; + } } } -int -lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout, - int *peer_tx_credits, int *peer_rtr_credits, - int *max_tx_credits, - struct lnet_ioctl_net_config *net_config) +static int +lnet_get_net_config(struct lnet_ioctl_config_data *config) { struct lnet_ni *ni; struct list_head *tmp; + int idx = config->cfg_count; int cpt, i = 0; int rc = -ENOENT; @@ -1712,9 +1752,7 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout, ni = list_entry(tmp, lnet_ni_t, ni_list); lnet_ni_lock(ni); - lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout, - peer_tx_credits, peer_rtr_credits, - max_tx_credits, net_config); + lnet_fill_ni_info(ni, config); lnet_ni_unlock(ni); rc = 0; break; @@ -1725,10 +1763,9 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout, } int -lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, - __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr, - __s32 credits) +lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf) { + char *nets = conf->cfg_config_u.cfg_net.net_intf; lnet_ping_info_t *pinfo; lnet_handle_md_t md_handle; struct lnet_ni *ni; @@ -1773,8 +1810,7 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets, list_del_init(&ni->ni_list); - rc = lnet_startup_lndni(ni, peer_timeout, peer_cr, - peer_buf_cr, credits); + rc = lnet_startup_lndni(ni, conf); if (rc) goto failed1; @@ -1864,6 +1900,10 @@ LNetCtl(unsigned int cmd, void *arg) int rc; unsigned long secs_passed; + BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX < + sizeof(struct lnet_ioctl_net_config) + + sizeof(struct lnet_ioctl_config_data)); + switch (cmd) { case IOC_LIBCFS_GET_NI: rc = LNetGetId(data->ioc_count, &id); @@ -1918,27 +1958,14 @@ LNetCtl(unsigned int cmd, void *arg) &config->cfg_config_u.cfg_route.rtr_priority); case IOC_LIBCFS_GET_NET: { - struct lnet_ioctl_net_config *net_config; - size_t total = sizeof(*config) + sizeof(*net_config); - + size_t total = sizeof(*config) + + sizeof(struct lnet_ioctl_net_config); config = arg; if (config->cfg_hdr.ioc_len < total) return -EINVAL; - net_config = (struct lnet_ioctl_net_config *) - config->cfg_bulk; - if (!net_config) - return -EINVAL; - - return lnet_get_net_config(config->cfg_count, - &config->cfg_ncpts, - &config->cfg_nid, - &config->cfg_config_u.cfg_net.net_peer_timeout, - &config->cfg_config_u.cfg_net.net_peer_tx_credits, - &config->cfg_config_u.cfg_net.net_peer_rtr_credits, - &config->cfg_config_u.cfg_net.net_max_tx_credits, - net_config); + return lnet_get_net_config(config); } case IOC_LIBCFS_GET_LNET_STATS: { diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c index 449069c9e..480cc9c6c 100644 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ b/drivers/staging/lustre/lnet/lnet/config.c @@ -107,6 +107,9 @@ lnet_ni_free(struct lnet_ni *ni) if (ni->ni_cpts) cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts); + if (ni->ni_lnd_tunables) + LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables)); + for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++) { LIBCFS_FREE(ni->ni_interfaces[i], strlen(ni->ni_interfaces[i]) + 1); diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index f19aa9320..c5d5bedb3 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -407,7 +407,7 @@ lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset, LASSERT(niov > 0); LASSERT(nkiov > 0); this_nob = min(iov->iov_len - iovoffset, - (__kernel_size_t) kiov->kiov_len - kiovoffset); + (__kernel_size_t)kiov->kiov_len - kiovoffset); this_nob = min(this_nob, nob); if (!addr) @@ -477,7 +477,7 @@ lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov, do { LASSERT(nkiov > 0); LASSERT(niov > 0); - this_nob = min((__kernel_size_t) kiov->kiov_len - kiovoffset, + this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset, iov->iov_len - iovoffset); this_nob = min(this_nob, nob); @@ -996,7 +996,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) LASSERT(msg2->msg_txpeer->lp_ni == ni); LASSERT(msg2->msg_tx_delayed); - (void) lnet_post_send_locked(msg2, 1); + (void)lnet_post_send_locked(msg2, 1); } } @@ -1019,7 +1019,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) LASSERT(msg2->msg_txpeer == txpeer); LASSERT(msg2->msg_tx_delayed); - (void) lnet_post_send_locked(msg2, 1); + (void)lnet_post_send_locked(msg2, 1); } } @@ -1142,7 +1142,7 @@ routing_off: lnet_msg_t, msg_list); list_del(&msg2->msg_list); - (void) lnet_post_routed_recv_locked(msg2, 1); + (void)lnet_post_routed_recv_locked(msg2, 1); } } if (rxpeer) { diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c index 93037c116..246b5c141 100644 --- a/drivers/staging/lustre/lnet/lnet/module.c +++ b/drivers/staging/lustre/lnet/lnet/module.c @@ -108,12 +108,7 @@ lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr) rc = -EINVAL; goto out_unlock; } - rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, - conf->cfg_config_u.cfg_net.net_intf, - conf->cfg_config_u.cfg_net.net_peer_timeout, - conf->cfg_config_u.cfg_net.net_peer_tx_credits, - conf->cfg_config_u.cfg_net.net_peer_rtr_credits, - conf->cfg_config_u.cfg_net.net_max_tx_credits); + rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf); out_unlock: mutex_unlock(&lnet_config_mutex); diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c index dcb6e506f..a63d86c4c 100644 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ b/drivers/staging/lustre/lnet/selftest/brw_test.c @@ -49,10 +49,10 @@ module_param(brw_inject_errors, int, 0644); MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default"); static void -brw_client_fini(sfw_test_instance_t *tsi) +brw_client_fini(struct sfw_test_instance *tsi) { - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; + struct srpc_bulk *bulk; + struct sfw_test_unit *tsu; LASSERT(tsi->tsi_is_client); @@ -67,21 +67,21 @@ brw_client_fini(sfw_test_instance_t *tsi) } static int -brw_client_init(sfw_test_instance_t *tsi) +brw_client_init(struct sfw_test_instance *tsi) { - sfw_session_t *sn = tsi->tsi_batch->bat_session; + struct sfw_session *sn = tsi->tsi_batch->bat_session; int flags; int npg; int len; int opc; - srpc_bulk_t *bulk; - sfw_test_unit_t *tsu; + struct srpc_bulk *bulk; + struct sfw_test_unit *tsu; LASSERT(sn); LASSERT(tsi->tsi_is_client); if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0; + struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; opc = breq->blk_opc; flags = breq->blk_flags; @@ -91,9 +91,8 @@ brw_client_init(sfw_test_instance_t *tsi) * but we have to keep it for compatibility */ len = npg * PAGE_SIZE; - } else { - test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1; + struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; /* * I should never get this step if it's unknown feature @@ -225,7 +224,7 @@ bad_data: } static void -brw_fill_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) +brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) { int i; struct page *pg; @@ -237,7 +236,7 @@ brw_fill_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) } static int -brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) +brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) { int i; struct page *pg; @@ -255,14 +254,14 @@ brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic) } static int -brw_client_prep_rpc(sfw_test_unit_t *tsu, - lnet_process_id_t dest, srpc_client_rpc_t **rpcpp) +brw_client_prep_rpc(struct sfw_test_unit *tsu, + lnet_process_id_t dest, struct srpc_client_rpc **rpcpp) { - srpc_bulk_t *bulk = tsu->tsu_private; - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; - srpc_client_rpc_t *rpc; - srpc_brw_reqst_t *req; + struct srpc_bulk *bulk = tsu->tsu_private; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct sfw_session *sn = tsi->tsi_batch->bat_session; + struct srpc_client_rpc *rpc; + struct srpc_brw_reqst *req; int flags; int npg; int len; @@ -273,15 +272,14 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu, LASSERT(bulk); if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0; + struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; opc = breq->blk_opc; flags = breq->blk_flags; npg = breq->blk_npg; len = npg * PAGE_SIZE; - } else { - test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1; + struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; /* * I should never get this step if it's unknown feature @@ -299,7 +297,7 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu, if (rc) return rc; - memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg])); + memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg])); if (opc == LST_BRW_WRITE) brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC); else @@ -315,21 +313,21 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu, } static void -brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) +brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) { __u64 magic = BRW_MAGIC; - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; - srpc_msg_t *msg = &rpc->crpc_replymsg; - srpc_brw_reply_t *reply = &msg->msg_body.brw_reply; - srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct sfw_session *sn = tsi->tsi_batch->bat_session; + struct srpc_msg *msg = &rpc->crpc_replymsg; + struct srpc_brw_reply *reply = &msg->msg_body.brw_reply; + struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; LASSERT(sn); if (rpc->crpc_status) { CERROR("BRW RPC to %s failed with %d\n", libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); - if (!tsi->tsi_stopping) /* rpc could have been aborted */ + if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_brw_errors); return; } @@ -363,7 +361,7 @@ brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) static void brw_server_rpc_done(struct srpc_server_rpc *rpc) { - srpc_bulk_t *blk = rpc->srpc_bulk; + struct srpc_bulk *blk = rpc->srpc_bulk; if (!blk) return; @@ -384,9 +382,9 @@ static int brw_bulk_ready(struct srpc_server_rpc *rpc, int status) { __u64 magic = BRW_MAGIC; - srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply; - srpc_brw_reqst_t *reqst; - srpc_msg_t *reqstmsg; + struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply; + struct srpc_brw_reqst *reqst; + struct srpc_msg *reqstmsg; LASSERT(rpc->srpc_bulk); LASSERT(rpc->srpc_reqstbuf); @@ -420,10 +418,10 @@ static int brw_server_handle(struct srpc_server_rpc *rpc) { struct srpc_service *sv = rpc->srpc_scd->scd_svc; - srpc_msg_t *replymsg = &rpc->srpc_replymsg; - srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply; - srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst; + struct srpc_msg *replymsg = &rpc->srpc_replymsg; + struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; + struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply; + struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst; int npg; int rc; @@ -459,7 +457,7 @@ brw_server_handle(struct srpc_server_rpc *rpc) if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) { /* compat with old version */ - if (reqst->brw_len & ~CFS_PAGE_MASK) { + if (reqst->brw_len & ~PAGE_MASK) { reply->brw_status = EINVAL; return 0; } @@ -490,7 +488,8 @@ brw_server_handle(struct srpc_server_rpc *rpc) return 0; } -sfw_test_client_ops_t brw_test_client; +struct sfw_test_client_ops brw_test_client; + void brw_init_test_client(void) { brw_test_client.tso_init = brw_client_init; @@ -499,7 +498,8 @@ void brw_init_test_client(void) brw_test_client.tso_done_rpc = brw_client_done_rpc; }; -srpc_service_t brw_test_service; +struct srpc_service brw_test_service; + void brw_init_test_service(void) { brw_test_service.sv_id = SRPC_SERVICE_BRW; diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c index 79ee6c0bf..408c614b6 100644 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ b/drivers/staging/lustre/lnet/selftest/conctl.c @@ -51,9 +51,9 @@ lst_session_new_ioctl(lstio_session_new_args_t *args) char *name; int rc; - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_key || /* no key is specified */ - !args->lstio_ses_namep || /* session name */ + if (!args->lstio_ses_idp || /* address for output sid */ + !args->lstio_ses_key || /* no key is specified */ + !args->lstio_ses_namep || /* session name */ args->lstio_ses_nmlen <= 0 || args->lstio_ses_nmlen > LST_NAME_SIZE) return -EINVAL; @@ -95,11 +95,11 @@ lst_session_info_ioctl(lstio_session_info_args_t *args) { /* no checking of key */ - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_keyp || /* address for output key */ - !args->lstio_ses_featp || /* address for output features */ - !args->lstio_ses_ndinfo || /* address for output ndinfo */ - !args->lstio_ses_namep || /* address for output name */ + if (!args->lstio_ses_idp || /* address for output sid */ + !args->lstio_ses_keyp || /* address for output key */ + !args->lstio_ses_featp || /* address for output features */ + !args->lstio_ses_ndinfo || /* address for output ndinfo */ + !args->lstio_ses_namep || /* address for output name */ args->lstio_ses_nmlen <= 0 || args->lstio_ses_nmlen > LST_NAME_SIZE) return -EINVAL; @@ -125,7 +125,7 @@ lst_debug_ioctl(lstio_debug_args_t *args) if (!args->lstio_dbg_resultp) return -EINVAL; - if (args->lstio_dbg_namep && /* name of batch/group */ + if (args->lstio_dbg_namep && /* name of batch/group */ (args->lstio_dbg_nmlen <= 0 || args->lstio_dbg_nmlen > LST_NAME_SIZE)) return -EINVAL; @@ -326,7 +326,7 @@ lst_nodes_add_ioctl(lstio_group_nodes_args_t *args) if (args->lstio_grp_key != console_session.ses_key) return -EACCES; - if (!args->lstio_grp_idsp || /* array of ids */ + if (!args->lstio_grp_idsp || /* array of ids */ args->lstio_grp_count <= 0 || !args->lstio_grp_resultp || !args->lstio_grp_featp || @@ -394,13 +394,13 @@ lst_group_info_ioctl(lstio_group_info_args_t *args) args->lstio_grp_nmlen > LST_NAME_SIZE) return -EINVAL; - if (!args->lstio_grp_entp && /* output: group entry */ - !args->lstio_grp_dentsp) /* output: node entry */ + if (!args->lstio_grp_entp && /* output: group entry */ + !args->lstio_grp_dentsp) /* output: node entry */ return -EINVAL; - if (args->lstio_grp_dentsp) { /* have node entry */ - if (!args->lstio_grp_idxp || /* node index */ - !args->lstio_grp_ndentp) /* # of node entry */ + if (args->lstio_grp_dentsp) { /* have node entry */ + if (!args->lstio_grp_idxp || /* node index */ + !args->lstio_grp_ndentp) /* # of node entry */ return -EINVAL; if (copy_from_user(&ndent, args->lstio_grp_ndentp, @@ -612,18 +612,18 @@ lst_batch_info_ioctl(lstio_batch_info_args_t *args) if (args->lstio_bat_key != console_session.ses_key) return -EACCES; - if (!args->lstio_bat_namep || /* batch name */ + if (!args->lstio_bat_namep || /* batch name */ args->lstio_bat_nmlen <= 0 || args->lstio_bat_nmlen > LST_NAME_SIZE) return -EINVAL; - if (!args->lstio_bat_entp && /* output: batch entry */ - !args->lstio_bat_dentsp) /* output: node entry */ + if (!args->lstio_bat_entp && /* output: batch entry */ + !args->lstio_bat_dentsp) /* output: node entry */ return -EINVAL; - if (args->lstio_bat_dentsp) { /* have node entry */ - if (!args->lstio_bat_idxp || /* node index */ - !args->lstio_bat_ndentp) /* # of node entry */ + if (args->lstio_bat_dentsp) { /* have node entry */ + if (!args->lstio_bat_idxp || /* node index */ + !args->lstio_bat_ndentp) /* # of node entry */ return -EINVAL; if (copy_from_user(&index, args->lstio_bat_idxp, @@ -722,18 +722,18 @@ static int lst_test_add_ioctl(lstio_test_args_t *args) if (!args->lstio_tes_resultp || !args->lstio_tes_retp || - !args->lstio_tes_bat_name || /* no specified batch */ + !args->lstio_tes_bat_name || /* no specified batch */ args->lstio_tes_bat_nmlen <= 0 || args->lstio_tes_bat_nmlen > LST_NAME_SIZE || - !args->lstio_tes_sgrp_name || /* no source group */ + !args->lstio_tes_sgrp_name || /* no source group */ args->lstio_tes_sgrp_nmlen <= 0 || args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE || - !args->lstio_tes_dgrp_name || /* no target group */ + !args->lstio_tes_dgrp_name || /* no target group */ args->lstio_tes_dgrp_nmlen <= 0 || args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE) return -EINVAL; - if (!args->lstio_tes_loop || /* negative is infinite */ + if (!args->lstio_tes_loop || /* negative is infinite */ args->lstio_tes_concur <= 0 || args->lstio_tes_dist <= 0 || args->lstio_tes_span <= 0) @@ -743,7 +743,7 @@ static int lst_test_add_ioctl(lstio_test_args_t *args) if (args->lstio_tes_param && (args->lstio_tes_param_len <= 0 || args->lstio_tes_param_len > - PAGE_SIZE - sizeof(lstcon_test_t))) + PAGE_SIZE - sizeof(struct lstcon_test))) return -EINVAL; LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1); diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c index 35a227d0c..6f6875811 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ b/drivers/staging/lustre/lnet/selftest/conrpc.c @@ -46,13 +46,13 @@ #include "conrpc.h" #include "console.h" -void lstcon_rpc_stat_reply(lstcon_rpc_trans_t *, srpc_msg_t *, - lstcon_node_t *, lstcon_trans_stat_t *); +void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *, + struct lstcon_node *, lstcon_trans_stat_t *); static void -lstcon_rpc_done(srpc_client_rpc_t *rpc) +lstcon_rpc_done(struct srpc_client_rpc *rpc) { - lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv; + struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv; LASSERT(crpc && rpc == crpc->crp_rpc); LASSERT(crpc->crp_posted && !crpc->crp_finished); @@ -90,8 +90,8 @@ lstcon_rpc_done(srpc_client_rpc_t *rpc) } static int -lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats, - int bulk_npg, int bulk_len, int embedded, lstcon_rpc_t *crpc) +lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats, + int bulk_npg, int bulk_len, int embedded, struct lstcon_rpc *crpc) { crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, feats, bulk_npg, bulk_len, @@ -115,16 +115,16 @@ lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats, } static int -lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats, - int bulk_npg, int bulk_len, lstcon_rpc_t **crpcpp) +lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned feats, + int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp) { - lstcon_rpc_t *crpc = NULL; + struct lstcon_rpc *crpc = NULL; int rc; spin_lock(&console_session.ses_rpc_lock); crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist, - lstcon_rpc_t, crp_link); + struct lstcon_rpc, crp_link); if (crpc) list_del_init(&crpc->crp_link); @@ -148,9 +148,9 @@ lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats, } void -lstcon_rpc_put(lstcon_rpc_t *crpc) +lstcon_rpc_put(struct lstcon_rpc *crpc) { - srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk; + struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk; int i; LASSERT(list_empty(&crpc->crp_link)); @@ -183,9 +183,9 @@ lstcon_rpc_put(lstcon_rpc_t *crpc) } static void -lstcon_rpc_post(lstcon_rpc_t *crpc) +lstcon_rpc_post(struct lstcon_rpc *crpc) { - lstcon_rpc_trans_t *trans = crpc->crp_trans; + struct lstcon_rpc_trans *trans = crpc->crp_trans; LASSERT(trans); @@ -236,9 +236,9 @@ lstcon_rpc_trans_name(int transop) int lstcon_rpc_trans_prep(struct list_head *translist, int transop, - lstcon_rpc_trans_t **transpp) + struct lstcon_rpc_trans **transpp) { - lstcon_rpc_trans_t *trans; + struct lstcon_rpc_trans *trans; if (translist) { list_for_each_entry(trans, translist, tas_link) { @@ -278,26 +278,26 @@ lstcon_rpc_trans_prep(struct list_head *translist, int transop, } void -lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc) +lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc) { list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list); crpc->crp_trans = trans; } void -lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) +lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error) { - srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - lstcon_node_t *nd; + struct srpc_client_rpc *rpc; + struct lstcon_rpc *crpc; + struct lstcon_node *nd; list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { rpc = crpc->crp_rpc; spin_lock(&rpc->crpc_lock); - if (!crpc->crp_posted || /* not posted */ - crpc->crp_stamp) { /* rpc done or aborted already */ + if (!crpc->crp_posted || /* not posted */ + crpc->crp_stamp) { /* rpc done or aborted already */ if (!crpc->crp_stamp) { crpc->crp_stamp = cfs_time_current(); crpc->crp_status = -EINTR; @@ -326,7 +326,7 @@ lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error) } static int -lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans) +lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans) { if (console_session.ses_shutdown && !list_empty(&trans->tas_olink)) /* Not an end session RPC */ @@ -336,9 +336,9 @@ lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans) } int -lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout) +lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout) { - lstcon_rpc_t *crpc; + struct lstcon_rpc *crpc; int rc; if (list_empty(&trans->tas_rpcs_list)) @@ -386,11 +386,11 @@ lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout) } static int -lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp) +lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp) { - lstcon_node_t *nd = crpc->crp_node; - srpc_client_rpc_t *rpc = crpc->crp_rpc; - srpc_generic_reply_t *rep; + struct lstcon_node *nd = crpc->crp_node; + struct srpc_client_rpc *rpc = crpc->crp_rpc; + struct srpc_generic_reply *rep; LASSERT(nd && rpc); LASSERT(crpc->crp_stamp); @@ -423,10 +423,10 @@ lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp) } void -lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) +lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, lstcon_trans_stat_t *stat) { - lstcon_rpc_t *crpc; - srpc_msg_t *rep; + struct lstcon_rpc *crpc; + struct srpc_msg *rep; int error; LASSERT(stat); @@ -466,17 +466,17 @@ lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat) } int -lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, +lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, struct list_head __user *head_up, lstcon_rpc_readent_func_t readent) { struct list_head tmp; struct list_head __user *next; lstcon_rpc_ent_t *ent; - srpc_generic_reply_t *rep; - lstcon_rpc_t *crpc; - srpc_msg_t *msg; - lstcon_node_t *nd; + struct srpc_generic_reply *rep; + struct lstcon_rpc *crpc; + struct srpc_msg *msg; + struct lstcon_node *nd; long dur; struct timeval tv; int error; @@ -520,7 +520,7 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, continue; /* RPC is done */ - rep = (srpc_generic_reply_t *)&msg->msg_body.reply; + rep = (struct srpc_generic_reply *)&msg->msg_body.reply; if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(lst_sid_t)) || copy_to_user(&ent->rpe_fwk_errno, &rep->status, @@ -531,7 +531,6 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, continue; error = readent(trans->tas_opc, msg, ent); - if (error) return error; } @@ -540,11 +539,11 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, } void -lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) +lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans) { - srpc_client_rpc_t *rpc; - lstcon_rpc_t *crpc; - lstcon_rpc_t *tmp; + struct srpc_client_rpc *rpc; + struct lstcon_rpc *crpc; + struct lstcon_rpc *tmp; int count = 0; list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) { @@ -563,10 +562,10 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) } /* - * rpcs can be still not callbacked (even LNetMDUnlink is called) - * because huge timeout for inaccessible network, don't make - * user wait for them, just abandon them, they will be recycled - * in callback + * rpcs can be still not callbacked (even LNetMDUnlink is + * called) because huge timeout for inaccessible network, + * don't make user wait for them, just abandon them, they + * will be recycled in callback */ LASSERT(crpc->crp_status); @@ -593,11 +592,11 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans) } int -lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, - unsigned feats, lstcon_rpc_t **crpc) +lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, + unsigned feats, struct lstcon_rpc **crpc) { - srpc_mksn_reqst_t *msrq; - srpc_rmsn_reqst_t *rsrq; + struct srpc_mksn_reqst *msrq; + struct srpc_rmsn_reqst *rsrq; int rc; switch (transop) { @@ -632,9 +631,9 @@ lstcon_sesrpc_prep(lstcon_node_t *nd, int transop, } int -lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc) +lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc) { - srpc_debug_reqst_t *drq; + struct srpc_debug_reqst *drq; int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc); @@ -650,11 +649,11 @@ lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc) } int -lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, - lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc) +lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, + struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc) { - lstcon_batch_t *batch; - srpc_batch_reqst_t *brq; + struct lstcon_batch *batch; + struct srpc_batch_reqst *brq; int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc); @@ -676,16 +675,16 @@ lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, LASSERT(!tsb->tsb_index); - batch = (lstcon_batch_t *)tsb; + batch = (struct lstcon_batch *)tsb; brq->bar_arg = batch->bat_arg; return 0; } int -lstcon_statrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc) +lstcon_statrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc) { - srpc_stat_reqst_t *srq; + struct srpc_stat_reqst *srq; int rc; rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc); @@ -716,12 +715,12 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov) } static int -lstcon_dstnodes_prep(lstcon_group_t *grp, int idx, +lstcon_dstnodes_prep(struct lstcon_group *grp, int idx, int dist, int span, int nkiov, lnet_kiov_t *kiov) { lnet_process_id_packed_t *pid; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; + struct lstcon_ndlink *ndl; + struct lstcon_node *nd; int start; int end; int i = 0; @@ -770,9 +769,9 @@ lstcon_dstnodes_prep(lstcon_group_t *grp, int idx, } static int -lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req) +lstcon_pingrpc_prep(lst_test_ping_param_t *param, struct srpc_test_reqst *req) { - test_ping_req_t *prq = &req->tsr_u.ping; + struct test_ping_req *prq = &req->tsr_u.ping; prq->png_size = param->png_size; prq->png_flags = param->png_flags; @@ -781,9 +780,9 @@ lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req) } static int -lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) +lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req) { - test_bulk_req_t *brq = &req->tsr_u.bulk_v0; + struct test_bulk_req *brq = &req->tsr_u.bulk_v0; brq->blk_opc = param->blk_opc; brq->blk_npg = (param->blk_size + PAGE_SIZE - 1) / @@ -794,9 +793,9 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) } static int -lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) +lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req) { - test_bulk_req_v1_t *brq = &req->tsr_u.bulk_v1; + struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1; brq->blk_opc = param->blk_opc; brq->blk_flags = param->blk_flags; @@ -807,13 +806,13 @@ lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req) } int -lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, - lstcon_test_t *test, lstcon_rpc_t **crpc) +lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats, + struct lstcon_test *test, struct lstcon_rpc **crpc) { - lstcon_group_t *sgrp = test->tes_src_grp; - lstcon_group_t *dgrp = test->tes_dst_grp; - srpc_test_reqst_t *trq; - srpc_bulk_t *bulk; + struct lstcon_group *sgrp = test->tes_src_grp; + struct lstcon_group *dgrp = test->tes_dst_grp; + struct srpc_test_reqst *trq; + struct srpc_bulk *bulk; int i; int npg = 0; int nob = 0; @@ -841,7 +840,6 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, trq->tsr_ndest = 0; trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; - } else { bulk = &(*crpc)->crp_rpc->crpc_bulk; @@ -917,10 +915,10 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats, } static int -lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans, - lstcon_node_t *nd, srpc_msg_t *reply) +lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans, + struct lstcon_node *nd, struct srpc_msg *reply) { - srpc_mksn_reply_t *mksn_rep = &reply->msg_body.mksn_reply; + struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply; int status = mksn_rep->mksn_status; if (!status && @@ -940,7 +938,7 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans, if (!trans->tas_feats_updated) { spin_lock(&console_session.ses_rpc_lock); - if (!trans->tas_feats_updated) { /* recheck with lock */ + if (!trans->tas_feats_updated) { /* recheck with lock */ trans->tas_feats_updated = 1; trans->tas_features = reply->msg_ses_feats; } @@ -964,14 +962,14 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans, } void -lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg, - lstcon_node_t *nd, lstcon_trans_stat_t *stat) +lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg, + struct lstcon_node *nd, lstcon_trans_stat_t *stat) { - srpc_rmsn_reply_t *rmsn_rep; - srpc_debug_reply_t *dbg_rep; - srpc_batch_reply_t *bat_rep; - srpc_test_reply_t *test_rep; - srpc_stat_reply_t *stat_rep; + struct srpc_rmsn_reply *rmsn_rep; + struct srpc_debug_reply *dbg_rep; + struct srpc_batch_reply *bat_rep; + struct srpc_test_reply *test_rep; + struct srpc_stat_reply *stat_rep; int rc = 0; switch (trans->tas_opc) { @@ -1085,12 +1083,12 @@ int lstcon_rpc_trans_ndlist(struct list_head *ndlist, struct list_head *translist, int transop, void *arg, lstcon_rpc_cond_func_t condition, - lstcon_rpc_trans_t **transpp) + struct lstcon_rpc_trans **transpp) { - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; - lstcon_rpc_t *rpc; + struct lstcon_rpc_trans *trans; + struct lstcon_ndlink *ndl; + struct lstcon_node *nd; + struct lstcon_rpc *rpc; unsigned feats; int rc; @@ -1130,14 +1128,16 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist, case LST_TRANS_TSBCLIADD: case LST_TRANS_TSBSRVADD: rc = lstcon_testrpc_prep(nd, transop, feats, - (lstcon_test_t *)arg, &rpc); + (struct lstcon_test *)arg, + &rpc); break; case LST_TRANS_TSBRUN: case LST_TRANS_TSBSTOP: case LST_TRANS_TSBCLIQRY: case LST_TRANS_TSBSRVQRY: rc = lstcon_batrpc_prep(nd, transop, feats, - (lstcon_tsb_hdr_t *)arg, &rpc); + (struct lstcon_tsb_hdr *)arg, + &rpc); break; case LST_TRANS_STATQRY: rc = lstcon_statrpc_prep(nd, feats, &rpc); @@ -1170,17 +1170,18 @@ static void lstcon_rpc_pinger(void *arg) { struct stt_timer *ptimer = (struct stt_timer *)arg; - lstcon_rpc_trans_t *trans; - lstcon_rpc_t *crpc; - srpc_msg_t *rep; - srpc_debug_reqst_t *drq; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; + struct lstcon_rpc_trans *trans; + struct lstcon_rpc *crpc; + struct srpc_msg *rep; + struct srpc_debug_reqst *drq; + struct lstcon_ndlink *ndl; + struct lstcon_node *nd; int intv; int count = 0; int rc; - /* RPC pinger is a special case of transaction, + /* + * RPC pinger is a special case of transaction, * it's called by timer at 8 seconds interval. */ mutex_lock(&console_session.ses_mutex); @@ -1326,9 +1327,9 @@ lstcon_rpc_pinger_stop(void) void lstcon_rpc_cleanup_wait(void) { - lstcon_rpc_trans_t *trans; - lstcon_rpc_t *crpc; - lstcon_rpc_t *temp; + struct lstcon_rpc_trans *trans; + struct lstcon_rpc *crpc; + struct lstcon_rpc *temp; struct list_head *pacer; struct list_head zlist; @@ -1338,7 +1339,7 @@ lstcon_rpc_cleanup_wait(void) while (!list_empty(&console_session.ses_trans_list)) { list_for_each(pacer, &console_session.ses_trans_list) { - trans = list_entry(pacer, lstcon_rpc_trans_t, + trans = list_entry(pacer, struct lstcon_rpc_trans, tas_link); CDEBUG(D_NET, "Session closed, wakeup transaction %s\n", @@ -1370,7 +1371,7 @@ lstcon_rpc_cleanup_wait(void) list_for_each_entry_safe(crpc, temp, &zlist, crp_link) { list_del(&crpc->crp_link); - LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t)); + LIBCFS_FREE(crpc, sizeof(struct lstcon_rpc)); } } diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h index 3e7839dad..90c3385a3 100644 --- a/drivers/staging/lustre/lnet/selftest/conrpc.h +++ b/drivers/staging/lustre/lnet/selftest/conrpc.h @@ -63,9 +63,9 @@ struct lstcon_tsb_hdr; struct lstcon_test; struct lstcon_node; -typedef struct lstcon_rpc { +struct lstcon_rpc { struct list_head crp_link; /* chain on rpc transaction */ - srpc_client_rpc_t *crp_rpc; /* client rpc */ + struct srpc_client_rpc *crp_rpc; /* client rpc */ struct lstcon_node *crp_node; /* destination node */ struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ @@ -76,9 +76,9 @@ typedef struct lstcon_rpc { unsigned int crp_embedded:1; int crp_status; /* console rpc errors */ unsigned long crp_stamp; /* replied time stamp */ -} lstcon_rpc_t; +}; -typedef struct lstcon_rpc_trans { +struct lstcon_rpc_trans { struct list_head tas_olink; /* link chain on owner list */ struct list_head tas_link; /* link chain on global list */ int tas_opc; /* operation code of transaction */ @@ -87,7 +87,7 @@ typedef struct lstcon_rpc_trans { wait_queue_head_t tas_waitq; /* wait queue head */ atomic_t tas_remaining; /* # of un-scheduled rpcs */ struct list_head tas_rpcs_list; /* queued requests */ -} lstcon_rpc_trans_t; +}; #define LST_TRANS_PRIVATE 0x1000 @@ -106,35 +106,35 @@ typedef struct lstcon_rpc_trans { #define LST_TRANS_STATQRY 0x21 typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *); -typedef int (*lstcon_rpc_readent_func_t)(int, srpc_msg_t *, +typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *, lstcon_rpc_ent_t __user *); int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned version, lstcon_rpc_t **crpc); + unsigned version, struct lstcon_rpc **crpc); int lstcon_dbgrpc_prep(struct lstcon_node *nd, - unsigned version, lstcon_rpc_t **crpc); + unsigned version, struct lstcon_rpc **crpc); int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version, - struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc); + struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc); int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version, - struct lstcon_test *test, lstcon_rpc_t **crpc); + struct lstcon_test *test, struct lstcon_rpc **crpc); int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version, - lstcon_rpc_t **crpc); -void lstcon_rpc_put(lstcon_rpc_t *crpc); + struct lstcon_rpc **crpc); +void lstcon_rpc_put(struct lstcon_rpc *crpc); int lstcon_rpc_trans_prep(struct list_head *translist, - int transop, lstcon_rpc_trans_t **transpp); + int transop, struct lstcon_rpc_trans **transpp); int lstcon_rpc_trans_ndlist(struct list_head *ndlist, struct list_head *translist, int transop, void *arg, lstcon_rpc_cond_func_t condition, - lstcon_rpc_trans_t **transpp); -void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, + struct lstcon_rpc_trans **transpp); +void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, lstcon_trans_stat_t *stat); -int lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans, +int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, struct list_head __user *head_up, lstcon_rpc_readent_func_t readent); -void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error); -void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans); -void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req); -int lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout); +void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error); +void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans); +void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *req); +int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout); int lstcon_rpc_pinger_start(void); void lstcon_rpc_pinger_stop(void); void lstcon_rpc_cleanup_wait(void); diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c index 1a923ea3a..a03e52d29 100644 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ b/drivers/staging/lustre/lnet/selftest/console.c @@ -61,7 +61,7 @@ do { \ struct lstcon_session console_session; static void -lstcon_node_get(lstcon_node_t *nd) +lstcon_node_get(struct lstcon_node *nd) { LASSERT(nd->nd_ref >= 1); @@ -69,9 +69,9 @@ lstcon_node_get(lstcon_node_t *nd) } static int -lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) +lstcon_node_find(lnet_process_id_t id, struct lstcon_node **ndpp, int create) { - lstcon_ndlink_t *ndl; + struct lstcon_ndlink *ndl; unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE; LASSERT(id.nid != LNET_NID_ANY); @@ -90,11 +90,11 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) if (!create) return -ENOENT; - LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t)); + LIBCFS_ALLOC(*ndpp, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink)); if (!*ndpp) return -ENOMEM; - ndl = (lstcon_ndlink_t *)(*ndpp + 1); + ndl = (struct lstcon_ndlink *)(*ndpp + 1); ndl->ndl_node = *ndpp; @@ -103,7 +103,7 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) ndl->ndl_node->nd_stamp = cfs_time_current(); ndl->ndl_node->nd_state = LST_NODE_UNKNOWN; ndl->ndl_node->nd_timeout = 0; - memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t)); + memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc)); /* * queued in global hash & list, no refcount is taken by @@ -117,16 +117,16 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create) } static void -lstcon_node_put(lstcon_node_t *nd) +lstcon_node_put(struct lstcon_node *nd) { - lstcon_ndlink_t *ndl; + struct lstcon_ndlink *ndl; LASSERT(nd->nd_ref > 0); if (--nd->nd_ref > 0) return; - ndl = (lstcon_ndlink_t *)(nd + 1); + ndl = (struct lstcon_ndlink *)(nd + 1); LASSERT(!list_empty(&ndl->ndl_link)); LASSERT(!list_empty(&ndl->ndl_hlink)); @@ -135,16 +135,16 @@ lstcon_node_put(lstcon_node_t *nd) list_del(&ndl->ndl_link); list_del(&ndl->ndl_hlink); - LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t)); + LIBCFS_FREE(nd, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink)); } static int lstcon_ndlink_find(struct list_head *hash, - lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create) + lnet_process_id_t id, struct lstcon_ndlink **ndlpp, int create) { unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; + struct lstcon_ndlink *ndl; + struct lstcon_node *nd; int rc; if (id.nid == LNET_NID_ANY) @@ -168,7 +168,7 @@ lstcon_ndlink_find(struct list_head *hash, if (rc) return rc; - LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t)); + LIBCFS_ALLOC(ndl, sizeof(struct lstcon_ndlink)); if (!ndl) { lstcon_node_put(nd); return -ENOMEM; @@ -184,7 +184,7 @@ lstcon_ndlink_find(struct list_head *hash, } static void -lstcon_ndlink_release(lstcon_ndlink_t *ndl) +lstcon_ndlink_release(struct lstcon_ndlink *ndl) { LASSERT(list_empty(&ndl->ndl_link)); LASSERT(!list_empty(&ndl->ndl_hlink)); @@ -196,12 +196,12 @@ lstcon_ndlink_release(lstcon_ndlink_t *ndl) } static int -lstcon_group_alloc(char *name, lstcon_group_t **grpp) +lstcon_group_alloc(char *name, struct lstcon_group **grpp) { - lstcon_group_t *grp; + struct lstcon_group *grp; int i; - LIBCFS_ALLOC(grp, offsetof(lstcon_group_t, + LIBCFS_ALLOC(grp, offsetof(struct lstcon_group, grp_ndl_hash[LST_NODE_HASHSIZE])); if (!grp) return -ENOMEM; @@ -209,7 +209,7 @@ lstcon_group_alloc(char *name, lstcon_group_t **grpp) grp->grp_ref = 1; if (name) { if (strlen(name) > sizeof(grp->grp_name) - 1) { - LIBCFS_FREE(grp, offsetof(lstcon_group_t, + LIBCFS_FREE(grp, offsetof(struct lstcon_group, grp_ndl_hash[LST_NODE_HASHSIZE])); return -E2BIG; } @@ -229,18 +229,18 @@ lstcon_group_alloc(char *name, lstcon_group_t **grpp) } static void -lstcon_group_addref(lstcon_group_t *grp) +lstcon_group_addref(struct lstcon_group *grp) { grp->grp_ref++; } -static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *); +static void lstcon_group_ndlink_release(struct lstcon_group *, struct lstcon_ndlink *); static void -lstcon_group_drain(lstcon_group_t *grp, int keep) +lstcon_group_drain(struct lstcon_group *grp, int keep) { - lstcon_ndlink_t *ndl; - lstcon_ndlink_t *tmp; + struct lstcon_ndlink *ndl; + struct lstcon_ndlink *tmp; list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) { if (!(ndl->ndl_node->nd_state & keep)) @@ -249,7 +249,7 @@ lstcon_group_drain(lstcon_group_t *grp, int keep) } static void -lstcon_group_decref(lstcon_group_t *grp) +lstcon_group_decref(struct lstcon_group *grp) { int i; @@ -264,20 +264,20 @@ lstcon_group_decref(lstcon_group_t *grp) for (i = 0; i < LST_NODE_HASHSIZE; i++) LASSERT(list_empty(&grp->grp_ndl_hash[i])); - LIBCFS_FREE(grp, offsetof(lstcon_group_t, + LIBCFS_FREE(grp, offsetof(struct lstcon_group, grp_ndl_hash[LST_NODE_HASHSIZE])); } static int -lstcon_group_find(const char *name, lstcon_group_t **grpp) +lstcon_group_find(const char *name, struct lstcon_group **grpp) { - lstcon_group_t *grp; + struct lstcon_group *grp; list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { if (strncmp(grp->grp_name, name, LST_NAME_SIZE)) continue; - lstcon_group_addref(grp); /* +1 ref for caller */ + lstcon_group_addref(grp); /* +1 ref for caller */ *grpp = grp; return 0; } @@ -286,8 +286,8 @@ lstcon_group_find(const char *name, lstcon_group_t **grpp) } static int -lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id, - lstcon_ndlink_t **ndlpp, int create) +lstcon_group_ndlink_find(struct lstcon_group *grp, lnet_process_id_t id, + struct lstcon_ndlink **ndlpp, int create) { int rc; @@ -305,7 +305,7 @@ lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id, } static void -lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl) +lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl) { list_del_init(&ndl->ndl_link); lstcon_ndlink_release(ndl); @@ -313,8 +313,8 @@ lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl) } static void -lstcon_group_ndlink_move(lstcon_group_t *old, - lstcon_group_t *new, lstcon_ndlink_t *ndl) +lstcon_group_ndlink_move(struct lstcon_group *old, + struct lstcon_group *new, struct lstcon_ndlink *ndl) { unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) % LST_NODE_HASHSIZE; @@ -329,21 +329,21 @@ lstcon_group_ndlink_move(lstcon_group_t *old, } static void -lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new) +lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new) { - lstcon_ndlink_t *ndl; + struct lstcon_ndlink *ndl; while (!list_empty(&old->grp_ndl_list)) { ndl = list_entry(old->grp_ndl_list.next, - lstcon_ndlink_t, ndl_link); + struct lstcon_ndlink, ndl_link); lstcon_group_ndlink_move(old, new, ndl); } } static int -lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg) +lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg) { - lstcon_group_t *grp = (lstcon_group_t *)arg; + struct lstcon_group *grp = (struct lstcon_group *)arg; switch (transop) { case LST_TRANS_SESNEW: @@ -370,10 +370,10 @@ lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg) } static int -lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, +lstcon_sesrpc_readent(int transop, struct srpc_msg *msg, lstcon_rpc_ent_t __user *ent_up) { - srpc_debug_reply_t *rep; + struct srpc_debug_reply *rep; switch (transop) { case LST_TRANS_SESNEW: @@ -399,13 +399,13 @@ lstcon_sesrpc_readent(int transop, srpc_msg_t *msg, } static int -lstcon_group_nodes_add(lstcon_group_t *grp, +lstcon_group_nodes_add(struct lstcon_group *grp, int count, lnet_process_id_t __user *ids_up, unsigned *featp, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; + struct lstcon_rpc_trans *trans; + struct lstcon_ndlink *ndl; + struct lstcon_group *tmp; lnet_process_id_t id; int i; int rc; @@ -466,13 +466,13 @@ lstcon_group_nodes_add(lstcon_group_t *grp, } static int -lstcon_group_nodes_remove(lstcon_group_t *grp, +lstcon_group_nodes_remove(struct lstcon_group *grp, int count, lnet_process_id_t __user *ids_up, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; + struct lstcon_rpc_trans *trans; + struct lstcon_ndlink *ndl; + struct lstcon_group *tmp; lnet_process_id_t id; int rc; int i; @@ -523,7 +523,7 @@ error: int lstcon_group_add(char *name) { - lstcon_group_t *grp; + struct lstcon_group *grp; int rc; rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST; @@ -548,7 +548,7 @@ int lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up, unsigned *featp, struct list_head __user *result_up) { - lstcon_group_t *grp; + struct lstcon_group *grp; int rc; LASSERT(count > 0); @@ -578,8 +578,8 @@ lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up, int lstcon_group_del(char *name) { - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; + struct lstcon_rpc_trans *trans; + struct lstcon_group *grp; int rc; rc = lstcon_group_find(name, &grp); @@ -621,7 +621,7 @@ lstcon_group_del(char *name) int lstcon_group_clean(char *name, int args) { - lstcon_group_t *grp = NULL; + struct lstcon_group *grp = NULL; int rc; rc = lstcon_group_find(name, &grp); @@ -654,7 +654,7 @@ int lstcon_nodes_remove(char *name, int count, lnet_process_id_t __user *ids_up, struct list_head __user *result_up) { - lstcon_group_t *grp = NULL; + struct lstcon_group *grp = NULL; int rc; rc = lstcon_group_find(name, &grp); @@ -683,8 +683,8 @@ lstcon_nodes_remove(char *name, int count, lnet_process_id_t __user *ids_up, int lstcon_group_refresh(char *name, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; + struct lstcon_rpc_trans *trans; + struct lstcon_group *grp; int rc; rc = lstcon_group_find(name, &grp); @@ -725,7 +725,7 @@ lstcon_group_refresh(char *name, struct list_head __user *result_up) int lstcon_group_list(int index, int len, char __user *name_up) { - lstcon_group_t *grp; + struct lstcon_group *grp; LASSERT(index >= 0); LASSERT(name_up); @@ -733,7 +733,7 @@ lstcon_group_list(int index, int len, char __user *name_up) list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { if (!index--) { return copy_to_user(name_up, grp->grp_name, len) ? - -EFAULT : 0; + -EFAULT : 0; } } @@ -744,8 +744,8 @@ static int lstcon_nodes_getent(struct list_head *head, int *index_p, int *count_p, lstcon_node_ent_t __user *dents_up) { - lstcon_ndlink_t *ndl; - lstcon_node_t *nd; + struct lstcon_ndlink *ndl; + struct lstcon_node *nd; int count = 0; int index = 0; @@ -786,8 +786,8 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p, lstcon_node_ent_t __user *dents_up) { lstcon_ndlist_ent_t *gentp; - lstcon_group_t *grp; - lstcon_ndlink_t *ndl; + struct lstcon_group *grp; + struct lstcon_ndlink *ndl; int rc; rc = lstcon_group_find(name, &grp); @@ -828,9 +828,9 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p, } static int -lstcon_batch_find(const char *name, lstcon_batch_t **batpp) +lstcon_batch_find(const char *name, struct lstcon_batch **batpp) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) { @@ -845,7 +845,7 @@ lstcon_batch_find(const char *name, lstcon_batch_t **batpp) int lstcon_batch_add(char *name) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; int i; int rc; @@ -855,7 +855,7 @@ lstcon_batch_add(char *name) return rc; } - LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t)); + LIBCFS_ALLOC(bat, sizeof(struct lstcon_batch)); if (!bat) { CERROR("Can't allocate descriptor for batch %s\n", name); return -ENOMEM; @@ -865,7 +865,7 @@ lstcon_batch_add(char *name) sizeof(struct list_head) * LST_NODE_HASHSIZE); if (!bat->bat_cli_hash) { CERROR("Can't allocate hash for batch %s\n", name); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); + LIBCFS_FREE(bat, sizeof(struct lstcon_batch)); return -ENOMEM; } @@ -875,7 +875,7 @@ lstcon_batch_add(char *name) if (!bat->bat_srv_hash) { CERROR("Can't allocate hash for batch %s\n", name); LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); + LIBCFS_FREE(bat, sizeof(struct lstcon_batch)); return -ENOMEM; } @@ -883,7 +883,7 @@ lstcon_batch_add(char *name) if (strlen(name) > sizeof(bat->bat_name) - 1) { LIBCFS_FREE(bat->bat_srv_hash, LST_NODE_HASHSIZE); LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); + LIBCFS_FREE(bat, sizeof(struct lstcon_batch)); return -E2BIG; } strncpy(bat->bat_name, name, sizeof(bat->bat_name)); @@ -911,7 +911,7 @@ lstcon_batch_add(char *name) int lstcon_batch_list(int index, int len, char __user *name_up) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; LASSERT(name_up); LASSERT(index >= 0); @@ -934,9 +934,9 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up, lstcon_test_batch_ent_t *entp; struct list_head *clilst; struct list_head *srvlst; - lstcon_test_t *test = NULL; - lstcon_batch_t *bat; - lstcon_ndlink_t *ndl; + struct lstcon_test *test = NULL; + struct lstcon_batch *bat; + struct lstcon_ndlink *ndl; int rc; rc = lstcon_batch_find(name, &bat); @@ -977,7 +977,6 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up, if (!test) { entp->u.tbe_batch.bae_ntest = bat->bat_ntest; entp->u.tbe_batch.bae_state = bat->bat_state; - } else { entp->u.tbe_test.tse_type = test->tes_type; entp->u.tbe_test.tse_loop = test->tes_loop; @@ -999,7 +998,7 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up, } static int -lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg) +lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg) { switch (transop) { case LST_TRANS_TSBRUN: @@ -1021,10 +1020,10 @@ lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg) } static int -lstcon_batch_op(lstcon_batch_t *bat, int transop, +lstcon_batch_op(struct lstcon_batch *bat, int transop, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; + struct lstcon_rpc_trans *trans; int rc; rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list, @@ -1047,7 +1046,7 @@ lstcon_batch_op(lstcon_batch_t *bat, int transop, int lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; int rc; if (lstcon_batch_find(name, &bat)) { @@ -1069,7 +1068,7 @@ lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up) int lstcon_batch_stop(char *name, int force, struct list_head __user *result_up) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; int rc; if (lstcon_batch_find(name, &bat)) { @@ -1089,17 +1088,17 @@ lstcon_batch_stop(char *name, int force, struct list_head __user *result_up) } static void -lstcon_batch_destroy(lstcon_batch_t *bat) +lstcon_batch_destroy(struct lstcon_batch *bat) { - lstcon_ndlink_t *ndl; - lstcon_test_t *test; + struct lstcon_ndlink *ndl; + struct lstcon_test *test; int i; list_del(&bat->bat_link); while (!list_empty(&bat->bat_test_list)) { test = list_entry(bat->bat_test_list.next, - lstcon_test_t, tes_link); + struct lstcon_test, tes_link); LASSERT(list_empty(&test->tes_trans_list)); list_del(&test->tes_link); @@ -1107,7 +1106,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) lstcon_group_decref(test->tes_src_grp); lstcon_group_decref(test->tes_dst_grp); - LIBCFS_FREE(test, offsetof(lstcon_test_t, + LIBCFS_FREE(test, offsetof(struct lstcon_test, tes_param[test->tes_paramlen])); } @@ -1115,7 +1114,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) while (!list_empty(&bat->bat_cli_list)) { ndl = list_entry(bat->bat_cli_list.next, - lstcon_ndlink_t, ndl_link); + struct lstcon_ndlink, ndl_link); list_del_init(&ndl->ndl_link); lstcon_ndlink_release(ndl); @@ -1123,7 +1122,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat) while (!list_empty(&bat->bat_srv_list)) { ndl = list_entry(bat->bat_srv_list.next, - lstcon_ndlink_t, ndl_link); + struct lstcon_ndlink, ndl_link); list_del_init(&ndl->ndl_link); lstcon_ndlink_release(ndl); @@ -1138,19 +1137,19 @@ lstcon_batch_destroy(lstcon_batch_t *bat) sizeof(struct list_head) * LST_NODE_HASHSIZE); LIBCFS_FREE(bat->bat_srv_hash, sizeof(struct list_head) * LST_NODE_HASHSIZE); - LIBCFS_FREE(bat, sizeof(lstcon_batch_t)); + LIBCFS_FREE(bat, sizeof(struct lstcon_batch)); } static int -lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) +lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg) { - lstcon_test_t *test; - lstcon_batch_t *batch; - lstcon_ndlink_t *ndl; + struct lstcon_test *test; + struct lstcon_batch *batch; + struct lstcon_ndlink *ndl; struct list_head *hash; struct list_head *head; - test = (lstcon_test_t *)arg; + test = (struct lstcon_test *)arg; LASSERT(test); batch = test->tes_batch; @@ -1186,10 +1185,10 @@ lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg) } static int -lstcon_test_nodes_add(lstcon_test_t *test, struct list_head __user *result_up) +lstcon_test_nodes_add(struct lstcon_test *test, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; + struct lstcon_rpc_trans *trans; + struct lstcon_group *grp; int transop; int rc; @@ -1237,7 +1236,7 @@ again: } static int -lstcon_verify_batch(const char *name, lstcon_batch_t **batch) +lstcon_verify_batch(const char *name, struct lstcon_batch **batch) { int rc; @@ -1256,10 +1255,10 @@ lstcon_verify_batch(const char *name, lstcon_batch_t **batch) } static int -lstcon_verify_group(const char *name, lstcon_group_t **grp) +lstcon_verify_group(const char *name, struct lstcon_group **grp) { int rc; - lstcon_ndlink_t *ndl; + struct lstcon_ndlink *ndl; rc = lstcon_group_find(name, grp); if (rc) { @@ -1284,11 +1283,11 @@ lstcon_test_add(char *batch_name, int type, int loop, void *param, int paramlen, int *retp, struct list_head __user *result_up) { - lstcon_test_t *test = NULL; + struct lstcon_test *test = NULL; int rc; - lstcon_group_t *src_grp = NULL; - lstcon_group_t *dst_grp = NULL; - lstcon_batch_t *batch = NULL; + struct lstcon_group *src_grp = NULL; + struct lstcon_group *dst_grp = NULL; + struct lstcon_batch *batch = NULL; /* * verify that a batch of the given name exists, and the groups @@ -1310,7 +1309,7 @@ lstcon_test_add(char *batch_name, int type, int loop, if (dst_grp->grp_userland) *retp = 1; - LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen])); + LIBCFS_ALLOC(test, offsetof(struct lstcon_test, tes_param[paramlen])); if (!test) { CERROR("Can't allocate test descriptor\n"); rc = -ENOMEM; @@ -1357,7 +1356,7 @@ lstcon_test_add(char *batch_name, int type, int loop, return rc; out: if (test) - LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen])); + LIBCFS_FREE(test, offsetof(struct lstcon_test, tes_param[paramlen])); if (dst_grp) lstcon_group_decref(dst_grp); @@ -1369,9 +1368,9 @@ out: } static int -lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp) +lstcon_test_find(struct lstcon_batch *batch, int idx, struct lstcon_test **testpp) { - lstcon_test_t *test; + struct lstcon_test *test; list_for_each_entry(test, &batch->bat_test_list, tes_link) { if (idx == test->tes_hdr.tsb_index) { @@ -1384,10 +1383,10 @@ lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp) } static int -lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg, +lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg, lstcon_rpc_ent_t __user *ent_up) { - srpc_batch_reply_t *rep = &msg->msg_body.bat_reply; + struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; LASSERT(transop == LST_TRANS_TSBCLIQRY || transop == LST_TRANS_TSBSRVQRY); @@ -1404,12 +1403,12 @@ int lstcon_test_batch_query(char *name, int testidx, int client, int timeout, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; + struct lstcon_rpc_trans *trans; struct list_head *translist; struct list_head *ndlist; - lstcon_tsb_hdr_t *hdr; - lstcon_batch_t *batch; - lstcon_test_t *test = NULL; + struct lstcon_tsb_hdr *hdr; + struct lstcon_batch *batch; + struct lstcon_test *test = NULL; int transop; int rc; @@ -1423,7 +1422,6 @@ lstcon_test_batch_query(char *name, int testidx, int client, translist = &batch->bat_trans_list; ndlist = &batch->bat_cli_list; hdr = &batch->bat_hdr; - } else { /* query specified test only */ rc = lstcon_test_find(batch, testidx, &test); @@ -1448,7 +1446,8 @@ lstcon_test_batch_query(char *name, int testidx, int client, lstcon_rpc_trans_postwait(trans, timeout); - if (!testidx && /* query a batch, not a test */ + /* query a batch, not a test */ + if (!testidx && !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) && !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) { /* all RPCs finished, and no active test */ @@ -1463,10 +1462,10 @@ lstcon_test_batch_query(char *name, int testidx, int client, } static int -lstcon_statrpc_readent(int transop, srpc_msg_t *msg, +lstcon_statrpc_readent(int transop, struct srpc_msg *msg, lstcon_rpc_ent_t __user *ent_up) { - srpc_stat_reply_t *rep = &msg->msg_body.stat_reply; + struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; sfw_counters_t __user *sfwk_stat; srpc_counters_t __user *srpc_stat; lnet_counters_t __user *lnet_stat; @@ -1491,7 +1490,7 @@ lstcon_ndlist_stat(struct list_head *ndlist, int timeout, struct list_head __user *result_up) { struct list_head head; - lstcon_rpc_trans_t *trans; + struct lstcon_rpc_trans *trans; int rc; INIT_LIST_HEAD(&head); @@ -1516,7 +1515,7 @@ int lstcon_group_stat(char *grp_name, int timeout, struct list_head __user *result_up) { - lstcon_group_t *grp; + struct lstcon_group *grp; int rc; rc = lstcon_group_find(grp_name, &grp); @@ -1536,8 +1535,8 @@ int lstcon_nodes_stat(int count, lnet_process_id_t __user *ids_up, int timeout, struct list_head __user *result_up) { - lstcon_ndlink_t *ndl; - lstcon_group_t *tmp; + struct lstcon_ndlink *ndl; + struct lstcon_group *tmp; lnet_process_id_t id; int i; int rc; @@ -1581,7 +1580,7 @@ lstcon_debug_ndlist(struct list_head *ndlist, struct list_head *translist, int timeout, struct list_head __user *result_up) { - lstcon_rpc_trans_t *trans; + struct lstcon_rpc_trans *trans; int rc; rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY, @@ -1611,7 +1610,7 @@ int lstcon_batch_debug(int timeout, char *name, int client, struct list_head __user *result_up) { - lstcon_batch_t *bat; + struct lstcon_batch *bat; int rc; rc = lstcon_batch_find(name, &bat); @@ -1629,7 +1628,7 @@ int lstcon_group_debug(int timeout, char *name, struct list_head __user *result_up) { - lstcon_group_t *grp; + struct lstcon_group *grp; int rc; rc = lstcon_group_find(name, &grp); @@ -1649,8 +1648,8 @@ lstcon_nodes_debug(int timeout, struct list_head __user *result_up) { lnet_process_id_t id; - lstcon_ndlink_t *ndl; - lstcon_group_t *grp; + struct lstcon_ndlink *ndl; + struct lstcon_group *grp; int i; int rc; @@ -1749,7 +1748,7 @@ lstcon_session_new(char *name, int key, unsigned feats, if (strlen(name) > sizeof(console_session.ses_name) - 1) return -E2BIG; - strncpy(console_session.ses_name, name, + strlcpy(console_session.ses_name, name, sizeof(console_session.ses_name)); rc = lstcon_batch_add(LST_DEFAULT_BATCH); @@ -1758,7 +1757,7 @@ lstcon_session_new(char *name, int key, unsigned feats, rc = lstcon_rpc_pinger_start(); if (rc) { - lstcon_batch_t *bat = NULL; + struct lstcon_batch *bat = NULL; lstcon_batch_find(LST_DEFAULT_BATCH, &bat); lstcon_batch_destroy(bat); @@ -1782,7 +1781,7 @@ lstcon_session_info(lst_sid_t __user *sid_up, int __user *key_up, char __user *name_up, int len) { lstcon_ndlist_ent_t *entp; - lstcon_ndlink_t *ndl; + struct lstcon_ndlink *ndl; int rc = 0; if (console_session.ses_state != LST_SESSION_ACTIVE) @@ -1813,9 +1812,9 @@ lstcon_session_info(lst_sid_t __user *sid_up, int __user *key_up, int lstcon_session_end(void) { - lstcon_rpc_trans_t *trans; - lstcon_group_t *grp; - lstcon_batch_t *bat; + struct lstcon_rpc_trans *trans; + struct lstcon_group *grp; + struct lstcon_batch *bat; int rc = 0; LASSERT(console_session.ses_state == LST_SESSION_ACTIVE); @@ -1849,7 +1848,7 @@ lstcon_session_end(void) /* destroy all batches */ while (!list_empty(&console_session.ses_bat_list)) { bat = list_entry(console_session.ses_bat_list.next, - lstcon_batch_t, bat_link); + struct lstcon_batch, bat_link); lstcon_batch_destroy(bat); } @@ -1857,7 +1856,7 @@ lstcon_session_end(void) /* destroy all groups */ while (!list_empty(&console_session.ses_grp_list)) { grp = list_entry(console_session.ses_grp_list.next, - lstcon_group_t, grp_link); + struct lstcon_group, grp_link); LASSERT(grp->grp_ref == 1); lstcon_group_decref(grp); @@ -1906,12 +1905,12 @@ lstcon_session_feats_check(unsigned feats) static int lstcon_acceptor_handle(struct srpc_server_rpc *rpc) { - srpc_msg_t *rep = &rpc->srpc_replymsg; - srpc_msg_t *req = &rpc->srpc_reqstbuf->buf_msg; - srpc_join_reqst_t *jreq = &req->msg_body.join_reqst; - srpc_join_reply_t *jrep = &rep->msg_body.join_reply; - lstcon_group_t *grp = NULL; - lstcon_ndlink_t *ndl; + struct srpc_msg *rep = &rpc->srpc_replymsg; + struct srpc_msg *req = &rpc->srpc_reqstbuf->buf_msg; + struct srpc_join_reqst *jreq = &req->msg_body.join_reqst; + struct srpc_join_reply *jrep = &rep->msg_body.join_reply; + struct lstcon_group *grp = NULL; + struct lstcon_ndlink *ndl; int rc = 0; sfw_unpack_message(req); @@ -1987,7 +1986,8 @@ out: return rc; } -static srpc_service_t lstcon_acceptor_service; +static struct srpc_service lstcon_acceptor_service; + static void lstcon_init_acceptor_service(void) { /* initialize selftest console acceptor service table */ diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h index 554f58244..becd22e41 100644 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ b/drivers/staging/lustre/lnet/selftest/console.h @@ -50,22 +50,25 @@ #include "selftest.h" #include "conrpc.h" -typedef struct lstcon_node { +/* node descriptor */ +struct lstcon_node { lnet_process_id_t nd_id; /* id of the node */ int nd_ref; /* reference count */ int nd_state; /* state of the node */ int nd_timeout; /* session timeout */ unsigned long nd_stamp; /* timestamp of last replied RPC */ struct lstcon_rpc nd_ping; /* ping rpc */ -} lstcon_node_t; /* node descriptor */ +}; -typedef struct { +/* node link descriptor */ +struct lstcon_ndlink { struct list_head ndl_link; /* chain on list */ struct list_head ndl_hlink; /* chain on hash */ - lstcon_node_t *ndl_node; /* pointer to node */ -} lstcon_ndlink_t; /* node link descriptor */ + struct lstcon_node *ndl_node; /* pointer to node */ +}; -typedef struct { +/* (alias of nodes) group descriptor */ +struct lstcon_group { struct list_head grp_link; /* chain on global group list */ int grp_ref; /* reference count */ @@ -76,18 +79,19 @@ typedef struct { struct list_head grp_trans_list; /* transaction list */ struct list_head grp_ndl_list; /* nodes list */ struct list_head grp_ndl_hash[0]; /* hash table for nodes */ -} lstcon_group_t; /* (alias of nodes) group descriptor */ +}; #define LST_BATCH_IDLE 0xB0 /* idle batch */ #define LST_BATCH_RUNNING 0xB1 /* running batch */ -typedef struct lstcon_tsb_hdr { +struct lstcon_tsb_hdr { lst_bid_t tsb_id; /* batch ID */ int tsb_index; /* test index */ -} lstcon_tsb_hdr_t; +}; -typedef struct { - lstcon_tsb_hdr_t bat_hdr; /* test_batch header */ +/* (tests ) batch descriptor */ +struct lstcon_batch { + struct lstcon_tsb_hdr bat_hdr; /* test_batch header */ struct list_head bat_link; /* chain on session's batches list */ int bat_ntest; /* # of test */ int bat_state; /* state of the batch */ @@ -95,20 +99,21 @@ typedef struct { * for run, force for stop */ char bat_name[LST_NAME_SIZE];/* name of batch */ - struct list_head bat_test_list; /* list head of tests (lstcon_test_t) + struct list_head bat_test_list; /* list head of tests (struct lstcon_test) */ struct list_head bat_trans_list; /* list head of transaction */ struct list_head bat_cli_list; /* list head of client nodes - * (lstcon_node_t) */ + * (struct lstcon_node) */ struct list_head *bat_cli_hash; /* hash table of client nodes */ struct list_head bat_srv_list; /* list head of server nodes */ struct list_head *bat_srv_hash; /* hash table of server nodes */ -} lstcon_batch_t; /* (tests ) batch descriptor */ +}; -typedef struct lstcon_test { - lstcon_tsb_hdr_t tes_hdr; /* test batch header */ +/* a single test descriptor */ +struct lstcon_test { + struct lstcon_tsb_hdr tes_hdr; /* test batch header */ struct list_head tes_link; /* chain on batch's tests list */ - lstcon_batch_t *tes_batch; /* pointer to batch */ + struct lstcon_batch *tes_batch; /* pointer to batch */ int tes_type; /* type of the test, i.e: bulk, ping */ int tes_stop_onerr; /* stop on error */ @@ -120,12 +125,12 @@ typedef struct lstcon_test { int tes_cliidx; /* client index, used for RPC creating */ struct list_head tes_trans_list; /* transaction list */ - lstcon_group_t *tes_src_grp; /* group run the test */ - lstcon_group_t *tes_dst_grp; /* target group */ + struct lstcon_group *tes_src_grp; /* group run the test */ + struct lstcon_group *tes_dst_grp; /* target group */ int tes_paramlen; /* test parameter length */ char tes_param[0]; /* test parameter */ -} lstcon_test_t; /* a single test descriptor */ +}; #define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */ #define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */ @@ -152,7 +157,7 @@ struct lstcon_session { unsigned ses_expired:1; /* console is timedout */ __u64 ses_id_cookie; /* batch id cookie */ char ses_name[LST_NAME_SIZE];/* session name */ - lstcon_rpc_trans_t *ses_ping; /* session pinger */ + struct lstcon_rpc_trans *ses_ping; /* session pinger */ struct stt_timer ses_ping_timer; /* timer for pinger */ lstcon_trans_stat_t ses_trans_stat; /* transaction stats */ diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c index e2c532399..30e4f71f1 100644 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ b/drivers/staging/lustre/lnet/selftest/framework.c @@ -109,19 +109,19 @@ static struct smoketest_framework { struct list_head fw_tests; /* registered test cases */ atomic_t fw_nzombies; /* # zombie sessions */ spinlock_t fw_lock; /* serialise */ - sfw_session_t *fw_session; /* _the_ session */ + struct sfw_session *fw_session; /* _the_ session */ int fw_shuttingdown; /* shutdown in progress */ struct srpc_server_rpc *fw_active_srpc;/* running RPC */ } sfw_data; /* forward ref's */ -int sfw_stop_batch(sfw_batch_t *tsb, int force); -void sfw_destroy_session(sfw_session_t *sn); +int sfw_stop_batch(struct sfw_batch *tsb, int force); +void sfw_destroy_session(struct sfw_session *sn); -static inline sfw_test_case_t * +static inline struct sfw_test_case * sfw_find_test_case(int id) { - sfw_test_case_t *tsc; + struct sfw_test_case *tsc; LASSERT(id <= SRPC_SERVICE_MAX_ID); LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID); @@ -135,9 +135,9 @@ sfw_find_test_case(int id) } static int -sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops) +sfw_register_test(struct srpc_service *service, struct sfw_test_client_ops *cliops) { - sfw_test_case_t *tsc; + struct sfw_test_case *tsc; if (sfw_find_test_case(service->sv_id)) { CERROR("Failed to register test %s (%d)\n", @@ -145,7 +145,7 @@ sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops) return -EEXIST; } - LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t)); + LIBCFS_ALLOC(tsc, sizeof(struct sfw_test_case)); if (!tsc) return -ENOMEM; @@ -159,7 +159,7 @@ sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops) static void sfw_add_session_timer(void) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; struct stt_timer *timer = &sn->sn_timer; LASSERT(!sfw_data.fw_shuttingdown); @@ -177,7 +177,7 @@ sfw_add_session_timer(void) static int sfw_del_session_timer(void) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; if (!sn || !sn->sn_timer_active) return 0; @@ -196,10 +196,10 @@ static void sfw_deactivate_session(void) __must_hold(&sfw_data.fw_lock) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; int nactive = 0; - sfw_batch_t *tsb; - sfw_test_case_t *tsc; + struct sfw_batch *tsb; + struct sfw_test_case *tsc; if (!sn) return; @@ -226,7 +226,7 @@ __must_hold(&sfw_data.fw_lock) } if (nactive) - return; /* wait for active batches to stop */ + return; /* wait for active batches to stop */ list_del_init(&sn->sn_list); spin_unlock(&sfw_data.fw_lock); @@ -239,7 +239,7 @@ __must_hold(&sfw_data.fw_lock) static void sfw_session_expired(void *data) { - sfw_session_t *sn = data; + struct sfw_session *sn = data; spin_lock(&sfw_data.fw_lock); @@ -257,12 +257,12 @@ sfw_session_expired(void *data) } static inline void -sfw_init_session(sfw_session_t *sn, lst_sid_t sid, +sfw_init_session(struct sfw_session *sn, lst_sid_t sid, unsigned features, const char *name) { struct stt_timer *timer = &sn->sn_timer; - memset(sn, 0, sizeof(sfw_session_t)); + memset(sn, 0, sizeof(struct sfw_session)); INIT_LIST_HEAD(&sn->sn_list); INIT_LIST_HEAD(&sn->sn_batches); atomic_set(&sn->sn_refcount, 1); /* +1 for caller */ @@ -298,7 +298,7 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc) } static void -sfw_client_rpc_fini(srpc_client_rpc_t *rpc) +sfw_client_rpc_fini(struct srpc_client_rpc *rpc) { LASSERT(!rpc->crpc_bulk.bk_niov); LASSERT(list_empty(&rpc->crpc_list)); @@ -318,11 +318,11 @@ sfw_client_rpc_fini(srpc_client_rpc_t *rpc) spin_unlock(&sfw_data.fw_lock); } -static sfw_batch_t * +static struct sfw_batch * sfw_find_batch(lst_bid_t bid) { - sfw_session_t *sn = sfw_data.fw_session; - sfw_batch_t *bat; + struct sfw_session *sn = sfw_data.fw_session; + struct sfw_batch *bat; LASSERT(sn); @@ -334,11 +334,11 @@ sfw_find_batch(lst_bid_t bid) return NULL; } -static sfw_batch_t * +static struct sfw_batch * sfw_bid2batch(lst_bid_t bid) { - sfw_session_t *sn = sfw_data.fw_session; - sfw_batch_t *bat; + struct sfw_session *sn = sfw_data.fw_session; + struct sfw_batch *bat; LASSERT(sn); @@ -346,7 +346,7 @@ sfw_bid2batch(lst_bid_t bid) if (bat) return bat; - LIBCFS_ALLOC(bat, sizeof(sfw_batch_t)); + LIBCFS_ALLOC(bat, sizeof(struct sfw_batch)); if (!bat) return NULL; @@ -361,11 +361,11 @@ sfw_bid2batch(lst_bid_t bid) } static int -sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) +sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; sfw_counters_t *cnt = &reply->str_fw; - sfw_batch_t *bat; + struct sfw_batch *bat; reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id; @@ -402,10 +402,10 @@ sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) } int -sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) +sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply) { - sfw_session_t *sn = sfw_data.fw_session; - srpc_msg_t *msg = container_of(request, srpc_msg_t, + struct sfw_session *sn = sfw_data.fw_session; + struct srpc_msg *msg = container_of(request, struct srpc_msg, msg_body.mksn_reqst); int cplen = 0; @@ -438,7 +438,7 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) /* * reject the request if it requires unknown features * NB: old version will always accept all features because it's not - * aware of srpc_msg_t::msg_ses_feats, it's a defect but it's also + * aware of srpc_msg::msg_ses_feats, it's a defect but it's also * harmless because it will return zero feature to console, and it's * console's responsibility to make sure all nodes in a session have * same feature mask. @@ -449,7 +449,7 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) } /* brand new or create by force */ - LIBCFS_ALLOC(sn, sizeof(sfw_session_t)); + LIBCFS_ALLOC(sn, sizeof(struct sfw_session)); if (!sn) { CERROR("dropping RPC mksn under memory pressure\n"); return -ENOMEM; @@ -473,9 +473,9 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply) } static int -sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) +sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *reply) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id; @@ -505,9 +505,9 @@ sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply) } static int -sfw_debug_session(srpc_debug_reqst_t *request, srpc_debug_reply_t *reply) +sfw_debug_session(struct srpc_debug_reqst *request, struct srpc_debug_reply *reply) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; if (!sn) { reply->dbg_status = ESRCH; @@ -526,10 +526,10 @@ sfw_debug_session(srpc_debug_reqst_t *request, srpc_debug_reply_t *reply) } static void -sfw_test_rpc_fini(srpc_client_rpc_t *rpc) +sfw_test_rpc_fini(struct srpc_client_rpc *rpc) { - sfw_test_unit_t *tsu = rpc->crpc_priv; - sfw_test_instance_t *tsi = tsu->tsu_instance; + struct sfw_test_unit *tsu = rpc->crpc_priv; + struct sfw_test_instance *tsi = tsu->tsu_instance; /* Called with hold of tsi->tsi_lock */ LASSERT(list_empty(&rpc->crpc_list)); @@ -537,7 +537,7 @@ sfw_test_rpc_fini(srpc_client_rpc_t *rpc) } static inline int -sfw_test_buffers(sfw_test_instance_t *tsi) +sfw_test_buffers(struct sfw_test_instance *tsi) { struct sfw_test_case *tsc; struct srpc_service *svc; @@ -614,10 +614,10 @@ sfw_unload_test(struct sfw_test_instance *tsi) } static void -sfw_destroy_test_instance(sfw_test_instance_t *tsi) +sfw_destroy_test_instance(struct sfw_test_instance *tsi) { - srpc_client_rpc_t *rpc; - sfw_test_unit_t *tsu; + struct srpc_client_rpc *rpc; + struct sfw_test_unit *tsu; if (!tsi->tsi_is_client) goto clean; @@ -630,14 +630,14 @@ sfw_destroy_test_instance(sfw_test_instance_t *tsi) while (!list_empty(&tsi->tsi_units)) { tsu = list_entry(tsi->tsi_units.next, - sfw_test_unit_t, tsu_list); + struct sfw_test_unit, tsu_list); list_del(&tsu->tsu_list); LIBCFS_FREE(tsu, sizeof(*tsu)); } while (!list_empty(&tsi->tsi_free_rpcs)) { rpc = list_entry(tsi->tsi_free_rpcs.next, - srpc_client_rpc_t, crpc_list); + struct srpc_client_rpc, crpc_list); list_del(&rpc->crpc_list); LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); } @@ -648,34 +648,34 @@ clean: } static void -sfw_destroy_batch(sfw_batch_t *tsb) +sfw_destroy_batch(struct sfw_batch *tsb) { - sfw_test_instance_t *tsi; + struct sfw_test_instance *tsi; LASSERT(!sfw_batch_active(tsb)); LASSERT(list_empty(&tsb->bat_list)); while (!list_empty(&tsb->bat_tests)) { tsi = list_entry(tsb->bat_tests.next, - sfw_test_instance_t, tsi_list); + struct sfw_test_instance, tsi_list); list_del_init(&tsi->tsi_list); sfw_destroy_test_instance(tsi); } - LIBCFS_FREE(tsb, sizeof(sfw_batch_t)); + LIBCFS_FREE(tsb, sizeof(struct sfw_batch)); } void -sfw_destroy_session(sfw_session_t *sn) +sfw_destroy_session(struct sfw_session *sn) { - sfw_batch_t *batch; + struct sfw_batch *batch; LASSERT(list_empty(&sn->sn_list)); LASSERT(sn != sfw_data.fw_session); while (!list_empty(&sn->sn_batches)) { batch = list_entry(sn->sn_batches.next, - sfw_batch_t, bat_list); + struct sfw_batch, bat_list); list_del_init(&batch->bat_list); sfw_destroy_batch(batch); } @@ -685,28 +685,28 @@ sfw_destroy_session(sfw_session_t *sn) } static void -sfw_unpack_addtest_req(srpc_msg_t *msg) +sfw_unpack_addtest_req(struct srpc_msg *msg) { - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; + struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST); LASSERT(req->tsr_is_client); if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ + return; /* no flipping needed */ LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); if (req->tsr_service == SRPC_SERVICE_BRW) { if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) { - test_bulk_req_t *bulk = &req->tsr_u.bulk_v0; + struct test_bulk_req *bulk = &req->tsr_u.bulk_v0; __swab32s(&bulk->blk_opc); __swab32s(&bulk->blk_npg); __swab32s(&bulk->blk_flags); } else { - test_bulk_req_v1_t *bulk = &req->tsr_u.bulk_v1; + struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1; __swab16s(&bulk->blk_opc); __swab16s(&bulk->blk_flags); @@ -718,7 +718,7 @@ sfw_unpack_addtest_req(srpc_msg_t *msg) } if (req->tsr_service == SRPC_SERVICE_PING) { - test_ping_req_t *ping = &req->tsr_u.ping; + struct test_ping_req *ping = &req->tsr_u.ping; __swab32s(&ping->png_size); __swab32s(&ping->png_flags); @@ -729,14 +729,14 @@ sfw_unpack_addtest_req(srpc_msg_t *msg) } static int -sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) +sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc) { - srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg; - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; - srpc_bulk_t *bk = rpc->srpc_bulk; + struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg; + struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; + struct srpc_bulk *bk = rpc->srpc_bulk; int ndest = req->tsr_ndest; - sfw_test_unit_t *tsu; - sfw_test_instance_t *tsi; + struct sfw_test_unit *tsu; + struct sfw_test_instance *tsi; int i; int rc; @@ -789,13 +789,13 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc) int j; dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page); - LASSERT(dests); /* my pages are within KVM always */ + LASSERT(dests); /* my pages are within KVM always */ id = dests[i % SFW_ID_PER_PAGE]; if (msg->msg_magic != SRPC_MSG_MAGIC) sfw_unpack_id(id); for (j = 0; j < tsi->tsi_concur; j++) { - LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t)); + LIBCFS_ALLOC(tsu, sizeof(struct sfw_test_unit)); if (!tsu) { rc = -ENOMEM; CERROR("Can't allocate tsu for %d\n", @@ -824,11 +824,11 @@ error: } static void -sfw_test_unit_done(sfw_test_unit_t *tsu) +sfw_test_unit_done(struct sfw_test_unit *tsu) { - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_batch_t *tsb = tsi->tsi_batch; - sfw_session_t *sn = tsb->bat_session; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct sfw_batch *tsb = tsi->tsi_batch; + struct sfw_session *sn = tsb->bat_session; LASSERT(sfw_test_active(tsi)); @@ -844,8 +844,8 @@ sfw_test_unit_done(sfw_test_unit_t *tsu) spin_lock(&sfw_data.fw_lock); - if (!atomic_dec_and_test(&tsb->bat_nactive) ||/* tsb still active */ - sn == sfw_data.fw_session) { /* sn also active */ + if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */ + sn == sfw_data.fw_session) { /* sn also active */ spin_unlock(&sfw_data.fw_lock); return; } @@ -866,10 +866,10 @@ sfw_test_unit_done(sfw_test_unit_t *tsu) } static void -sfw_test_rpc_done(srpc_client_rpc_t *rpc) +sfw_test_rpc_done(struct srpc_client_rpc *rpc) { - sfw_test_unit_t *tsu = rpc->crpc_priv; - sfw_test_instance_t *tsi = tsu->tsu_instance; + struct sfw_test_unit *tsu = rpc->crpc_priv; + struct sfw_test_instance *tsi = tsu->tsu_instance; int done = 0; tsi->tsi_ops->tso_done_rpc(tsu, rpc); @@ -900,19 +900,19 @@ sfw_test_rpc_done(srpc_client_rpc_t *rpc) } int -sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, +sfw_create_test_rpc(struct sfw_test_unit *tsu, lnet_process_id_t peer, unsigned features, int nblk, int blklen, - srpc_client_rpc_t **rpcpp) + struct srpc_client_rpc **rpcpp) { - srpc_client_rpc_t *rpc = NULL; - sfw_test_instance_t *tsi = tsu->tsu_instance; + struct srpc_client_rpc *rpc = NULL; + struct sfw_test_instance *tsi = tsu->tsu_instance; spin_lock(&tsi->tsi_lock); LASSERT(sfw_test_active(tsi)); /* pick request from buffer */ rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs, - srpc_client_rpc_t, crpc_list); + struct srpc_client_rpc, crpc_list); if (rpc) { LASSERT(nblk == rpc->crpc_bulk.bk_niov); list_del_init(&rpc->crpc_list); @@ -942,11 +942,11 @@ sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer, } static int -sfw_run_test(swi_workitem_t *wi) +sfw_run_test(struct swi_workitem *wi) { - sfw_test_unit_t *tsu = wi->swi_workitem.wi_data; - sfw_test_instance_t *tsi = tsu->tsu_instance; - srpc_client_rpc_t *rpc = NULL; + struct sfw_test_unit *tsu = wi->swi_workitem.wi_data; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct srpc_client_rpc *rpc = NULL; LASSERT(wi == &tsu->tsu_worker); @@ -991,11 +991,11 @@ test_done: } static int -sfw_run_batch(sfw_batch_t *tsb) +sfw_run_batch(struct sfw_batch *tsb) { - swi_workitem_t *wi; - sfw_test_unit_t *tsu; - sfw_test_instance_t *tsi; + struct swi_workitem *wi; + struct sfw_test_unit *tsu; + struct sfw_test_instance *tsi; if (sfw_batch_active(tsb)) { CDEBUG(D_NET, "Batch already active: %llu (%d)\n", @@ -1026,10 +1026,10 @@ sfw_run_batch(sfw_batch_t *tsb) } int -sfw_stop_batch(sfw_batch_t *tsb, int force) +sfw_stop_batch(struct sfw_batch *tsb, int force) { - sfw_test_instance_t *tsi; - srpc_client_rpc_t *rpc; + struct sfw_test_instance *tsi; + struct srpc_client_rpc *rpc; if (!sfw_batch_active(tsb)) { CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id); @@ -1068,9 +1068,9 @@ sfw_stop_batch(sfw_batch_t *tsb, int force) } static int -sfw_query_batch(sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply) +sfw_query_batch(struct sfw_batch *tsb, int testidx, struct srpc_batch_reply *reply) { - sfw_test_instance_t *tsi; + struct sfw_test_instance *tsi; if (testidx < 0) return -EINVAL; @@ -1115,11 +1115,11 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, static int sfw_add_test(struct srpc_server_rpc *rpc) { - sfw_session_t *sn = sfw_data.fw_session; - srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply; - srpc_test_reqst_t *request; + struct sfw_session *sn = sfw_data.fw_session; + struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply; + struct srpc_test_reqst *request; int rc; - sfw_batch_t *bat; + struct sfw_batch *bat; request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst; reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id; @@ -1183,11 +1183,11 @@ sfw_add_test(struct srpc_server_rpc *rpc) } static int -sfw_control_batch(srpc_batch_reqst_t *request, srpc_batch_reply_t *reply) +sfw_control_batch(struct srpc_batch_reqst *request, struct srpc_batch_reply *reply) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; int rc = 0; - sfw_batch_t *bat; + struct sfw_batch *bat; reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id; @@ -1227,8 +1227,8 @@ static int sfw_handle_server_rpc(struct srpc_server_rpc *rpc) { struct srpc_service *sv = rpc->srpc_scd->scd_svc; - srpc_msg_t *reply = &rpc->srpc_replymsg; - srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg; + struct srpc_msg *reply = &rpc->srpc_replymsg; + struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg; unsigned features = LST_FEATS_MASK; int rc = 0; @@ -1244,7 +1244,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) /* Remove timer to avoid racing with it or expiring active session */ if (sfw_del_session_timer()) { - CERROR("Dropping RPC (%s) from %s: racing with expiry timer.", + CERROR("dropping RPC %s from %s: racing with expiry timer\n", sv->sv_name, libcfs_id2str(rpc->srpc_peer)); spin_unlock(&sfw_data.fw_lock); return -EAGAIN; @@ -1261,7 +1261,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION && sv->sv_id != SRPC_SERVICE_DEBUG) { - sfw_session_t *sn = sfw_data.fw_session; + struct sfw_session *sn = sfw_data.fw_session; if (sn && sn->sn_features != request->msg_ses_feats) { @@ -1273,7 +1273,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc) } } else if (request->msg_ses_feats & ~LST_FEATS_MASK) { - /** + /* * NB: at this point, old version will ignore features and * create new session anyway, so console should be able * to handle this @@ -1377,12 +1377,12 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) return rc; } -srpc_client_rpc_t * +struct srpc_client_rpc * sfw_create_rpc(lnet_process_id_t peer, int service, unsigned features, int nbulkiov, int bulklen, - void (*done)(srpc_client_rpc_t *), void *priv) + void (*done)(struct srpc_client_rpc *), void *priv) { - srpc_client_rpc_t *rpc = NULL; + struct srpc_client_rpc *rpc = NULL; spin_lock(&sfw_data.fw_lock); @@ -1391,7 +1391,7 @@ sfw_create_rpc(lnet_process_id_t peer, int service, if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) { rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); + struct srpc_client_rpc, crpc_list); list_del(&rpc->crpc_list); srpc_init_client_rpc(rpc, peer, service, 0, 0, @@ -1415,7 +1415,7 @@ sfw_create_rpc(lnet_process_id_t peer, int service, } void -sfw_unpack_message(srpc_msg_t *msg) +sfw_unpack_message(struct srpc_msg *msg) { if (msg->msg_magic == SRPC_MSG_MAGIC) return; /* no flipping needed */ @@ -1424,7 +1424,7 @@ sfw_unpack_message(srpc_msg_t *msg) LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); if (msg->msg_type == SRPC_MSG_STAT_REQST) { - srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst; + struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst; __swab32s(&req->str_type); __swab64s(&req->str_rpyid); @@ -1433,7 +1433,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_STAT_REPLY) { - srpc_stat_reply_t *rep = &msg->msg_body.stat_reply; + struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; __swab32s(&rep->str_status); sfw_unpack_sid(rep->str_sid); @@ -1444,7 +1444,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_MKSN_REQST) { - srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst; + struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst; __swab64s(&req->mksn_rpyid); __swab32s(&req->mksn_force); @@ -1453,7 +1453,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_MKSN_REPLY) { - srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply; + struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply; __swab32s(&rep->mksn_status); __swab32s(&rep->mksn_timeout); @@ -1462,7 +1462,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_RMSN_REQST) { - srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst; + struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst; __swab64s(&req->rmsn_rpyid); sfw_unpack_sid(req->rmsn_sid); @@ -1470,7 +1470,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_RMSN_REPLY) { - srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply; + struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply; __swab32s(&rep->rmsn_status); sfw_unpack_sid(rep->rmsn_sid); @@ -1478,7 +1478,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_DEBUG_REQST) { - srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst; + struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst; __swab64s(&req->dbg_rpyid); __swab32s(&req->dbg_flags); @@ -1487,7 +1487,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) { - srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply; + struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply; __swab32s(&rep->dbg_nbatch); __swab32s(&rep->dbg_timeout); @@ -1496,7 +1496,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_BATCH_REQST) { - srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst; + struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst; __swab32s(&req->bar_opc); __swab64s(&req->bar_rpyid); @@ -1508,7 +1508,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_BATCH_REPLY) { - srpc_batch_reply_t *rep = &msg->msg_body.bat_reply; + struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; __swab32s(&rep->bar_status); sfw_unpack_sid(rep->bar_sid); @@ -1516,7 +1516,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_TEST_REQST) { - srpc_test_reqst_t *req = &msg->msg_body.tes_reqst; + struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; __swab64s(&req->tsr_rpyid); __swab64s(&req->tsr_bulkid); @@ -1530,7 +1530,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_TEST_REPLY) { - srpc_test_reply_t *rep = &msg->msg_body.tes_reply; + struct srpc_test_reply *rep = &msg->msg_body.tes_reply; __swab32s(&rep->tsr_status); sfw_unpack_sid(rep->tsr_sid); @@ -1538,7 +1538,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_JOIN_REQST) { - srpc_join_reqst_t *req = &msg->msg_body.join_reqst; + struct srpc_join_reqst *req = &msg->msg_body.join_reqst; __swab64s(&req->join_rpyid); sfw_unpack_sid(req->join_sid); @@ -1546,7 +1546,7 @@ sfw_unpack_message(srpc_msg_t *msg) } if (msg->msg_type == SRPC_MSG_JOIN_REPLY) { - srpc_join_reply_t *rep = &msg->msg_body.join_reply; + struct srpc_join_reply *rep = &msg->msg_body.join_reply; __swab32s(&rep->join_status); __swab32s(&rep->join_timeout); @@ -1558,7 +1558,7 @@ sfw_unpack_message(srpc_msg_t *msg) } void -sfw_abort_rpc(srpc_client_rpc_t *rpc) +sfw_abort_rpc(struct srpc_client_rpc *rpc) { LASSERT(atomic_read(&rpc->crpc_refcount) > 0); LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); @@ -1569,7 +1569,7 @@ sfw_abort_rpc(srpc_client_rpc_t *rpc) } void -sfw_post_rpc(srpc_client_rpc_t *rpc) +sfw_post_rpc(struct srpc_client_rpc *rpc) { spin_lock(&rpc->crpc_lock); @@ -1584,7 +1584,7 @@ sfw_post_rpc(srpc_client_rpc_t *rpc) spin_unlock(&rpc->crpc_lock); } -static srpc_service_t sfw_services[] = { +static struct srpc_service sfw_services[] = { { /* sv_id */ SRPC_SERVICE_DEBUG, /* sv_name */ "debug", @@ -1628,8 +1628,8 @@ sfw_startup(void) int i; int rc; int error; - srpc_service_t *sv; - sfw_test_case_t *tsc; + struct srpc_service *sv; + struct sfw_test_case *tsc; if (session_timeout < 0) { CERROR("Session timeout must be non-negative: %d\n", @@ -1721,8 +1721,8 @@ sfw_startup(void) void sfw_shutdown(void) { - srpc_service_t *sv; - sfw_test_case_t *tsc; + struct srpc_service *sv; + struct sfw_test_case *tsc; int i; spin_lock(&sfw_data.fw_lock); @@ -1759,10 +1759,10 @@ sfw_shutdown(void) } while (!list_empty(&sfw_data.fw_zombie_rpcs)) { - srpc_client_rpc_t *rpc; + struct srpc_client_rpc *rpc; rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - srpc_client_rpc_t, crpc_list); + struct srpc_client_rpc, crpc_list); list_del(&rpc->crpc_list); LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc)); @@ -1778,7 +1778,7 @@ sfw_shutdown(void) while (!list_empty(&sfw_data.fw_tests)) { tsc = list_entry(sfw_data.fw_tests.next, - sfw_test_case_t, tsc_list); + struct sfw_test_case, tsc_list); srpc_wait_service_shutdown(tsc->tsc_srv_service); diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c index 81a45045e..ad26fe9dd 100644 --- a/drivers/staging/lustre/lnet/selftest/ping_test.c +++ b/drivers/staging/lustre/lnet/selftest/ping_test.c @@ -56,9 +56,9 @@ struct lst_ping_data { static struct lst_ping_data lst_ping_data; static int -ping_client_init(sfw_test_instance_t *tsi) +ping_client_init(struct sfw_test_instance *tsi) { - sfw_session_t *sn = tsi->tsi_batch->bat_session; + struct sfw_session *sn = tsi->tsi_batch->bat_session; LASSERT(tsi->tsi_is_client); LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK)); @@ -70,9 +70,9 @@ ping_client_init(sfw_test_instance_t *tsi) } static void -ping_client_fini(sfw_test_instance_t *tsi) +ping_client_fini(struct sfw_test_instance *tsi) { - sfw_session_t *sn = tsi->tsi_batch->bat_session; + struct sfw_session *sn = tsi->tsi_batch->bat_session; int errors; LASSERT(sn); @@ -86,12 +86,12 @@ ping_client_fini(sfw_test_instance_t *tsi) } static int -ping_client_prep_rpc(sfw_test_unit_t *tsu, - lnet_process_id_t dest, srpc_client_rpc_t **rpc) +ping_client_prep_rpc(struct sfw_test_unit *tsu, lnet_process_id_t dest, + struct srpc_client_rpc **rpc) { - srpc_ping_reqst_t *req; - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; + struct srpc_ping_reqst *req; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct sfw_session *sn = tsi->tsi_batch->bat_session; struct timespec64 ts; int rc; @@ -118,18 +118,18 @@ ping_client_prep_rpc(sfw_test_unit_t *tsu, } static void -ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) +ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) { - sfw_test_instance_t *tsi = tsu->tsu_instance; - sfw_session_t *sn = tsi->tsi_batch->bat_session; - srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; - srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply; + struct sfw_test_instance *tsi = tsu->tsu_instance; + struct sfw_session *sn = tsi->tsi_batch->bat_session; + struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; + struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply; struct timespec64 ts; LASSERT(sn); if (rpc->crpc_status) { - if (!tsi->tsi_stopping) /* rpc could have been aborted */ + if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_ping_errors); CERROR("Unable to ping %s (%d): %d\n", libcfs_id2str(rpc->crpc_dest), @@ -171,10 +171,10 @@ static int ping_server_handle(struct srpc_server_rpc *rpc) { struct srpc_service *sv = rpc->srpc_scd->scd_svc; - srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - srpc_msg_t *replymsg = &rpc->srpc_replymsg; - srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst; - srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply; + struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; + struct srpc_msg *replymsg = &rpc->srpc_replymsg; + struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst; + struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply; LASSERT(sv->sv_id == SRPC_SERVICE_PING); @@ -210,7 +210,8 @@ ping_server_handle(struct srpc_server_rpc *rpc) return 0; } -sfw_test_client_ops_t ping_test_client; +struct sfw_test_client_ops ping_test_client; + void ping_init_test_client(void) { ping_test_client.tso_init = ping_client_init; @@ -219,7 +220,8 @@ void ping_init_test_client(void) ping_test_client.tso_done_rpc = ping_client_done_rpc; } -srpc_service_t ping_test_service; +struct srpc_service ping_test_service; + void ping_init_test_service(void) { ping_test_service.sv_id = SRPC_SERVICE_PING; diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c index 7d7748d96..3c45a7cfa 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ b/drivers/staging/lustre/lnet/selftest/rpc.c @@ -46,19 +46,19 @@ #include "selftest.h" -typedef enum { +enum srpc_state { SRPC_STATE_NONE, SRPC_STATE_NI_INIT, SRPC_STATE_EQ_INIT, SRPC_STATE_RUNNING, SRPC_STATE_STOPPING, -} srpc_state_t; +}; static struct smoketest_rpc { spinlock_t rpc_glock; /* global lock */ - srpc_service_t *rpc_services[SRPC_SERVICE_MAX_ID + 1]; + struct srpc_service *rpc_services[SRPC_SERVICE_MAX_ID + 1]; lnet_handle_eq_t rpc_lnet_eq; /* _the_ LNet event queue */ - srpc_state_t rpc_state; + enum srpc_state rpc_state; srpc_counters_t rpc_counters; __u64 rpc_matchbits; /* matchbits counter */ } srpc_data; @@ -71,7 +71,7 @@ srpc_serv_portal(int svc_id) } /* forward ref's */ -int srpc_handle_rpc(swi_workitem_t *wi); +int srpc_handle_rpc(struct swi_workitem *wi); void srpc_get_counters(srpc_counters_t *cnt) { @@ -88,7 +88,7 @@ void srpc_set_counters(const srpc_counters_t *cnt) } static int -srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob) +srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob) { nob = min_t(int, nob, PAGE_SIZE); @@ -102,7 +102,7 @@ srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob) } void -srpc_free_bulk(srpc_bulk_t *bk) +srpc_free_bulk(struct srpc_bulk *bk) { int i; struct page *pg; @@ -117,25 +117,25 @@ srpc_free_bulk(srpc_bulk_t *bk) __free_page(pg); } - LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov])); + LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_niov])); } -srpc_bulk_t * +struct srpc_bulk * srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink) { - srpc_bulk_t *bk; + struct srpc_bulk *bk; int i; LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV); LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt, - offsetof(srpc_bulk_t, bk_iovs[bulk_npg])); + offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); if (!bk) { CERROR("Can't allocate descriptor for %d pages\n", bulk_npg); return NULL; } - memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[bulk_npg])); + memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); bk->bk_sink = sink; bk->bk_len = bulk_len; bk->bk_niov = bulk_npg; @@ -256,7 +256,7 @@ srpc_service_init(struct srpc_service *svc) svc->sv_shuttingdown = 0; svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct srpc_service_cd)); + sizeof(*svc->sv_cpt_data)); if (!svc->sv_cpt_data) return -ENOMEM; @@ -338,7 +338,7 @@ srpc_add_service(struct srpc_service *sv) } int -srpc_remove_service(srpc_service_t *sv) +srpc_remove_service(struct srpc_service *sv) { int id = sv->sv_id; @@ -357,7 +357,7 @@ srpc_remove_service(srpc_service_t *sv) static int srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, int len, int options, lnet_process_id_t peer, - lnet_handle_md_t *mdh, srpc_event_t *ev) + lnet_handle_md_t *mdh, struct srpc_event *ev) { int rc; lnet_md_t md; @@ -396,7 +396,7 @@ srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, static int srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, int options, lnet_process_id_t peer, lnet_nid_t self, - lnet_handle_md_t *mdh, srpc_event_t *ev) + lnet_handle_md_t *mdh, struct srpc_event *ev) { int rc; lnet_md_t md; @@ -449,7 +449,7 @@ srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, static int srpc_post_passive_rqtbuf(int service, int local, void *buf, int len, - lnet_handle_md_t *mdh, srpc_event_t *ev) + lnet_handle_md_t *mdh, struct srpc_event *ev) { lnet_process_id_t any = { 0 }; @@ -697,7 +697,7 @@ srpc_finish_service(struct srpc_service *sv) /* called with sv->sv_lock held */ static void -srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf) +srpc_service_recycle_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) __must_hold(&scd->scd_lock) { if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) { @@ -755,11 +755,11 @@ srpc_abort_service(struct srpc_service *sv) } void -srpc_shutdown_service(srpc_service_t *sv) +srpc_shutdown_service(struct srpc_service *sv) { struct srpc_service_cd *scd; struct srpc_server_rpc *rpc; - srpc_buffer_t *buf; + struct srpc_buffer *buf; int i; CDEBUG(D_NET, "Shutting down service: id %d, name %s\n", @@ -792,9 +792,9 @@ srpc_shutdown_service(srpc_service_t *sv) } static int -srpc_send_request(srpc_client_rpc_t *rpc) +srpc_send_request(struct srpc_client_rpc *rpc) { - srpc_event_t *ev = &rpc->crpc_reqstev; + struct srpc_event *ev = &rpc->crpc_reqstev; int rc; ev->ev_fired = 0; @@ -803,7 +803,7 @@ srpc_send_request(srpc_client_rpc_t *rpc) rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service), rpc->crpc_service, &rpc->crpc_reqstmsg, - sizeof(srpc_msg_t), LNET_MD_OP_PUT, + sizeof(struct srpc_msg), LNET_MD_OP_PUT, rpc->crpc_dest, LNET_NID_ANY, &rpc->crpc_reqstmdh, ev); if (rc) { @@ -814,9 +814,9 @@ srpc_send_request(srpc_client_rpc_t *rpc) } static int -srpc_prepare_reply(srpc_client_rpc_t *rpc) +srpc_prepare_reply(struct srpc_client_rpc *rpc) { - srpc_event_t *ev = &rpc->crpc_replyev; + struct srpc_event *ev = &rpc->crpc_replyev; __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid; int rc; @@ -827,7 +827,8 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc) *id = srpc_next_id(); rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, - &rpc->crpc_replymsg, sizeof(srpc_msg_t), + &rpc->crpc_replymsg, + sizeof(struct srpc_msg), LNET_MD_OP_PUT, rpc->crpc_dest, &rpc->crpc_replymdh, ev); if (rc) { @@ -838,10 +839,10 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc) } static int -srpc_prepare_bulk(srpc_client_rpc_t *rpc) +srpc_prepare_bulk(struct srpc_client_rpc *rpc) { - srpc_bulk_t *bk = &rpc->crpc_bulk; - srpc_event_t *ev = &rpc->crpc_bulkev; + struct srpc_bulk *bk = &rpc->crpc_bulk; + struct srpc_event *ev = &rpc->crpc_bulkev; __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; int rc; int opt; @@ -873,8 +874,8 @@ srpc_prepare_bulk(srpc_client_rpc_t *rpc) static int srpc_do_bulk(struct srpc_server_rpc *rpc) { - srpc_event_t *ev = &rpc->srpc_ev; - srpc_bulk_t *bk = rpc->srpc_bulk; + struct srpc_event *ev = &rpc->srpc_ev; + struct srpc_bulk *bk = rpc->srpc_bulk; __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid; int rc; int opt; @@ -903,7 +904,7 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) { struct srpc_service_cd *scd = rpc->srpc_scd; struct srpc_service *sv = scd->scd_svc; - srpc_buffer_t *buffer; + struct srpc_buffer *buffer; LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE); @@ -948,7 +949,7 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) { buffer = list_entry(scd->scd_buf_blocked.next, - srpc_buffer_t, buf_list); + struct srpc_buffer, buf_list); list_del(&buffer->buf_list); srpc_init_server_rpc(rpc, scd, buffer); @@ -963,12 +964,12 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) /* handles an incoming RPC */ int -srpc_handle_rpc(swi_workitem_t *wi) +srpc_handle_rpc(struct swi_workitem *wi) { struct srpc_server_rpc *rpc = wi->swi_workitem.wi_data; struct srpc_service_cd *scd = rpc->srpc_scd; struct srpc_service *sv = scd->scd_svc; - srpc_event_t *ev = &rpc->srpc_ev; + struct srpc_event *ev = &rpc->srpc_ev; int rc = 0; LASSERT(wi == &rpc->srpc_wi); @@ -995,8 +996,8 @@ srpc_handle_rpc(swi_workitem_t *wi) default: LBUG(); case SWI_STATE_NEWBORN: { - srpc_msg_t *msg; - srpc_generic_reply_t *reply; + struct srpc_msg *msg; + struct srpc_generic_reply *reply; msg = &rpc->srpc_reqstbuf->buf_msg; reply = &rpc->srpc_replymsg.msg_body.reply; @@ -1077,7 +1078,7 @@ srpc_handle_rpc(swi_workitem_t *wi) static void srpc_client_rpc_expired(void *data) { - srpc_client_rpc_t *rpc = data; + struct srpc_client_rpc *rpc = data; CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n", rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), @@ -1096,7 +1097,7 @@ srpc_client_rpc_expired(void *data) } static void -srpc_add_client_rpc_timer(srpc_client_rpc_t *rpc) +srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc) { struct stt_timer *timer = &rpc->crpc_timer; @@ -1117,7 +1118,7 @@ srpc_add_client_rpc_timer(srpc_client_rpc_t *rpc) * running on any CPU. */ static void -srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc) +srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc) { /* timer not planted or already exploded */ if (!rpc->crpc_timeout) @@ -1138,9 +1139,9 @@ srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc) } static void -srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status) +srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status) { - swi_workitem_t *wi = &rpc->crpc_wi; + struct swi_workitem *wi = &rpc->crpc_wi; LASSERT(status || wi->swi_state == SWI_STATE_DONE); @@ -1175,11 +1176,11 @@ srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status) /* sends an outgoing RPC */ int -srpc_send_rpc(swi_workitem_t *wi) +srpc_send_rpc(struct swi_workitem *wi) { int rc = 0; - srpc_client_rpc_t *rpc; - srpc_msg_t *reply; + struct srpc_client_rpc *rpc; + struct srpc_msg *reply; int do_bulk; LASSERT(wi); @@ -1237,7 +1238,7 @@ srpc_send_rpc(swi_workitem_t *wi) wi->swi_state = SWI_STATE_REQUEST_SENT; /* perhaps more events, fall thru */ case SWI_STATE_REQUEST_SENT: { - srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service); + enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service); if (!rpc->crpc_replyev.ev_fired) break; @@ -1308,15 +1309,15 @@ abort: return 0; } -srpc_client_rpc_t * +struct srpc_client_rpc * srpc_create_client_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) + void (*rpc_done)(struct srpc_client_rpc *), + void (*rpc_fini)(struct srpc_client_rpc *), void *priv) { - srpc_client_rpc_t *rpc; + struct srpc_client_rpc *rpc; - LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t, + LIBCFS_ALLOC(rpc, offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[nbulkiov])); if (!rpc) return NULL; @@ -1328,12 +1329,12 @@ srpc_create_client_rpc(lnet_process_id_t peer, int service, /* called with rpc->crpc_lock held */ void -srpc_abort_rpc(srpc_client_rpc_t *rpc, int why) +srpc_abort_rpc(struct srpc_client_rpc *rpc, int why) { LASSERT(why); - if (rpc->crpc_aborted || /* already aborted */ - rpc->crpc_closed) /* callback imminent */ + if (rpc->crpc_aborted || /* already aborted */ + rpc->crpc_closed) /* callback imminent */ return; CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n", @@ -1347,7 +1348,7 @@ srpc_abort_rpc(srpc_client_rpc_t *rpc, int why) /* called with rpc->crpc_lock held */ void -srpc_post_rpc(srpc_client_rpc_t *rpc) +srpc_post_rpc(struct srpc_client_rpc *rpc) { LASSERT(!rpc->crpc_aborted); LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); @@ -1363,7 +1364,7 @@ srpc_post_rpc(srpc_client_rpc_t *rpc) int srpc_send_reply(struct srpc_server_rpc *rpc) { - srpc_event_t *ev = &rpc->srpc_ev; + struct srpc_event *ev = &rpc->srpc_ev; struct srpc_msg *msg = &rpc->srpc_replymsg; struct srpc_buffer *buffer = rpc->srpc_reqstbuf; struct srpc_service_cd *scd = rpc->srpc_scd; @@ -1401,7 +1402,7 @@ srpc_send_reply(struct srpc_server_rpc *rpc) rpc->srpc_peer, rpc->srpc_self, &rpc->srpc_replymdh, ev); if (rc) - ev->ev_fired = 1; /* no more event expected */ + ev->ev_fired = 1; /* no more event expected */ return rc; } @@ -1410,13 +1411,13 @@ static void srpc_lnet_ev_handler(lnet_event_t *ev) { struct srpc_service_cd *scd; - srpc_event_t *rpcev = ev->md.user_ptr; - srpc_client_rpc_t *crpc; + struct srpc_event *rpcev = ev->md.user_ptr; + struct srpc_client_rpc *crpc; struct srpc_server_rpc *srpc; - srpc_buffer_t *buffer; - srpc_service_t *sv; - srpc_msg_t *msg; - srpc_msg_type_t type; + struct srpc_buffer *buffer; + struct srpc_service *sv; + struct srpc_msg *msg; + enum srpc_msg_type type; LASSERT(!in_interrupt()); @@ -1486,7 +1487,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev) LASSERT(ev->type != LNET_EVENT_UNLINK || sv->sv_shuttingdown); - buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg); + buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg); buffer->buf_peer = ev->initiator; buffer->buf_self = ev->target.nid; @@ -1509,7 +1510,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev) scd->scd_buf_err = 0; } - if (!scd->scd_buf_err && /* adding buffer is enabled */ + if (!scd->scd_buf_err && /* adding buffer is enabled */ !scd->scd_buf_adjust && scd->scd_buf_nposted < scd->scd_buf_low) { scd->scd_buf_adjust = max(scd->scd_buf_total / 2, @@ -1663,7 +1664,7 @@ srpc_shutdown(void) spin_lock(&srpc_data.rpc_glock); for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) { - srpc_service_t *sv = srpc_data.rpc_services[i]; + struct srpc_service *sv = srpc_data.rpc_services[i]; LASSERTF(!sv, "service not empty: id %d, name %s\n", i, sv->sv_name); diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h index a79c315f2..c9b904cad 100644 --- a/drivers/staging/lustre/lnet/selftest/rpc.h +++ b/drivers/staging/lustre/lnet/selftest/rpc.h @@ -44,7 +44,7 @@ * * XXX: *REPLY == *REQST + 1 */ -typedef enum { +enum srpc_msg_type { SRPC_MSG_MKSN_REQST = 0, SRPC_MSG_MKSN_REPLY = 1, SRPC_MSG_RMSN_REQST = 2, @@ -63,7 +63,7 @@ typedef enum { SRPC_MSG_PING_REPLY = 15, SRPC_MSG_JOIN_REQST = 16, SRPC_MSG_JOIN_REPLY = 17, -} srpc_msg_type_t; +}; /* CAVEAT EMPTOR: * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer, @@ -72,122 +72,122 @@ typedef enum { * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field * session id if needed. */ -typedef struct { +struct srpc_generic_reqst { __u64 rpyid; /* reply buffer matchbits */ __u64 bulkid; /* bulk buffer matchbits */ -} WIRE_ATTR srpc_generic_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_generic_reply { __u32 status; lst_sid_t sid; -} WIRE_ATTR srpc_generic_reply_t; +} WIRE_ATTR; /* FRAMEWORK RPCs */ -typedef struct { +struct srpc_mksn_reqst { __u64 mksn_rpyid; /* reply buffer matchbits */ lst_sid_t mksn_sid; /* session id */ __u32 mksn_force; /* use brute force */ char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */ +} WIRE_ATTR; /* make session request */ -typedef struct { +struct srpc_mksn_reply { __u32 mksn_status; /* session status */ lst_sid_t mksn_sid; /* session id */ __u32 mksn_timeout; /* session timeout */ char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */ +} WIRE_ATTR; /* make session reply */ -typedef struct { +struct srpc_rmsn_reqst { __u64 rmsn_rpyid; /* reply buffer matchbits */ lst_sid_t rmsn_sid; /* session id */ -} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */ +} WIRE_ATTR; /* remove session request */ -typedef struct { +struct srpc_rmsn_reply { __u32 rmsn_status; lst_sid_t rmsn_sid; /* session id */ -} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */ +} WIRE_ATTR; /* remove session reply */ -typedef struct { +struct srpc_join_reqst { __u64 join_rpyid; /* reply buffer matchbits */ lst_sid_t join_sid; /* session id to join */ char join_group[LST_NAME_SIZE]; /* group name */ -} WIRE_ATTR srpc_join_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_join_reply { __u32 join_status; /* returned status */ lst_sid_t join_sid; /* session id */ __u32 join_timeout; /* # seconds' inactivity to * expire */ char join_session[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR srpc_join_reply_t; +} WIRE_ATTR; -typedef struct { +struct srpc_debug_reqst { __u64 dbg_rpyid; /* reply buffer matchbits */ lst_sid_t dbg_sid; /* session id */ __u32 dbg_flags; /* bitmap of debug */ -} WIRE_ATTR srpc_debug_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_debug_reply { __u32 dbg_status; /* returned code */ lst_sid_t dbg_sid; /* session id */ __u32 dbg_timeout; /* session timeout */ __u32 dbg_nbatch; /* # of batches in the node */ char dbg_name[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR srpc_debug_reply_t; +} WIRE_ATTR; #define SRPC_BATCH_OPC_RUN 1 #define SRPC_BATCH_OPC_STOP 2 #define SRPC_BATCH_OPC_QUERY 3 -typedef struct { +struct srpc_batch_reqst { __u64 bar_rpyid; /* reply buffer matchbits */ lst_sid_t bar_sid; /* session id */ lst_bid_t bar_bid; /* batch id */ __u32 bar_opc; /* create/start/stop batch */ __u32 bar_testidx; /* index of test */ __u32 bar_arg; /* parameters */ -} WIRE_ATTR srpc_batch_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_batch_reply { __u32 bar_status; /* status of request */ lst_sid_t bar_sid; /* session id */ __u32 bar_active; /* # of active tests in batch/test */ __u32 bar_time; /* remained time */ -} WIRE_ATTR srpc_batch_reply_t; +} WIRE_ATTR; -typedef struct { +struct srpc_stat_reqst { __u64 str_rpyid; /* reply buffer matchbits */ lst_sid_t str_sid; /* session id */ __u32 str_type; /* type of stat */ -} WIRE_ATTR srpc_stat_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_stat_reply { __u32 str_status; lst_sid_t str_sid; sfw_counters_t str_fw; srpc_counters_t str_rpc; lnet_counters_t str_lnet; -} WIRE_ATTR srpc_stat_reply_t; +} WIRE_ATTR; -typedef struct { +struct test_bulk_req { __u32 blk_opc; /* bulk operation code */ __u32 blk_npg; /* # of pages */ __u32 blk_flags; /* reserved flags */ -} WIRE_ATTR test_bulk_req_t; +} WIRE_ATTR; -typedef struct { +struct test_bulk_req_v1 { __u16 blk_opc; /* bulk operation code */ __u16 blk_flags; /* data check flags */ __u32 blk_len; /* data length */ __u32 blk_offset; /* reserved: offset */ -} WIRE_ATTR test_bulk_req_v1_t; +} WIRE_ATTR; -typedef struct { +struct test_ping_req { __u32 png_size; /* size of ping message */ __u32 png_flags; /* reserved flags */ -} WIRE_ATTR test_ping_req_t; +} WIRE_ATTR; -typedef struct { +struct srpc_test_reqst { __u64 tsr_rpyid; /* reply buffer matchbits */ __u64 tsr_bulkid; /* bulk buffer matchbits */ lst_sid_t tsr_sid; /* session id */ @@ -201,82 +201,82 @@ typedef struct { __u32 tsr_ndest; /* # of dest nodes */ union { - test_ping_req_t ping; - test_bulk_req_t bulk_v0; - test_bulk_req_v1_t bulk_v1; - } tsr_u; -} WIRE_ATTR srpc_test_reqst_t; + struct test_ping_req ping; + struct test_bulk_req bulk_v0; + struct test_bulk_req_v1 bulk_v1; + } tsr_u; +} WIRE_ATTR; -typedef struct { +struct srpc_test_reply { __u32 tsr_status; /* returned code */ lst_sid_t tsr_sid; -} WIRE_ATTR srpc_test_reply_t; +} WIRE_ATTR; /* TEST RPCs */ -typedef struct { +struct srpc_ping_reqst { __u64 pnr_rpyid; __u32 pnr_magic; __u32 pnr_seq; __u64 pnr_time_sec; __u64 pnr_time_usec; -} WIRE_ATTR srpc_ping_reqst_t; +} WIRE_ATTR; -typedef struct { +struct srpc_ping_reply { __u32 pnr_status; __u32 pnr_magic; __u32 pnr_seq; -} WIRE_ATTR srpc_ping_reply_t; +} WIRE_ATTR; -typedef struct { +struct srpc_brw_reqst { __u64 brw_rpyid; /* reply buffer matchbits */ __u64 brw_bulkid; /* bulk buffer matchbits */ __u32 brw_rw; /* read or write */ __u32 brw_len; /* bulk data len */ __u32 brw_flags; /* bulk data patterns */ -} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */ +} WIRE_ATTR; /* bulk r/w request */ -typedef struct { +struct srpc_brw_reply { __u32 brw_status; -} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */ +} WIRE_ATTR; /* bulk r/w reply */ #define SRPC_MSG_MAGIC 0xeeb0f00d #define SRPC_MSG_VERSION 1 -typedef struct srpc_msg { +struct srpc_msg { __u32 msg_magic; /* magic number */ __u32 msg_version; /* message version number */ - __u32 msg_type; /* type of message body: srpc_msg_type_t */ + __u32 msg_type; /* type of message body: srpc_msg_type */ __u32 msg_reserved0; __u32 msg_reserved1; __u32 msg_ses_feats; /* test session features */ union { - srpc_generic_reqst_t reqst; - srpc_generic_reply_t reply; - - srpc_mksn_reqst_t mksn_reqst; - srpc_mksn_reply_t mksn_reply; - srpc_rmsn_reqst_t rmsn_reqst; - srpc_rmsn_reply_t rmsn_reply; - srpc_debug_reqst_t dbg_reqst; - srpc_debug_reply_t dbg_reply; - srpc_batch_reqst_t bat_reqst; - srpc_batch_reply_t bat_reply; - srpc_stat_reqst_t stat_reqst; - srpc_stat_reply_t stat_reply; - srpc_test_reqst_t tes_reqst; - srpc_test_reply_t tes_reply; - srpc_join_reqst_t join_reqst; - srpc_join_reply_t join_reply; - - srpc_ping_reqst_t ping_reqst; - srpc_ping_reply_t ping_reply; - srpc_brw_reqst_t brw_reqst; - srpc_brw_reply_t brw_reply; + struct srpc_generic_reqst reqst; + struct srpc_generic_reply reply; + + struct srpc_mksn_reqst mksn_reqst; + struct srpc_mksn_reply mksn_reply; + struct srpc_rmsn_reqst rmsn_reqst; + struct srpc_rmsn_reply rmsn_reply; + struct srpc_debug_reqst dbg_reqst; + struct srpc_debug_reply dbg_reply; + struct srpc_batch_reqst bat_reqst; + struct srpc_batch_reply bat_reply; + struct srpc_stat_reqst stat_reqst; + struct srpc_stat_reply stat_reply; + struct srpc_test_reqst tes_reqst; + struct srpc_test_reply tes_reply; + struct srpc_join_reqst join_reqst; + struct srpc_join_reply join_reply; + + struct srpc_ping_reqst ping_reqst; + struct srpc_ping_reply ping_reply; + struct srpc_brw_reqst brw_reqst; + struct srpc_brw_reply brw_reply; } msg_body; -} WIRE_ATTR srpc_msg_t; +} WIRE_ATTR; static inline void -srpc_unpack_msg_hdr(srpc_msg_t *msg) +srpc_unpack_msg_hdr(struct srpc_msg *msg) { if (msg->msg_magic == SRPC_MSG_MAGIC) return; /* no flipping needed */ diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h index e689ca184..4eac1c9e6 100644 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ b/drivers/staging/lustre/lnet/selftest/selftest.h @@ -93,7 +93,7 @@ struct sfw_test_instance; /* all reply/bulk RDMAs go to this portal */ #define SRPC_RDMA_PORTAL 52 -static inline srpc_msg_type_t +static inline enum srpc_msg_type srpc_service2request(int service) { switch (service) { @@ -128,13 +128,13 @@ srpc_service2request(int service) } } -static inline srpc_msg_type_t +static inline enum srpc_msg_type srpc_service2reply(int service) { return srpc_service2request(service) + 1; } -typedef enum { +enum srpc_event_type { SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) * received */ SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */ @@ -143,57 +143,58 @@ typedef enum { SRPC_REPLY_SENT = 5, /* outgoing reply sent */ SRPC_REQUEST_RCVD = 6, /* incoming request received */ SRPC_REQUEST_SENT = 7, /* outgoing request sent */ -} srpc_event_type_t; +}; /* RPC event */ -typedef struct { - srpc_event_type_t ev_type; /* what's up */ +struct srpc_event { + enum srpc_event_type ev_type; /* what's up */ lnet_event_kind_t ev_lnet; /* LNet event type */ int ev_fired; /* LNet event fired? */ int ev_status; /* LNet event status */ void *ev_data; /* owning server/client RPC */ -} srpc_event_t; +}; -typedef struct { +/* bulk descriptor */ +struct srpc_bulk { int bk_len; /* len of bulk data */ lnet_handle_md_t bk_mdh; int bk_sink; /* sink/source */ int bk_niov; /* # iov in bk_iovs */ lnet_kiov_t bk_iovs[0]; -} srpc_bulk_t; /* bulk descriptor */ +}; /* message buffer descriptor */ -typedef struct srpc_buffer { +struct srpc_buffer { struct list_head buf_list; /* chain on srpc_service::*_msgq */ - srpc_msg_t buf_msg; + struct srpc_msg buf_msg; lnet_handle_md_t buf_mdh; lnet_nid_t buf_self; lnet_process_id_t buf_peer; -} srpc_buffer_t; +}; struct swi_workitem; typedef int (*swi_action_t) (struct swi_workitem *); -typedef struct swi_workitem { +struct swi_workitem { struct cfs_wi_sched *swi_sched; - cfs_workitem_t swi_workitem; + struct cfs_workitem swi_workitem; swi_action_t swi_action; int swi_state; -} swi_workitem_t; +}; /* server-side state of a RPC */ struct srpc_server_rpc { /* chain on srpc_service::*_rpcq */ struct list_head srpc_list; struct srpc_service_cd *srpc_scd; - swi_workitem_t srpc_wi; - srpc_event_t srpc_ev; /* bulk/reply event */ + struct swi_workitem srpc_wi; + struct srpc_event srpc_ev; /* bulk/reply event */ lnet_nid_t srpc_self; lnet_process_id_t srpc_peer; - srpc_msg_t srpc_replymsg; + struct srpc_msg srpc_replymsg; lnet_handle_md_t srpc_replymdh; - srpc_buffer_t *srpc_reqstbuf; - srpc_bulk_t *srpc_bulk; + struct srpc_buffer *srpc_reqstbuf; + struct srpc_bulk *srpc_bulk; unsigned int srpc_aborted; /* being given up */ int srpc_status; @@ -201,14 +202,14 @@ struct srpc_server_rpc { }; /* client-side state of a RPC */ -typedef struct srpc_client_rpc { +struct srpc_client_rpc { struct list_head crpc_list; /* chain on user's lists */ spinlock_t crpc_lock; /* serialize */ int crpc_service; atomic_t crpc_refcount; int crpc_timeout; /* # seconds to wait for reply */ struct stt_timer crpc_timer; - swi_workitem_t crpc_wi; + struct swi_workitem crpc_wi; lnet_process_id_t crpc_dest; void (*crpc_done)(struct srpc_client_rpc *); @@ -221,20 +222,20 @@ typedef struct srpc_client_rpc { unsigned int crpc_closed:1; /* completed */ /* RPC events */ - srpc_event_t crpc_bulkev; /* bulk event */ - srpc_event_t crpc_reqstev; /* request event */ - srpc_event_t crpc_replyev; /* reply event */ + struct srpc_event crpc_bulkev; /* bulk event */ + struct srpc_event crpc_reqstev; /* request event */ + struct srpc_event crpc_replyev; /* reply event */ /* bulk, request(reqst), and reply exchanged on wire */ - srpc_msg_t crpc_reqstmsg; - srpc_msg_t crpc_replymsg; + struct srpc_msg crpc_reqstmsg; + struct srpc_msg crpc_replymsg; lnet_handle_md_t crpc_reqstmdh; lnet_handle_md_t crpc_replymdh; - srpc_bulk_t crpc_bulk; -} srpc_client_rpc_t; + struct srpc_bulk crpc_bulk; +}; #define srpc_client_rpc_size(rpc) \ -offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) +offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) #define srpc_client_rpc_addref(rpc) \ do { \ @@ -266,13 +267,13 @@ struct srpc_service_cd { /** backref to service */ struct srpc_service *scd_svc; /** event buffer */ - srpc_event_t scd_ev; + struct srpc_event scd_ev; /** free RPC descriptors */ struct list_head scd_rpc_free; /** in-flight RPCs */ struct list_head scd_rpc_active; /** workitem for posting buffer */ - swi_workitem_t scd_buf_wi; + struct swi_workitem scd_buf_wi; /** CPT id */ int scd_cpt; /** error code for scd_buf_wi */ @@ -306,7 +307,7 @@ struct srpc_service_cd { #define SFW_FRWK_WI_MIN 16 #define SFW_FRWK_WI_MAX 256 -typedef struct srpc_service { +struct srpc_service { int sv_id; /* service id */ const char *sv_name; /* human readable name */ int sv_wi_total; /* total server workitems */ @@ -320,9 +321,9 @@ typedef struct srpc_service { */ int (*sv_handler)(struct srpc_server_rpc *); int (*sv_bulk_ready)(struct srpc_server_rpc *, int); -} srpc_service_t; +}; -typedef struct { +struct sfw_session { struct list_head sn_list; /* chain on fw_zombie_sessions */ lst_sid_t sn_id; /* unique identifier */ unsigned int sn_timeout; /* # seconds' inactivity to expire */ @@ -335,37 +336,37 @@ typedef struct { atomic_t sn_brw_errors; atomic_t sn_ping_errors; unsigned long sn_started; -} sfw_session_t; +}; #define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \ (sid0).ses_stamp == (sid1).ses_stamp) -typedef struct { +struct sfw_batch { struct list_head bat_list; /* chain on sn_batches */ lst_bid_t bat_id; /* batch id */ int bat_error; /* error code of batch */ - sfw_session_t *bat_session; /* batch's session */ + struct sfw_session *bat_session; /* batch's session */ atomic_t bat_nactive; /* # of active tests */ struct list_head bat_tests; /* test instances */ -} sfw_batch_t; +}; -typedef struct { +struct sfw_test_client_ops { int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test * client */ void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test * client */ int (*tso_prep_rpc)(struct sfw_test_unit *tsu, lnet_process_id_t dest, - srpc_client_rpc_t **rpc); /* prep a tests rpc */ + struct srpc_client_rpc **rpc); /* prep a tests rpc */ void (*tso_done_rpc)(struct sfw_test_unit *tsu, - srpc_client_rpc_t *rpc); /* done a test rpc */ -} sfw_test_client_ops_t; + struct srpc_client_rpc *rpc); /* done a test rpc */ +}; -typedef struct sfw_test_instance { +struct sfw_test_instance { struct list_head tsi_list; /* chain on batch */ int tsi_service; /* test type */ - sfw_batch_t *tsi_batch; /* batch */ - sfw_test_client_ops_t *tsi_ops; /* test client operation + struct sfw_batch *tsi_batch; /* batch */ + struct sfw_test_client_ops *tsi_ops; /* test client operation */ /* public parameter for all test units */ @@ -384,11 +385,11 @@ typedef struct sfw_test_instance { struct list_head tsi_active_rpcs; /* active rpcs */ union { - test_ping_req_t ping; /* ping parameter */ - test_bulk_req_t bulk_v0; /* bulk parameter */ - test_bulk_req_v1_t bulk_v1; /* bulk v1 parameter */ + struct test_ping_req ping; /* ping parameter */ + struct test_bulk_req bulk_v0; /* bulk parameter */ + struct test_bulk_req_v1 bulk_v1; /* bulk v1 parameter */ } tsi_u; -} sfw_test_instance_t; +}; /* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of * pages are not used */ @@ -397,57 +398,58 @@ typedef struct sfw_test_instance { #define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) #define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE) -typedef struct sfw_test_unit { +struct sfw_test_unit { struct list_head tsu_list; /* chain on lst_test_instance */ lnet_process_id_t tsu_dest; /* id of dest node */ int tsu_loop; /* loop count of the test */ - sfw_test_instance_t *tsu_instance; /* pointer to test instance */ + struct sfw_test_instance *tsu_instance; /* pointer to test instance */ void *tsu_private; /* private data */ - swi_workitem_t tsu_worker; /* workitem of the test unit */ -} sfw_test_unit_t; + struct swi_workitem tsu_worker; /* workitem of the test unit */ +}; -typedef struct sfw_test_case { +struct sfw_test_case { struct list_head tsc_list; /* chain on fw_tests */ - srpc_service_t *tsc_srv_service; /* test service */ - sfw_test_client_ops_t *tsc_cli_ops; /* ops of test client */ -} sfw_test_case_t; + struct srpc_service *tsc_srv_service; /* test service */ + struct sfw_test_client_ops *tsc_cli_ops; /* ops of test client */ +}; -srpc_client_rpc_t * +struct srpc_client_rpc * sfw_create_rpc(lnet_process_id_t peer, int service, unsigned features, int nbulkiov, int bulklen, - void (*done)(srpc_client_rpc_t *), void *priv); -int sfw_create_test_rpc(sfw_test_unit_t *tsu, + void (*done)(struct srpc_client_rpc *), void *priv); +int sfw_create_test_rpc(struct sfw_test_unit *tsu, lnet_process_id_t peer, unsigned features, - int nblk, int blklen, srpc_client_rpc_t **rpc); -void sfw_abort_rpc(srpc_client_rpc_t *rpc); -void sfw_post_rpc(srpc_client_rpc_t *rpc); -void sfw_client_rpc_done(srpc_client_rpc_t *rpc); -void sfw_unpack_message(srpc_msg_t *msg); + int nblk, int blklen, struct srpc_client_rpc **rpc); +void sfw_abort_rpc(struct srpc_client_rpc *rpc); +void sfw_post_rpc(struct srpc_client_rpc *rpc); +void sfw_client_rpc_done(struct srpc_client_rpc *rpc); +void sfw_unpack_message(struct srpc_msg *msg); void sfw_free_pages(struct srpc_server_rpc *rpc); -void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i); +void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i); int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, int sink); -int sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply); +int sfw_make_session(struct srpc_mksn_reqst *request, + struct srpc_mksn_reply *reply); -srpc_client_rpc_t * +struct srpc_client_rpc * srpc_create_client_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv); -void srpc_post_rpc(srpc_client_rpc_t *rpc); -void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why); -void srpc_free_bulk(srpc_bulk_t *bk); -srpc_bulk_t *srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, - int sink); -int srpc_send_rpc(swi_workitem_t *wi); + void (*rpc_done)(struct srpc_client_rpc *), + void (*rpc_fini)(struct srpc_client_rpc *), void *priv); +void srpc_post_rpc(struct srpc_client_rpc *rpc); +void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); +void srpc_free_bulk(struct srpc_bulk *bk); +struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned bulk_npg, + unsigned bulk_len, int sink); +int srpc_send_rpc(struct swi_workitem *wi); int srpc_send_reply(struct srpc_server_rpc *rpc); -int srpc_add_service(srpc_service_t *sv); -int srpc_remove_service(srpc_service_t *sv); -void srpc_shutdown_service(srpc_service_t *sv); -void srpc_abort_service(srpc_service_t *sv); -int srpc_finish_service(srpc_service_t *sv); -int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer); -void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer); +int srpc_add_service(struct srpc_service *sv); +int srpc_remove_service(struct srpc_service *sv); +void srpc_shutdown_service(struct srpc_service *sv); +void srpc_abort_service(struct srpc_service *sv); +int srpc_finish_service(struct srpc_service *sv); +int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer); +void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer); void srpc_get_counters(srpc_counters_t *cnt); void srpc_set_counters(const srpc_counters_t *cnt); @@ -461,15 +463,17 @@ srpc_serv_is_framework(struct srpc_service *svc) } static inline int -swi_wi_action(cfs_workitem_t *wi) +swi_wi_action(struct cfs_workitem *wi) { - swi_workitem_t *swi = container_of(wi, swi_workitem_t, swi_workitem); + struct swi_workitem *swi; + + swi = container_of(wi, struct swi_workitem, swi_workitem); return swi->swi_action(swi); } static inline void -swi_init_workitem(swi_workitem_t *swi, void *data, +swi_init_workitem(struct swi_workitem *swi, void *data, swi_action_t action, struct cfs_wi_sched *sched) { swi->swi_sched = sched; @@ -479,19 +483,19 @@ swi_init_workitem(swi_workitem_t *swi, void *data, } static inline void -swi_schedule_workitem(swi_workitem_t *wi) +swi_schedule_workitem(struct swi_workitem *wi) { cfs_wi_schedule(wi->swi_sched, &wi->swi_workitem); } static inline void -swi_exit_workitem(swi_workitem_t *swi) +swi_exit_workitem(struct swi_workitem *swi) { cfs_wi_exit(swi->swi_sched, &swi->swi_workitem); } static inline int -swi_deschedule_workitem(swi_workitem_t *swi) +swi_deschedule_workitem(struct swi_workitem *swi) { return cfs_wi_deschedule(swi->swi_sched, &swi->swi_workitem); } @@ -502,7 +506,7 @@ void sfw_shutdown(void); void srpc_shutdown(void); static inline void -srpc_destroy_client_rpc(srpc_client_rpc_t *rpc) +srpc_destroy_client_rpc(struct srpc_client_rpc *rpc) { LASSERT(rpc); LASSERT(!srpc_event_pending(rpc)); @@ -515,14 +519,14 @@ srpc_destroy_client_rpc(srpc_client_rpc_t *rpc) } static inline void -srpc_init_client_rpc(srpc_client_rpc_t *rpc, lnet_process_id_t peer, +srpc_init_client_rpc(struct srpc_client_rpc *rpc, lnet_process_id_t peer, int service, int nbulkiov, int bulklen, - void (*rpc_done)(srpc_client_rpc_t *), - void (*rpc_fini)(srpc_client_rpc_t *), void *priv) + void (*rpc_done)(struct srpc_client_rpc *), + void (*rpc_fini)(struct srpc_client_rpc *), void *priv) { LASSERT(nbulkiov <= LNET_MAX_IOV); - memset(rpc, 0, offsetof(srpc_client_rpc_t, + memset(rpc, 0, offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[nbulkiov])); INIT_LIST_HEAD(&rpc->crpc_list); @@ -592,7 +596,7 @@ do { \ } while (0) static inline void -srpc_wait_service_shutdown(srpc_service_t *sv) +srpc_wait_service_shutdown(struct srpc_service *sv) { int i = 2; @@ -607,16 +611,16 @@ srpc_wait_service_shutdown(srpc_service_t *sv) } } -extern sfw_test_client_ops_t brw_test_client; +extern struct sfw_test_client_ops brw_test_client; void brw_init_test_client(void); -extern srpc_service_t brw_test_service; +extern struct srpc_service brw_test_service; void brw_init_test_service(void); -extern sfw_test_client_ops_t ping_test_client; +extern struct sfw_test_client_ops ping_test_client; void ping_init_test_client(void); -extern srpc_service_t ping_test_service; +extern struct srpc_service ping_test_service; void ping_init_test_service(void); #endif /* __SELFTEST_SELFTEST_H__ */ diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c index 8be52526a..b6c4aae00 100644 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ b/drivers/staging/lustre/lnet/selftest/timer.c @@ -49,7 +49,7 @@ * sorted by increasing expiry time. The number of slots is 2**7 (128), * to cover a time period of 1024 seconds into the future before wrapping. */ -#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ +#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ #define STTIMER_SLOTTIME (1 << STTIMER_MINPOLL) #define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1)) #define STTIMER_NSLOTS (1 << 7) @@ -170,20 +170,22 @@ stt_check_timers(unsigned long *last) static int stt_timer_main(void *arg) { + int rc = 0; + cfs_block_allsigs(); while (!stt_data.stt_shuttingdown) { stt_check_timers(&stt_data.stt_prev_slot); - wait_event_timeout(stt_data.stt_waitq, - stt_data.stt_shuttingdown, - cfs_time_seconds(STTIMER_SLOTTIME)); + rc = wait_event_timeout(stt_data.stt_waitq, + stt_data.stt_shuttingdown, + cfs_time_seconds(STTIMER_SLOTTIME)); } spin_lock(&stt_data.stt_lock); stt_data.stt_nthreads--; spin_unlock(&stt_data.stt_lock); - return 0; + return rc; } static int diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c index 39269c3c5..3a4df6264 100644 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ b/drivers/staging/lustre/lustre/fid/fid_request.c @@ -66,6 +66,7 @@ static int seq_client_rpc(struct lu_client_seq *seq, unsigned int debug_mask; int rc; + LASSERT(exp && !IS_ERR(exp)); req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY, LUSTRE_MDS_VERSION, SEQ_QUERY); if (!req) @@ -101,19 +102,22 @@ static int seq_client_rpc(struct lu_client_seq *seq, req->rq_no_delay = req->rq_no_resend = 1; debug_mask = D_CONSOLE; } else { - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (seq->lcs_type == LUSTRE_SEQ_METADATA) { + req->rq_reply_portal = MDC_REPLY_PORTAL; req->rq_request_portal = SEQ_METADATA_PORTAL; - else + } else { + req->rq_reply_portal = OSC_REPLY_PORTAL; req->rq_request_portal = SEQ_DATA_PORTAL; + } debug_mask = D_INFO; } ptlrpc_at_set_req_timeout(req); - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - if (seq->lcs_type == LUSTRE_SEQ_METADATA) + if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA) mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) goto out_req; diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c index 062f388cf..5a04e99d9 100644 --- a/drivers/staging/lustre/lustre/fld/fld_cache.c +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -178,8 +178,9 @@ restart_fixup: if (n_range->lsr_end <= c_range->lsr_end) { *n_range = *c_range; fld_cache_entry_delete(cache, f_curr); - } else + } else { n_range->lsr_start = c_range->lsr_end; + } } /* we could have overlap over next diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h index e8a3caf20..75d6a4863 100644 --- a/drivers/staging/lustre/lustre/fld/fld_internal.h +++ b/drivers/staging/lustre/lustre/fld/fld_internal.h @@ -101,12 +101,6 @@ struct fld_cache { unsigned int fci_no_shrink:1; }; -enum fld_op { - FLD_CREATE = 0, - FLD_DELETE = 1, - FLD_LOOKUP = 2 -}; - enum { /* 4M of FLD cache will not hurt client a lot. */ FLD_SERVER_CACHE_SIZE = (4 * 0x100000), @@ -126,7 +120,8 @@ enum { extern struct lu_fld_hash fld_hash[]; int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op); + struct lu_seq_range *range, __u32 fld_op, + struct ptlrpc_request **reqp); extern struct lprocfs_vars fld_client_debugfs_list[]; diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c index a3d122d85..304c0ec26 100644 --- a/drivers/staging/lustre/lustre/fld/fld_request.c +++ b/drivers/staging/lustre/lustre/fld/fld_request.c @@ -64,9 +64,9 @@ static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&mcw->mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; }; @@ -75,15 +75,15 @@ static void fld_enter_request(struct client_obd *cli) struct mdc_cache_waiter mcw; struct l_wait_info lwi = { 0 }; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); init_waitqueue_head(&mcw.mcw_waitq); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi); } else { cli->cl_r_in_flight++; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } } @@ -92,10 +92,9 @@ static void fld_exit_request(struct client_obd *cli) struct list_head *l, *tmp; struct mdc_cache_waiter *mcw; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_r_in_flight--; list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { - if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { /* No free request slots anymore */ break; @@ -106,7 +105,7 @@ static void fld_exit_request(struct client_obd *cli) cli->cl_r_in_flight++; wake_up(&mcw->mcw_waitq); } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq) @@ -392,55 +391,82 @@ void fld_client_fini(struct lu_client_fld *fld) EXPORT_SYMBOL(fld_client_fini); int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op) + struct lu_seq_range *range, __u32 fld_op, + struct ptlrpc_request **reqp) { - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; struct lu_seq_range *prange; __u32 *op; - int rc; + int rc = 0; struct obd_import *imp; LASSERT(exp); imp = class_exp2cliimp(exp); - req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION, - FLD_QUERY); - if (!req) - return -ENOMEM; - - op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); - *op = fld_op; + switch (fld_op) { + case FLD_QUERY: + req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, + LUSTRE_MDS_VERSION, FLD_QUERY); + if (!req) + return -ENOMEM; + + /* + * XXX: only needed when talking to old server(< 2.6), it should + * be removed when < 2.6 server is not supported + */ + op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); + *op = FLD_LOOKUP; + + if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) + req->rq_allow_replay = 1; + break; + case FLD_READ: + req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ, + LUSTRE_MDS_VERSION, FLD_READ); + if (!req) + return -ENOMEM; + + req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, + RCL_SERVER, PAGE_SIZE); + break; + default: + rc = -EINVAL; + break; + } + if (rc) + return rc; prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); *prange = *range; - ptlrpc_request_set_replen(req); req->rq_request_portal = FLD_REQUEST_PORTAL; req->rq_reply_portal = MDC_REPLY_PORTAL; ptlrpc_at_set_req_timeout(req); - if (fld_op == FLD_LOOKUP && - imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) - req->rq_allow_replay = 1; - - if (fld_op != FLD_LOOKUP) - mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); fld_enter_request(&exp->exp_obd->u.cli); rc = ptlrpc_queue_wait(req); fld_exit_request(&exp->exp_obd->u.cli); - if (fld_op != FLD_LOOKUP) - mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc) goto out_req; - prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); - if (!prange) { - rc = -EFAULT; - goto out_req; + if (fld_op == FLD_QUERY) { + prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); + if (!prange) { + rc = -EFAULT; + goto out_req; + } + *range = *prange; } - *range = *prange; + out_req: - ptlrpc_req_finished(req); + if (rc || !reqp) { + ptlrpc_req_finished(req); + req = NULL; + } + + if (reqp) + *reqp = req; + return rc; } @@ -468,7 +494,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, res.lsr_start = seq; fld_range_set_type(&res, flags); - rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP); + rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL); if (rc == 0) { *mds = res.lsr_index; diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h index fb971ded5..d4c33dd11 100644 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ b/drivers/staging/lustre/lustre/include/cl_object.h @@ -82,7 +82,6 @@ * - i_mutex * - PG_locked * - cl_object_header::coh_page_guard - * - cl_object_header::coh_lock_guard * - lu_site::ls_guard * * See the top comment in cl_object.c for the description of overall locking and @@ -98,9 +97,12 @@ * super-class definitions. */ #include "lu_object.h" +#include #include "linux/lustre_compat25.h" #include #include +#include +#include struct inode; @@ -138,7 +140,7 @@ struct cl_device_operations { * cl_req_slice_add(). * * \see osc_req_init(), lov_req_init(), lovsub_req_init() - * \see ccc_req_init() + * \see vvp_req_init() */ int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev, struct cl_req *req); @@ -147,7 +149,7 @@ struct cl_device_operations { /** * Device in the client stack. * - * \see ccc_device, lov_device, lovsub_device, osc_device + * \see vvp_device, lov_device, lovsub_device, osc_device */ struct cl_device { /** Super-class. */ @@ -243,7 +245,7 @@ enum cl_attr_valid { * be discarded from the memory, all its sub-objects are torn-down and * destroyed too. * - * \see ccc_object, lov_object, lovsub_object, osc_object + * \see vvp_object, lov_object, lovsub_object, osc_object */ struct cl_object { /** super class */ @@ -322,7 +324,7 @@ struct cl_object_operations { * to be used instead of newly created. */ int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); /** * Initialize lock slice for this layer. Called top-to-bottom through * every object layer when a new cl_lock is instantiated. Layer @@ -383,11 +385,17 @@ struct cl_object_operations { * object. Layers are supposed to fill parts of \a lvb that will be * shipped to the glimpse originator as a glimpse result. * - * \see ccc_object_glimpse(), lovsub_object_glimpse(), + * \see vvp_object_glimpse(), lovsub_object_glimpse(), * \see osc_object_glimpse() */ int (*coo_glimpse)(const struct lu_env *env, const struct cl_object *obj, struct ost_lvb *lvb); + /** + * Object prune method. Called when the layout is going to change on + * this object, therefore each layer has to clean up their cache, + * mainly pages and locks. + */ + int (*coo_prune)(const struct lu_env *env, struct cl_object *obj); }; /** @@ -398,22 +406,6 @@ struct cl_object_header { * here. */ struct lu_object_header coh_lu; - /** \name locks - * \todo XXX move locks below to the separate cache-lines, they are - * mostly useless otherwise. - */ - /** @{ */ - /** Lock protecting page tree. */ - spinlock_t coh_page_guard; - /** Lock protecting lock list. */ - spinlock_t coh_lock_guard; - /** @} locks */ - /** Radix tree of cl_page's, cached for this object. */ - struct radix_tree_root coh_tree; - /** # of pages in radix tree. */ - unsigned long coh_pages; - /** List of cl_lock's granted for this object. */ - struct list_head coh_locks; /** * Parent object. It is assumed that an object has a well-defined @@ -460,10 +452,6 @@ struct cl_object_header { co_lu.lo_linkage) /** @} cl_object */ -#ifndef pgoff_t -#define pgoff_t unsigned long -#endif - #define CL_PAGE_EOF ((pgoff_t)~0ull) /** \addtogroup cl_page cl_page @@ -727,16 +715,10 @@ struct cl_page { atomic_t cp_ref; /** An object this page is a part of. Immutable after creation. */ struct cl_object *cp_obj; - /** Logical page index within the object. Immutable after creation. */ - pgoff_t cp_index; /** List of slices. Immutable after creation. */ struct list_head cp_layers; - /** Parent page, NULL for top-level page. Immutable after creation. */ - struct cl_page *cp_parent; - /** Lower-layer page. NULL for bottommost page. Immutable after - * creation. - */ - struct cl_page *cp_child; + /** vmpage */ + struct page *cp_vmpage; /** * Page state. This field is const to avoid accidental update, it is * modified only internally within cl_page.c. Protected by a VM lock. @@ -787,10 +769,11 @@ struct cl_page { /** * Per-layer part of cl_page. * - * \see ccc_page, lov_page, osc_page + * \see vvp_page, lov_page, osc_page */ struct cl_page_slice { struct cl_page *cpl_page; + pgoff_t cpl_index; /** * Object slice corresponding to this page slice. Immutable after * creation. @@ -804,16 +787,9 @@ struct cl_page_slice { /** * Lock mode. For the client extent locks. * - * \warning: cl_lock_mode_match() assumes particular ordering here. * \ingroup cl_lock */ enum cl_lock_mode { - /** - * Mode of a lock that protects no data, and exists only as a - * placeholder. This is used for `glimpse' requests. A phantom lock - * might get promoted to real lock at some point. - */ - CLM_PHANTOM, CLM_READ, CLM_WRITE, CLM_GROUP @@ -845,11 +821,6 @@ struct cl_page_operations { * provided by the topmost layer, see cl_page_disown0() as an example. */ - /** - * \return the underlying VM page. Optional. - */ - struct page *(*cpo_vmpage)(const struct lu_env *env, - const struct cl_page_slice *slice); /** * Called when \a io acquires this page into the exclusive * ownership. When this method returns, it is guaranteed that the is @@ -896,14 +867,6 @@ struct cl_page_operations { */ void (*cpo_export)(const struct lu_env *env, const struct cl_page_slice *slice, int uptodate); - /** - * Unmaps page from the user space (if it is mapped). - * - * \see cl_page_unmap() - * \see vvp_page_unmap() - */ - int (*cpo_unmap)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); /** * Checks whether underlying VM page is locked (in the suitable * sense). Used for assertions. @@ -957,7 +920,7 @@ struct cl_page_operations { */ int (*cpo_is_under_lock)(const struct lu_env *env, const struct cl_page_slice *slice, - struct cl_io *io); + struct cl_io *io, pgoff_t *max); /** * Optional debugging helper. Prints given page slice. @@ -1027,26 +990,6 @@ struct cl_page_operations { */ int (*cpo_make_ready)(const struct lu_env *env, const struct cl_page_slice *slice); - /** - * Announce that this page is to be written out - * opportunistically, that is, page is dirty, it is not - * necessary to start write-out transfer right now, but - * eventually page has to be written out. - * - * Main caller of this is the write path (see - * vvp_io_commit_write()), using this method to build a - * "transfer cache" from which large transfers are then - * constructed by the req-formation engine. - * - * \todo XXX it would make sense to add page-age tracking - * semantics here, and to oblige the req-formation engine to - * send the page out not later than it is too old. - * - * \see cl_page_cache_add() - */ - int (*cpo_cache_add)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); } io[CRT_NR]; /** * Tell transfer engine that only [to, from] part of a page should be @@ -1098,9 +1041,8 @@ struct cl_page_operations { */ #define CL_PAGE_DEBUG(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1111,9 +1053,8 @@ do { \ */ #define CL_PAGE_HEADER(mask, env, page, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -1130,6 +1071,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc) #define cl_page_in_use(pg) __page_in_use(pg, 1) #define cl_page_in_use_noref(pg) __page_in_use(pg, 0) +static inline struct page *cl_page_vmpage(struct cl_page *page) +{ + LASSERT(page->cp_vmpage); + return page->cp_vmpage; +} + /** @} cl_page */ /** \addtogroup cl_lock cl_lock @@ -1150,12 +1097,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to * cl_lock::cll_layers list through cl_lock_slice::cls_linkage. * - * All locks for a given object are linked into cl_object_header::coh_locks - * list (protected by cl_object_header::coh_lock_guard spin-lock) through - * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can - * sort it in starting lock offset, or use altogether different data structure - * like a tree. - * * Typical cl_lock consists of the two layers: * * - vvp_lock (vvp specific data), and @@ -1177,111 +1118,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * LIFE CYCLE * - * cl_lock is reference counted. When reference counter drops to 0, lock is - * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING - * lock is destroyed when last reference is released. Referencing between - * top-lock and its sub-locks is described in the lov documentation module. - * - * STATE MACHINE - * - * Also, cl_lock is a state machine. This requires some clarification. One of - * the goals of client IO re-write was to make IO path non-blocking, or at - * least to make it easier to make it non-blocking in the future. Here - * `non-blocking' means that when a system call (read, write, truncate) - * reaches a situation where it has to wait for a communication with the - * server, it should --instead of waiting-- remember its current state and - * switch to some other work. E.g,. instead of waiting for a lock enqueue, - * client should proceed doing IO on the next stripe, etc. Obviously this is - * rather radical redesign, and it is not planned to be fully implemented at - * this time, instead we are putting some infrastructure in place, that would - * make it easier to do asynchronous non-blocking IO easier in the - * future. Specifically, where old locking code goes to sleep (waiting for - * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When - * enqueue reply comes, its completion handler signals that lock state-machine - * is ready to transit to the next state. There is some generic code in - * cl_lock.c that sleeps, waiting for these signals. As a result, for users of - * this cl_lock.c code, it looks like locking is done in normal blocking - * fashion, and it the same time it is possible to switch to the non-blocking - * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c - * functions). - * - * For a description of state machine states and transitions see enum - * cl_lock_state. - * - * There are two ways to restrict a set of states which lock might move to: - * - * - placing a "hold" on a lock guarantees that lock will not be moved - * into cl_lock_state::CLS_FREEING state until hold is released. Hold - * can be only acquired on a lock that is not in - * cl_lock_state::CLS_FREEING. All holds on a lock are counted in - * cl_lock::cll_holds. Hold protects lock from cancellation and - * destruction. Requests to cancel and destroy a lock on hold will be - * recorded, but only honored when last hold on a lock is released; - * - * - placing a "user" on a lock guarantees that lock will not leave - * cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING, - * cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of - * states, once it enters this set. That is, if a user is added onto a - * lock in a state not from this set, it doesn't immediately enforce - * lock to move to this set, but once lock enters this set it will - * remain there until all users are removed. Lock users are counted in - * cl_lock::cll_users. - * - * User is used to assure that lock is not canceled or destroyed while - * it is being enqueued, or actively used by some IO. - * - * Currently, a user always comes with a hold (cl_lock_invariant() - * checks that a number of holds is not less than a number of users). - * - * CONCURRENCY - * - * This is how lock state-machine operates. struct cl_lock contains a mutex - * cl_lock::cll_guard that protects struct fields. - * - * - mutex is taken, and cl_lock::cll_state is examined. - * - * - for every state there are possible target states where lock can move - * into. They are tried in order. Attempts to move into next state are - * done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try(). - * - * - if the transition can be performed immediately, state is changed, - * and mutex is released. - * - * - if the transition requires blocking, _try() function returns - * cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to - * sleep, waiting for possibility of lock state change. It is woken - * up when some event occurs, that makes lock state change possible - * (e.g., the reception of the reply from the server), and repeats - * the loop. - * - * Top-lock and sub-lock has separate mutexes and the latter has to be taken - * first to avoid dead-lock. - * - * To see an example of interaction of all these issues, take a look at the - * lov_cl.c:lov_lock_enqueue() function. It is called as a part of - * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by - * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note - * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It - * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be - * done in parallel, rather than one after another (this is used for glimpse - * locks, that cannot dead-lock). + * cl_lock is a cacheless data container for the requirements of locks to + * complete the IO. cl_lock is created before I/O starts and destroyed when the + * I/O is complete. + * + * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached + * to cl_lock at OSC layer. LDLM lock is still cacheable. * * INTERFACE AND USAGE * - * struct cl_lock_operations provide a number of call-backs that are invoked - * when events of interest occurs. Layers can intercept and handle glimpse, - * blocking, cancel ASTs and a reception of the reply from the server. + * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A + * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() + * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock + * consists of multiple sub cl_locks, each sub locks will be enqueued + * correspondingly. At OSC layer, the lock enqueue request will tend to reuse + * cached LDLM lock; otherwise a new LDLM lock will have to be requested from + * OST side. * - * One important difference with the old client locking model is that new - * client has a representation for the top-lock, whereas in the old code only - * sub-locks existed as real data structures and file-level locks are - * represented by "request sets" that are created and destroyed on each and - * every lock creation. + * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() + * method will be called for each layer to release the resource held by this + * lock. At OSC layer, the reference count of LDLM lock, which is held at + * clo_enqueue time, is released. * - * Top-locks are cached, and can be found in the cache by the system calls. It - * is possible that top-lock is in cache, but some of its sub-locks were - * canceled and destroyed. In that case top-lock has to be enqueued again - * before it can be used. + * LDLM lock can only be canceled if there is no cl_lock using it. * * Overall process of the locking during IO operation is as following: * @@ -1294,7 +1153,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * * - when all locks are acquired, IO is performed; * - * - locks are released into cache. + * - locks are released after IO is complete. * * Striping introduces major additional complexity into locking. The * fundamental problem is that it is generally unsafe to actively use (hold) @@ -1316,16 +1175,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc) * buf is a part of memory mapped Lustre file, a lock or locks protecting buf * has to be held together with the usual lock on [offset, offset + count]. * - * As multi-stripe locks have to be allowed, it makes sense to cache them, so - * that, for example, a sequence of O_APPEND writes can proceed quickly - * without going down to the individual stripes to do lock matching. On the - * other hand, multi-stripe locks shouldn't be used by normal read/write - * calls. To achieve this, every layer can implement ->clo_fits_into() method, - * that is called by lock matching code (cl_lock_lookup()), and that can be - * used to selectively disable matching of certain locks for certain IOs. For - * example, lov layer implements lov_lock_fits_into() that allow multi-stripe - * locks to be matched only for truncates and O_APPEND writes. - * * Interaction with DLM * * In the expected setup, cl_lock is ultimately backed up by a collection of @@ -1356,295 +1205,27 @@ struct cl_lock_descr { __u32 cld_enq_flags; }; -#define DDESCR "%s(%d):[%lu, %lu]" +#define DDESCR "%s(%d):[%lu, %lu]:%x" #define PDESCR(descr) \ cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \ - (descr)->cld_start, (descr)->cld_end + (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags const char *cl_lock_mode_name(const enum cl_lock_mode mode); -/** - * Lock state-machine states. - * - * \htmlonly - *
- *
- * Possible state transitions:
- *
- *	      +------------------>NEW
- *	      |		    |
- *	      |		    | cl_enqueue_try()
- *	      |		    |
- *	      |    cl_unuse_try()  V
- *	      |  +--------------QUEUING (*)
- *	      |  |		 |
- *	      |  |		 | cl_enqueue_try()
- *	      |  |		 |
- *	      |  | cl_unuse_try()  V
- *    sub-lock  |  +-------------ENQUEUED (*)
- *    canceled  |  |		 |
- *	      |  |		 | cl_wait_try()
- *	      |  |		 |
- *	      |  |		(R)
- *	      |  |		 |
- *	      |  |		 V
- *	      |  |		HELD<---------+
- *	      |  |		 |	    |
- *	      |  |		 |	    | cl_use_try()
- *	      |  |  cl_unuse_try() |	    |
- *	      |  |		 |	    |
- *	      |  |		 V	 ---+
- *	      |  +------------>INTRANSIT (D) <--+
- *	      |		    |	    |
- *	      |     cl_unuse_try() |	    | cached lock found
- *	      |		    |	    | cl_use_try()
- *	      |		    |	    |
- *	      |		    V	    |
- *	      +------------------CACHED---------+
- *				   |
- *				  (C)
- *				   |
- *				   V
- *				FREEING
- *
- * Legend:
- *
- *	 In states marked with (*) transition to the same state (i.e., a loop
- *	 in the diagram) is possible.
- *
- *	 (R) is the point where Receive call-back is invoked: it allows layers
- *	 to handle arrival of lock reply.
- *
- *	 (C) is the point where Cancellation call-back is invoked.
- *
- *	 (D) is the transit state which means the lock is changing.
- *
- *	 Transition to FREEING state is possible from any other state in the
- *	 diagram in case of unrecoverable error.
- * 
- * \endhtmlonly - * - * These states are for individual cl_lock object. Top-lock and its sub-locks - * can be in the different states. Another way to say this is that we have - * nested state-machines. - * - * Separate QUEUING and ENQUEUED states are needed to support non-blocking - * operation for locks with multiple sub-locks. Imagine lock on a file F, that - * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send - * enqueue to S0, wait for its completion, then send enqueue for S1, wait for - * its completion and at last enqueue lock for S2, and wait for its - * completion. In that case, top-lock is in QUEUING state while S0, S1 are - * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note - * that in this case, sub-locks move from state to state, and top-lock remains - * in the same state). - */ -enum cl_lock_state { - /** - * Lock that wasn't yet enqueued - */ - CLS_NEW, - /** - * Enqueue is in progress, blocking for some intermediate interaction - * with the other side. - */ - CLS_QUEUING, - /** - * Lock is fully enqueued, waiting for server to reply when it is - * granted. - */ - CLS_ENQUEUED, - /** - * Lock granted, actively used by some IO. - */ - CLS_HELD, - /** - * This state is used to mark the lock is being used, or unused. - * We need this state because the lock may have several sublocks, - * so it's impossible to have an atomic way to bring all sublocks - * into CLS_HELD state at use case, or all sublocks to CLS_CACHED - * at unuse case. - * If a thread is referring to a lock, and it sees the lock is in this - * state, it must wait for the lock. - * See state diagram for details. - */ - CLS_INTRANSIT, - /** - * Lock granted, not used. - */ - CLS_CACHED, - /** - * Lock is being destroyed. - */ - CLS_FREEING, - CLS_NR -}; - -enum cl_lock_flags { - /** - * lock has been cancelled. This flag is never cleared once set (by - * cl_lock_cancel0()). - */ - CLF_CANCELLED = 1 << 0, - /** cancellation is pending for this lock. */ - CLF_CANCELPEND = 1 << 1, - /** destruction is pending for this lock. */ - CLF_DOOMED = 1 << 2, - /** from enqueue RPC reply upcall. */ - CLF_FROM_UPCALL = 1 << 3, -}; - -/** - * Lock closure. - * - * Lock closure is a collection of locks (both top-locks and sub-locks) that - * might be updated in a result of an operation on a certain lock (which lock - * this is a closure of). - * - * Closures are needed to guarantee dead-lock freedom in the presence of - * - * - nested state-machines (top-lock state-machine composed of sub-lock - * state-machines), and - * - * - shared sub-locks. - * - * Specifically, many operations, such as lock enqueue, wait, unlock, - * etc. start from a top-lock, and then operate on a sub-locks of this - * top-lock, holding a top-lock mutex. When sub-lock state changes as a result - * of such operation, this change has to be propagated to all top-locks that - * share this sub-lock. Obviously, no natural lock ordering (e.g., - * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has - * to be used. Lock closure systematizes this try-and-repeat logic. - */ -struct cl_lock_closure { - /** - * Lock that is mutexed when closure construction is started. When - * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on - * origin is released before waiting. - */ - struct cl_lock *clc_origin; - /** - * List of enclosed locks, so far. Locks are linked here through - * cl_lock::cll_inclosure. - */ - struct list_head clc_list; - /** - * True iff closure is in a `wait' mode. This determines what - * cl_lock_enclosure() does when a lock L to be added to the closure - * is currently mutexed by some other thread. - * - * If cl_lock_closure::clc_wait is not set, then closure construction - * fails with CLO_REPEAT immediately. - * - * In wait mode, cl_lock_enclosure() waits until next attempt to build - * a closure might succeed. To this end it releases an origin mutex - * (cl_lock_closure::clc_origin), that has to be the only lock mutex - * owned by the current thread, and then waits on L mutex (by grabbing - * it and immediately releasing), before returning CLO_REPEAT to the - * caller. - */ - int clc_wait; - /** Number of locks in the closure. */ - int clc_nr; -}; - /** * Layered client lock. */ struct cl_lock { - /** Reference counter. */ - atomic_t cll_ref; /** List of slices. Immutable after creation. */ struct list_head cll_layers; - /** - * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected - * by cl_lock::cll_descr::cld_obj::coh_lock_guard. - */ - struct list_head cll_linkage; - /** - * Parameters of this lock. Protected by - * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within - * cl_lock::cll_guard. Modified only on lock creation and in - * cl_lock_modify(). - */ + /** lock attribute, extent, cl_object, etc. */ struct cl_lock_descr cll_descr; - /** Protected by cl_lock::cll_guard. */ - enum cl_lock_state cll_state; - /** signals state changes. */ - wait_queue_head_t cll_wq; - /** - * Recursive lock, most fields in cl_lock{} are protected by this. - * - * Locking rules: this mutex is never held across network - * communication, except when lock is being canceled. - * - * Lock ordering: a mutex of a sub-lock is taken first, then a mutex - * on a top-lock. Other direction is implemented through a - * try-lock-repeat loop. Mutices of unrelated locks can be taken only - * by try-locking. - * - * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait(). - */ - struct mutex cll_guard; - struct task_struct *cll_guarder; - int cll_depth; - - /** - * the owner for INTRANSIT state - */ - struct task_struct *cll_intransit_owner; - int cll_error; - /** - * Number of holds on a lock. A hold prevents a lock from being - * canceled and destroyed. Protected by cl_lock::cll_guard. - * - * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release() - */ - int cll_holds; - /** - * Number of lock users. Valid in cl_lock_state::CLS_HELD state - * only. Lock user pins lock in CLS_HELD state. Protected by - * cl_lock::cll_guard. - * - * \see cl_wait(), cl_unuse(). - */ - int cll_users; - /** - * Flag bit-mask. Values from enum cl_lock_flags. Updates are - * protected by cl_lock::cll_guard. - */ - unsigned long cll_flags; - /** - * A linkage into a list of locks in a closure. - * - * \see cl_lock_closure - */ - struct list_head cll_inclosure; - /** - * Confict lock at queuing time. - */ - struct cl_lock *cll_conflict; - /** - * A list of references to this lock, for debugging. - */ - struct lu_ref cll_reference; - /** - * A list of holds on this lock, for debugging. - */ - struct lu_ref cll_holders; - /** - * A reference for cl_lock::cll_descr::cld_obj. For debugging. - */ - struct lu_ref_link cll_obj_ref; -#ifdef CONFIG_LOCKDEP - /* "dep_map" name is assumed by lockdep.h macros. */ - struct lockdep_map dep_map; -#endif }; /** * Per-layer part of cl_lock * - * \see ccc_lock, lov_lock, lovsub_lock, osc_lock + * \see vvp_lock, lov_lock, lovsub_lock, osc_lock */ struct cl_lock_slice { struct cl_lock *cls_lock; @@ -1657,175 +1238,37 @@ struct cl_lock_slice { struct list_head cls_linkage; }; -/** - * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}(). - * - * NOTE: lov_subresult() depends on ordering here. - */ -enum cl_lock_transition { - /** operation cannot be completed immediately. Wait for state change. */ - CLO_WAIT = 1, - /** operation had to release lock mutex, restart. */ - CLO_REPEAT = 2, - /** lower layer re-enqueued. */ - CLO_REENQUEUED = 3, -}; - /** * * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops */ struct cl_lock_operations { - /** - * \name statemachine - * - * State machine transitions. These 3 methods are called to transfer - * lock from one state to another, as described in the commentary - * above enum #cl_lock_state. - * - * \retval 0 this layer has nothing more to do to before - * transition to the target state happens; - * - * \retval CLO_REPEAT method had to release and re-acquire cl_lock - * mutex, repeat invocation of transition method - * across all layers; - * - * \retval CLO_WAIT this layer cannot move to the target state - * immediately, as it has to wait for certain event - * (e.g., the communication with the server). It - * is guaranteed, that when the state transfer - * becomes possible, cl_lock::cll_wq wait-queue - * is signaled. Caller can wait for this event by - * calling cl_lock_state_wait(); - * - * \retval -ve failure, abort state transition, move the lock - * into cl_lock_state::CLS_FREEING state, and set - * cl_lock::cll_error. - * - * Once all layers voted to agree to transition (by returning 0), lock - * is moved into corresponding target state. All state transition - * methods are optional. - */ /** @{ */ /** * Attempts to enqueue the lock. Called top-to-bottom. * - * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), + * \retval 0 this layer has enqueued the lock successfully + * \retval >0 this layer has enqueued the lock, but need to wait on + * @anchor for resources + * \retval -ve failure + * + * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), * \see osc_lock_enqueue() */ int (*clo_enqueue)(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); + struct cl_io *io, struct cl_sync_io *anchor); /** - * Attempts to wait for enqueue result. Called top-to-bottom. - * - * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait() - */ - int (*clo_wait)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** - * Attempts to unlock the lock. Called bottom-to-top. In addition to - * usual return values of lock state-machine methods, this can return - * -ESTALE to indicate that lock cannot be returned to the cache, and - * has to be re-initialized. - * unuse is a one-shot operation, so it must NOT return CLO_WAIT. - * - * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse() - */ - int (*clo_unuse)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** - * Notifies layer that cached lock is started being used. - * - * \pre lock->cll_state == CLS_CACHED - * - * \see lov_lock_use(), osc_lock_use() - */ - int (*clo_use)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} statemachine */ - /** - * A method invoked when lock state is changed (as a result of state - * transition). This is used, for example, to track when the state of - * a sub-lock changes, to propagate this change to the corresponding - * top-lock. Optional - * - * \see lovsub_lock_state() - */ - void (*clo_state)(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state st); - /** - * Returns true, iff given lock is suitable for the given io, idea - * being, that there are certain "unsafe" locks, e.g., ones acquired - * for O_APPEND writes, that we don't want to re-use for a normal - * write, to avoid the danger of cascading evictions. Optional. Runs - * under cl_object_header::coh_lock_guard. - * - * XXX this should take more information about lock needed by - * io. Probably lock description or something similar. - * - * \see lov_fits_into() - */ - int (*clo_fits_into)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); - /** - * \name ast - * Asynchronous System Traps. All of then are optional, all are - * executed bottom-to-top. - */ - /** @{ */ - - /** - * Cancellation callback. Cancel a lock voluntarily, or under - * the request of server. + * Cancel a lock, release its DLM lock ref, while does not cancel the + * DLM lock */ void (*clo_cancel)(const struct lu_env *env, const struct cl_lock_slice *slice); - /** - * Lock weighting ast. Executed to estimate how precious this lock - * is. The sum of results across all layers is used to determine - * whether lock worth keeping in cache given present memory usage. - * - * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh(). - */ - unsigned long (*clo_weigh)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} ast */ - - /** - * \see lovsub_lock_closure() - */ - int (*clo_closure)(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_lock_closure *closure); - /** - * Executed bottom-to-top when lock description changes (e.g., as a - * result of server granting more generous lock than was requested). - * - * \see lovsub_lock_modify() - */ - int (*clo_modify)(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *updated); - /** - * Notifies layers (bottom-to-top) that lock is going to be - * destroyed. Responsibility of layers is to prevent new references on - * this lock from being acquired once this method returns. - * - * This can be called multiple times due to the races. - * - * \see cl_lock_delete() - * \see osc_lock_delete(), lovsub_lock_delete() - */ - void (*clo_delete)(const struct lu_env *env, - const struct cl_lock_slice *slice); + /** @} */ /** * Destructor. Frees resources and the slice. * - * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), + * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), * \see osc_lock_fini() */ void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice); @@ -2016,7 +1459,7 @@ enum cl_io_state { * This is usually embedded into layer session data, rather than allocated * dynamically. * - * \see vvp_io, lov_io, osc_io, ccc_io + * \see vvp_io, lov_io, osc_io */ struct cl_io_slice { struct cl_io *cis_io; @@ -2031,6 +1474,8 @@ struct cl_io_slice { struct list_head cis_linkage; }; +typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, + struct cl_page *); /** * Per-layer io operations. * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops @@ -2114,7 +1559,7 @@ struct cl_io_operations { void (*cio_fini)(const struct lu_env *env, const struct cl_io_slice *slice); } op[CIT_OP_NR]; - struct { + /** * Submit pages from \a queue->c2_qin for IO, and move * successfully submitted pages into \a queue->c2_qout. Return @@ -2127,7 +1572,15 @@ struct cl_io_operations { const struct cl_io_slice *slice, enum cl_req_type crt, struct cl_2queue *queue); - } req_op[CRT_NR]; + /** + * Queue async page for write. + * The difference between cio_submit and cio_queue is that + * cio_submit is for urgent request. + */ + int (*cio_commit_async)(const struct lu_env *env, + const struct cl_io_slice *slice, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); /** * Read missing page. * @@ -2139,31 +1592,6 @@ struct cl_io_operations { int (*cio_read_page)(const struct lu_env *env, const struct cl_io_slice *slice, const struct cl_page_slice *page); - /** - * Prepare write of a \a page. Called bottom-to-top by a top-level - * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for - * get data from user-level buffer. - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_prepare_write(), lov_io_prepare_write(), - * osc_io_prepare_write(). - */ - int (*cio_prepare_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); - /** - * - * \pre io->ci_type == CIT_WRITE - * - * \see vvp_io_commit_write(), lov_io_commit_write(), - * osc_io_commit_write(). - */ - int (*cio_commit_write)(const struct lu_env *env, - const struct cl_io_slice *slice, - const struct cl_page_slice *page, - unsigned from, unsigned to); /** * Optional debugging helper. Print given io slice. */ @@ -2215,10 +1643,14 @@ enum cl_enq_flags { * for async glimpse lock. */ CEF_AGL = 0x00000020, + /** + * enqueue a lock to test DLM lock existence. + */ + CEF_PEEK = 0x00000040, /** * mask of enq_flags. */ - CEF_MASK = 0x0000003f, + CEF_MASK = 0x0000007f, }; /** @@ -2228,12 +1660,12 @@ enum cl_enq_flags { struct cl_io_lock_link { /** linkage into one of cl_lockset lists. */ struct list_head cill_linkage; - struct cl_lock_descr cill_descr; - struct cl_lock *cill_lock; + struct cl_lock cill_lock; /** optional destructor */ void (*cill_fini)(const struct lu_env *env, struct cl_io_lock_link *link); }; +#define cill_descr cill_lock.cll_descr /** * Lock-set represents a collection of locks, that io needs at a @@ -2267,8 +1699,6 @@ struct cl_io_lock_link { struct cl_lockset { /** locks to be acquired. */ struct list_head cls_todo; - /** locks currently being processed. */ - struct list_head cls_curr; /** locks acquired. */ struct list_head cls_done; }; @@ -2632,9 +2062,7 @@ struct cl_site { * and top-locks (and top-pages) are accounted here. */ struct cache_stats cs_pages; - struct cache_stats cs_locks; atomic_t cs_pages_state[CPS_NR]; - atomic_t cs_locks_state[CLS_NR]; }; int cl_site_init(struct cl_site *s, struct cl_device *top); @@ -2725,7 +2153,7 @@ static inline void cl_device_fini(struct cl_device *d) } void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, + struct cl_object *obj, pgoff_t index, const struct cl_page_operations *ops); void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, struct cl_object *obj, @@ -2758,7 +2186,7 @@ int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, struct ost_lvb *lvb); int cl_conf_set(const struct lu_env *env, struct cl_object *obj, const struct cl_object_conf *conf); -void cl_object_prune(const struct lu_env *env, struct cl_object *obj); +int cl_object_prune(const struct lu_env *env, struct cl_object *obj); void cl_object_kill(const struct lu_env *env, struct cl_object *obj); /** @@ -2772,7 +2200,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1) static inline void cl_object_page_init(struct cl_object *clob, int size) { clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize; - cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8); + cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size); } static inline void *cl_object_page_slice(struct cl_object *clob, @@ -2781,6 +2209,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob, return (void *)((char *)page + clob->co_slice_off); } +/** + * Return refcount of cl_object. + */ +static inline int cl_object_refc(struct cl_object *clob) +{ + struct lu_object_header *header = clob->co_lu.lo_header; + + return atomic_read(&header->loh_ref); +} + /** @} cl_object */ /** \defgroup cl_page cl_page @@ -2794,28 +2232,20 @@ enum { }; /* callback of cl_page_gang_lookup() */ -typedef int (*cl_page_gang_cb_t) (const struct lu_env *, struct cl_io *, - struct cl_page *, void *); -int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, pgoff_t start, pgoff_t end, - cl_page_gang_cb_t cb, void *cbdata); -struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index); struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj, pgoff_t idx, struct page *vmpage, enum cl_page_type type); -struct cl_page *cl_page_find_sub(const struct lu_env *env, - struct cl_object *obj, - pgoff_t idx, struct page *vmpage, - struct cl_page *parent); +struct cl_page *cl_page_alloc(const struct lu_env *env, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, + enum cl_page_type type); void cl_page_get(struct cl_page *page); void cl_page_put(const struct lu_env *env, struct cl_page *page); void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); -struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page); struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj); -struct cl_page *cl_page_top(struct cl_page *page); const struct cl_page_slice *cl_page_at(const struct cl_page *page, const struct lu_device_type *dtype); @@ -2872,12 +2302,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io, void cl_page_discard(const struct lu_env *env, struct cl_io *io, struct cl_page *pg); void cl_page_delete(const struct lu_env *env, struct cl_page *pg); -int cl_page_unmap(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page); + struct cl_page *page, pgoff_t *max_index); loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); pgoff_t cl_index(const struct cl_object *obj, loff_t offset); int cl_page_size(const struct cl_object *obj); @@ -2890,138 +2318,66 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie, const struct cl_lock_descr *descr); /* @} helper */ +/** + * Data structure managing a client's cached pages. A count of + * "unstable" pages is maintained, and an LRU of clean pages is + * maintained. "unstable" pages are pages pinned by the ptlrpc + * layer for recovery purposes. + */ +struct cl_client_cache { + /** + * # of users (OSCs) + */ + atomic_t ccc_users; + /** + * # of threads are doing shrinking + */ + unsigned int ccc_lru_shrinkers; + /** + * # of LRU entries available + */ + atomic_t ccc_lru_left; + /** + * List of entities(OSCs) for this LRU cache + */ + struct list_head ccc_lru; + /** + * Max # of LRU entries + */ + unsigned long ccc_lru_max; + /** + * Lock to protect ccc_lru list + */ + spinlock_t ccc_lru_lock; + /** + * # of unstable pages for this mount point + */ + atomic_t ccc_unstable_nr; + /** + * Waitq for awaiting unstable pages to reach zero. + * Used at umounting time and signaled on BRW commit + */ + wait_queue_head_t ccc_unstable_waitq; + +}; + /** @} cl_page */ /** \defgroup cl_lock cl_lock * @{ */ -struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source); -struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env, - struct cl_object *obj, pgoff_t index, - struct cl_lock *except, int pending, - int canceld); -static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env, - struct cl_object *obj, - struct cl_page *page, - struct cl_lock *except, - int pending, int canceld) -{ - LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj)); - return cl_lock_at_pgoff(env, obj, page->cp_index, except, - pending, canceld); -} - +int cl_lock_request(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock); +int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, + const struct cl_io *io); +void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock); const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, const struct lu_device_type *dtype); - -void cl_lock_get(struct cl_lock *lock); -void cl_lock_get_trust(struct cl_lock *lock); -void cl_lock_put(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source); -void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock); - -int cl_lock_is_intransit(struct cl_lock *lock); - -int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock, - int keep_mutex); - -/** \name statemachine statemachine - * Interface to lock state machine consists of 3 parts: - * - * - "try" functions that attempt to effect a state transition. If state - * transition is not possible right now (e.g., if it has to wait for some - * asynchronous event to occur), these functions return - * cl_lock_transition::CLO_WAIT. - * - * - "non-try" functions that implement synchronous blocking interface on - * top of non-blocking "try" functions. These functions repeatedly call - * corresponding "try" versions, and if state transition is not possible - * immediately, wait for lock state change. - * - * - methods from cl_lock_operations, called by "try" functions. Lock can - * be advanced to the target state only when all layers voted that they - * are ready for this transition. "Try" functions call methods under lock - * mutex. If a layer had to release a mutex, it re-acquires it and returns - * cl_lock_transition::CLO_REPEAT, causing "try" function to call all - * layers again. - * - * TRY NON-TRY METHOD FINAL STATE - * - * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED - * - * cl_wait_try() cl_wait() cl_lock_operations::clo_wait() CLS_HELD - * - * cl_unuse_try() cl_unuse() cl_lock_operations::clo_unuse() CLS_CACHED - * - * cl_use_try() NONE cl_lock_operations::clo_use() CLS_HELD - * - * @{ - */ - -int cl_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_unuse(const struct lu_env *env, struct cl_lock *lock); -int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 flags); -int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock); -int cl_wait_try(const struct lu_env *env, struct cl_lock *lock); -int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic); - -/** @} statemachine */ - -void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state); -int cl_queue_match(const struct list_head *queue, - const struct cl_lock_descr *need); - -void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_is_mutexed(struct cl_lock *lock); -int cl_lock_nr_mutexed(const struct lu_env *env); -int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_ext_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_descr_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need); -int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need); -int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock, - const struct cl_lock_descr *desc); - -void cl_lock_closure_init(const struct lu_env *env, - struct cl_lock_closure *closure, - struct cl_lock *origin, int wait); -void cl_lock_closure_fini(struct cl_lock_closure *closure); -int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); -void cl_lock_disclosure(const struct lu_env *env, - struct cl_lock_closure *closure); -int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure); - +void cl_lock_release(const struct lu_env *env, struct cl_lock *lock); +int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock, struct cl_sync_io *anchor); void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock); -void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error); -void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait); - -unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock); /** @} cl_lock */ @@ -3050,15 +2406,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, struct cl_lock_descr *descr); int cl_io_read_page(const struct lu_env *env, struct cl_io *io, struct cl_page *page); -int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); -int cl_io_commit_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to); int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue); int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, enum cl_req_type iot, struct cl_2queue *queue, long timeout); +int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb); int cl_io_is_going(const struct lu_env *env); /** @@ -3114,6 +2469,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist) return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch); } +static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist) +{ + LASSERT(plist->pl_nr > 0); + return list_entry(plist->pl_pages.next, struct cl_page, cp_batch); +} + /** * Iterate over pages in a page list. */ @@ -3130,9 +2491,14 @@ void cl_page_list_init(struct cl_page_list *plist); void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page); void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, struct cl_page *page); +void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, + struct cl_page *page); void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head); +void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, + struct cl_page *page); void cl_page_list_disown(const struct lu_env *env, struct cl_io *io, struct cl_page_list *plist); +void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist); void cl_2queue_init(struct cl_2queue *queue); void cl_2queue_disown(const struct lu_env *env, @@ -3177,13 +2543,18 @@ struct cl_sync_io { atomic_t csi_barrier; /** completion to be signaled when transfer is complete. */ wait_queue_head_t csi_waitq; + /** callback to invoke when this IO is finished */ + void (*csi_end_io)(const struct lu_env *, + struct cl_sync_io *); }; -void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages); -int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_sync_io *anchor, +void cl_sync_io_init(struct cl_sync_io *anchor, int nr, + void (*end)(const struct lu_env *, struct cl_sync_io *)); +int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout); -void cl_sync_io_note(struct cl_sync_io *anchor, int ioret); +void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, + int ioret); +void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); /** @} cl_sync_io */ @@ -3241,6 +2612,9 @@ void *cl_env_reenter(void); void cl_env_reexit(void *cookie); void cl_env_implant(struct lu_env *env, int *refcheck); void cl_env_unplant(struct lu_env *env, int *refcheck); +unsigned int cl_env_cache_purge(unsigned int nr); +struct lu_env *cl_env_percpu_get(void); +void cl_env_percpu_put(struct lu_env *env); /** @} cl_env */ diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h deleted file mode 100644 index 5d839a9f7..000000000 --- a/drivers/staging/lustre/lustre/include/lclient.h +++ /dev/null @@ -1,408 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Definitions shared between vvp and liblustre, and other clients in the - * future. - * - * Author: Oleg Drokin - * Author: Nikita Danilov - */ - -#ifndef LCLIENT_H -#define LCLIENT_H - -blkcnt_t dirty_cnt(struct inode *inode); - -int cl_glimpse_size0(struct inode *inode, int agl); -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl); - -static inline int cl_glimpse_size(struct inode *inode) -{ - return cl_glimpse_size0(inode, 0); -} - -static inline int cl_agl(struct inode *inode) -{ - return cl_glimpse_size0(inode, 1); -} - -/** - * Locking policy for setattr. - */ -enum ccc_setattr_lock_type { - /** Locking is done by server */ - SETATTR_NOLOCK, - /** Extent lock is enqueued */ - SETATTR_EXTENT_LOCK, - /** Existing local extent lock is used */ - SETATTR_MATCH_LOCK -}; - -/** - * IO state private to vvp or slp layers. - */ -struct ccc_io { - /** super class */ - struct cl_io_slice cui_cl; - struct cl_io_lock_link cui_link; - /** - * I/O vector information to or from which read/write is going. - */ - struct iov_iter *cui_iter; - /** - * Total size for the left IO. - */ - size_t cui_tot_count; - - union { - struct { - enum ccc_setattr_lock_type cui_local_lock; - } setattr; - } u; - /** - * True iff io is processing glimpse right now. - */ - int cui_glimpse; - /** - * Layout version when this IO is initialized - */ - __u32 cui_layout_gen; - /** - * File descriptor against which IO is done. - */ - struct ll_file_data *cui_fd; - struct kiocb *cui_iocb; -}; - -/** - * True, if \a io is a normal io, False for splice_{read,write}. - * must be implemented in arch specific code. - */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io); - -extern struct lu_context_key ccc_key; -extern struct lu_context_key ccc_session_key; - -struct ccc_thread_info { - struct cl_lock_descr cti_descr; - struct cl_io cti_io; - struct cl_attr cti_attr; -}; - -static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env) -{ - struct ccc_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &ccc_key); - LASSERT(info); - return info; -} - -static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env) -{ - struct cl_attr *attr = &ccc_env_info(env)->cti_attr; - - memset(attr, 0, sizeof(*attr)); - return attr; -} - -static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env) -{ - struct cl_io *io = &ccc_env_info(env)->cti_io; - - memset(io, 0, sizeof(*io)); - return io; -} - -struct ccc_session { - struct ccc_io cs_ios; -}; - -static inline struct ccc_session *ccc_env_session(const struct lu_env *env) -{ - struct ccc_session *ses; - - ses = lu_context_key_get(env->le_ses, &ccc_session_key); - LASSERT(ses); - return ses; -} - -static inline struct ccc_io *ccc_env_io(const struct lu_env *env) -{ - return &ccc_env_session(env)->cs_ios; -} - -/** - * ccc-private object state. - */ -struct ccc_object { - struct cl_object_header cob_header; - struct cl_object cob_cl; - struct inode *cob_inode; - - /** - * A list of dirty pages pending IO in the cache. Used by - * SOM. Protected by ll_inode_info::lli_lock. - * - * \see ccc_page::cpg_pending_linkage - */ - struct list_head cob_pending_list; - - /** - * Access this counter is protected by inode->i_sem. Now that - * the lifetime of transient pages must be covered by inode sem, - * we don't need to hold any lock.. - */ - int cob_transient_pages; - /** - * Number of outstanding mmaps on this file. - * - * \see ll_vm_open(), ll_vm_close(). - */ - atomic_t cob_mmap_cnt; - - /** - * various flags - * cob_discard_page_warned - * if pages belonging to this object are discarded when a client - * is evicted, some debug info will be printed, this flag will be set - * during processing the first discarded page, then avoid flooding - * debug message for lots of discarded pages. - * - * \see ll_dirty_page_discard_warn. - */ - unsigned int cob_discard_page_warned:1; -}; - -/** - * ccc-private page state. - */ -struct ccc_page { - struct cl_page_slice cpg_cl; - int cpg_defer_uptodate; - int cpg_ra_used; - int cpg_write_queued; - /** - * Non-empty iff this page is already counted in - * ccc_object::cob_pending_list. Protected by - * ccc_object::cob_pending_guard. This list is only used as a flag, - * that is, never iterated through, only checked for list_empty(), but - * having a list is useful for debugging. - */ - struct list_head cpg_pending_linkage; - /** VM page */ - struct page *cpg_page; -}; - -static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice) -{ - return container_of(slice, struct ccc_page, cpg_cl); -} - -struct ccc_device { - struct cl_device cdv_cl; - struct super_block *cdv_sb; - struct cl_device *cdv_next; -}; - -struct ccc_lock { - struct cl_lock_slice clk_cl; -}; - -struct ccc_req { - struct cl_req_slice crq_cl; -}; - -void *ccc_key_init (const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_key_fini (const struct lu_context *ctx, - struct lu_context_key *key, void *data); -void *ccc_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - -int ccc_device_init (const struct lu_env *env, - struct lu_device *d, - const char *name, struct lu_device *next); -struct lu_device *ccc_device_fini (const struct lu_env *env, - struct lu_device *d); -struct lu_device *ccc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg, - const struct lu_device_operations *luops, - const struct cl_device_operations *clops); -struct lu_device *ccc_device_free (const struct lu_env *env, - struct lu_device *d); -struct lu_object *ccc_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev, - const struct cl_object_operations *clops, - const struct lu_object_operations *luops); - -int ccc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); -void ccc_umount(const struct lu_env *env, struct cl_device *dev); -int ccc_global_init(struct lu_device_type *device_type); -void ccc_global_fini(struct lu_device_type *device_type); -int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob, - const struct cl_object_conf *conf); -int ccc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf); -void ccc_object_free(const struct lu_env *env, struct lu_object *obj); -int ccc_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io, - const struct cl_lock_operations *lkops); -int ccc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb); -struct page *ccc_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice); -int ccc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); -int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice); -int ccc_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); -void ccc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice); -void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice); -int ccc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags); -int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice); -int ccc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io); -void ccc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state); - -int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end); -int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end); -void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios); -void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios, - size_t nob); -void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio, - struct cl_io *io); -int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int *exceed); -void ccc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret); -void ccc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *oa, u64 flags); - -struct lu_device *ccc2lu_dev (struct ccc_device *vdv); -struct lu_object *ccc2lu (struct ccc_object *vob); -struct ccc_device *lu2ccc_dev (const struct lu_device *d); -struct ccc_device *cl2ccc_dev (const struct cl_device *d); -struct ccc_object *lu2ccc (const struct lu_object *obj); -struct ccc_object *cl2ccc (const struct cl_object *obj); -struct ccc_lock *cl2ccc_lock (const struct cl_lock_slice *slice); -struct ccc_io *cl2ccc_io (const struct lu_env *env, - const struct cl_io_slice *slice); -struct ccc_req *cl2ccc_req (const struct cl_req_slice *slice); -struct page *cl2vm_page (const struct cl_page_slice *slice); -struct inode *ccc_object_inode(const struct cl_object *obj); -struct ccc_object *cl_inode2ccc (struct inode *inode); - -int cl_setattr_ost(struct inode *inode, const struct iattr *attr); - -int ccc_object_invariant(const struct cl_object *obj); -int cl_file_inode_init(struct inode *inode, struct lustre_md *md); -void cl_inode_fini(struct inode *inode); -int cl_local_size(struct inode *inode); - -__u16 ll_dirent_type_get(struct lu_dirent *ent); -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); -__u32 cl_fid_build_gen(const struct lu_fid *fid); - -# define CLOBINVRNT(env, clob, expr) \ - ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) - -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); - -struct ccc_grouplock { - struct lu_env *cg_env; - struct cl_io *cg_io; - struct cl_lock *cg_lock; - unsigned long cg_gid; -}; - -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ccc_grouplock *cg); -void cl_put_grouplock(struct ccc_grouplock *cg); - -/** - * New interfaces to get and put lov_stripe_md from lov layer. This violates - * layering because lov_stripe_md is supposed to be a private data in lov. - * - * NB: If you find you have to use these interfaces for your new code, please - * think about it again. These interfaces may be removed in the future for - * better layering. - */ -struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); -void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); -int lov_read_and_clear_async_rc(struct cl_object *clob); - -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); -void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); - -/** - * Data structure managing a client's cached clean pages. An LRU of - * pages is maintained, along with other statistics. - */ -struct cl_client_cache { - atomic_t ccc_users; /* # of users (OSCs) of this data */ - struct list_head ccc_lru; /* LRU list of cached clean pages */ - spinlock_t ccc_lru_lock; /* lock for list */ - atomic_t ccc_lru_left; /* # of LRU entries available */ - unsigned long ccc_lru_max; /* Max # of LRU entries possible */ - unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */ -}; - -#endif /*LCLIENT_H */ diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h deleted file mode 100644 index 3907bf4ce..000000000 --- a/drivers/staging/lustre/lustre/include/linux/obd.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LINUX_OBD_H -#define __LINUX_OBD_H - -#ifndef __OBD_H -#error Do not #include this file directly. #include instead -#endif - -#include "../obd_support.h" - -#include -#include -#include /* for struct task_struct, for current.h */ -#include - -#include "../lustre_intent.h" - -struct ll_iattr { - struct iattr iattr; - unsigned int ia_attr_flags; -}; - -#define CLIENT_OBD_LIST_LOCK_DEBUG 1 - -struct client_obd_lock { - spinlock_t lock; - - unsigned long time; - struct task_struct *task; - const char *func; - int line; -}; - -static inline void __client_obd_list_lock(struct client_obd_lock *lock, - const char *func, int line) -{ - unsigned long cur = jiffies; - - while (1) { - if (spin_trylock(&lock->lock)) { - LASSERT(!lock->task); - lock->task = current; - lock->func = func; - lock->line = line; - lock->time = jiffies; - break; - } - - if (time_before(cur + 5 * HZ, jiffies) && - time_before(lock->time + 5 * HZ, jiffies)) { - struct task_struct *task = lock->task; - - if (!task) - continue; - - LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n", - current->comm, current->pid, - lock, task->comm, task->pid, - lock->func, lock->line, - (jiffies - lock->time) / HZ); - LCONSOLE_WARN("====== for current process =====\n"); - dump_stack(); - LCONSOLE_WARN("====== end =======\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(1000 * HZ); - } - cpu_relax(); - } -} - -#define client_obd_list_lock(lock) \ - __client_obd_list_lock(lock, __func__, __LINE__) - -static inline void client_obd_list_unlock(struct client_obd_lock *lock) -{ - LASSERT(lock->task); - lock->task = NULL; - lock->time = jiffies; - spin_unlock(&lock->lock); -} - -static inline void client_obd_list_lock_init(struct client_obd_lock *lock) -{ - spin_lock_init(&lock->lock); -} - -static inline void client_obd_list_lock_done(struct client_obd_lock *lock) -{} - -#endif /* __LINUX_OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h index 242bb1ef6..281651218 100644 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ b/drivers/staging/lustre/lustre/include/lu_object.h @@ -198,7 +198,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env, * Operations specific for particular lu_object. */ struct lu_object_operations { - /** * Allocate lower-layer parts of the object by calling * lu_device_operations::ldo_object_alloc() of the corresponding @@ -656,21 +655,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) * @{ */ -int lu_site_init (struct lu_site *s, struct lu_device *d); -void lu_site_fini (struct lu_site *s); -int lu_site_init_finish (struct lu_site *s); -void lu_stack_fini (const struct lu_env *env, struct lu_device *top); -void lu_device_get (struct lu_device *d); -void lu_device_put (struct lu_device *d); -int lu_device_init (struct lu_device *d, struct lu_device_type *t); -void lu_device_fini (struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); +int lu_site_init(struct lu_site *s, struct lu_device *d); +void lu_site_fini(struct lu_site *s); +int lu_site_init_finish(struct lu_site *s); +void lu_stack_fini(const struct lu_env *env, struct lu_device *top); +void lu_device_get(struct lu_device *d); +void lu_device_put(struct lu_device *d); +int lu_device_init(struct lu_device *d, struct lu_device_type *t); +void lu_device_fini(struct lu_device *d); +int lu_object_header_init(struct lu_object_header *h); void lu_object_header_fini(struct lu_object_header *h); -int lu_object_init (struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); -void lu_object_fini (struct lu_object *o); -void lu_object_add_top (struct lu_object_header *h, struct lu_object *o); -void lu_object_add (struct lu_object *before, struct lu_object *o); +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +void lu_object_fini(struct lu_object *o); +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +void lu_object_add(struct lu_object *before, struct lu_object *o); /** * Helpers to initialize and finalize device types. @@ -781,9 +780,8 @@ int lu_cdebug_printer(const struct lu_env *env, */ #define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ CDEBUG(mask, format, ## __VA_ARGS__); \ } \ @@ -794,9 +792,8 @@ do { \ */ #define LU_OBJECT_HEADER(mask, env, object, format, ...) \ do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ (object)->lo_header); \ lu_cdebug_printer(env, &msgdata, "\n"); \ @@ -1006,6 +1003,10 @@ enum lu_context_tag { * Context for local operations */ LCT_LOCAL = 1 << 7, + /** + * session for server thread + **/ + LCT_SERVER_SESSION = BIT(8), /** * Set when at least one of keys, having values in this context has * non-NULL lu_context_key::lct_exit() method. This is used to @@ -1118,7 +1119,7 @@ struct lu_context_key { { \ type *value; \ \ - CLASSERT(PAGE_SIZE >= sizeof (*value)); \ + CLASSERT(PAGE_SIZE >= sizeof(*value)); \ \ value = kzalloc(sizeof(*value), GFP_NOFS); \ if (!value) \ @@ -1154,12 +1155,12 @@ do { \ (key)->lct_owner = THIS_MODULE; \ } while (0) -int lu_context_key_register(struct lu_context_key *key); -void lu_context_key_degister(struct lu_context_key *key); -void *lu_context_key_get (const struct lu_context *ctx, - const struct lu_context_key *key); -void lu_context_key_quiesce (struct lu_context_key *key); -void lu_context_key_revive (struct lu_context_key *key); +int lu_context_key_register(struct lu_context_key *key); +void lu_context_key_degister(struct lu_context_key *key); +void *lu_context_key_get(const struct lu_context *ctx, + const struct lu_context_key *key); +void lu_context_key_quiesce(struct lu_context_key *key); +void lu_context_key_revive(struct lu_context_key *key); /* * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an @@ -1216,21 +1217,21 @@ void lu_context_key_revive (struct lu_context_key *key); LU_TYPE_START(mod, __VA_ARGS__); \ LU_TYPE_STOP(mod, __VA_ARGS__) -int lu_context_init (struct lu_context *ctx, __u32 tags); -void lu_context_fini (struct lu_context *ctx); -void lu_context_enter (struct lu_context *ctx); -void lu_context_exit (struct lu_context *ctx); -int lu_context_refill(struct lu_context *ctx); +int lu_context_init(struct lu_context *ctx, __u32 tags); +void lu_context_fini(struct lu_context *ctx); +void lu_context_enter(struct lu_context *ctx); +void lu_context_exit(struct lu_context *ctx); +int lu_context_refill(struct lu_context *ctx); /* * Helper functions to operate on multiple keys. These are used by the default * device type operations, defined by LU_TYPE_INIT_FINI(). */ -int lu_context_key_register_many(struct lu_context_key *k, ...); +int lu_context_key_register_many(struct lu_context_key *k, ...); void lu_context_key_degister_many(struct lu_context_key *k, ...); -void lu_context_key_revive_many (struct lu_context_key *k, ...); -void lu_context_key_quiesce_many (struct lu_context_key *k, ...); +void lu_context_key_revive_many(struct lu_context_key *k, ...); +void lu_context_key_quiesce_many(struct lu_context_key *k, ...); /** * Environment. @@ -1246,9 +1247,9 @@ struct lu_env { struct lu_context *le_ses; }; -int lu_env_init (struct lu_env *env, __u32 tags); -void lu_env_fini (struct lu_env *env); -int lu_env_refill(struct lu_env *env); +int lu_env_init(struct lu_env *env, __u32 tags); +void lu_env_fini(struct lu_env *env); +int lu_env_refill(struct lu_env *env); /** @} lu_context */ diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 5aae1d06a..9c53c1792 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -183,6 +183,12 @@ struct lu_seq_range { __u32 lsr_flags; }; +struct lu_seq_range_array { + __u32 lsra_count; + __u32 lsra_padding; + struct lu_seq_range lsra_lsr[0]; +}; + #define LU_SEQ_RANGE_MDT 0x0 #define LU_SEQ_RANGE_OST 0x1 #define LU_SEQ_RANGE_ANY 0x3 @@ -578,7 +584,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return FID_SEQ_OST_MDT0; - if (fid_seq_is_default(ostid->oi.oi_seq)) + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) return FID_SEQ_LOV_DEFAULT; if (fid_is_idif(&ostid->oi_fid)) @@ -590,9 +596,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid) /* extract OST objid from a wire ost_id (id/seq) pair */ static inline __u64 ostid_id(const struct ost_id *ostid) { - if (fid_seq_is_mdt0(ostid_seq(ostid))) + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return ostid->oi.oi_id & IDIF_OID_MASK; + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) + return ostid->oi.oi_id; + if (fid_is_idif(&ostid->oi_fid)) return fid_idif_id(fid_seq(&ostid->oi_fid), fid_oid(&ostid->oi_fid), 0); @@ -636,12 +645,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi) */ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { + if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); return; } oi->oi.oi_id = oid; + } else if (fid_is_idif(&oi->oi_fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Bad %llu to set "DOSTID"\n", + oid, POSTID(oi)); + return; + } + oi->oi_fid.f_seq = fid_idif_seq(oid, + fid_idif_ost_idx(&oi->oi_fid)); + oi->oi_fid.f_oid = oid; + oi->oi_fid.f_ver = oid >> 48; } else { if (oid > OBIF_MAX_OID) { CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi)); @@ -651,25 +670,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) } } -static inline void ostid_inc_id(struct ost_id *oi) +static inline int fid_set_id(struct lu_fid *fid, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { - if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) { - CERROR("Bad inc "DOSTID"\n", POSTID(oi)); - return; + if (unlikely(fid_seq_is_igif(fid->f_seq))) { + CERROR("bad IGIF, "DFID"\n", PFID(fid)); + return -EBADF; + } + + if (fid_is_idif(fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Too large OID %#llx to set IDIF "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; } - oi->oi.oi_id++; + fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid)); + fid->f_oid = oid; + fid->f_ver = oid >> 48; } else { - oi->oi_fid.f_oid++; + if (oid > OBIF_MAX_OID) { + CERROR("Too large OID %#llx to set REG "DFID"\n", + (unsigned long long)oid, PFID(fid)); + return -EBADF; + } + fid->f_oid = oid; } -} - -static inline void ostid_dec_id(struct ost_id *oi) -{ - if (fid_seq_is_mdt0(ostid_seq(oi))) - oi->oi.oi_id--; - else - oi->oi_fid.f_oid--; + return 0; } /** @@ -684,30 +709,34 @@ static inline void ostid_dec_id(struct ost_id *oi) static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid, __u32 ost_idx) { + __u64 seq = ostid_seq(ostid); + if (ost_idx > 0xffff) { CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - if (fid_seq_is_mdt0(ostid_seq(ostid))) { + if (fid_seq_is_mdt0(seq)) { + __u64 oid = ostid_id(ostid); + /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" * that we map into the IDIF namespace. It allows up to 2^48 * objects per OST, as this is the object namespace that has * been in production for years. This can handle create rates * of 1M objects/s/OST for 9 years, or combinations thereof. */ - if (ostid_id(ostid) >= IDIF_MAX_OID) { + if (oid >= IDIF_MAX_OID) { CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx); + fid->f_seq = fid_idif_seq(oid, ost_idx); /* truncate to 32 bits by assignment */ - fid->f_oid = ostid_id(ostid); + fid->f_oid = oid; /* in theory, not currently used */ - fid->f_ver = ostid_id(ostid) >> 48; - } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ { + fid->f_ver = oid >> 48; + } else if (likely(!fid_seq_is_default(seq))) { /* This is either an IDIF object, which identifies objects across * all OSTs, or a regular FID. The IDIF namespace maps legacy * OST objects into the FID namespace. In both cases, we just @@ -1001,8 +1030,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr) size = (sizeof(struct lu_dirent) + namelen + align) & ~align; size += sizeof(struct luda_type); - } else + } else { size = sizeof(struct lu_dirent) + namelen; + } return (size + 7) & ~7; } @@ -1256,6 +1286,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ #define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */ #define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/ +#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack + * name in request + */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1428,6 +1461,8 @@ enum obdo_flags { */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ + OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ + OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* Note that while these checksum values are currently separate bits, * in 2.x we can actually allow all values from 1-31 if we wanted. @@ -1525,6 +1560,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) oi->oi.oi_seq = seq; } +static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) +{ + oi->oi.oi_id = oid; +} + static inline __u64 lmm_oi_id(struct ost_id *oi) { return oi->oi.oi_id; @@ -1732,6 +1772,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os); #define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ +#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server + * that the client is running low on + * space for unstable pages; asking + * it to sync quickly + */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -2436,6 +2481,7 @@ struct mdt_rec_reint { void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +/* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ __u32 ld_active_tgt_count; /* how many active */ @@ -2460,7 +2506,6 @@ struct lmv_stripe_md { struct lu_fid mea_ids[0]; }; -/* lmv structures */ #define MEA_MAGIC_LAST_CHAR 0xb2221ca1 #define MEA_MAGIC_ALL_CHARS 0xb222a11c #define MEA_MAGIC_HASH_SEGMENT 0xb222a11b @@ -2470,9 +2515,10 @@ struct lmv_stripe_md { #define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL enum fld_rpc_opc { - FLD_QUERY = 900, + FLD_QUERY = 900, + FLD_READ = 901, FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY + FLD_FIRST_OPC = FLD_QUERY }; enum seq_rpc_opc { @@ -2486,6 +2532,12 @@ enum seq_op { SEQ_ALLOC_META = 1 }; +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2, +}; + /* * LOV data structures */ @@ -2582,6 +2634,8 @@ struct ldlm_extent { __u64 gid; }; +#define LDLM_GID_ANY ((__u64)-1) + static inline int ldlm_extent_overlap(struct ldlm_extent *ex1, struct ldlm_extent *ex2) { @@ -3304,7 +3358,7 @@ struct getinfo_fid2path { char gf_path[0]; } __packed; -void lustre_swab_fid2path (struct getinfo_fid2path *gf); +void lustre_swab_fid2path(struct getinfo_fid2path *gf); enum { LAYOUT_INTENT_ACCESS = 0, diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 276906e64..59ba48ac3 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -193,37 +193,37 @@ struct ost_id { * *INFO - set/get lov_user_mds_data */ /* see for ioctl numberss 101-150 */ -#define LL_IOC_GETFLAGS _IOR ('f', 151, long) -#define LL_IOC_SETFLAGS _IOW ('f', 152, long) -#define LL_IOC_CLRFLAGS _IOW ('f', 153, long) +#define LL_IOC_GETFLAGS _IOR('f', 151, long) +#define LL_IOC_SETFLAGS _IOW('f', 152, long) +#define LL_IOC_CLRFLAGS _IOW('f', 153, long) /* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */ -#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long) /* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */ -#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long) /* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */ -#define LL_IOC_LOV_SETEA _IOW ('f', 156, long) -#define LL_IOC_RECREATE_OBJ _IOW ('f', 157, long) -#define LL_IOC_RECREATE_FID _IOW ('f', 157, struct lu_fid) -#define LL_IOC_GROUP_LOCK _IOW ('f', 158, long) -#define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long) +#define LL_IOC_LOV_SETEA _IOW('f', 156, long) +#define LL_IOC_RECREATE_OBJ _IOW('f', 157, long) +#define LL_IOC_RECREATE_FID _IOW('f', 157, struct lu_fid) +#define LL_IOC_GROUP_LOCK _IOW('f', 158, long) +#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long) /* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */ -#define LL_IOC_QUOTACHECK _IOW ('f', 160, int) +#define LL_IOC_QUOTACHECK _IOW('f', 160, int) /* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */ -#define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define LL_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */ #define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) -#define LL_IOC_FLUSHCTX _IOW ('f', 166, long) -#define LL_IOC_RMTACL _IOW ('f', 167, long) -#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long) +#define LL_IOC_FLUSHCTX _IOW('f', 166, long) +#define LL_IOC_RMTACL _IOW('f', 167, long) +#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) #define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid) #define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long) -#define LL_IOC_PATH2FID _IOR ('f', 173, long) +#define LL_IOC_PATH2FID _IOR('f', 173, long) #define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *) -#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int) +#define LL_IOC_GET_MDTIDX _IOR('f', 175, int) /* see for ioctl numbers 177-210 */ @@ -676,7 +676,12 @@ static inline const char *changelog_type2str(int type) #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */ /* HSM cleaning needed */ /* Flags for rename */ -#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of target */ +#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of + * target + */ +#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target + * has an archive in backend + */ /* Flags for HSM */ /* 12b used (from high weight to low weight): @@ -833,9 +838,8 @@ struct ioc_data_version { __u64 idv_flags; /* See LL_DV_xxx */ }; -#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling - * version. Dirty caches are left unchanged. - */ +#define LL_DV_RD_FLUSH BIT(0) /* Flush dirty pages from clients */ +#define LL_DV_WR_FLUSH BIT(1) /* Flush all caching pages from clients */ #ifndef offsetof # define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb))) @@ -1095,12 +1099,12 @@ struct hsm_action_list { __u32 padding1; char hal_fsname[0]; /* null-terminated */ /* struct hsm_action_item[hal_count] follows, aligned on 8-byte - * boundaries. See hai_zero + * boundaries. See hai_first */ } __packed; #ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round (int val) +static inline int cfs_size_round(int val) { return (val + 7) & (~0x7); } @@ -1109,7 +1113,7 @@ static inline int cfs_size_round (int val) #endif /* Return pointer to first hai in action list */ -static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal) +static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) { return (struct hsm_action_item *)(hal->hal_fsname + cfs_size_round(strlen(hal-> \ @@ -1131,7 +1135,7 @@ static inline int hal_size(struct hsm_action_list *hal) struct hsm_action_item *hai; sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); - hai = hai_zero(hal); + hai = hai_first(hal); for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) sz += cfs_size_round(hai->hai_len); diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h index bb16ae980..e229e91f7 100644 --- a/drivers/staging/lustre/lustre/include/lustre_cfg.h +++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h @@ -161,7 +161,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index) int offset; int bufcount; - LASSERT (index >= 0); + LASSERT(index >= 0); bufcount = lcfg->lcfg_bufcount; if (index >= bufcount) diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h index 95fd36063..b36821ffb 100644 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ b/drivers/staging/lustre/lustre/include/lustre_disk.h @@ -130,7 +130,6 @@ struct lustre_sb_info { struct lustre_mount_data *lsi_lmd; /* mount command info */ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ - struct vfsmount *lsi_srv_mnt; /* the one server mount */ atomic_t lsi_mounts; /* references to the srv_mnt */ char lsi_svname[MTI_NAME_MAXLEN]; char lsi_osd_obdname[64]; @@ -158,7 +157,6 @@ struct lustre_sb_info { struct lustre_mount_info { char *lmi_name; struct super_block *lmi_sb; - struct vfsmount *lmi_mnt; struct list_head lmi_list_chain; }; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h index 8b0364f71..9cade144f 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h @@ -71,6 +71,7 @@ struct obd_device; */ enum ldlm_error { ELDLM_OK = 0, + ELDLM_LOCK_MATCHED = 1, ELDLM_LOCK_CHANGED = 300, ELDLM_LOCK_ABORTED = 301, @@ -269,7 +270,7 @@ struct ldlm_pool { struct completion pl_kobj_unregister; }; -typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock); +typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); /** * LVB operations. @@ -446,8 +447,11 @@ struct ldlm_namespace { /** Limit of parallel AST RPC count. */ unsigned ns_max_parallel_ast; - /** Callback to cancel locks before replaying it during recovery. */ - ldlm_cancel_for_recovery ns_cancel_for_recovery; + /** + * Callback to check if a lock is good to be canceled by ELC or + * during recovery. + */ + ldlm_cancel_cbt ns_cancel; /** LDLM lock stats */ struct lprocfs_stats *ns_stats; @@ -479,9 +483,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) } static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_for_recovery arg) + ldlm_cancel_cbt arg) { - ns->ns_cancel_for_recovery = arg; + ns->ns_cancel = arg; } struct ldlm_lock; diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h index 7f2ba2ffe..e7e0c21a9 100644 --- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h +++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h @@ -37,17 +37,11 @@ /** l_flags bits marked as "gone" bits */ #define LDLM_FL_GONE_MASK 0x0006004000000000ULL -/** l_flags bits marked as "hide_lock" bits */ -#define LDLM_FL_HIDE_LOCK_MASK 0x0000206400000000ULL - /** l_flags bits marked as "inherit" bits */ #define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL -/** l_flags bits marked as "local_only" bits */ -#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL - -/** l_flags bits marked as "on_wire" bits */ -#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL +/** l_flags bits marked as "off_wire" bits */ +#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL /** extent, mode, or resource changed */ #define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */ @@ -204,7 +198,7 @@ #define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36) #define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36) -/** whatever it might mean */ +/** whatever it might mean -- never transmitted? */ #define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */ #define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37) #define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37) @@ -287,18 +281,18 @@ * has canceled this lock and is waiting for rpc_lock which is taken by * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it. - * - * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is - * dropped to let ldlm_callback_handler() return EINVAL to the server. It - * is used when ELC RPC is already prepared and is waiting for rpc_lock, - * too late to send a separate CANCEL RPC. */ #define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */ #define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46) #define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46) #define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46) -/** whatever it might mean */ +/** + * Set by ldlm_cancel_callback() when lock cache is dropped to let + * ldlm_callback_handler() return EINVAL to the server. It is used when + * ELC RPC is already prepared and is waiting for rpc_lock, too late to + * send a separate CANCEL RPC. + */ #define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */ #define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47) #define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47) @@ -381,104 +375,16 @@ /** test for ldlm_lock flag bit set */ #define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) +/** multi-bit test: are any of mask bits set? */ +#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK) + /** set a ldlm_lock flag bit */ #define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b)) /** clear a ldlm_lock flag bit */ #define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b)) -/** Mask of flags inherited from parent lock when doing intents. */ -#define LDLM_INHERIT_FLAGS LDLM_FL_INHERIT_MASK - -/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */ -#define LDLM_AST_FLAGS LDLM_FL_AST_MASK - /** @} subgroup */ /** @} group */ -#ifdef WIRESHARK_COMPILE -static int hf_lustre_ldlm_fl_lock_changed = -1; -static int hf_lustre_ldlm_fl_block_granted = -1; -static int hf_lustre_ldlm_fl_block_conv = -1; -static int hf_lustre_ldlm_fl_block_wait = -1; -static int hf_lustre_ldlm_fl_ast_sent = -1; -static int hf_lustre_ldlm_fl_replay = -1; -static int hf_lustre_ldlm_fl_intent_only = -1; -static int hf_lustre_ldlm_fl_has_intent = -1; -static int hf_lustre_ldlm_fl_flock_deadlock = -1; -static int hf_lustre_ldlm_fl_discard_data = -1; -static int hf_lustre_ldlm_fl_no_timeout = -1; -static int hf_lustre_ldlm_fl_block_nowait = -1; -static int hf_lustre_ldlm_fl_test_lock = -1; -static int hf_lustre_ldlm_fl_cancel_on_block = -1; -static int hf_lustre_ldlm_fl_deny_on_contention = -1; -static int hf_lustre_ldlm_fl_ast_discard_data = -1; -static int hf_lustre_ldlm_fl_fail_loc = -1; -static int hf_lustre_ldlm_fl_skipped = -1; -static int hf_lustre_ldlm_fl_cbpending = -1; -static int hf_lustre_ldlm_fl_wait_noreproc = -1; -static int hf_lustre_ldlm_fl_cancel = -1; -static int hf_lustre_ldlm_fl_local_only = -1; -static int hf_lustre_ldlm_fl_failed = -1; -static int hf_lustre_ldlm_fl_canceling = -1; -static int hf_lustre_ldlm_fl_local = -1; -static int hf_lustre_ldlm_fl_lvb_ready = -1; -static int hf_lustre_ldlm_fl_kms_ignore = -1; -static int hf_lustre_ldlm_fl_cp_reqd = -1; -static int hf_lustre_ldlm_fl_cleaned = -1; -static int hf_lustre_ldlm_fl_atomic_cb = -1; -static int hf_lustre_ldlm_fl_bl_ast = -1; -static int hf_lustre_ldlm_fl_bl_done = -1; -static int hf_lustre_ldlm_fl_no_lru = -1; -static int hf_lustre_ldlm_fl_fail_notified = -1; -static int hf_lustre_ldlm_fl_destroyed = -1; -static int hf_lustre_ldlm_fl_server_lock = -1; -static int hf_lustre_ldlm_fl_res_locked = -1; -static int hf_lustre_ldlm_fl_waited = -1; -static int hf_lustre_ldlm_fl_ns_srv = -1; -static int hf_lustre_ldlm_fl_excl = -1; - -const value_string lustre_ldlm_flags_vals[] = { - {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"}, - {LDLM_FL_BLOCK_GRANTED, "LDLM_FL_BLOCK_GRANTED"}, - {LDLM_FL_BLOCK_CONV, "LDLM_FL_BLOCK_CONV"}, - {LDLM_FL_BLOCK_WAIT, "LDLM_FL_BLOCK_WAIT"}, - {LDLM_FL_AST_SENT, "LDLM_FL_AST_SENT"}, - {LDLM_FL_REPLAY, "LDLM_FL_REPLAY"}, - {LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"}, - {LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"}, - {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"}, - {LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"}, - {LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"}, - {LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"}, - {LDLM_FL_TEST_LOCK, "LDLM_FL_TEST_LOCK"}, - {LDLM_FL_CANCEL_ON_BLOCK, "LDLM_FL_CANCEL_ON_BLOCK"}, - {LDLM_FL_DENY_ON_CONTENTION, "LDLM_FL_DENY_ON_CONTENTION"}, - {LDLM_FL_AST_DISCARD_DATA, "LDLM_FL_AST_DISCARD_DATA"}, - {LDLM_FL_FAIL_LOC, "LDLM_FL_FAIL_LOC"}, - {LDLM_FL_SKIPPED, "LDLM_FL_SKIPPED"}, - {LDLM_FL_CBPENDING, "LDLM_FL_CBPENDING"}, - {LDLM_FL_WAIT_NOREPROC, "LDLM_FL_WAIT_NOREPROC"}, - {LDLM_FL_CANCEL, "LDLM_FL_CANCEL"}, - {LDLM_FL_LOCAL_ONLY, "LDLM_FL_LOCAL_ONLY"}, - {LDLM_FL_FAILED, "LDLM_FL_FAILED"}, - {LDLM_FL_CANCELING, "LDLM_FL_CANCELING"}, - {LDLM_FL_LOCAL, "LDLM_FL_LOCAL"}, - {LDLM_FL_LVB_READY, "LDLM_FL_LVB_READY"}, - {LDLM_FL_KMS_IGNORE, "LDLM_FL_KMS_IGNORE"}, - {LDLM_FL_CP_REQD, "LDLM_FL_CP_REQD"}, - {LDLM_FL_CLEANED, "LDLM_FL_CLEANED"}, - {LDLM_FL_ATOMIC_CB, "LDLM_FL_ATOMIC_CB"}, - {LDLM_FL_BL_AST, "LDLM_FL_BL_AST"}, - {LDLM_FL_BL_DONE, "LDLM_FL_BL_DONE"}, - {LDLM_FL_NO_LRU, "LDLM_FL_NO_LRU"}, - {LDLM_FL_FAIL_NOTIFIED, "LDLM_FL_FAIL_NOTIFIED"}, - {LDLM_FL_DESTROYED, "LDLM_FL_DESTROYED"}, - {LDLM_FL_SERVER_LOCK, "LDLM_FL_SERVER_LOCK"}, - {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"}, - {LDLM_FL_WAITED, "LDLM_FL_WAITED"}, - {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"}, - {LDLM_FL_EXCL, "LDLM_FL_EXCL"}, - { 0, NULL } -}; -#endif /* WIRESHARK_COMPILE */ + #endif /* LDLM_ALL_FLAGS_MASK */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h index ab4a92390..12e8b585c 100644 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ b/drivers/staging/lustre/lustre/include/lustre_fid.h @@ -308,10 +308,10 @@ static inline int fid_seq_in_fldb(__u64 seq) fid_seq_is_root(seq) || fid_seq_is_dot(seq); } -static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq) +static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) { if (fid_seq_is_mdt0(seq)) { - fid->f_seq = fid_idif_seq(0, 0); + fid->f_seq = fid_idif_seq(0, ost_idx); } else { LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || fid_seq_is_idif(seq), "%#llx\n", seq); @@ -498,19 +498,6 @@ static inline void ostid_build_res_name(struct ost_id *oi, } } -static inline void ostid_res_name_to_id(struct ost_id *oi, - struct ldlm_res_id *name) -{ - if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) { - /* old resid */ - ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); - ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - } else { - /* new resid */ - fid_extract_from_res_name(&oi->oi_fid, name); - } -} - /** * Return true if the resource is for the object identified by this id & group. */ @@ -546,7 +533,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid, } static inline void ost_fid_from_resid(struct lu_fid *fid, - const struct ldlm_res_id *name) + const struct ldlm_res_id *name, + int ost_idx) { if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) { /* old resid */ @@ -554,7 +542,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid, ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]); ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]); - ostid_to_fid(fid, &oi, 0); + ostid_to_fid(fid, &oi, ost_idx); } else { /* new resid */ fid_extract_from_res_name(fid, name); diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h index dac2d84d8..8325c82b3 100644 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ b/drivers/staging/lustre/lustre/include/lustre_import.h @@ -109,7 +109,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) "RECOVER", "FULL", "EVICTED", }; - LASSERT (state <= LUSTRE_IMP_EVICTED); + LASSERT(state <= LUSTRE_IMP_EVICTED); return import_state_names[state]; } diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index f2223d558..00b976766 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -280,16 +280,16 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_DATA_TYPE long #define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE) #define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) #define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) #define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE) #define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) #define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) @@ -308,13 +308,13 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_GETDTNAME OBD_IOC_GETNAME #define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE) #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) #define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) @@ -324,27 +324,27 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) #define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) /* see also for ioctls 151-153 */ /* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */ -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETSTRIPE _IOW('f', 154, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */ -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_GETSTRIPE _IOW('f', 155, OBD_IOC_DATA_TYPE) /* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */ -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETEA _IOW('f', 156, OBD_IOC_DATA_TYPE) /* see for ioctls 157-159 */ /* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */ -#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) +#define OBD_IOC_QUOTACHECK _IOW('f', 160, int) /* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */ -#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) /* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */ #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) /* see also for ioctls 163-176 */ -#define OBD_IOC_CHANGELOG_REG _IOW ('f', 177, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 178, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 179, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data) +#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data) #define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) #define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) #define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) @@ -352,7 +352,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) #define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) #define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE) #define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) #define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE) @@ -522,6 +522,28 @@ struct l_wait_info { sigmask(SIGTERM) | sigmask(SIGQUIT) | \ sigmask(SIGALRM)) +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +#define add_wait_queue_exclusive_head(waitq, link) \ +{ \ + unsigned long flags; \ + \ + spin_lock_irqsave(&((waitq)->lock), flags); \ + __add_wait_queue_exclusive(waitq, link); \ + spin_unlock_irqrestore(&((waitq)->lock), flags); \ +} + /* * wait for @condition to become true, but no longer than timeout, specified * by @info. @@ -578,7 +600,7 @@ do { \ \ if (condition) \ break; \ - if (cfs_signal_pending()) { \ + if (signal_pending(current)) { \ if (info->lwi_on_signal && \ (__timeout == 0 || __allow_intr)) { \ if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h index af77eb359..f267ff8a6 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h @@ -64,9 +64,27 @@ struct obd_export; struct ptlrpc_request; struct obd_device; +/** + * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. + * + * This mutex is used to implement execute-once semantics on the MDT. + * The MDT stores the last transaction ID and result for every client in + * its last_rcvd file. If the client doesn't get a reply, it can safely + * resend the request and the MDT will reconstruct the reply being aware + * that the request has already been executed. Without this lock, + * execution status of concurrent in-flight requests would be + * overwritten. + * + * This design limits the extent to which we can keep a full pipeline of + * in-flight requests from a single client. This limitation could be + * overcome by allowing multiple slots per client in the last_rcvd file. + */ struct mdc_rpc_lock { + /** Lock protecting in-flight RPC concurrency. */ struct mutex rpcl_mutex; + /** Intent associated with currently executing request. */ struct lookup_intent *rpcl_it; + /** Used for MDS/RPC load testing purposes. */ int rpcl_fakes; }; diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 69586a522..a7973d5de 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -1327,7 +1327,9 @@ struct ptlrpc_request { /* allow the req to be sent if the import is in recovery * status */ - rq_allow_replay:1; + rq_allow_replay:1, + /* bulk request, sent to server, but uncommitted */ + rq_unstable:1; unsigned int rq_nr_resend; diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h index 383fe6feb..a42cf90c1 100644 --- a/drivers/staging/lustre/lustre/include/lustre_param.h +++ b/drivers/staging/lustre/lustre/include/lustre_param.h @@ -89,6 +89,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh); /* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */ #define PARAM_OST "ost." +#define PARAM_OSD "osd." #define PARAM_OSC "osc." #define PARAM_MDT "mdt." #define PARAM_MDD "mdd." diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h index b2e67fcf9..0aac4391e 100644 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h @@ -137,6 +137,7 @@ extern struct req_format RQF_MGS_CONFIG_READ; /* fid/fld req_format */ extern struct req_format RQF_SEQ_QUERY; extern struct req_format RQF_FLD_QUERY; +extern struct req_format RQF_FLD_READ; /* MDS req_format */ extern struct req_format RQF_MDS_CONNECT; extern struct req_format RQF_MDS_DISCONNECT; @@ -199,7 +200,7 @@ extern struct req_format RQF_OST_BRW_READ; extern struct req_format RQF_OST_BRW_WRITE; extern struct req_format RQF_OST_STATFS; extern struct req_format RQF_OST_SET_GRANT_INFO; -extern struct req_format RQF_OST_GET_INFO_GENERIC; +extern struct req_format RQF_OST_GET_INFO; extern struct req_format RQF_OST_GET_INFO_LAST_ID; extern struct req_format RQF_OST_GET_INFO_LAST_FID; extern struct req_format RQF_OST_SET_INFO_LAST_FID; diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h index 4264d9765..2d926e0ee 100644 --- a/drivers/staging/lustre/lustre/include/obd.h +++ b/drivers/staging/lustre/lustre/include/obd.h @@ -37,7 +37,7 @@ #ifndef __OBD_H #define __OBD_H -#include "linux/obd.h" +#include #define IOC_OSC_TYPE 'h' #define IOC_OSC_MIN_NR 20 @@ -54,6 +54,7 @@ #include "lustre_export.h" #include "lustre_fid.h" #include "lustre_fld.h" +#include "lustre_intent.h" #define MAX_OBD_DEVICES 8192 @@ -165,9 +166,6 @@ struct obd_info { obd_enqueue_update_f oi_cb_up; }; -void lov_stripe_lock(struct lov_stripe_md *md); -void lov_stripe_unlock(struct lov_stripe_md *md); - struct obd_type { struct list_head typ_chain; struct obd_ops *typ_dt_ops; @@ -293,14 +291,10 @@ struct client_obd { * blocking everywhere, but we don't want to slow down fast-path of * our main platform.) * - * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together - * with client_obd_list_{un,}lock() and - * client_obd_list_lock_{init,done}() functions. - * * NB by Jinshan: though field names are still _loi_, but actually * osc_object{}s are in the list. */ - struct client_obd_lock cl_loi_list_lock; + spinlock_t cl_loi_list_lock; struct list_head cl_loi_ready_list; struct list_head cl_loi_hp_ready_list; struct list_head cl_loi_write_list; @@ -327,7 +321,8 @@ struct client_obd { atomic_t cl_lru_shrinkers; atomic_t cl_lru_in_list; struct list_head cl_lru_list; /* lru page list */ - struct client_obd_lock cl_lru_list_lock; /* page list protector */ + spinlock_t cl_lru_list_lock; /* page list protector */ + atomic_t cl_unstable_count; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ atomic_t cl_destroy_in_flight; @@ -364,6 +359,7 @@ struct client_obd { /* ptlrpc work for writeback in ptlrpcd context */ void *cl_writeback_work; + void *cl_lru_work; /* hash tables for osc_quota_info */ struct cfs_hash *cl_quota_hash[MAXQUOTAS]; }; @@ -391,45 +387,9 @@ struct ost_pool { struct rw_semaphore op_rw_sem; /* to protect ost_pool use */ }; -/* Round-robin allocator data */ -struct lov_qos_rr { - __u32 lqr_start_idx; /* start index of new inode */ - __u32 lqr_offset_idx; /* aliasing for start_idx */ - int lqr_start_count; /* reseed counter */ - struct ost_pool lqr_pool; /* round-robin optimized list */ - unsigned long lqr_dirty:1; /* recalc round-robin list */ -}; - /* allow statfs data caching for 1 second */ #define OBD_STATFS_CACHE_SECONDS 1 -struct lov_statfs_data { - struct obd_info lsd_oi; - struct obd_statfs lsd_statfs; -}; - -/* Stripe placement optimization */ -struct lov_qos { - struct list_head lq_oss_list; /* list of OSSs that targets use */ - struct rw_semaphore lq_rw_sem; - __u32 lq_active_oss_count; - unsigned int lq_prio_free; /* priority for free space */ - unsigned int lq_threshold_rr;/* priority for rr */ - struct lov_qos_rr lq_rr; /* round robin qos data */ - unsigned long lq_dirty:1, /* recalc qos data */ - lq_same_space:1,/* the ost's all have approx. - * the same space avail - */ - lq_reset:1, /* zero current penalties */ - lq_statfs_in_progress:1; /* statfs op in - progress */ - /* qos statfs data */ - struct lov_statfs_data *lq_statfs_data; - wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs - * requests completion - */ -}; - struct lov_tgt_desc { struct list_head ltd_kill; struct obd_uuid ltd_uuid; @@ -442,25 +402,6 @@ struct lov_tgt_desc { ltd_reap:1; /* should this target be deleted */ }; -/* Pool metadata */ -#define pool_tgt_size(_p) _p->pool_obds.op_size -#define pool_tgt_count(_p) _p->pool_obds.op_count -#define pool_tgt_array(_p) _p->pool_obds.op_array -#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem - -struct pool_desc { - char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */ - struct ost_pool pool_obds; /* pool members */ - atomic_t pool_refcount; /* pool ref. counter */ - struct lov_qos_rr pool_rr; /* round robin qos */ - struct hlist_node pool_hash; /* access by poolname */ - struct list_head pool_list; /* serial access */ - struct dentry *pool_debugfs_entry; /* file in debugfs */ - struct obd_device *pool_lobd; /* obd of the lov/lod to which - * this pool belongs - */ -}; - struct lov_obd { struct lov_desc desc; struct lov_tgt_desc **lov_tgts; /* sparse array */ @@ -468,8 +409,6 @@ struct lov_obd { struct mutex lov_lock; struct obd_connect_data lov_ocd; atomic_t lov_refcount; - __u32 lov_tgt_count; /* how many OBD's */ - __u32 lov_active_tgt_count; /* how many active */ __u32 lov_death_row;/* tgts scheduled to be deleted */ __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; @@ -479,7 +418,7 @@ struct lov_obd { struct dentry *lov_pool_debugfs_entry; enum lustre_sec_part lov_sp_me; - /* Cached LRU pages from upper layer */ + /* Cached LRU and unstable data from upper layer */ void *lov_cache; struct rw_semaphore lov_notify_lock; @@ -511,7 +450,7 @@ struct lmv_obd { struct obd_uuid cluuid; struct obd_export *exp; - struct mutex init_mutex; + struct mutex lmv_init_mutex; int connected; int max_easize; int max_def_easize; diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h index 637fa2211..f6c18df90 100644 --- a/drivers/staging/lustre/lustre/include/obd_cksum.h +++ b/drivers/staging/lustre/lustre/include/obd_cksum.h @@ -35,6 +35,7 @@ #ifndef __OBD_CKSUM #define __OBD_CKSUM #include "../../include/linux/libcfs/libcfs.h" +#include "../../include/linux/libcfs/libcfs_crypto.h" #include "lustre/lustre_idl.h" static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type) diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h index 706869f8c..32863bcb3 100644 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ b/drivers/staging/lustre/lustre/include/obd_class.h @@ -477,7 +477,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) struct lu_context session_ctx; struct lu_env env; - lu_context_init(&session_ctx, LCT_SESSION); + lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); session_ctx.lc_thread = NULL; lu_context_enter(&session_ctx); @@ -490,8 +490,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) obd->obd_lu_dev = d; d->ld_obd = obd; rc = 0; - } else + } else { rc = PTR_ERR(d); + } } lu_context_exit(&session_ctx); lu_context_fini(&session_ctx); diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index f8ee3a325..60034d39b 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -58,6 +58,7 @@ extern int at_early_margin; extern int at_extra; extern unsigned int obd_sync_filter; extern unsigned int obd_max_dirty_pages; +extern atomic_t obd_unstable_pages; extern atomic_t obd_dirty_pages; extern atomic_t obd_dirty_transit_pages; extern char obd_jobid_var[]; @@ -289,6 +290,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_ENOINO 0x229 #define OBD_FAIL_OST_DQACQ_NET 0x230 #define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 +#define OBD_FAIL_OST_SET_INFO_NET 0x232 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -319,6 +321,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LDLM_AGL_DELAY 0x31a #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c +#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 @@ -426,6 +429,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_FLD 0x1100 #define OBD_FAIL_FLD_QUERY_NET 0x1101 +#define OBD_FAIL_FLD_READ_NET 0x1102 #define OBD_FAIL_SEC_CTX 0x1200 #define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/lclient/glimpse.c deleted file mode 100644 index c4e8a0878..000000000 --- a/drivers/staging/lustre/lustre/lclient/glimpse.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * glimpse code shared between vvp and liblustre (and other Lustre clients in - * the future). - * - * Author: Nikita Danilov - * Author: Oleg Drokin - */ - -#include "../../include/linux/libcfs/libcfs.h" -#include "../include/obd_class.h" -#include "../include/obd_support.h" -#include "../include/obd.h" - -#include "../include/lustre_dlm.h" -#include "../include/lustre_lite.h" -#include "../include/lustre_mdc.h" -#include -#include - -#include "../include/cl_object.h" -#include "../include/lclient.h" -#include "../llite/llite_internal.h" - -static const struct cl_lock_descr whole_file = { - .cld_start = 0, - .cld_end = CL_PAGE_EOF, - .cld_mode = CLM_READ -}; - -/* - * Check whether file has possible unwriten pages. - * - * \retval 1 file is mmap-ed or has dirty pages - * 0 otherwise - */ -blkcnt_t dirty_cnt(struct inode *inode) -{ - blkcnt_t cnt = 0; - struct ccc_object *vob = cl_inode2ccc(inode); - void *results[1]; - - if (inode->i_mapping) - cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree, - results, 0, 1, - PAGECACHE_TAG_DIRTY); - if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0) - cnt = 1; - - return (cnt > 0) ? 1 : 0; -} - -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl) -{ - struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr; - struct cl_inode_info *lli = cl_i2info(inode); - const struct lu_fid *fid = lu_object_fid(&clob->co_lu); - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock *lock; - int result; - - result = 0; - if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { - CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid)); - if (lli->lli_has_smd) { - /* NOTE: this looks like DLM lock request, but it may - * not be one. Due to CEF_ASYNC flag (translated - * to LDLM_FL_HAS_INTENT by osc), this is - * glimpse request, that won't revoke any - * conflicting DLM locks held. Instead, - * ll_glimpse_callback() will be called on each - * client holding a DLM lock against this file, - * and resulting size will be returned for each - * stripe. DLM lock on [0, EOF] is acquired only - * if there were no conflicting locks. If there - * were conflicting locks, enqueuing or waiting - * fails with -ENAVAIL, but valid inode - * attributes are returned anyway. - */ - *descr = whole_file; - descr->cld_obj = clob; - descr->cld_mode = CLM_PHANTOM; - descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; - if (agl) - descr->cld_enq_flags |= CEF_AGL; - cio->cui_glimpse = 1; - /* - * CEF_ASYNC is used because glimpse sub-locks cannot - * deadlock (because they never conflict with other - * locks) and, hence, can be enqueued out-of-order. - * - * CEF_MUST protects glimpse lock from conversion into - * a lockless mode. - */ - lock = cl_lock_request(env, io, descr, "glimpse", - current); - cio->cui_glimpse = 0; - - if (!lock) - return 0; - - if (IS_ERR(lock)) - return PTR_ERR(lock); - - LASSERT(agl == 0); - result = cl_wait(env, lock); - if (result == 0) { - cl_merge_lvb(env, inode); - if (cl_isize_read(inode) > 0 && - inode->i_blocks == 0) { - /* - * LU-417: Add dirty pages block count - * lest i_blocks reports 0, some "cp" or - * "tar" may think it's a completely - * sparse file and skip it. - */ - inode->i_blocks = dirty_cnt(inode); - } - cl_unuse(env, lock); - } - cl_lock_release(env, lock, "glimpse", current); - } else { - CDEBUG(D_DLMTRACE, "No objects for inode\n"); - cl_merge_lvb(env, inode); - } - } - - return result; -} - -static int cl_io_get(struct inode *inode, struct lu_env **envout, - struct cl_io **ioout, int *refcheck) -{ - struct lu_env *env; - struct cl_io *io; - struct cl_inode_info *lli = cl_i2info(inode); - struct cl_object *clob = lli->lli_clob; - int result; - - if (S_ISREG(cl_inode_mode(inode))) { - env = cl_env_get(refcheck); - if (!IS_ERR(env)) { - io = ccc_env_thread_io(env); - io->ci_obj = clob; - *envout = env; - *ioout = io; - result = 1; - } else - result = PTR_ERR(env); - } else - result = 0; - return result; -} - -int cl_glimpse_size0(struct inode *inode, int agl) -{ - /* - * We don't need ast_flags argument to cl_glimpse_size(), because - * osc_lock_enqueue() takes care of the possible deadlock that said - * argument was introduced to avoid. - */ - /* - * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to - * cl_glimpse_size(), which doesn't make sense: glimpse locks are not - * blocking anyway. - */ - struct lu_env *env = NULL; - struct cl_io *io = NULL; - int result; - int refcheck; - - result = cl_io_get(inode, &env, &io, &refcheck); - if (result > 0) { -again: - io->ci_verify_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result > 0) - /* - * nothing to do for this io. This currently happens - * when stripe sub-object's are not yet created. - */ - result = io->ci_result; - else if (result == 0) - result = cl_glimpse_lock(env, io, inode, io->ci_obj, - agl); - - OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2); - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; - cl_env_put(env, &refcheck); - } - return result; -} - -int cl_local_size(struct inode *inode) -{ - struct lu_env *env = NULL; - struct cl_io *io = NULL; - struct ccc_thread_info *cti; - struct cl_object *clob; - struct cl_lock_descr *descr; - struct cl_lock *lock; - int result; - int refcheck; - - if (!cl_i2info(inode)->lli_has_smd) - return 0; - - result = cl_io_get(inode, &env, &io, &refcheck); - if (result <= 0) - return result; - - clob = io->ci_obj; - result = cl_io_init(env, io, CIT_MISC, clob); - if (result > 0) - result = io->ci_result; - else if (result == 0) { - cti = ccc_env_info(env); - descr = &cti->cti_descr; - - *descr = whole_file; - descr->cld_obj = clob; - lock = cl_lock_peek(env, io, descr, "localsize", current); - if (lock) { - cl_merge_lvb(env, inode); - cl_unuse(env, lock); - cl_lock_release(env, lock, "localsize", current); - result = 0; - } else - result = -ENODATA; - } - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - return result; -} diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c deleted file mode 100644 index 96141d17d..000000000 --- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl code shared between vvp and liblustre (and other Lustre clients in the - * future). - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "../../include/linux/libcfs/libcfs.h" -# include -# include -# include -# include -# include -# include -# include - -#include "../include/obd.h" -#include "../include/obd_support.h" -#include "../include/lustre_fid.h" -#include "../include/lustre_lite.h" -#include "../include/lustre_dlm.h" -#include "../include/lustre_ver.h" -#include "../include/lustre_mdc.h" -#include "../include/cl_object.h" - -#include "../include/lclient.h" - -#include "../llite/llite_internal.h" - -static const struct cl_req_operations ccc_req_ops; - -/* - * ccc_ prefix stands for "Common Client Code". - */ - -static struct kmem_cache *ccc_lock_kmem; -static struct kmem_cache *ccc_object_kmem; -static struct kmem_cache *ccc_thread_kmem; -static struct kmem_cache *ccc_session_kmem; -static struct kmem_cache *ccc_req_kmem; - -static struct lu_kmem_descr ccc_caches[] = { - { - .ckd_cache = &ccc_lock_kmem, - .ckd_name = "ccc_lock_kmem", - .ckd_size = sizeof(struct ccc_lock) - }, - { - .ckd_cache = &ccc_object_kmem, - .ckd_name = "ccc_object_kmem", - .ckd_size = sizeof(struct ccc_object) - }, - { - .ckd_cache = &ccc_thread_kmem, - .ckd_name = "ccc_thread_kmem", - .ckd_size = sizeof(struct ccc_thread_info), - }, - { - .ckd_cache = &ccc_session_kmem, - .ckd_name = "ccc_session_kmem", - .ckd_size = sizeof(struct ccc_session) - }, - { - .ckd_cache = &ccc_req_kmem, - .ckd_name = "ccc_req_kmem", - .ckd_size = sizeof(struct ccc_req) - }, - { - .ckd_cache = NULL - } -}; - -/***************************************************************************** - * - * Vvp device and device type functions. - * - */ - -void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key) -{ - struct ccc_thread_info *info; - - info = kmem_cache_zalloc(ccc_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -void ccc_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct ccc_thread_info *info = data; - - kmem_cache_free(ccc_thread_kmem, info); -} - -void *ccc_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct ccc_session *session; - - session = kmem_cache_zalloc(ccc_session_kmem, GFP_NOFS); - if (!session) - session = ERR_PTR(-ENOMEM); - return session; -} - -void ccc_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct ccc_session *session = data; - - kmem_cache_free(ccc_session_kmem, session); -} - -struct lu_context_key ccc_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = ccc_key_init, - .lct_fini = ccc_key_fini -}; - -struct lu_context_key ccc_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = ccc_session_key_init, - .lct_fini = ccc_session_key_fini -}; - -/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */ -/* LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key); */ - -int ccc_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - struct ccc_device *vdv; - int rc; - - vdv = lu2ccc_dev(d); - vdv->cdv_next = lu2cl_dev(next); - - LASSERT(d->ld_site && next->ld_type); - next->ld_site = d->ld_site; - rc = next->ld_type->ldt_ops->ldto_device_init( - env, next, next->ld_type->ldt_name, NULL); - if (rc == 0) { - lu_device_get(next); - lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); - } - return rc; -} - -struct lu_device *ccc_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - return cl2lu_dev(lu2ccc_dev(d)->cdv_next); -} - -struct lu_device *ccc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg, - const struct lu_device_operations *luops, - const struct cl_device_operations *clops) -{ - struct ccc_device *vdv; - struct lu_device *lud; - struct cl_site *site; - int rc; - - vdv = kzalloc(sizeof(*vdv), GFP_NOFS); - if (!vdv) - return ERR_PTR(-ENOMEM); - - lud = &vdv->cdv_cl.cd_lu_dev; - cl_device_init(&vdv->cdv_cl, t); - ccc2lu_dev(vdv)->ld_ops = luops; - vdv->cdv_cl.cd_ops = clops; - - site = kzalloc(sizeof(*site), GFP_NOFS); - if (site) { - rc = cl_site_init(site, &vdv->cdv_cl); - if (rc == 0) - rc = lu_site_init_finish(&site->cs_lu); - else { - LASSERT(!lud->ld_site); - CERROR("Cannot init lu_site, rc %d.\n", rc); - kfree(site); - } - } else - rc = -ENOMEM; - if (rc != 0) { - ccc_device_free(env, lud); - lud = ERR_PTR(rc); - } - return lud; -} - -struct lu_device *ccc_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct ccc_device *vdv = lu2ccc_dev(d); - struct cl_site *site = lu2cl_site(d->ld_site); - struct lu_device *next = cl2lu_dev(vdv->cdv_next); - - if (d->ld_site) { - cl_site_fini(site); - kfree(site); - } - cl_device_fini(lu2cl_dev(d)); - kfree(vdv); - return next; -} - -int ccc_req_init(const struct lu_env *env, struct cl_device *dev, - struct cl_req *req) -{ - struct ccc_req *vrq; - int result; - - vrq = kmem_cache_zalloc(ccc_req_kmem, GFP_NOFS); - if (vrq) { - cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops); - result = 0; - } else - result = -ENOMEM; - return result; -} - -/** - * An `emergency' environment used by ccc_inode_fini() when cl_env_get() - * fails. Access to this environment is serialized by ccc_inode_fini_guard - * mutex. - */ -static struct lu_env *ccc_inode_fini_env; - -/** - * A mutex serializing calls to slp_inode_fini() under extreme memory - * pressure, when environments cannot be allocated. - */ -static DEFINE_MUTEX(ccc_inode_fini_guard); -static int dummy_refcheck; - -int ccc_global_init(struct lu_device_type *device_type) -{ - int result; - - result = lu_kmem_init(ccc_caches); - if (result) - return result; - - result = lu_device_type_init(device_type); - if (result) - goto out_kmem; - - ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck, - LCT_REMEMBER|LCT_NOREF); - if (IS_ERR(ccc_inode_fini_env)) { - result = PTR_ERR(ccc_inode_fini_env); - goto out_device; - } - - ccc_inode_fini_env->le_ctx.lc_cookie = 0x4; - return 0; -out_device: - lu_device_type_fini(device_type); -out_kmem: - lu_kmem_fini(ccc_caches); - return result; -} - -void ccc_global_fini(struct lu_device_type *device_type) -{ - if (ccc_inode_fini_env) { - cl_env_put(ccc_inode_fini_env, &dummy_refcheck); - ccc_inode_fini_env = NULL; - } - lu_device_type_fini(device_type); - lu_kmem_fini(ccc_caches); -} - -/***************************************************************************** - * - * Object operations. - * - */ - -struct lu_object *ccc_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev, - const struct cl_object_operations *clops, - const struct lu_object_operations *luops) -{ - struct ccc_object *vob; - struct lu_object *obj; - - vob = kmem_cache_zalloc(ccc_object_kmem, GFP_NOFS); - if (vob) { - struct cl_object_header *hdr; - - obj = ccc2lu(vob); - hdr = &vob->cob_header; - cl_object_header_init(hdr); - lu_object_init(obj, &hdr->coh_lu, dev); - lu_object_add_top(&hdr->coh_lu, obj); - - vob->cob_cl.co_ops = clops; - obj->lo_ops = luops; - } else - obj = NULL; - return obj; -} - -int ccc_object_init0(const struct lu_env *env, - struct ccc_object *vob, - const struct cl_object_conf *conf) -{ - vob->cob_inode = conf->coc_inode; - vob->cob_transient_pages = 0; - cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page)); - return 0; -} - -int ccc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct ccc_device *dev = lu2ccc_dev(obj->lo_dev); - struct ccc_object *vob = lu2ccc(obj); - struct lu_object *below; - struct lu_device *under; - int result; - - under = &dev->cdv_next->cd_lu_dev; - below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); - if (below) { - const struct cl_object_conf *cconf; - - cconf = lu2cl_conf(conf); - INIT_LIST_HEAD(&vob->cob_pending_list); - lu_object_add(obj, below); - result = ccc_object_init0(env, vob, cconf); - } else - result = -ENOMEM; - return result; -} - -void ccc_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct ccc_object *vob = lu2ccc(obj); - - lu_object_fini(obj); - lu_object_header_fini(obj->lo_header); - kmem_cache_free(ccc_object_kmem, vob); -} - -int ccc_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *unused, - const struct cl_lock_operations *lkops) -{ - struct ccc_lock *clk; - int result; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - clk = kmem_cache_zalloc(ccc_lock_kmem, GFP_NOFS); - if (clk) { - cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops); - result = 0; - } else - result = -ENOMEM; - return result; -} - -int ccc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb) -{ - struct inode *inode = ccc_object_inode(obj); - - lvb->lvb_mtime = cl_inode_mtime(inode); - lvb->lvb_atime = cl_inode_atime(inode); - lvb->lvb_ctime = cl_inode_ctime(inode); - /* - * LU-417: Add dirty pages block count lest i_blocks reports 0, some - * "cp" or "tar" on remote node may think it's a completely sparse file - * and skip it. - */ - if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) - lvb->lvb_blocks = dirty_cnt(inode); - return 0; -} - -static void ccc_object_size_lock(struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - - ll_inode_size_lock(inode); - cl_object_attr_lock(obj); -} - -static void ccc_object_size_unlock(struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - - cl_object_attr_unlock(obj); - ll_inode_size_unlock(inode); -} - -/***************************************************************************** - * - * Page operations. - * - */ - -struct page *ccc_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - return cl2vm_page(slice); -} - -int ccc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr; - struct cl_page *page = slice->cpl_page; - - int result; - - if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || - io->ci_type == CIT_FAULT) { - if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED) - result = -EBUSY; - else { - desc->cld_start = page->cp_index; - desc->cld_end = page->cp_index; - desc->cld_obj = page->cp_obj; - desc->cld_mode = CLM_READ; - result = cl_queue_match(&io->ci_lockset.cls_done, - desc) ? -EBUSY : 0; - } - } else - result = 0; - return result; -} - -int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice) -{ - /* - * Cached read? - */ - LBUG(); - return 0; -} - -int ccc_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* transient page should always be sent. */ - return 0; -} - -/***************************************************************************** - * - * Lock operations. - * - */ - -void ccc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); -} - -void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) -{ - struct ccc_lock *clk = cl2ccc_lock(slice); - - kmem_cache_free(ccc_lock_kmem, clk); -} - -int ccc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *unused, __u32 enqflags) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice) -{ - CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj)); - return 0; -} - -/** - * Implementation of cl_lock_operations::clo_fits_into() methods for ccc - * layer. This function is executed every time io finds an existing lock in - * the lock cache while creating new lock. This function has to decide whether - * cached lock "fits" into io. - * - * \param slice lock to be checked - * \param io IO that wants a lock. - * - * \see lov_lock_fits_into(). - */ -int ccc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - const struct cl_lock *lock = slice->cls_lock; - const struct cl_lock_descr *descr = &lock->cll_descr; - const struct ccc_io *cio = ccc_env_io(env); - int result; - - /* - * Work around DLM peculiarity: it assumes that glimpse - * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock - * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make - * sure that glimpse doesn't get CLM_WRITE top-lock, so that it - * doesn't enqueue CLM_WRITE sub-locks. - */ - if (cio->cui_glimpse) - result = descr->cld_mode != CLM_WRITE; - - /* - * Also, don't match incomplete write locks for read, otherwise read - * would enqueue missing sub-locks in the write mode. - */ - else if (need->cld_mode != descr->cld_mode) - result = lock->cll_state >= CLS_ENQUEUED; - else - result = 1; - return result; -} - -/** - * Implements cl_lock_operations::clo_state() method for ccc layer, invoked - * whenever lock state changes. Transfers object attributes, that might be - * updated as a result of lock acquiring into inode. - */ -void ccc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct cl_lock *lock = slice->cls_lock; - - /* - * Refresh inode attributes when the lock is moving into CLS_HELD - * state, and only when this is a result of real enqueue, rather than - * of finding lock in the cache. - */ - if (state == CLS_HELD && lock->cll_state < CLS_HELD) { - struct cl_object *obj; - struct inode *inode; - - obj = slice->cls_obj; - inode = ccc_object_inode(obj); - - /* vmtruncate() sets the i_size - * under both a DLM lock and the - * ll_inode_size_lock(). If we don't get the - * ll_inode_size_lock() here we can match the DLM lock and - * reset i_size. generic_file_write can then trust the - * stale i_size when doing appending writes and effectively - * cancel the result of the truncate. Getting the - * ll_inode_size_lock() after the enqueue maintains the DLM - * -> ll_inode_size_lock() acquiring order. - */ - if (lock->cll_descr.cld_start == 0 && - lock->cll_descr.cld_end == CL_PAGE_EOF) - cl_merge_lvb(env, inode); - } -} - -/***************************************************************************** - * - * io operations. - * - */ - -int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end) -{ - struct ccc_io *cio = ccc_env_io(env); - struct cl_lock_descr *descr = &cio->cui_link.cill_descr; - struct cl_object *obj = io->ci_obj; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); - - memset(&cio->cui_link, 0, sizeof(cio->cui_link)); - - if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - descr->cld_mode = CLM_GROUP; - descr->cld_gid = cio->cui_fd->fd_grouplock.cg_gid; - } else { - descr->cld_mode = mode; - } - descr->cld_obj = obj; - descr->cld_start = start; - descr->cld_end = end; - descr->cld_enq_flags = enqflags; - - cl_io_lock_add(env, io, &cio->cui_link); - return 0; -} - -void ccc_io_update_iov(const struct lu_env *env, - struct ccc_io *cio, struct cl_io *io) -{ - size_t size = io->u.ci_rw.crw_count; - - if (!cl_is_normalio(env, io) || !cio->cui_iter) - return; - - iov_iter_truncate(cio->cui_iter, size); -} - -int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end) -{ - struct cl_object *obj = io->ci_obj; - - return ccc_io_one_lock_index(env, io, enqflags, mode, - cl_index(obj, start), cl_index(obj, end)); -} - -void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios) -{ - CLOBINVRNT(env, ios->cis_io->ci_obj, - ccc_object_invariant(ios->cis_io->ci_obj)); -} - -void ccc_io_advance(const struct lu_env *env, - const struct cl_io_slice *ios, - size_t nob) -{ - struct ccc_io *cio = cl2ccc_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = ios->cis_io->ci_obj; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - - if (!cl_is_normalio(env, io)) - return; - - iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count -= nob); -} - -/** - * Helper function that if necessary adjusts file size (inode->i_size), when - * position at the offset \a pos is accessed. File size can be arbitrary stale - * on a Lustre client, but client at least knows KMS. If accessed area is - * inside [0, KMS], set file size to KMS, otherwise glimpse file size. - * - * Locking: cl_isize_lock is used to serialize changes to inode size and to - * protect consistency between inode size and cl_object - * attributes. cl_object_size_lock() protects consistency between cl_attr's of - * top-object and sub-objects. - */ -int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int *exceed) -{ - struct cl_attr *attr = ccc_env_thread_attr(env); - struct inode *inode = ccc_object_inode(obj); - loff_t pos = start + count - 1; - loff_t kms; - int result; - - /* - * Consistency guarantees: following possibilities exist for the - * relation between region being accessed and real file size at this - * moment: - * - * (A): the region is completely inside of the file; - * - * (B-x): x bytes of region are inside of the file, the rest is - * outside; - * - * (C): the region is completely outside of the file. - * - * This classification is stable under DLM lock already acquired by - * the caller, because to change the class, other client has to take - * DLM lock conflicting with our lock. Also, any updates to ->i_size - * by other threads on this client are serialized by - * ll_inode_size_lock(). This guarantees that short reads are handled - * correctly in the face of concurrent writes and truncates. - */ - ccc_object_size_lock(obj); - result = cl_object_attr_get(env, obj, attr); - if (result == 0) { - kms = attr->cat_kms; - if (pos > kms) { - /* - * A glimpse is necessary to determine whether we - * return a short read (B) or some zeroes at the end - * of the buffer (C) - */ - ccc_object_size_unlock(obj); - result = cl_glimpse_lock(env, io, inode, obj, 0); - if (result == 0 && exceed) { - /* If objective page index exceed end-of-file - * page index, return directly. Do not expect - * kernel will check such case correctly. - * linux-2.6.18-128.1.1 miss to do that. - * --bug 17336 - */ - loff_t size = cl_isize_read(inode); - loff_t cur_index = start >> PAGE_SHIFT; - loff_t size_index = (size - 1) >> - PAGE_SHIFT; - - if ((size == 0 && cur_index != 0) || - size_index < cur_index) - *exceed = 1; - } - return result; - } - /* - * region is within kms and, hence, within real file - * size (A). We need to increase i_size to cover the - * read region so that generic_file_read() will do its - * job, but that doesn't mean the kms size is - * _correct_, it is only the _minimum_ size. If - * someone does a stat they will get the correct size - * which will always be >= the kms value here. - * b=11081 - */ - if (cl_isize_read(inode) < kms) { - cl_isize_write_nolock(inode, kms); - CDEBUG(D_VFSTRACE, - DFID" updating i_size %llu\n", - PFID(lu_object_fid(&obj->co_lu)), - (__u64)cl_isize_read(inode)); - - } - } - ccc_object_size_unlock(obj); - return result; -} - -/***************************************************************************** - * - * Transfer operations. - * - */ - -void ccc_req_completion(const struct lu_env *env, - const struct cl_req_slice *slice, int ioret) -{ - struct ccc_req *vrq; - - if (ioret > 0) - cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); - - vrq = cl2ccc_req(slice); - kmem_cache_free(ccc_req_kmem, vrq); -} - -/** - * Implementation of struct cl_req_operations::cro_attr_set() for ccc - * layer. ccc is responsible for - * - * - o_[mac]time - * - * - o_mode - * - * - o_parent_seq - * - * - o_[ug]id - * - * - o_parent_oid - * - * - o_parent_ver - * - * - o_ioepoch, - * - */ -void ccc_req_attr_set(const struct lu_env *env, - const struct cl_req_slice *slice, - const struct cl_object *obj, - struct cl_req_attr *attr, u64 flags) -{ - struct inode *inode; - struct obdo *oa; - u32 valid_flags; - - oa = attr->cra_oa; - inode = ccc_object_inode(obj); - valid_flags = OBD_MD_FLTYPE; - - if (slice->crs_req->crq_type == CRT_WRITE) { - if (flags & OBD_MD_FLEPOCH) { - oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch; - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLUID | OBD_MD_FLGID; - } - } - obdo_from_inode(oa, inode, valid_flags & flags); - obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid); - memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid, - JOBSTATS_JOBID_SIZE); -} - -static const struct cl_req_operations ccc_req_ops = { - .cro_attr_set = ccc_req_attr_set, - .cro_completion = ccc_req_completion -}; - -int cl_setattr_ost(struct inode *inode, const struct iattr *attr) -{ - struct lu_env *env; - struct cl_io *io; - int result; - int refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = ccc_env_thread_io(env); - io->ci_obj = cl_i2info(inode)->lli_clob; - - io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); - io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); - io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); - io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; - io->u.ci_setattr.sa_valid = attr->ia_valid; - -again: - if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { - struct ccc_io *cio = ccc_env_io(env); - - if (attr->ia_valid & ATTR_FILE) - /* populate the file descriptor for ftruncate to honor - * group lock - see LU-787 - */ - cio->cui_fd = cl_iattr2fd(inode, attr); - - result = cl_io_loop(env, io); - } else { - result = io->ci_result; - } - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; - /* HSM import case: file is released, cannot be restored - * no need to fail except if restore registration failed - * with -ENODATA - */ - if (result == -ENODATA && io->ci_restore_needed && - io->ci_result != -ENODATA) - result = 0; - cl_env_put(env, &refcheck); - return result; -} - -/***************************************************************************** - * - * Type conversions. - * - */ - -struct lu_device *ccc2lu_dev(struct ccc_device *vdv) -{ - return &vdv->cdv_cl.cd_lu_dev; -} - -struct ccc_device *lu2ccc_dev(const struct lu_device *d) -{ - return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev); -} - -struct ccc_device *cl2ccc_dev(const struct cl_device *d) -{ - return container_of0(d, struct ccc_device, cdv_cl); -} - -struct lu_object *ccc2lu(struct ccc_object *vob) -{ - return &vob->cob_cl.co_lu; -} - -struct ccc_object *lu2ccc(const struct lu_object *obj) -{ - return container_of0(obj, struct ccc_object, cob_cl.co_lu); -} - -struct ccc_object *cl2ccc(const struct cl_object *obj) -{ - return container_of0(obj, struct ccc_object, cob_cl); -} - -struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice) -{ - return container_of(slice, struct ccc_lock, clk_cl); -} - -struct ccc_io *cl2ccc_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct ccc_io *cio; - - cio = container_of(slice, struct ccc_io, cui_cl); - LASSERT(cio == ccc_env_io(env)); - return cio; -} - -struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice) -{ - return container_of0(slice, struct ccc_req, crq_cl); -} - -struct page *cl2vm_page(const struct cl_page_slice *slice) -{ - return cl2ccc_page(slice)->cpg_page; -} - -/***************************************************************************** - * - * Accessors. - * - */ -int ccc_object_invariant(const struct cl_object *obj) -{ - struct inode *inode = ccc_object_inode(obj); - struct cl_inode_info *lli = cl_i2info(inode); - - return (S_ISREG(cl_inode_mode(inode)) || - /* i_mode of unlinked inode is zeroed. */ - cl_inode_mode(inode) == 0) && lli->lli_clob == obj; -} - -struct inode *ccc_object_inode(const struct cl_object *obj) -{ - return cl2ccc(obj)->cob_inode; -} - -/** - * Initialize or update CLIO structures for regular files when new - * meta-data arrives from the server. - * - * \param inode regular file inode - * \param md new file metadata from MDS - * - allocates cl_object if necessary, - * - updated layout, if object was already here. - */ -int cl_file_inode_init(struct inode *inode, struct lustre_md *md) -{ - struct lu_env *env; - struct cl_inode_info *lli; - struct cl_object *clob; - struct lu_site *site; - struct lu_fid *fid; - struct cl_object_conf conf = { - .coc_inode = inode, - .u = { - .coc_md = md - } - }; - int result = 0; - int refcheck; - - LASSERT(md->body->valid & OBD_MD_FLID); - LASSERT(S_ISREG(cl_inode_mode(inode))); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - site = cl_i2sbi(inode)->ll_site; - lli = cl_i2info(inode); - fid = &lli->lli_fid; - LASSERT(fid_is_sane(fid)); - - if (!lli->lli_clob) { - /* clob is slave of inode, empty lli_clob means for new inode, - * there is no clob in cache with the given fid, so it is - * unnecessary to perform lookup-alloc-lookup-insert, just - * alloc and insert directly. - */ - LASSERT(inode->i_state & I_NEW); - conf.coc_lu.loc_flags = LOC_F_NEW; - clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), - fid, &conf); - if (!IS_ERR(clob)) { - /* - * No locking is necessary, as new inode is - * locked by I_NEW bit. - */ - lli->lli_clob = clob; - lli->lli_has_smd = lsm_has_objects(md->lsm); - lu_object_ref_add(&clob->co_lu, "inode", inode); - } else - result = PTR_ERR(clob); - } else { - result = cl_conf_set(env, lli->lli_clob, &conf); - } - - cl_env_put(env, &refcheck); - - if (result != 0) - CERROR("Failure to initialize cl object "DFID": %d\n", - PFID(fid), result); - return result; -} - -/** - * Wait for others drop their references of the object at first, then we drop - * the last one, which will lead to the object be destroyed immediately. - * Must be called after cl_object_kill() against this object. - * - * The reason we want to do this is: destroying top object will wait for sub - * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) - * to initiate top object destroying which may deadlock. See bz22520. - */ -static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) -{ - struct lu_object_header *header = obj->co_lu.lo_header; - wait_queue_t waiter; - - if (unlikely(atomic_read(&header->loh_ref) != 1)) { - struct lu_site *site = obj->co_lu.lo_dev->ld_site; - struct lu_site_bkt_data *bkt; - - bkt = lu_site_bkt_from_fid(site, &header->loh_fid); - - init_waitqueue_entry(&waiter, current); - add_wait_queue(&bkt->lsb_marche_funebre, &waiter); - - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&header->loh_ref) == 1) - break; - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&bkt->lsb_marche_funebre, &waiter); - } - - cl_object_put(env, obj); -} - -void cl_inode_fini(struct inode *inode) -{ - struct lu_env *env; - struct cl_inode_info *lli = cl_i2info(inode); - struct cl_object *clob = lli->lli_clob; - int refcheck; - int emergency; - - if (clob) { - void *cookie; - - cookie = cl_env_reenter(); - env = cl_env_get(&refcheck); - emergency = IS_ERR(env); - if (emergency) { - mutex_lock(&ccc_inode_fini_guard); - LASSERT(ccc_inode_fini_env); - cl_env_implant(ccc_inode_fini_env, &refcheck); - env = ccc_inode_fini_env; - } - /* - * cl_object cache is a slave to inode cache (which, in turn - * is a slave to dentry cache), don't keep cl_object in memory - * when its master is evicted. - */ - cl_object_kill(env, clob); - lu_object_ref_del(&clob->co_lu, "inode", inode); - cl_object_put_last(env, clob); - lli->lli_clob = NULL; - if (emergency) { - cl_env_unplant(ccc_inode_fini_env, &refcheck); - mutex_unlock(&ccc_inode_fini_guard); - } else - cl_env_put(env, &refcheck); - cl_env_reexit(cookie); - } -} - -/** - * return IF_* type for given lu_dirent entry. - * IF_* flag shld be converted to particular OS file type in - * platform llite module. - */ -__u16 ll_dirent_type_get(struct lu_dirent *ent) -{ - __u16 type = 0; - struct luda_type *lt; - int len = 0; - - if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { - const unsigned align = sizeof(struct luda_type) - 1; - - len = le16_to_cpu(ent->lde_namelen); - len = (len + align) & ~align; - lt = (void *)ent->lde_name + len; - type = IFTODT(le16_to_cpu(lt->lt_type)); - } - return type; -} - -/** - * build inode number from passed @fid - */ -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) -{ - if (BITS_PER_LONG == 32 || api32) - return fid_flatten32(fid); - else - return fid_flatten(fid); -} - -/** - * build inode generation from passed @fid. If our FID overflows the 32-bit - * inode number then return a non-zero generation to distinguish them. - */ -__u32 cl_fid_build_gen(const struct lu_fid *fid) -{ - __u32 gen; - - if (fid_is_igif(fid)) { - gen = lu_igif_gen(fid); - return gen; - } - - gen = fid_flatten(fid) >> 32; - return gen; -} - -/* lsm is unreliable after hsm implementation as layout can be changed at - * any time. This is only to support old, non-clio-ized interfaces. It will - * cause deadlock if clio operations are called with this extra layout refcount - * because in case the layout changed during the IO, ll_layout_refresh() will - * have to wait for the refcount to become zero to destroy the older layout. - * - * Notice that the lsm returned by this function may not be valid unless called - * inside layout lock - MDS_INODELOCK_LAYOUT. - */ -struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) -{ - return lov_lsm_get(cl_i2info(inode)->lli_clob); -} - -inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) -{ - lov_lsm_put(cl_i2info(inode)->lli_clob, lsm); -} diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/lclient/lcommon_misc.c deleted file mode 100644 index d80bcedd7..000000000 --- a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl code shared between vvp and liblustre (and other Lustre clients in the - * future). - * - */ -#include "../include/obd_class.h" -#include "../include/obd_support.h" -#include "../include/obd.h" -#include "../include/cl_object.h" -#include "../include/lclient.h" - -#include "../include/lustre_lite.h" - -/* Initialize the default and maximum LOV EA and cookie sizes. This allows - * us to make MDS RPCs with large enough reply buffers to hold the - * maximum-sized (= maximum striped) EA and cookie without having to - * calculate this (via a call into the LOV + OSCs) each time we make an RPC. - */ -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) -{ - struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; - __u32 valsize = sizeof(struct lov_desc); - int rc, easize, def_easize, cookiesize; - struct lov_desc desc; - __u16 stripes, def_stripes; - - rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC, - &valsize, &desc, NULL); - if (rc) - return rc; - - stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = stripes; - easize = obd_size_diskmd(dt_exp, &lsm); - - def_stripes = min_t(__u32, desc.ld_default_stripe_count, - LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = def_stripes; - def_easize = obd_size_diskmd(dt_exp, &lsm); - - cookiesize = stripes * sizeof(struct llog_cookie); - - /* default cookiesize is 0 because from 2.4 server doesn't send - * llog cookies to client. - */ - CDEBUG(D_HA, - "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n", - def_easize, easize, cookiesize); - - rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0); - return rc; -} - -/** - * This function is used as an upcall-callback hooked by liblustre and llite - * clients into obd_notify() listeners chain to handle notifications about - * change of import connect_flags. See llu_fsswop_mount() and - * lustre_common_fill_super(). - */ -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data) -{ - struct lustre_client_ocd *lco; - struct client_obd *cli; - __u64 flags; - int result; - - if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { - cli = &watched->u.cli; - lco = owner; - flags = cli->cl_import->imp_connect_data.ocd_connect_flags; - CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n", - lco->lco_flags, flags); - mutex_lock(&lco->lco_lock); - lco->lco_flags &= flags; - /* for each osc event update ea size */ - if (lco->lco_dt_exp) - cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp); - - mutex_unlock(&lco->lco_lock); - result = 0; - } else { - CERROR("unexpected notification from %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); - result = -EINVAL; - } - return result; -} - -#define GROUPLOCK_SCOPE "grouplock" - -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ccc_grouplock *cg) -{ - struct lu_env *env; - struct cl_io *io; - struct cl_lock *lock; - struct cl_lock_descr *descr; - __u32 enqflags; - int refcheck; - int rc; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = ccc_env_thread_io(env); - io->ci_obj = obj; - io->ci_ignore_layout = 1; - - rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (rc) { - /* Does not make sense to take GL for released layout */ - if (rc > 0) - rc = -ENOTSUPP; - cl_env_put(env, &refcheck); - return rc; - } - - descr = &ccc_env_info(env)->cti_descr; - descr->cld_obj = obj; - descr->cld_start = 0; - descr->cld_end = CL_PAGE_EOF; - descr->cld_gid = gid; - descr->cld_mode = CLM_GROUP; - - enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); - descr->cld_enq_flags = enqflags; - - lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current); - if (IS_ERR(lock)) { - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - return PTR_ERR(lock); - } - - cg->cg_env = cl_env_get(&refcheck); - cg->cg_io = io; - cg->cg_lock = lock; - cg->cg_gid = gid; - LASSERT(cg->cg_env == env); - - cl_env_unplant(env, &refcheck); - return 0; -} - -void cl_put_grouplock(struct ccc_grouplock *cg) -{ - struct lu_env *env = cg->cg_env; - struct cl_io *io = cg->cg_io; - struct cl_lock *lock = cg->cg_lock; - int refcheck; - - LASSERT(cg->cg_env); - LASSERT(cg->cg_gid); - - cl_env_implant(env, &refcheck); - cl_env_put(env, &refcheck); - - cl_unuse(env, lock); - cl_lock_release(env, lock, GROUPLOCK_SCOPE, current); - cl_io_fini(env, io); - cl_env_put(env, NULL); -} diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c index e5d1344e8..621323f6e 100644 --- a/drivers/staging/lustre/lustre/ldlm/l_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c @@ -54,7 +54,7 @@ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock) lock_res(lock->l_resource); - lock->l_flags |= LDLM_FL_RES_LOCKED; + ldlm_set_res_locked(lock); return lock->l_resource; } EXPORT_SYMBOL(lock_res_and_lock); @@ -65,7 +65,7 @@ EXPORT_SYMBOL(lock_res_and_lock); void unlock_res_and_lock(struct ldlm_lock *lock) { /* on server-side resource of lock doesn't change */ - lock->l_flags &= ~LDLM_FL_RES_LOCKED; + ldlm_clear_res_locked(lock); unlock_res(lock->l_resource); spin_unlock(&lock->l_lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c index a803e200f..cf1f17836 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c @@ -75,12 +75,12 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms) * just after we finish and take our lock into account in its * calculation of the kms */ - lock->l_flags |= LDLM_FL_KMS_IGNORE; + ldlm_set_kms_ignore(lock); list_for_each(tmp, &res->lr_granted) { lck = list_entry(tmp, struct ldlm_lock, l_res_link); - if (lck->l_flags & LDLM_FL_KMS_IGNORE) + if (ldlm_is_kms_ignore(lck)) continue; if (lck->l_policy_data.l_extent.end >= old_kms) diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c index b88b78606..349bfcc9b 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c @@ -101,8 +101,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags) LASSERT(hlist_unhashed(&lock->l_exp_flock_hash)); list_del_init(&lock->l_res_link); - if (flags == LDLM_FL_WAIT_NOREPROC && - !(lock->l_flags & LDLM_FL_FAILED)) { + if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) { /* client side - set a flag to prevent sending a CANCEL */ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING; @@ -436,7 +435,7 @@ ldlm_flock_interrupted_wait(void *data) lock_res_and_lock(lock); /* client side - set flag to prevent lock from being put on LRU list */ - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); unlock_res_and_lock(lock); } @@ -520,30 +519,29 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - if (lock->l_flags & LDLM_FL_DESTROYED) { - LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - return 0; - } - - if (lock->l_flags & LDLM_FL_FAILED) { + if (ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "client-side enqueue waking up: failed"); return -EIO; } - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - LDLM_DEBUG(lock, "client-side enqueue granted"); lock_res_and_lock(lock); + /* + * Protect against race where lock could have been just destroyed + * due to overlap in ldlm_process_flock_lock(). + */ + if (ldlm_is_destroyed(lock)) { + unlock_res_and_lock(lock); + LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); + return 0; + } + /* ldlm_lock_enqueue() has already placed lock on the granted list. */ list_del_init(&lock->l_res_link); - if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) { + if (ldlm_is_flock_deadlock(lock)) { LDLM_DEBUG(lock, "client-side enqueue deadlock received"); rc = -EDEADLK; } else if (flags & LDLM_FL_TEST_LOCK) { diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h index e21373e73..32f227f37 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h @@ -95,9 +95,10 @@ enum { LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */ LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */ LDLM_CANCEL_LRUR = 1 << 3, /* Cancel locks from lru resize. */ - LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ + LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither + * sending nor waiting for any rpcs) + */ + LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */ }; int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, @@ -145,7 +146,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode); void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode); int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, enum ldlm_desc_ast_t ast_type); -int ldlm_lock_remove_from_lru(struct ldlm_lock *lock); +int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use); +#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0) int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock); void ldlm_lock_destroy_nolock(struct ldlm_lock *lock); @@ -216,8 +218,6 @@ enum ldlm_policy_res { LDLM_POLICY_SKIP_LOCK }; -typedef enum ldlm_policy_res ldlm_policy_res_t; - #define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v) #define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; } #define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v) @@ -305,9 +305,10 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock) int ret = 0; lock_res_and_lock(lock); - if (((lock->l_req_mode == lock->l_granted_mode) && - !(lock->l_flags & LDLM_FL_CP_REQD)) || - (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL))) + if ((lock->l_req_mode == lock->l_granted_mode) && + !ldlm_is_cp_reqd(lock)) + ret = 1; + else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock)) ret = 1; unlock_res_and_lock(lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c index 7dd7df59a..b4ffbe2fc 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c @@ -314,7 +314,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); INIT_LIST_HEAD(&cli->cl_loi_write_list); INIT_LIST_HEAD(&cli->cl_loi_read_list); - client_obd_list_lock_init(&cli->cl_loi_list_lock); + spin_lock_init(&cli->cl_loi_list_lock); atomic_set(&cli->cl_pending_w_pages, 0); atomic_set(&cli->cl_pending_r_pages, 0); cli->cl_r_in_flight = 0; @@ -333,7 +333,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) atomic_set(&cli->cl_lru_busy, 0); atomic_set(&cli->cl_lru_in_list, 0); INIT_LIST_HEAD(&cli->cl_lru_list); - client_obd_list_lock_init(&cli->cl_lru_list_lock); + spin_lock_init(&cli->cl_lru_list_lock); + atomic_set(&cli->cl_unstable_count, 0); init_waitqueue_head(&cli->cl_destroy_waitq); atomic_set(&cli->cl_destroy_in_flight, 0); @@ -355,6 +356,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES, LNET_MTU >> PAGE_SHIFT); + /* + * set cl_chunkbits default value to PAGE_CACHE_SHIFT, + * it will be updated at OSC connection time. + */ + cli->cl_chunkbits = PAGE_SHIFT; + if (!strcmp(name, LUSTRE_MDC_NAME)) { cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT; } else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) { @@ -429,7 +436,6 @@ err_ldlm: ldlm_put_ref(); err: return rc; - } EXPORT_SYMBOL(client_obd_setup); @@ -438,6 +444,7 @@ int client_obd_cleanup(struct obd_device *obddev) ldlm_namespace_free_post(obddev->obd_namespace); obddev->obd_namespace = NULL; + obd_cleanup_client_import(obddev); LASSERT(!obddev->u.cli.cl_import); ldlm_put_ref(); @@ -748,6 +755,7 @@ int ldlm_error2errno(enum ldlm_error error) switch (error) { case ELDLM_OK: + case ELDLM_LOCK_MATCHED: result = 0; break; case ELDLM_LOCK_CHANGED: diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c index ecd65a7a3..bff94ea12 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c @@ -185,7 +185,7 @@ void ldlm_lock_put(struct ldlm_lock *lock) "final lock_put on destroyed lock, freeing it."); res = lock->l_resource; - LASSERT(lock->l_flags & LDLM_FL_DESTROYED); + LASSERT(ldlm_is_destroyed(lock)); LASSERT(list_empty(&lock->l_res_link)); LASSERT(list_empty(&lock->l_pending_chain)); @@ -229,15 +229,25 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock) /** * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first. + * + * If \a last_use is non-zero, it will remove the lock from LRU only if + * it matches lock's l_last_used. + * + * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use + * doesn't match lock's l_last_used; + * otherwise, the lock hasn't been in the LRU list. + * \retval 1 the lock was in LRU list and removed. */ -int ldlm_lock_remove_from_lru(struct ldlm_lock *lock) +int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use) { struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - int rc; + int rc = 0; spin_lock(&ns->ns_lock); - rc = ldlm_lock_remove_from_lru_nolock(lock); + if (last_use == 0 || last_use == lock->l_last_used) + rc = ldlm_lock_remove_from_lru_nolock(lock); spin_unlock(&ns->ns_lock); + return rc; } @@ -252,8 +262,7 @@ static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock) LASSERT(list_empty(&lock->l_lru)); LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); list_add_tail(&lock->l_lru, &ns->ns_unused_list); - if (lock->l_flags & LDLM_FL_SKIPPED) - lock->l_flags &= ~LDLM_FL_SKIPPED; + ldlm_clear_skipped(lock); LASSERT(ns->ns_nr_unused >= 0); ns->ns_nr_unused++; } @@ -318,11 +327,11 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock) LBUG(); } - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { LASSERT(list_empty(&lock->l_lru)); return 0; } - lock->l_flags |= LDLM_FL_DESTROYED; + ldlm_set_destroyed(lock); if (lock->l_export && lock->l_export->exp_lock_hash) { /* NB: it's safe to call cfs_hash_del() even lock isn't @@ -544,7 +553,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, /* It's unlikely but possible that someone marked the lock as * destroyed after we did handle2object on it */ - if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED) == 0)) { + if (flags == 0 && !ldlm_is_destroyed(lock)) { lu_ref_add(&lock->l_reference, "handle", current); return lock; } @@ -554,21 +563,22 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, LASSERT(lock->l_resource); lu_ref_add_atomic(&lock->l_reference, "handle", current); - if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) { + if (unlikely(ldlm_is_destroyed(lock))) { unlock_res_and_lock(lock); CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock); LDLM_LOCK_PUT(lock); return NULL; } - if (flags && (lock->l_flags & flags)) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - return NULL; - } + if (flags) { + if (lock->l_flags & flags) { + unlock_res_and_lock(lock); + LDLM_LOCK_PUT(lock); + return NULL; + } - if (flags) lock->l_flags |= flags; + } unlock_res_and_lock(lock); return lock; @@ -599,14 +609,14 @@ EXPORT_SYMBOL(ldlm_lock2desc); static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, struct list_head *work_list) { - if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) { + if (!ldlm_is_ast_sent(lock)) { LDLM_DEBUG(lock, "lock incompatible; sending blocking AST."); - lock->l_flags |= LDLM_FL_AST_SENT; + ldlm_set_ast_sent(lock); /* If the enqueuing client said so, tell the AST recipient to * discard dirty data, rather than writing back. */ - if (new->l_flags & LDLM_FL_AST_DISCARD_DATA) - lock->l_flags |= LDLM_FL_DISCARD_DATA; + if (ldlm_is_ast_discard_data(new)) + ldlm_set_discard_data(lock); LASSERT(list_empty(&lock->l_bl_ast)); list_add(&lock->l_bl_ast, work_list); LDLM_LOCK_GET(lock); @@ -621,8 +631,8 @@ static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, static void ldlm_add_cp_work_item(struct ldlm_lock *lock, struct list_head *work_list) { - if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) { - lock->l_flags |= LDLM_FL_CP_REQD; + if (!ldlm_is_cp_reqd(lock)) { + ldlm_set_cp_reqd(lock); LDLM_DEBUG(lock, "lock granted; sending completion AST."); LASSERT(list_empty(&lock->l_cp_ast)); list_add(&lock->l_cp_ast, work_list); @@ -657,7 +667,7 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode) struct ldlm_lock *lock; lock = ldlm_handle2lock(lockh); - LASSERT(lock); + LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie); ldlm_lock_addref_internal(lock, mode); LDLM_LOCK_PUT(lock); } @@ -704,7 +714,7 @@ int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode) if (lock) { lock_res_and_lock(lock); if (lock->l_readers != 0 || lock->l_writers != 0 || - !(lock->l_flags & LDLM_FL_CBPENDING)) { + !ldlm_is_cbpending(lock)) { ldlm_lock_addref_internal_nolock(lock, mode); result = 0; } @@ -770,17 +780,17 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_decref_internal_nolock(lock, mode); - if (lock->l_flags & LDLM_FL_LOCAL && + if (ldlm_is_local(lock) && !lock->l_readers && !lock->l_writers) { /* If this is a local lock on a server namespace and this was * the last reference, cancel the lock. */ CDEBUG(D_INFO, "forcing cancel of local lock\n"); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); } if (!lock->l_readers && !lock->l_writers && - (lock->l_flags & LDLM_FL_CBPENDING)) { + ldlm_is_cbpending(lock)) { /* If we received a blocked AST and this was the last reference, * run the callback. */ @@ -791,16 +801,14 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_remove_from_lru(lock); unlock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_FAIL_LOC) + if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - if ((lock->l_flags & LDLM_FL_ATOMIC_CB) || + if (ldlm_is_atomic_cb(lock) || ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) ldlm_handle_bl_callback(ns, NULL, lock); } else if (!lock->l_readers && !lock->l_writers && - !(lock->l_flags & LDLM_FL_NO_LRU) && - !(lock->l_flags & LDLM_FL_BL_AST)) { - + !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) { LDLM_DEBUG(lock, "add lock into lru list"); /* If this is a client-side namespace and this was the last @@ -809,7 +817,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) ldlm_lock_add_to_lru(lock); unlock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_FAIL_LOC) + if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE @@ -853,7 +861,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode) LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); unlock_res_and_lock(lock); ldlm_lock_decref_internal(lock, mode); LDLM_LOCK_PUT(lock); @@ -971,7 +979,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, ldlm_resource_dump(D_INFO, res); LDLM_DEBUG(lock, "About to add lock:"); - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); return; } @@ -1073,10 +1081,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue, * whose parents already hold a lock so forward progress * can still happen. */ - if (lock->l_flags & LDLM_FL_CBPENDING && - !(flags & LDLM_FL_CBPENDING)) + if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING)) continue; - if (!unref && lock->l_flags & LDLM_FL_CBPENDING && + if (!unref && ldlm_is_cbpending(lock) && lock->l_readers == 0 && lock->l_writers == 0) continue; @@ -1092,6 +1099,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue, if (unlikely(match == LCK_GROUP) && lock->l_resource->lr_type == LDLM_EXTENT && + policy->l_extent.gid != LDLM_GID_ANY && lock->l_policy_data.l_extent.gid != policy->l_extent.gid) continue; @@ -1104,11 +1112,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue, policy->l_inodebits.bits)) continue; - if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK)) + if (!unref && LDLM_HAVE_MASK(lock, GONE)) continue; - if ((flags & LDLM_FL_LOCAL_ONLY) && - !(lock->l_flags & LDLM_FL_LOCAL)) + if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock)) continue; if (flags & LDLM_FL_TEST_LOCK) { @@ -1142,7 +1149,7 @@ EXPORT_SYMBOL(ldlm_lock_fail_match_locked); */ void ldlm_lock_allow_match_locked(struct ldlm_lock *lock) { - lock->l_flags |= LDLM_FL_LVB_READY; + ldlm_set_lvb_ready(lock); wake_up_all(&lock->l_waitq); } EXPORT_SYMBOL(ldlm_lock_allow_match_locked); @@ -1243,8 +1250,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, if (lock) { ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && - (!(lock->l_flags & LDLM_FL_LVB_READY))) { + if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) { __u64 wait_flags = LDLM_FL_LVB_READY | LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; struct l_wait_info lwi; @@ -1271,7 +1277,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, l_wait_event(lock->l_waitq, lock->l_flags & wait_flags, &lwi); - if (!(lock->l_flags & LDLM_FL_LVB_READY)) { + if (!ldlm_is_lvb_ready(lock)) { if (flags & LDLM_FL_TEST_LOCK) LDLM_LOCK_RELEASE(lock); else @@ -1325,10 +1331,10 @@ enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh, lock = ldlm_handle2lock(lockh); if (lock) { lock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_GONE_MASK) + if (LDLM_HAVE_MASK(lock, GONE)) goto out; - if (lock->l_flags & LDLM_FL_CBPENDING && + if (ldlm_is_cbpending(lock) && lock->l_readers == 0 && lock->l_writers == 0) goto out; @@ -1542,7 +1548,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, /* Some flags from the enqueue want to make it into the AST, via the * lock's l_flags. */ - lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA; + if (*flags & LDLM_FL_AST_DISCARD_DATA) + ldlm_set_ast_discard_data(lock); /* * This distinction between local lock trees is very important; a client @@ -1581,7 +1588,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) lock_res_and_lock(lock); list_del_init(&lock->l_bl_ast); - LASSERT(lock->l_flags & LDLM_FL_AST_SENT); + LASSERT(ldlm_is_ast_sent(lock)); LASSERT(lock->l_bl_ast_run == 0); LASSERT(lock->l_blocking_lock); lock->l_bl_ast_run++; @@ -1628,12 +1635,12 @@ ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) /* nobody should touch l_cp_ast */ lock_res_and_lock(lock); list_del_init(&lock->l_cp_ast); - LASSERT(lock->l_flags & LDLM_FL_CP_REQD); + LASSERT(ldlm_is_cp_reqd(lock)); /* save l_completion_ast since it can be changed by * mds_intent_policy(), see bug 14225 */ completion_callback = lock->l_completion_ast; - lock->l_flags &= ~LDLM_FL_CP_REQD; + ldlm_clear_cp_reqd(lock); unlock_res_and_lock(lock); if (completion_callback) @@ -1778,8 +1785,8 @@ out: void ldlm_cancel_callback(struct ldlm_lock *lock) { check_res_locked(lock->l_resource); - if (!(lock->l_flags & LDLM_FL_CANCEL)) { - lock->l_flags |= LDLM_FL_CANCEL; + if (!ldlm_is_cancel(lock)) { + ldlm_set_cancel(lock); if (lock->l_blocking_ast) { unlock_res_and_lock(lock); lock->l_blocking_ast(lock, NULL, lock->l_ast_data, @@ -1789,7 +1796,7 @@ void ldlm_cancel_callback(struct ldlm_lock *lock) LDLM_DEBUG(lock, "no blocking ast"); } } - lock->l_flags |= LDLM_FL_BL_DONE; + ldlm_set_bl_done(lock); } /** diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index ebe9042ad..ab739f079 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -124,10 +124,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns, LDLM_DEBUG(lock, "client blocking AST callback handler"); lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) - lock->l_flags |= LDLM_FL_CANCEL; + if (ldlm_is_cancel_on_block(lock)) + ldlm_set_cancel(lock); do_ast = !lock->l_readers && !lock->l_writers; unlock_res_and_lock(lock); @@ -172,7 +172,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(to); if (lock->l_granted_mode == lock->l_req_mode || - lock->l_flags & LDLM_FL_DESTROYED) + ldlm_is_destroyed(lock)) break; } } @@ -215,7 +215,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, } lock_res_and_lock(lock); - if ((lock->l_flags & LDLM_FL_DESTROYED) || + if (ldlm_is_destroyed(lock) || lock->l_granted_mode == lock->l_req_mode) { /* bug 11300: the lock has already been granted */ unlock_res_and_lock(lock); @@ -291,7 +291,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, out: if (rc < 0) { lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_FAILED; + ldlm_set_failed(lock); unlock_res_and_lock(lock); wake_up(&lock->l_waitq); } @@ -360,8 +360,7 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; spin_lock(&blp->blp_lock); - if (blwi->blwi_lock && - blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) { + if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) { /* add LDLM_FL_DISCARD_DATA requests to the priority list */ list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); } else { @@ -626,23 +625,22 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) return 0; } - if ((lock->l_flags & LDLM_FL_FAIL_LOC) && + if (ldlm_is_fail_loc(lock) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ lock_res_and_lock(lock); lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_AST_FLAGS); + LDLM_FL_AST_MASK); if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { /* If somebody cancels lock and cache is already dropped, * or lock is failed before cp_ast received on client, * we can tell the server we have no lock. Otherwise, we * should send cancel after dropping the cache. */ - if (((lock->l_flags & LDLM_FL_CANCELING) && - (lock->l_flags & LDLM_FL_BL_DONE)) || - (lock->l_flags & LDLM_FL_FAILED)) { + if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) || + ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n", dlm_req->lock_handle[0].cookie); unlock_res_and_lock(lock); @@ -656,7 +654,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_BL_AST; + ldlm_set_bl_ast(lock); } unlock_res_and_lock(lock); @@ -674,7 +672,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) case LDLM_BL_CALLBACK: CDEBUG(D_INODE, "blocking ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); - if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) { + if (!ldlm_is_cancel_on_block(lock)) { rc = ldlm_callback_reply(req, 0); if (req->rq_no_reply || rc) ldlm_callback_errmsg(req, "Normal process", rc, diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c index 74e193e52..107314e28 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c @@ -153,7 +153,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock) long delay; int result; - if (lock->l_flags & (LDLM_FL_DESTROYED | LDLM_FL_FAILED)) { + if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "client-side enqueue: destroyed"); result = -EIO; } else { @@ -252,7 +252,7 @@ noreproc: lwd.lwd_lock = lock; - if (lock->l_flags & LDLM_FL_NO_TIMEOUT) { + if (ldlm_is_no_timeout(lock)) { LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); lwi = LWI_INTR(interrupted_completion_wait, &lwd); } else { @@ -269,7 +269,7 @@ noreproc: if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST, OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) { - lock->l_flags |= LDLM_FL_FAIL_LOC; + ldlm_set_fail_loc(lock); rc = -EINTR; } else { /* Go to sleep until the lock is granted or cancelled. */ @@ -296,7 +296,7 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, lock_res_and_lock(lock); /* Check that lock is not granted or failed, we might race. */ if ((lock->l_req_mode != lock->l_granted_mode) && - !(lock->l_flags & LDLM_FL_FAILED)) { + !ldlm_is_failed(lock)) { /* Make sure that this lock will not be found by raced * bl_ast and -EINVAL reply is sent to server anyways. * bug 17645 @@ -347,7 +347,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, struct ldlm_lock *lock; struct ldlm_reply *reply; int cleanup_phase = 1; - int size = 0; lock = ldlm_handle2lock(lockh); /* ldlm_cli_enqueue is holding a reference on this lock. */ @@ -375,8 +374,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, goto cleanup; } - if (lvb_len != 0) { - LASSERT(lvb); + if (lvb_len > 0) { + int size = 0; size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER); @@ -390,12 +389,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, rc = -EINVAL; goto cleanup; } + lvb_len = size; } if (rc == ELDLM_LOCK_ABORTED) { - if (lvb_len != 0) + if (lvb_len > 0 && lvb) rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lvb, size); + lvb, lvb_len); if (rc == 0) rc = ELDLM_LOCK_ABORTED; goto cleanup; @@ -421,7 +421,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, *flags = ldlm_flags_from_wire(reply->lock_flags); lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & - LDLM_INHERIT_FLAGS); + LDLM_FL_INHERIT_MASK); /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match() * to wait with no timeout as well */ @@ -489,7 +489,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, /* If the lock has already been granted by a completion AST, don't * clobber the LVB with an older one. */ - if (lvb_len != 0) { + if (lvb_len > 0) { /* We must lock or a racing completion might update lvb without * letting us know and we'll clobber the correct value. * Cannot unlock after the check either, as that still leaves @@ -498,7 +498,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, lock_res_and_lock(lock); if (lock->l_req_mode != lock->l_granted_mode) rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lock->l_lvb_data, size); + lock->l_lvb_data, lvb_len); unlock_res_and_lock(lock); if (rc < 0) { cleanup_phase = 1; @@ -518,7 +518,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, } } - if (lvb_len && lvb) { + if (lvb_len > 0 && lvb) { /* Copy the LVB here, and not earlier, because the completion * AST (if any) can override what we got in the reply */ @@ -601,7 +601,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff); flags = ns_connect_lru_resize(ns) ? - LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED; + LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED; to_free = !ns_connect_lru_resize(ns) && opc == LDLM_ENQUEUE ? 1 : 0; @@ -821,12 +821,11 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock) LDLM_DEBUG(lock, "client-side cancel"); /* Set this flag to prevent others from getting new references*/ lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); local_only = !!(lock->l_flags & (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK)); ldlm_cancel_callback(lock); - rc = (lock->l_flags & LDLM_FL_BL_AST) ? - LDLM_FL_BL_AST : LDLM_FL_CANCELING; + rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING; unlock_res_and_lock(lock); if (local_only) { @@ -1131,31 +1130,30 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list_local); * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g. * readahead requests, ...) */ -static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res +ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, + int unused, int added, int count) { - ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK; - ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery; - - lock_res_and_lock(lock); + enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK; /* don't check added & count since we want to process all locks - * from unused list + * from unused list. + * It's fine to not take lock to access lock->l_resource since + * the lock has already been granted so it won't change. */ switch (lock->l_resource->lr_type) { case LDLM_EXTENT: case LDLM_IBITS: - if (cb && cb(lock)) + if (ns->ns_cancel && ns->ns_cancel(lock) != 0) break; default: result = LDLM_POLICY_SKIP_LOCK; - lock->l_flags |= LDLM_FL_SKIPPED; + lock_res_and_lock(lock); + ldlm_set_skipped(lock); + unlock_res_and_lock(lock); break; } - unlock_res_and_lock(lock); return result; } @@ -1168,10 +1166,10 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { unsigned long cur = cfs_time_current(); struct ldlm_pool *pl = &ns->ns_pool; @@ -1196,8 +1194,13 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, /* Stop when SLV is not yet come from server or lv is smaller than * it is. */ - return (slv == 0 || lv < slv) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; + if (slv == 0 || lv < slv) + return LDLM_POLICY_KEEP_LOCK; + + if (ns->ns_cancel && ns->ns_cancel(lock) == 0) + return LDLM_POLICY_KEEP_LOCK; + + return LDLM_POLICY_CANCEL_LOCK; } /** @@ -1209,10 +1212,10 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { /* Stop LRU processing when we reach past @count or have checked all * locks in LRU. @@ -1230,16 +1233,35 @@ static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) { - /* Stop LRU processing if young lock is found and we reach past count */ - return ((added >= count) && - time_before(cfs_time_current(), - cfs_time_add(lock->l_last_used, ns->ns_max_age))) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; + if ((added >= count) && + time_before(cfs_time_current(), + cfs_time_add(lock->l_last_used, ns->ns_max_age))) + return LDLM_POLICY_KEEP_LOCK; + + if (ns->ns_cancel && ns->ns_cancel(lock) == 0) + return LDLM_POLICY_KEEP_LOCK; + + return LDLM_POLICY_CANCEL_LOCK; +} + +static enum ldlm_policy_res +ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns, + struct ldlm_lock *lock, + int unused, int added, + int count) +{ + enum ldlm_policy_res result; + + result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count); + if (result == LDLM_POLICY_KEEP_LOCK) + return result; + + return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count); } /** @@ -1251,10 +1273,9 @@ static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns, * * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU */ -static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) +static enum ldlm_policy_res +ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, + int unused, int added, int count) { /* Stop LRU processing when we reach past count or have checked all * locks in LRU. @@ -1263,7 +1284,8 @@ static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns, LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; } -typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *, +typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)( + struct ldlm_namespace *, struct ldlm_lock *, int, int, int); @@ -1281,6 +1303,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) return ldlm_cancel_lrur_policy; else if (flags & LDLM_CANCEL_PASSED) return ldlm_cancel_passed_policy; + else if (flags & LDLM_CANCEL_LRUR_NO_WAIT) + return ldlm_cancel_lrur_no_wait_policy; } else { if (flags & LDLM_CANCEL_AGED) return ldlm_cancel_aged_policy; @@ -1329,6 +1353,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, ldlm_cancel_lru_policy_t pf; struct ldlm_lock *lock, *next; int added = 0, unused, remained; + int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT); spin_lock(&ns->ns_lock); unused = ns->ns_nr_unused; @@ -1341,7 +1366,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, LASSERT(pf); while (!list_empty(&ns->ns_unused_list)) { - ldlm_policy_res_t result; + enum ldlm_policy_res result; + time_t last_use = 0; /* all unused locks */ if (remained-- <= 0) @@ -1354,17 +1380,20 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { /* No locks which got blocking requests. */ - LASSERT(!(lock->l_flags & LDLM_FL_BL_AST)); + LASSERT(!ldlm_is_bl_ast(lock)); - if (flags & LDLM_CANCEL_NO_WAIT && - lock->l_flags & LDLM_FL_SKIPPED) + if (no_wait && ldlm_is_skipped(lock)) /* already processed */ continue; + last_use = lock->l_last_used; + if (last_use == cfs_time_current()) + continue; + /* Somebody is already doing CANCEL. No need for this * lock in LRU, do not traverse it again. */ - if (!(lock->l_flags & LDLM_FL_CANCELING)) + if (!ldlm_is_canceling(lock)) break; ldlm_lock_remove_from_lru_nolock(lock); @@ -1407,12 +1436,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, lock_res_and_lock(lock); /* Check flags again under the lock. */ - if ((lock->l_flags & LDLM_FL_CANCELING) || - (ldlm_lock_remove_from_lru(lock) == 0)) { + if (ldlm_is_canceling(lock) || + (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) { /* Another thread is removing lock from LRU, or * somebody is already doing CANCEL, or there * is a blocking request which will send cancel - * by itself, or the lock is no longer unused. + * by itself, or the lock is no longer unused or + * the lock has been used since the pf() call and + * pages could be put under it. */ unlock_res_and_lock(lock); lu_ref_del(&lock->l_reference, @@ -1429,7 +1460,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, * where while we are doing cancel here, server is also * silently cancelling this lock. */ - lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + ldlm_clear_cancel_on_block(lock); /* Setting the CBPENDING flag is a little misleading, * but prevents an important race; namely, once @@ -1526,8 +1557,7 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res, /* If somebody is already doing CANCEL, or blocking AST came, * skip this lock. */ - if (lock->l_flags & LDLM_FL_BL_AST || - lock->l_flags & LDLM_FL_CANCELING) + if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock)) continue; if (lockmode_compat(lock->l_granted_mode, mode)) @@ -1771,7 +1801,6 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns, cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_res_iter_helper, &helper); - } /* non-blocking function to manipulate a lock whose cb_data is being put away. @@ -1887,7 +1916,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int flags; /* Bug 11974: Do not replay a lock which is actively being canceled */ - if (lock->l_flags & LDLM_FL_CANCELING) { + if (ldlm_is_canceling(lock)) { LDLM_DEBUG(lock, "Not replaying canceled lock:"); return 0; } @@ -1896,7 +1925,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) * server might have long dropped it, but notification of that event was * lost by network. (and server granted conflicting lock already) */ - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { + if (ldlm_is_cancel_on_block(lock)) { LDLM_DEBUG(lock, "Not replaying reply-less lock:"); ldlm_lock_cancel(lock); return 0; diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c index 9dede87ad..e99c89c34 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c @@ -124,9 +124,15 @@ int ldlm_debugfs_setup(void) } rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL); + if (rc) { + CERROR("LProcFS failed in ldlm-init\n"); + goto err_svc; + } return 0; +err_svc: + ldebugfs_remove(&ldlm_svc_debugfs_dir); err_ns: ldebugfs_remove(&ldlm_ns_debugfs_dir); err_type: @@ -758,12 +764,12 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, list_for_each(tmp, q) { lock = list_entry(tmp, struct ldlm_lock, l_res_link); - if (lock->l_flags & LDLM_FL_CLEANED) { + if (ldlm_is_cleaned(lock)) { lock = NULL; continue; } LDLM_LOCK_GET(lock); - lock->l_flags |= LDLM_FL_CLEANED; + ldlm_set_cleaned(lock); break; } @@ -775,13 +781,13 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, /* Set CBPENDING so nothing in the cancellation path * can match this lock. */ - lock->l_flags |= LDLM_FL_CBPENDING; - lock->l_flags |= LDLM_FL_FAILED; + ldlm_set_cbpending(lock); + ldlm_set_failed(lock); lock->l_flags |= flags; /* ... without sending a CANCEL message for local_only. */ if (local_only) - lock->l_flags |= LDLM_FL_LOCAL_ONLY; + ldlm_set_local_only(lock); if (local_only && (lock->l_readers || lock->l_writers)) { /* This is a little bit gross, but much better than the @@ -1275,7 +1281,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, LDLM_DEBUG(lock, "About to add this lock:\n"); - if (lock->l_flags & LDLM_FL_DESTROYED) { + if (ldlm_is_destroyed(lock)) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); return; } @@ -1400,3 +1406,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res) LDLM_DEBUG_LIMIT(level, lock, "###"); } } +EXPORT_SYMBOL(ldlm_resource_dump); diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile index 9ac29e718..2ce10ff01 100644 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ b/drivers/staging/lustre/lustre/llite/Makefile @@ -4,7 +4,8 @@ lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \ rw.o namei.o symlink.o llite_mmap.o \ xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \ rw26.o super25.o statahead.o \ - ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \ - vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o + glimpse.o lcommon_cl.o lcommon_misc.o \ + vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \ + lproc_llite.o llite_lloop-y := lloop.o diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index dd1c82701..1b6f82a1a 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -108,11 +108,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry, static inline int return_if_equal(struct ldlm_lock *lock, void *data) { - if ((lock->l_flags & - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) == - (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) - return LDLM_ITER_CONTINUE; - return LDLM_ITER_STOP; + return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ? + LDLM_ITER_CONTINUE : LDLM_ITER_STOP; } /* find any ldlm lock of the inode in mdc and lov @@ -253,8 +250,8 @@ void ll_invalidate_aliases(struct inode *inode) { struct dentry *dentry; - CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n", + PFID(ll_inode2fid(inode)), inode); ll_lock_dcache(inode); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { @@ -289,8 +286,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode) if (it->d.lustre.it_lock_mode && inode) { struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); } diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index e4c82883e..4b00d1ac8 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -158,11 +158,16 @@ static int ll_dir_filler(void *_hash, struct page *page0) int i; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n", - inode->i_ino, inode->i_generation, inode, hash); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n", + PFID(ll_inode2fid(inode)), inode, hash); LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return PTR_ERR(op_data); + page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS); if (page_pool) { page_pool[0] = page0; @@ -177,8 +182,6 @@ static int ll_dir_filler(void *_hash, struct page *page0) page_pool[npages] = page; } - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); op_data->op_npages = npages; op_data->op_offset = hash; rc = md_readpage(exp, op_data, page_pool, &request); @@ -190,7 +193,7 @@ static int ll_dir_filler(void *_hash, struct page *page0) body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); /* Checked by mdc_readpage() */ if (body->valid & OBD_MD_FLSIZE) - cl_isize_write(inode, body->size); + i_size_write(inode, body->size); nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1) >> PAGE_SHIFT; @@ -372,8 +375,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, return ERR_PTR(rc); } - CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n", - dir, dir->i_ino, dir->i_generation); + CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), dir); md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &it.d.lustre.it_lock_handle, dir, NULL); } else { @@ -468,6 +471,28 @@ fail: goto out_unlock; } +/** + * return IF_* type for given lu_dirent entry. + * IF_* flag shld be converted to particular OS file type in + * platform llite module. + */ +static __u16 ll_dirent_type_get(struct lu_dirent *ent) +{ + __u16 type = 0; + struct luda_type *lt; + int len = 0; + + if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { + const unsigned int align = sizeof(struct luda_type) - 1; + + len = le16_to_cpu(ent->lde_namelen); + len = (len + align) & ~align; + lt = (void *)ent->lde_name + len; + type = IFTODT(le16_to_cpu(lt->lt_type)); + } + return type; +} + int ll_dir_read(struct inode *inode, struct dir_context *ctx) { struct ll_inode_info *info = ll_i2info(inode); @@ -589,15 +614,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) struct inode *inode = file_inode(filp); struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); struct ll_sb_info *sbi = ll_i2sbi(inode); + __u64 pos = lfd ? lfd->lfd_pos : 0; int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; int api32 = ll_need_32bit_api(sbi); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", - inode->i_ino, inode->i_generation, - inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n", + PFID(ll_inode2fid(inode)), inode, (unsigned long)pos, + i_size_read(inode), api32); - if (lfd->lfd_pos == MDS_DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * end-of-file. */ @@ -605,9 +631,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) goto out; } - ctx->pos = lfd->lfd_pos; + ctx->pos = pos; rc = ll_dir_read(inode, ctx); - lfd->lfd_pos = ctx->pos; + if (lfd) + lfd->lfd_pos = ctx->pos; if (ctx->pos == MDS_DIR_END_OFF) { if (api32) ctx->pos = LL_DIR_END_OFF_32BIT; @@ -804,9 +831,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) { - CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n", - inode->i_ino, - inode->i_generation, rc); + CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n", + PFID(ll_inode2fid(inode)), rc); goto out; } @@ -916,7 +942,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } /* Read current file data version */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc != 0) { CDEBUG(D_HSM, "Could not read file data version of " @@ -936,6 +962,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } progress: + /* On error, the request should be considered as completed */ + if (hpk.hpk_errval > 0) + hpk.hpk_flags |= HP_FLAG_COMPLETED; rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), &hpk, NULL); @@ -997,8 +1026,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) goto progress; } - rc = ll_data_version(inode, &data_version, - copy->hc_hai.hai_action == HSMA_ARCHIVE); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc) { CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n"); @@ -1033,7 +1061,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) /* hpk_errval must be >= 0 */ hpk.hpk_errval = EBUSY; } - } progress: @@ -1242,8 +1269,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct obd_ioctl_data *data; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n", - inode->i_ino, inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n", + PFID(ll_inode2fid(inode)), inode, cmd); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ @@ -1362,7 +1389,6 @@ out_free: lmv_out_free: obd_ioctl_freedata(buf, len); return rc; - } case LL_IOC_LOV_SETSTRIPE: { struct lov_user_md_v3 lumv3; @@ -1474,8 +1500,9 @@ free_lmv: cmd == LL_IOC_MDC_GETINFO)) { rc = 0; goto skip_lmm; - } else + } else { goto out_req; + } } if (cmd == IOC_MDC_GETFILESTRIPE || @@ -1688,15 +1715,16 @@ out_quotactl: return ll_flush_ctx(inode); #ifdef CONFIG_FS_POSIX_ACL case LL_IOC_RMTACL: { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - rc = rct_add(&sbi->ll_rct, current_pid(), arg); - if (!rc) - fd->fd_flags |= LL_FILE_RMTACL; - return rc; - } else - return 0; + rc = rct_add(&sbi->ll_rct, current_pid(), arg); + if (!rc) + fd->fd_flags |= LL_FILE_RMTACL; + return rc; + } else { + return 0; + } } #endif case LL_IOC_GETOBDCOUNT: { @@ -1817,6 +1845,9 @@ out_quotactl: return rc; } case LL_IOC_HSM_CT_START: + if (!capable(CFS_CAP_SYS_ADMIN)) + return -EPERM; + rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, sizeof(struct lustre_kernelcomm)); return rc; @@ -1865,7 +1896,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; - inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -1903,7 +1933,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) goto out; out: - inode_unlock(inode); return ret; } @@ -1922,7 +1951,7 @@ const struct file_operations ll_dir_operations = { .open = ll_dir_open, .release = ll_dir_release, .read = generic_read_dir, - .iterate = ll_readdir, + .iterate_shared = ll_readdir, .unlocked_ioctl = ll_dir_ioctl, .fsync = ll_fsync, }; diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index cf619af3c..f47f2acaf 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -45,6 +45,7 @@ #include "../include/lustre_lite.h" #include #include +#include #include "llite_internal.h" #include "../include/lustre/ll_fiemap.h" @@ -87,8 +88,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, op_data->op_attr.ia_ctime = inode->i_ctime; op_data->op_attr.ia_size = i_size_read(inode); op_data->op_attr_blocks = inode->i_blocks; - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = - ll_inode_to_ext_flags(inode->i_flags); + op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; if (fh) op_data->op_handle = *fh; @@ -170,13 +170,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, */ rc = ll_som_update(inode, op_data); if (rc) { - CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); rc = 0; } } else if (rc) { - CERROR("inode %lu mdc close failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc close failed: rc = %d\n", + ll_i2mdexp(inode)->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); } /* DATA_MODIFIED flag was successfully sent on close, cancel data @@ -278,7 +280,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode, /* clear group lock, if present */ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) - ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); + ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid); if (fd->fd_lease_och) { bool lease_broken; @@ -343,8 +345,8 @@ int ll_file_release(struct inode *inode, struct file *file) struct ll_inode_info *lli = ll_i2info(inode); int rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); #ifdef CONFIG_FS_POSIX_ACL if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) { @@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file) struct ll_file_data *fd; int rc = 0, opendir_set = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, - inode->i_generation, inode, file->f_flags); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n", + PFID(ll_inode2fid(inode)), inode, file->f_flags); it = file->private_data; /* XXX: compat macro */ file->private_data = NULL; /* prevent ll_local_open assertion */ @@ -677,7 +679,9 @@ restart: if (rc) goto out_och_free; - LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF)); + LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF), + "inode %p: disposition %x, status %d\n", inode, + it_disposition(it, ~0), it->d.lustre.it_status); rc = ll_local_open(file, it, fd, *och_p); if (rc) @@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, return och; out_close: - rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); - if (rc2) - CERROR("Close openhandle returned %d\n", rc2); - - /* cancel open lock */ + /* Cancel open lock */ if (it.d.lustre.it_lock_mode != 0) { ldlm_lock_decref_and_cancel(&och->och_lease_handle, it.d.lustre.it_lock_mode); it.d.lustre.it_lock_mode = 0; + och->och_lease_handle.cookie = 0ULL; } + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL); + if (rc2 < 0) + CERROR("%s: error closing file "DFID": %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&ll_i2info(inode)->lli_fid), rc2); + och = NULL; /* och has been freed in ll_close_inode_openhandle() */ out_release_it: ll_intent_release(&it); out: @@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, lock_res_and_lock(lock); cancelled = ldlm_is_cancel(lock); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } CDEBUG(D_INODE, "lease for " DFID " broken? %d\n", @@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, - struct obdo *obdo, __u64 ioepoch, int sync) + struct obdo *obdo, __u64 ioepoch, int dv_flags) { struct ptlrpc_request_set *set; struct obd_info oinfo = { }; @@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP | OBD_MD_FLEPOCH | OBD_MD_FLDATAVERSION; - if (sync) { + if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) { oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; + if (dv_flags & LL_DV_WR_FLUSH) + oinfo.oi_oa->o_flags |= OBD_FL_FLUSH; } set = ptlrpc_prep_set(); @@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } - if (rc == 0) + if (rc == 0) { oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE | - OBD_MD_FLDATAVERSION); + OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS); + if (dv_flags & LL_DV_WR_FLUSH && + !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS && + oinfo.oi_oa->o_flags & OBD_FL_FLUSH)) + return -ENOTSUPP; + } return rc; } @@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, lsm = ccc_inode_lsm_get(inode); rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), - obdo, ioepoch, sync); + obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0); if (rc == 0) { struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; @@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, return rc; } -int ll_merge_lvb(const struct lu_env *env, struct inode *inode) +int ll_merge_attr(const struct lu_env *env, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; - struct cl_attr *attr = ccc_env_thread_attr(env); - struct ost_lvb lvb; + struct cl_attr *attr = vvp_env_thread_attr(env); + s64 atime; + s64 mtime; + s64 ctime; int rc = 0; ll_inode_size_lock(inode); + /* merge timestamps the most recently obtained from mds with * timestamps obtained from osts */ - LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = lli->lli_atime; + LTIME_S(inode->i_mtime) = lli->lli_mtime; + LTIME_S(inode->i_ctime) = lli->lli_ctime; - lvb.lvb_size = i_size_read(inode); - lvb.lvb_blocks = inode->i_blocks; - lvb.lvb_mtime = LTIME_S(inode->i_mtime); - lvb.lvb_atime = LTIME_S(inode->i_atime); - lvb.lvb_ctime = LTIME_S(inode->i_ctime); + mtime = LTIME_S(inode->i_mtime); + atime = LTIME_S(inode->i_atime); + ctime = LTIME_S(inode->i_ctime); cl_object_attr_lock(obj); rc = cl_object_attr_get(env, obj, attr); cl_object_attr_unlock(obj); - if (rc == 0) { - if (lvb.lvb_atime < attr->cat_atime) - lvb.lvb_atime = attr->cat_atime; - if (lvb.lvb_ctime < attr->cat_ctime) - lvb.lvb_ctime = attr->cat_ctime; - if (lvb.lvb_mtime < attr->cat_mtime) - lvb.lvb_mtime = attr->cat_mtime; + if (rc != 0) + goto out_size_unlock; - CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", - PFID(&lli->lli_fid), attr->cat_size); - cl_isize_write_nolock(inode, attr->cat_size); + if (atime < attr->cat_atime) + atime = attr->cat_atime; - inode->i_blocks = attr->cat_blocks; + if (ctime < attr->cat_ctime) + ctime = attr->cat_ctime; - LTIME_S(inode->i_mtime) = lvb.lvb_mtime; - LTIME_S(inode->i_atime) = lvb.lvb_atime; - LTIME_S(inode->i_ctime) = lvb.lvb_ctime; - } + if (mtime < attr->cat_mtime) + mtime = attr->cat_mtime; + + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(&lli->lli_fid), attr->cat_size); + + i_size_write(inode, attr->cat_size); + + inode->i_blocks = attr->cat_blocks; + + LTIME_S(inode->i_mtime) = mtime; + LTIME_S(inode->i_atime) = atime; + LTIME_S(inode->i_ctime) = ctime; + +out_size_unlock: ll_inode_size_unlock(inode); return rc; @@ -1120,47 +1141,48 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, struct cl_io *io; ssize_t result; + CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n", + file->f_path.dentry->d_name.name, iot, *ppos, count); + restart: - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); ll_io_init(io, file, iot == CIT_WRITE); if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); int write_mutex_locked = 0; - cio->cui_fd = LUSTRE_FPRIVATE(file); - vio->cui_io_subtype = args->via_io_subtype; + vio->vui_fd = LUSTRE_FPRIVATE(file); + vio->vui_io_subtype = args->via_io_subtype; - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - cio->cui_iter = args->u.normal.via_iter; - cio->cui_iocb = args->u.normal.via_iocb; + vio->vui_iter = args->u.normal.via_iter; + vio->vui_iocb = args->u.normal.via_iocb; if ((iot == CIT_WRITE) && - !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { if (mutex_lock_interruptible(&lli-> lli_write_mutex)) { result = -ERESTARTSYS; goto out; } write_mutex_locked = 1; - } else if (iot == CIT_READ) { - down_read(&lli->lli_trunc_sem); } + down_read(&lli->lli_trunc_sem); break; case IO_SPLICE: - vio->u.splice.cui_pipe = args->u.splice.via_pipe; - vio->u.splice.cui_flags = args->u.splice.via_flags; + vio->u.splice.vui_pipe = args->u.splice.via_pipe; + vio->u.splice.vui_flags = args->u.splice.via_flags; break; default: - CERROR("Unknown IO type - %u\n", vio->cui_io_subtype); + CERROR("Unknown IO type - %u\n", vio->vui_io_subtype); LBUG(); } result = cl_io_loop(env, io); + if (args->via_io_subtype == IO_NORMAL) + up_read(&lli->lli_trunc_sem); if (write_mutex_locked) mutex_unlock(&lli->lli_write_mutex); - else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ) - up_read(&lli->lli_trunc_sem); } else { /* cl_io_rw_init() handled IO */ result = io->ci_result; @@ -1197,6 +1219,7 @@ out: fd->fd_write_failed = true; } } + CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); return result; } @@ -1212,7 +1235,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1236,7 +1259,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_NORMAL); + args = ll_env_args(env, IO_NORMAL); args->u.normal.via_iter = from; args->u.normal.via_iocb = iocb; @@ -1262,7 +1285,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, if (IS_ERR(env)) return PTR_ERR(env); - args = vvp_env_args(env, IO_SPLICE); + args = ll_env_args(env, IO_SPLICE); args->u.splice.via_pipe = pipe; args->u.splice.via_flags = flags; @@ -1354,7 +1377,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg) } int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, int lum_size) + __u64 flags, struct lov_user_md *lum, + int lum_size) { struct lov_stripe_md *lsm = NULL; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; @@ -1363,8 +1387,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, lsm = ccc_inode_lsm_get(inode); if (lsm) { ccc_inode_lsm_put(inode, lsm); - CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", - inode->i_ino); + CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n", + PFID(ll_inode2fid(inode))); rc = -EEXIST; goto out; } @@ -1478,7 +1502,7 @@ out: static int ll_lov_setea(struct inode *inode, struct file *file, unsigned long arg) { - int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; + __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; struct lov_user_md *lump; int lum_size = sizeof(struct lov_user_md) + sizeof(struct lov_user_ost_data); @@ -1512,7 +1536,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, struct lov_user_md_v1 __user *lumv1p = (void __user *)arg; struct lov_user_md_v3 __user *lumv3p = (void __user *)arg; int lum_size, rc; - int flags = FMODE_WRITE; + __u64 flags = FMODE_WRITE; /* first try with v1 which is smaller than v3 */ lum_size = sizeof(struct lov_user_md_v1); @@ -1561,7 +1585,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; int rc; if (arg == 0) { @@ -1575,14 +1599,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) spin_lock(&lli->lli_lock); if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { CWARN("group lock already existed with gid %lu\n", - fd->fd_grouplock.cg_gid); + fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } - LASSERT(!fd->fd_grouplock.cg_lock); + LASSERT(!fd->fd_grouplock.lg_lock); spin_unlock(&lli->lli_lock); - rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, + rc = cl_get_grouplock(ll_i2info(inode)->lli_clob, arg, (file->f_flags & O_NONBLOCK), &grouplock); if (rc) return rc; @@ -1608,7 +1632,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, { struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ccc_grouplock grouplock; + struct ll_grouplock grouplock; spin_lock(&lli->lli_lock); if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { @@ -1616,11 +1640,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, CWARN("no group lock held\n"); return -EINVAL; } - LASSERT(fd->fd_grouplock.cg_lock); + LASSERT(fd->fd_grouplock.lg_lock); - if (fd->fd_grouplock.cg_gid != arg) { + if (fd->fd_grouplock.lg_gid != arg) { CWARN("group lock %lu doesn't match current id %lu\n", - arg, fd->fd_grouplock.cg_gid); + arg, fd->fd_grouplock.lg_gid); spin_unlock(&lli->lli_lock); return -EINVAL; } @@ -1861,11 +1885,12 @@ error: * This value is computed using stripe object version on OST. * Version is computed using server side locking. * - * @param extent_lock Take extent lock. Not needed if a process is already - * holding the OST object group locks. + * @param sync if do sync on the OST side; + * 0: no sync + * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs + * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs */ -int ll_data_version(struct inode *inode, __u64 *data_version, - int extent_lock) +int ll_data_version(struct inode *inode, __u64 *data_version, int flags) { struct lov_stripe_md *lsm = NULL; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -1887,7 +1912,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version, goto out; } - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock); + rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags); if (rc == 0) { if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) rc = -EOPNOTSUPP; @@ -1923,7 +1948,7 @@ int ll_hsm_release(struct inode *inode) } /* Grab latest data_version and [am]time values */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH); if (rc != 0) goto out; @@ -1933,7 +1958,7 @@ int ll_hsm_release(struct inode *inode) goto out; } - ll_merge_lvb(env, inode); + ll_merge_attr(env, inode); cl_env_nested_put(&nest, env); /* Release the file. @@ -2227,8 +2252,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ll_file_data *fd = LUSTRE_FPRIVATE(file); int flags, rc; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, - inode->i_generation, inode, cmd); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n", + PFID(ll_inode2fid(inode)), inode, cmd); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ @@ -2331,9 +2356,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(&idv, (char __user *)arg, sizeof(idv))) return -EFAULT; - rc = ll_data_version(inode, &idv.idv_version, - !(idv.idv_flags & LL_DV_NOFLUSH)); - + idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH; + rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags); if (rc == 0 && copy_to_user((char __user *)arg, &idv, sizeof(idv))) return -EFAULT; @@ -2499,7 +2523,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) rc = och->och_flags & (FMODE_READ | FMODE_WRITE); unlock_res_and_lock(lock); - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } } mutex_unlock(&lli->lli_och_mutex); @@ -2537,9 +2561,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : (origin == SEEK_CUR) ? file->f_pos : 0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", - inode->i_ino, inode->i_generation, inode, retval, retval, - origin); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n", + PFID(ll_inode2fid(inode)), inode, retval, retval, origin); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { @@ -2603,8 +2626,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(env)) return PTR_ERR(env); - io = ccc_env_thread_io(env); - io->ci_obj = cl_i2info(inode)->lli_clob; + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; io->ci_ignore_layout = ignore_layout; /* initialize parameters for sync */ @@ -2634,8 +2657,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct ptlrpc_request *req; int rc, err; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2693,8 +2716,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) int rc; int rc2 = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", - inode->i_ino, file_lock); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n", + PFID(ll_inode2fid(inode)), file_lock); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); @@ -2777,9 +2800,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (IS_ERR(op_data)) return PTR_ERR(op_data); - CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", - inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode, - flock.l_flock.start, flock.l_flock.end); + CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", + PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, + einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, op_data, &lockh, &flock, 0, NULL /* req */, flags); @@ -2901,8 +2924,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) struct obd_export *exp; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n", - inode->i_ino, inode->i_generation, inode, dentry); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n", + PFID(ll_inode2fid(inode)), inode, dentry); exp = ll_i2mdexp(inode); @@ -2998,9 +3021,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) /* if object isn't regular file, don't validate size */ if (!S_ISREG(inode->i_mode)) { - LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime; - LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime; - LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime; + LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime; + LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; + LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; } else { /* In case of restore, the MDT has the right size and has * already send it back without granting the layout lock, @@ -3124,8 +3147,8 @@ int ll_inode_permission(struct inode *inode, int mask) return rc; } - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n", - inode->i_ino, inode->i_generation, inode, inode->i_mode, mask); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n", + PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT) return lustre_check_remote_perm(inode, mask); @@ -3335,10 +3358,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) int rc; CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", - PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY), + PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock), lock->l_lvb_data, lock->l_lvb_len); - if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY)) + if (lock->l_lvb_data && ldlm_is_lvb_ready(lock)) return 0; /* if layout lock was granted right away, the layout is returned @@ -3415,14 +3438,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, LASSERT(lock); LASSERT(ldlm_has_layout(lock)); - LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d", - inode, PFID(&lli->lli_fid), reconf); + LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d", + PFID(&lli->lli_fid), inode, reconf); /* in case this is a caching lock and reinstate with new inode */ md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL); lock_res_and_lock(lock); - lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY); + lvb_ready = ldlm_is_lvb_ready(lock); unlock_res_and_lock(lock); /* checking lvb_ready is racy but this is okay. The worst case is * that multi processes may configure the file on the same time. @@ -3487,9 +3510,9 @@ out: /* wait for IO to complete if it's still being used. */ if (wait_layout) { - CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n", + CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n", ll_get_fsname(inode->i_sb, NULL, 0), - inode, PFID(&lli->lli_fid)); + PFID(&lli->lli_fid), inode); memset(&conf, 0, sizeof(conf)); conf.coc_opc = OBJECT_CONF_WAIT; @@ -3498,7 +3521,8 @@ out: if (rc == 0) rc = -EAGAIN; - CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n", + CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), rc); } return rc; @@ -3571,9 +3595,9 @@ again: it.it_op = IT_LAYOUT; lockh.cookie = 0ULL; - LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid)); + LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lli->lli_fid), inode); rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh, NULL, 0, NULL, 0); @@ -3601,7 +3625,7 @@ again: /** * This function send a restore request to the MDT */ -int ll_layout_restore(struct inode *inode) +int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) { struct hsm_user_request *hur; int len, rc; @@ -3617,9 +3641,10 @@ int ll_layout_restore(struct inode *inode) hur->hur_request.hr_flags = 0; memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, sizeof(hur->hur_user_item[0].hui_fid)); - hur->hur_user_item[0].hui_extent.length = -1; + hur->hur_user_item[0].hui_extent.offset = offset; + hur->hur_user_item[0].hui_extent.length = length; hur->hur_request.hr_itemcount = 1; - rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, + rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp, len, hur, NULL); kfree(hur); return rc; diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c new file mode 100644 index 000000000..d8ea75424 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/glimpse.c @@ -0,0 +1,255 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * glimpse code shared between vvp and liblustre (and other Lustre clients in + * the future). + * + * Author: Nikita Danilov + * Author: Oleg Drokin + */ + +#include "../../include/linux/libcfs/libcfs.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/obd.h" + +#include "../include/lustre_dlm.h" +#include "../include/lustre_lite.h" +#include "../include/lustre_mdc.h" +#include +#include + +#include "../include/cl_object.h" +#include "../llite/llite_internal.h" + +static const struct cl_lock_descr whole_file = { + .cld_start = 0, + .cld_end = CL_PAGE_EOF, + .cld_mode = CLM_READ +}; + +/* + * Check whether file has possible unwriten pages. + * + * \retval 1 file is mmap-ed or has dirty pages + * 0 otherwise + */ +blkcnt_t dirty_cnt(struct inode *inode) +{ + blkcnt_t cnt = 0; + struct vvp_object *vob = cl_inode2vvp(inode); + void *results[1]; + + if (inode->i_mapping) + cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree, + results, 0, 1, + PAGECACHE_TAG_DIRTY); + if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0) + cnt = 1; + + return (cnt > 0) ? 1 : 0; +} + +int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, + struct inode *inode, struct cl_object *clob, int agl) +{ + struct ll_inode_info *lli = ll_i2info(inode); + const struct lu_fid *fid = lu_object_fid(&clob->co_lu); + int result; + + result = 0; + if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) { + CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); + if (lli->lli_has_smd) { + struct cl_lock *lock = vvp_env_lock(env); + struct cl_lock_descr *descr = &lock->cll_descr; + + /* NOTE: this looks like DLM lock request, but it may + * not be one. Due to CEF_ASYNC flag (translated + * to LDLM_FL_HAS_INTENT by osc), this is + * glimpse request, that won't revoke any + * conflicting DLM locks held. Instead, + * ll_glimpse_callback() will be called on each + * client holding a DLM lock against this file, + * and resulting size will be returned for each + * stripe. DLM lock on [0, EOF] is acquired only + * if there were no conflicting locks. If there + * were conflicting locks, enqueuing or waiting + * fails with -ENAVAIL, but valid inode + * attributes are returned anyway. + */ + *descr = whole_file; + descr->cld_obj = clob; + descr->cld_mode = CLM_READ; + descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; + if (agl) + descr->cld_enq_flags |= CEF_AGL; + /* + * CEF_ASYNC is used because glimpse sub-locks cannot + * deadlock (because they never conflict with other + * locks) and, hence, can be enqueued out-of-order. + * + * CEF_MUST protects glimpse lock from conversion into + * a lockless mode. + */ + result = cl_lock_request(env, io, lock); + if (result < 0) + return result; + + if (!agl) { + ll_merge_attr(env, inode); + if (i_size_read(inode) > 0 && + inode->i_blocks == 0) { + /* + * LU-417: Add dirty pages block count + * lest i_blocks reports 0, some "cp" or + * "tar" may think it's a completely + * sparse file and skip it. + */ + inode->i_blocks = dirty_cnt(inode); + } + } + cl_lock_release(env, lock); + } else { + CDEBUG(D_DLMTRACE, "No objects for inode\n"); + ll_merge_attr(env, inode); + } + } + + return result; +} + +static int cl_io_get(struct inode *inode, struct lu_env **envout, + struct cl_io **ioout, int *refcheck) +{ + struct lu_env *env; + struct cl_io *io; + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *clob = lli->lli_clob; + int result; + + if (S_ISREG(inode->i_mode)) { + env = cl_env_get(refcheck); + if (!IS_ERR(env)) { + io = vvp_env_thread_io(env); + io->ci_obj = clob; + *envout = env; + *ioout = io; + result = 1; + } else { + result = PTR_ERR(env); + } + } else { + result = 0; + } + return result; +} + +int cl_glimpse_size0(struct inode *inode, int agl) +{ + /* + * We don't need ast_flags argument to cl_glimpse_size(), because + * osc_lock_enqueue() takes care of the possible deadlock that said + * argument was introduced to avoid. + */ + /* + * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to + * cl_glimpse_size(), which doesn't make sense: glimpse locks are not + * blocking anyway. + */ + struct lu_env *env = NULL; + struct cl_io *io = NULL; + int result; + int refcheck; + + result = cl_io_get(inode, &env, &io, &refcheck); + if (result > 0) { +again: + io->ci_verify_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result > 0) + /* + * nothing to do for this io. This currently happens + * when stripe sub-object's are not yet created. + */ + result = io->ci_result; + else if (result == 0) + result = cl_glimpse_lock(env, io, inode, io->ci_obj, + agl); + + OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2); + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + cl_env_put(env, &refcheck); + } + return result; +} + +int cl_local_size(struct inode *inode) +{ + struct lu_env *env = NULL; + struct cl_io *io = NULL; + struct cl_object *clob; + int result; + int refcheck; + + if (!ll_i2info(inode)->lli_has_smd) + return 0; + + result = cl_io_get(inode, &env, &io, &refcheck); + if (result <= 0) + return result; + + clob = io->ci_obj; + result = cl_io_init(env, io, CIT_MISC, clob); + if (result > 0) { + result = io->ci_result; + } else if (result == 0) { + struct cl_lock *lock = vvp_env_lock(env); + + lock->cll_descr = whole_file; + lock->cll_descr.cld_enq_flags = CEF_PEEK; + lock->cll_descr.cld_obj = clob; + result = cl_lock_request(env, io, lock); + if (result == 0) { + ll_merge_attr(env, inode); + cl_lock_release(env, lock); + } + } + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c new file mode 100644 index 000000000..6c00715b4 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c @@ -0,0 +1,327 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2015, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * cl code shared between vvp and liblustre (and other Lustre clients in the + * future). + * + * Author: Nikita Danilov + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../../include/linux/libcfs/libcfs.h" +# include +# include +# include +# include +# include +# include +# include + +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_fid.h" +#include "../include/lustre_lite.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_ver.h" +#include "../include/lustre_mdc.h" +#include "../include/cl_object.h" + +#include "../llite/llite_internal.h" + +/* + * ccc_ prefix stands for "Common Client Code". + */ + +/***************************************************************************** + * + * Vvp device and device type functions. + * + */ + +/** + * An `emergency' environment used by cl_inode_fini() when cl_env_get() + * fails. Access to this environment is serialized by cl_inode_fini_guard + * mutex. + */ +struct lu_env *cl_inode_fini_env; +int cl_inode_fini_refcheck; + +/** + * A mutex serializing calls to slp_inode_fini() under extreme memory + * pressure, when environments cannot be allocated. + */ +static DEFINE_MUTEX(cl_inode_fini_guard); + +int cl_setattr_ost(struct inode *inode, const struct iattr *attr) +{ + struct lu_env *env; + struct cl_io *io; + int result; + int refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; + + io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); + io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); + io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); + io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_valid = attr->ia_valid; + +again: + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { + struct vvp_io *vio = vvp_env_io(env); + + if (attr->ia_valid & ATTR_FILE) + /* populate the file descriptor for ftruncate to honor + * group lock - see LU-787 + */ + vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file); + + result = cl_io_loop(env, io); + } else { + result = io->ci_result; + } + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + /* HSM import case: file is released, cannot be restored + * no need to fail except if restore registration failed + * with -ENODATA + */ + if (result == -ENODATA && io->ci_restore_needed && + io->ci_result != -ENODATA) + result = 0; + cl_env_put(env, &refcheck); + return result; +} + +/** + * Initialize or update CLIO structures for regular files when new + * meta-data arrives from the server. + * + * \param inode regular file inode + * \param md new file metadata from MDS + * - allocates cl_object if necessary, + * - updated layout, if object was already here. + */ +int cl_file_inode_init(struct inode *inode, struct lustre_md *md) +{ + struct lu_env *env; + struct ll_inode_info *lli; + struct cl_object *clob; + struct lu_site *site; + struct lu_fid *fid; + struct cl_object_conf conf = { + .coc_inode = inode, + .u = { + .coc_md = md + } + }; + int result = 0; + int refcheck; + + LASSERT(md->body->valid & OBD_MD_FLID); + LASSERT(S_ISREG(inode->i_mode)); + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + site = ll_i2sbi(inode)->ll_site; + lli = ll_i2info(inode); + fid = &lli->lli_fid; + LASSERT(fid_is_sane(fid)); + + if (!lli->lli_clob) { + /* clob is slave of inode, empty lli_clob means for new inode, + * there is no clob in cache with the given fid, so it is + * unnecessary to perform lookup-alloc-lookup-insert, just + * alloc and insert directly. + */ + LASSERT(inode->i_state & I_NEW); + conf.coc_lu.loc_flags = LOC_F_NEW; + clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), + fid, &conf); + if (!IS_ERR(clob)) { + /* + * No locking is necessary, as new inode is + * locked by I_NEW bit. + */ + lli->lli_clob = clob; + lli->lli_has_smd = lsm_has_objects(md->lsm); + lu_object_ref_add(&clob->co_lu, "inode", inode); + } else { + result = PTR_ERR(clob); + } + } else { + result = cl_conf_set(env, lli->lli_clob, &conf); + } + + cl_env_put(env, &refcheck); + + if (result != 0) + CERROR("Failure to initialize cl object " DFID ": %d\n", + PFID(fid), result); + return result; +} + +/** + * Wait for others drop their references of the object at first, then we drop + * the last one, which will lead to the object be destroyed immediately. + * Must be called after cl_object_kill() against this object. + * + * The reason we want to do this is: destroying top object will wait for sub + * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) + * to initiate top object destroying which may deadlock. See bz22520. + */ +static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *header = obj->co_lu.lo_header; + wait_queue_t waiter; + + if (unlikely(atomic_read(&header->loh_ref) != 1)) { + struct lu_site *site = obj->co_lu.lo_dev->ld_site; + struct lu_site_bkt_data *bkt; + + bkt = lu_site_bkt_from_fid(site, &header->loh_fid); + + init_waitqueue_entry(&waiter, current); + add_wait_queue(&bkt->lsb_marche_funebre, &waiter); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (atomic_read(&header->loh_ref) == 1) + break; + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&bkt->lsb_marche_funebre, &waiter); + } + + cl_object_put(env, obj); +} + +void cl_inode_fini(struct inode *inode) +{ + struct lu_env *env; + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *clob = lli->lli_clob; + int refcheck; + int emergency; + + if (clob) { + void *cookie; + + cookie = cl_env_reenter(); + env = cl_env_get(&refcheck); + emergency = IS_ERR(env); + if (emergency) { + mutex_lock(&cl_inode_fini_guard); + LASSERT(cl_inode_fini_env); + cl_env_implant(cl_inode_fini_env, &refcheck); + env = cl_inode_fini_env; + } + /* + * cl_object cache is a slave to inode cache (which, in turn + * is a slave to dentry cache), don't keep cl_object in memory + * when its master is evicted. + */ + cl_object_kill(env, clob); + lu_object_ref_del(&clob->co_lu, "inode", inode); + cl_object_put_last(env, clob); + lli->lli_clob = NULL; + if (emergency) { + cl_env_unplant(cl_inode_fini_env, &refcheck); + mutex_unlock(&cl_inode_fini_guard); + } else { + cl_env_put(env, &refcheck); + } + cl_env_reexit(cookie); + } +} + +/** + * build inode number from passed @fid + */ +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) +{ + if (BITS_PER_LONG == 32 || api32) + return fid_flatten32(fid); + else + return fid_flatten(fid); +} + +/** + * build inode generation from passed @fid. If our FID overflows the 32-bit + * inode number then return a non-zero generation to distinguish them. + */ +__u32 cl_fid_build_gen(const struct lu_fid *fid) +{ + __u32 gen; + + if (fid_is_igif(fid)) { + gen = lu_igif_gen(fid); + return gen; + } + + gen = fid_flatten(fid) >> 32; + return gen; +} + +/* lsm is unreliable after hsm implementation as layout can be changed at + * any time. This is only to support old, non-clio-ized interfaces. It will + * cause deadlock if clio operations are called with this extra layout refcount + * because in case the layout changed during the IO, ll_layout_refresh() will + * have to wait for the refcount to become zero to destroy the older layout. + * + * Notice that the lsm returned by this function may not be valid unless called + * inside layout lock - MDS_INODELOCK_LAYOUT. + */ +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode) +{ + return lov_lsm_get(ll_i2info(inode)->lli_clob); +} + +inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm) +{ + lov_lsm_put(ll_i2info(inode)->lli_clob, lsm); +} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c new file mode 100644 index 000000000..12f3e71f4 --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c @@ -0,0 +1,201 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * cl code shared between vvp and liblustre (and other Lustre clients in the + * future). + * + */ +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/obd.h" +#include "../include/cl_object.h" + +#include "../include/lustre_lite.h" +#include "llite_internal.h" + +/* Initialize the default and maximum LOV EA and cookie sizes. This allows + * us to make MDS RPCs with large enough reply buffers to hold the + * maximum-sized (= maximum striped) EA and cookie without having to + * calculate this (via a call into the LOV + OSCs) each time we make an RPC. + */ +int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) +{ + struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; + __u32 valsize = sizeof(struct lov_desc); + int rc, easize, def_easize, cookiesize; + struct lov_desc desc; + __u16 stripes, def_stripes; + + rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC, + &valsize, &desc, NULL); + if (rc) + return rc; + + stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = stripes; + easize = obd_size_diskmd(dt_exp, &lsm); + + def_stripes = min_t(__u32, desc.ld_default_stripe_count, + LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = def_stripes; + def_easize = obd_size_diskmd(dt_exp, &lsm); + + cookiesize = stripes * sizeof(struct llog_cookie); + + /* default cookiesize is 0 because from 2.4 server doesn't send + * llog cookies to client. + */ + CDEBUG(D_HA, + "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n", + def_easize, easize, cookiesize); + + rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0); + return rc; +} + +/** + * This function is used as an upcall-callback hooked by liblustre and llite + * clients into obd_notify() listeners chain to handle notifications about + * change of import connect_flags. See llu_fsswop_mount() and + * lustre_common_fill_super(). + */ +int cl_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner, void *data) +{ + struct lustre_client_ocd *lco; + struct client_obd *cli; + __u64 flags; + int result; + + if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + cli = &watched->u.cli; + lco = owner; + flags = cli->cl_import->imp_connect_data.ocd_connect_flags; + CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n", + lco->lco_flags, flags); + mutex_lock(&lco->lco_lock); + lco->lco_flags &= flags; + /* for each osc event update ea size */ + if (lco->lco_dt_exp) + cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp); + + mutex_unlock(&lco->lco_lock); + result = 0; + } else { + CERROR("unexpected notification from %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + result = -EINVAL; + } + return result; +} + +#define GROUPLOCK_SCOPE "grouplock" + +int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, + struct ll_grouplock *cg) +{ + struct lu_env *env; + struct cl_io *io; + struct cl_lock *lock; + struct cl_lock_descr *descr; + __u32 enqflags; + int refcheck; + int rc; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return PTR_ERR(env); + + io = vvp_env_thread_io(env); + io->ci_obj = obj; + io->ci_ignore_layout = 1; + + rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (rc != 0) { + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + /* Does not make sense to take GL for released layout */ + if (rc > 0) + rc = -ENOTSUPP; + return rc; + } + + lock = vvp_env_lock(env); + descr = &lock->cll_descr; + descr->cld_obj = obj; + descr->cld_start = 0; + descr->cld_end = CL_PAGE_EOF; + descr->cld_gid = gid; + descr->cld_mode = CLM_GROUP; + + enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); + descr->cld_enq_flags = enqflags; + + rc = cl_lock_request(env, io, lock); + if (rc < 0) { + cl_io_fini(env, io); + cl_env_put(env, &refcheck); + return rc; + } + + cg->lg_env = cl_env_get(&refcheck); + cg->lg_io = io; + cg->lg_lock = lock; + cg->lg_gid = gid; + LASSERT(cg->lg_env == env); + + cl_env_unplant(env, &refcheck); + return 0; +} + +void cl_put_grouplock(struct ll_grouplock *cg) +{ + struct lu_env *env = cg->lg_env; + struct cl_io *io = cg->lg_io; + struct cl_lock *lock = cg->lg_lock; + int refcheck; + + LASSERT(cg->lg_env); + LASSERT(cg->lg_gid); + + cl_env_implant(env, &refcheck); + cl_env_put(env, &refcheck); + + cl_lock_release(env, lock); + cl_io_fini(env, io); + cl_env_put(env, NULL); +} diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c index a55ac4dcc..2df551d3a 100644 --- a/drivers/staging/lustre/lustre/llite/llite_close.c +++ b/drivers/staging/lustre/lustre/llite/llite_close.c @@ -46,31 +46,31 @@ #include "llite_internal.h" /** records that a write is in flight */ -void vvp_write_pending(struct ccc_object *club, struct ccc_page *page) +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_SOM_DIRTY; - if (page && list_empty(&page->cpg_pending_linkage)) - list_add(&page->cpg_pending_linkage, &club->cob_pending_list); + if (page && list_empty(&page->vpg_pending_linkage)) + list_add(&page->vpg_pending_linkage, &club->vob_pending_list); spin_unlock(&lli->lli_lock); } /** records that a write has completed */ -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page) { - struct ll_inode_info *lli = ll_i2info(club->cob_inode); + struct ll_inode_info *lli = ll_i2info(club->vob_inode); int rc = 0; spin_lock(&lli->lli_lock); - if (page && !list_empty(&page->cpg_pending_linkage)) { - list_del_init(&page->cpg_pending_linkage); + if (page && !list_empty(&page->vpg_pending_linkage)) { + list_del_init(&page->vpg_pending_linkage); rc = 1; } spin_unlock(&lli->lli_lock); if (rc) - ll_queue_done_writing(club->cob_inode, 0); + ll_queue_done_writing(club->vob_inode, 0); } /** Queues DONE_WRITING if @@ -80,25 +80,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) void ll_queue_done_writing(struct inode *inode, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); lli->lli_flags |= flags; if ((lli->lli_flags & LLIF_DONE_WRITING) && - list_empty(&club->cob_pending_list)) { + list_empty(&club->vob_pending_list)) { struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); /* DONE_WRITING is allowed and inode has no dirty page. */ spin_lock(&lcq->lcq_lock); LASSERT(list_empty(&lli->lli_close_list)); - CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INODE, "adding inode "DFID" to close list\n", + PFID(ll_inode2fid(inode))); list_add_tail(&lli->lli_close_list, &lcq->lcq_head); /* Avoid a concurrent insertion into the close thread queue: @@ -124,9 +124,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) op_data->op_flags |= MF_SOM_CHANGE; /* Check if Size-on-MDS attributes are valid. */ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); if (!cl_local_size(inode)) { /* Send Size-on-MDS Attributes if valid. */ @@ -140,10 +140,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, struct obd_client_handle **och, unsigned long flags) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); + struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob); spin_lock(&lli->lli_lock); - if (!(list_empty(&club->cob_pending_list))) { + if (!(list_empty(&club->vob_pending_list))) { if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { LASSERT(*och); LASSERT(!lli->lli_pending_och); @@ -198,7 +198,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, } } - LASSERT(list_empty(&club->cob_pending_list)); + LASSERT(list_empty(&club->vob_pending_list)); lli->lli_flags &= ~LLIF_SOM_DIRTY; spin_unlock(&lli->lli_lock); ll_done_writing_attr(inode, op_data); @@ -221,9 +221,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) LASSERT(op_data); if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) - CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n", - inode->i_ino, inode->i_generation, - lli->lli_flags); + CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), lli->lli_flags); oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); if (!oa) { @@ -241,9 +241,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data) if (rc) { oa->o_valid = 0; if (rc != -ENOENT) - CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n", - rc, inode->i_ino, - inode->i_generation); + CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); } else { CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid)); @@ -302,9 +302,11 @@ static void ll_done_writing(struct inode *inode) * OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc done_writing failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); + } out: ll_finish_md_op_data(op_data); if (och) { @@ -323,8 +325,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, lli_close_list); list_del_init(&lli->lli_close_list); - } else if (atomic_read(&lcq->lcq_stop)) + } else if (atomic_read(&lcq->lcq_stop)) { lli = ERR_PTR(-EALREADY); + } spin_unlock(&lcq->lcq_lock); return lli; @@ -348,8 +351,8 @@ static int ll_close_thread(void *arg) break; inode = ll_info2i(lli); - CDEBUG(D_INFO, "done_writing for inode %lu/%u\n", - inode->i_ino, inode->i_generation); + CDEBUG(D_INFO, "done_writing for inode "DFID"\n", + PFID(ll_inode2fid(inode))); ll_done_writing(inode); iput(inode); } diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index e3c0f1dd4..3f2f30b65 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -43,11 +43,11 @@ /* for struct cl_lock_descr and struct cl_io */ #include "../include/cl_object.h" -#include "../include/lclient.h" #include "../include/lustre_mdc.h" #include "../include/lustre_intent.h" #include #include +#include "vvp_internal.h" #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -99,6 +99,13 @@ struct ll_remote_perm { */ }; +struct ll_grouplock { + struct lu_env *lg_env; + struct cl_io *lg_io; + struct cl_lock *lg_lock; + unsigned long lg_gid; +}; + enum lli_flags { /* MDS has an authority for the Size-on-MDS attributes. */ LLIF_MDS_SIZE_LOCK = (1 << 0), @@ -161,7 +168,9 @@ struct ll_inode_info { struct inode lli_vfs_inode; /* the most recent timestamps obtained from mds */ - struct ost_lvb lli_lvb; + s64 lli_atime; + s64 lli_mtime; + s64 lli_ctime; spinlock_t lli_agl_lock; /* Try to make the d::member and f::member are aligned. Before using @@ -328,6 +337,7 @@ enum ra_stat { RA_STAT_EOF, RA_STAT_MAX_IN_FLIGHT, RA_STAT_WRONG_GRAB_PAGE, + RA_STAT_FAILED_REACH_END, _NR_RA_STAT, }; @@ -481,6 +491,12 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ + /* + * Used to track "unstable" pages on a client, and maintain a + * LRU list of clean pages. An "unstable" page is defined as + * any page which is sent to a server as part of a bulk request, + * but is uncommitted to stable storage. + */ struct cl_client_cache ll_cache; struct lprocfs_stats *ll_ra_stats; @@ -525,13 +541,6 @@ struct ll_sb_info { struct completion ll_kobj_unregister; }; -struct ll_ra_read { - pgoff_t lrr_start; - pgoff_t lrr_count; - struct task_struct *lrr_reader; - struct list_head lrr_linkage; -}; - /* * per file-descriptor read-ahead data. */ @@ -589,12 +598,6 @@ struct ll_readahead_state { * will not be accurate when dealing with reads issued via mmap. */ unsigned long ras_request_index; - /* - * list of struct ll_ra_read's one per read(2) call current in - * progress against this file descriptor. Used by read-ahead code, - * protected by ->ras_lock. - */ - struct list_head ras_read_beads; /* * The following 3 items are used for detecting the stride I/O * mode. @@ -622,7 +625,7 @@ extern struct kmem_cache *ll_file_data_slab; struct lustre_handle; struct ll_file_data { struct ll_readahead_state fd_ras; - struct ccc_grouplock fd_grouplock; + struct ll_grouplock fd_grouplock; __u64 lfd_pos; __u32 fd_flags; fmode_t fd_omode; @@ -663,8 +666,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi) #endif } -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar); -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar); +void ll_ras_enter(struct file *f); + +/* llite/lcommon_misc.c */ +int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); +int cl_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner, void *data); +int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, + struct ll_grouplock *cg); +void cl_put_grouplock(struct ll_grouplock *cg); /* llite/lproc_llite.c */ int ldebugfs_register_mountpoint(struct dentry *parent, @@ -697,15 +708,15 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); /* llite/rw.c */ -int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); -int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); int ll_writepage(struct page *page, struct writeback_control *wbc); int ll_writepages(struct address_space *, struct writeback_control *wbc); int ll_readpage(struct file *file, struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags); + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit); +struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage); +void ll_cl_fini(struct ll_cl_context *lcc); extern const struct address_space_operations ll_aops; @@ -740,7 +751,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type); int ll_inode_permission(struct inode *inode, int mask); int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - int flags, struct lov_user_md *lum, + __u64 flags, struct lov_user_md *lum, int lum_size); int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct lov_mds_md **lmm, int *lmm_size, @@ -750,9 +761,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp, int *lmm_size, struct ptlrpc_request **request); int ll_fsync(struct file *file, loff_t start, loff_t end, int data); -int ll_merge_lvb(const struct lu_env *env, struct inode *inode); +int ll_merge_attr(const struct lu_env *env, struct inode *inode); int ll_fid2path(struct inode *inode, void __user *arg); -int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +int ll_data_version(struct inode *inode, __u64 *data_version, int flags); int ll_hsm_release(struct inode *inode); /* llite/dcache.c */ @@ -824,65 +835,8 @@ struct ll_close_queue { atomic_t lcq_stop; }; -struct ccc_object *cl_inode2ccc(struct inode *inode); - -void vvp_write_pending (struct ccc_object *club, struct ccc_page *page); -void vvp_write_complete(struct ccc_object *club, struct ccc_page *page); - -/* specific architecture can implement only part of this list */ -enum vvp_io_subtype { - /** normal IO */ - IO_NORMAL, - /** io started from splice_{read|write} */ - IO_SPLICE -}; - -/* IO subtypes */ -struct vvp_io { - /** io subtype */ - enum vvp_io_subtype cui_io_subtype; - - union { - struct { - struct pipe_inode_info *cui_pipe; - unsigned int cui_flags; - } splice; - struct vvp_fault_io { - /** - * Inode modification time that is checked across DLM - * lock request. - */ - time64_t ft_mtime; - struct vm_area_struct *ft_vma; - /** - * locked page returned from vvp_io - */ - struct page *ft_vmpage; - struct vm_fault_api { - /** - * kernel fault info - */ - struct vm_fault *ft_vmf; - /** - * fault API used bitflags for return code. - */ - unsigned int ft_flags; - /** - * check that flags are from filemap_fault - */ - bool ft_flags_valid; - } fault; - } fault; - } u; - /** - * Read-ahead state used by read and page-fault IO contexts. - */ - struct ll_ra_read cui_bead; - /** - * Set when cui_bead has been initialized. - */ - int cui_ra_window_set; -}; +void vvp_write_pending(struct vvp_object *club, struct vvp_page *page); +void vvp_write_complete(struct vvp_object *club, struct vvp_page *page); /** * IO arguments for various VFS I/O interfaces. @@ -911,54 +865,32 @@ struct ll_cl_context { int lcc_refcheck; }; -struct vvp_thread_info { - struct vvp_io_args vti_args; - struct ra_io_arg vti_ria; - struct ll_cl_context vti_io_ctx; +struct ll_thread_info { + struct vvp_io_args lti_args; + struct ra_io_arg lti_ria; + struct ll_cl_context lti_io_ctx; }; -static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) -{ - extern struct lu_context_key vvp_key; - struct vvp_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &vvp_key); - LASSERT(info); - return info; -} - -static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env, - enum vvp_io_subtype type) +extern struct lu_context_key ll_thread_key; +static inline struct ll_thread_info *ll_env_info(const struct lu_env *env) { - struct vvp_io_args *ret = &vvp_env_info(env)->vti_args; - - ret->via_io_subtype = type; + struct ll_thread_info *lti; - return ret; + lti = lu_context_key_get(&env->le_ctx, &ll_thread_key); + LASSERT(lti); + return lti; } -struct vvp_session { - struct vvp_io vs_ios; -}; - -static inline struct vvp_session *vvp_env_session(const struct lu_env *env) +static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, + enum vvp_io_subtype type) { - extern struct lu_context_key vvp_session_key; - struct vvp_session *ses; + struct vvp_io_args *via = &ll_env_info(env)->lti_args; - ses = lu_context_key_get(env->le_ses, &vvp_session_key); - LASSERT(ses); - return ses; -} + via->via_io_subtype = type; -static inline struct vvp_io *vvp_env_io(const struct lu_env *env) -{ - return &vvp_env_session(env)->vs_ios; + return via; } -int vvp_global_init(void); -void vvp_global_fini(void); - void ll_queue_done_writing(struct inode *inode, unsigned long flags); void ll_close_thread_shutdown(struct ll_close_queue *lcq); int ll_close_thread_start(struct ll_close_queue **lcq_ret); @@ -981,6 +913,10 @@ static inline void ll_invalidate_page(struct page *vmpage) if (!mapping) return; + /* + * truncate_complete_page() calls + * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). + */ ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE); truncate_complete_page(mapping, vmpage); } @@ -1040,10 +976,10 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) } /* llite/xattr.c */ -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size); ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); @@ -1055,9 +991,6 @@ void free_rmtperm_hash(struct hlist_head *hash); int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm); int lustre_check_remote_perm(struct inode *inode, int mask); -/* llite/llite_cl.c */ -extern struct lu_device_type vvp_device_type; - /** * Common IO arguments for various VFS I/O interfaces. */ @@ -1069,7 +1002,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, struct ll_readahead_state *ras, unsigned long index, unsigned hit); void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len); -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which); +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); /* llite/llite_rmtacl.c */ #ifdef CONFIG_FS_POSIX_ACL @@ -1163,6 +1096,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry, int only_unplug); void ll_stop_statahead(struct inode *dir, void *key); +blkcnt_t dirty_cnt(struct inode *inode); + +int cl_glimpse_size0(struct inode *inode, int agl); +int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, + struct inode *inode, struct cl_object *clob, int agl); + +static inline int cl_glimpse_size(struct inode *inode) +{ + return cl_glimpse_size0(inode, 0); +} + +static inline int cl_agl(struct inode *inode) +{ + return cl_glimpse_size0(inode, 1); +} + static inline int ll_glimpse_size(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1285,43 +1234,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd); void ll_iocontrol_unregister(void *magic); -/* lclient compat stuff */ -#define cl_inode_info ll_inode_info -#define cl_i2info(info) ll_i2info(info) -#define cl_inode_mode(inode) ((inode)->i_mode) -#define cl_i2sbi ll_i2sbi - -static inline struct ll_file_data *cl_iattr2fd(struct inode *inode, - const struct iattr *attr) -{ - LASSERT(attr->ia_valid & ATTR_FILE); - return LUSTRE_FPRIVATE(attr->ia_file); -} - -static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms) -{ - LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex)); - i_size_write(inode, kms); -} - -static inline void cl_isize_write(struct inode *inode, loff_t kms) -{ - ll_inode_size_lock(inode); - i_size_write(inode, kms); - ll_inode_size_unlock(inode); -} - -#define cl_isize_read(inode) i_size_read(inode) - -static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode) -{ - return ll_merge_lvb(env, inode); -} - -#define cl_inode_atime(inode) LTIME_S((inode)->i_atime) -#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime) -#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime) - int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, enum cl_fsync_mode mode, int ignore_layout); @@ -1350,7 +1262,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt, int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ : LPROC_LL_OSC_WRITE; - ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc); + ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc); } ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, @@ -1382,18 +1294,16 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, */ if (it->d.lustre.it_remote_lock_mode) { handle.cookie = it->d.lustre.it_remote_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n", - inode, - inode->i_ino, inode->i_generation, + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, NULL); } handle.cookie = it->d.lustre.it_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n", - inode, inode->i_ino, - inode->i_generation, handle.cookie); + CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n", + PFID(ll_inode2fid(inode)), inode, handle.cookie); md_set_lock_data(exp, &handle.cookie, inode, &it->d.lustre.it_lock_bits); @@ -1471,9 +1381,25 @@ enum { int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); -int ll_layout_restore(struct inode *inode); +int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); int ll_xattr_init(void); void ll_xattr_fini(void); +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt); + +/* lcommon_cl.c */ +int cl_setattr_ost(struct inode *inode, const struct iattr *attr); + +extern struct lu_env *cl_inode_fini_env; +extern int cl_inode_fini_refcheck; + +int cl_file_inode_init(struct inode *inode, struct lustre_md *md); +void cl_inode_fini(struct inode *inode); +int cl_local_size(struct inode *inode); + +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); +__u32 cl_fid_build_gen(const struct lu_fid *fid); + #endif /* LLITE_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b57a99268..96c7e9fc6 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -85,18 +85,18 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb) si_meminfo(&si); pages = si.totalram - si.totalhigh; - if (pages >> (20 - PAGE_SHIFT) < 512) - lru_page_max = pages / 2; - else - lru_page_max = (pages / 4) * 3; + lru_page_max = pages / 2; - /* initialize lru data */ + /* initialize ll_cache data */ atomic_set(&sbi->ll_cache.ccc_users, 0); sbi->ll_cache.ccc_lru_max = lru_page_max; atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); spin_lock_init(&sbi->ll_cache.ccc_lru_lock); INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); + atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); + init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq); + sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, SBI_DEFAULT_READAHEAD_MAX); sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; @@ -169,12 +169,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, return -ENOMEM; } - if (llite_root) { - err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); - if (err < 0) - CERROR("could not register mount in /lustre/llite\n"); - } - /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | @@ -337,10 +331,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, else sbi->ll_md_brw_size = PAGE_SIZE; - if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) { - LCONSOLE_INFO("Layout lock feature supported.\n"); + if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; - } if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { @@ -453,7 +445,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* make root inode * XXX: move this to after cbd setup? */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS; + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE; if (sbi->ll_flags & LL_SBI_RMT_CLIENT) valid |= OBD_MD_FLRMTPERM; else if (sbi->ll_flags & LL_SBI_ACL) @@ -555,6 +547,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, kfree(data); kfree(osfs); + if (llite_root) { + err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); + if (err < 0) { + CERROR("%s: could not register mount in debugfs: " + "rc = %d\n", ll_get_fsname(sb, NULL, 0), err); + err = 0; + } + } + return err; out_root: iput(root); @@ -573,7 +574,6 @@ out_md: out: kfree(data); kfree(osfs); - ldebugfs_unregister_mountpoint(sbi); return err; } @@ -897,10 +897,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) cfg->cfg_callback = class_config_llog_handler; /* set up client obds */ err = lustre_process_log(sb, profilenm, cfg); - if (err < 0) { - CERROR("Unable to process log: %d\n", err); + if (err < 0) goto out_free; - } /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */ lprof = class_get_profile(profilenm); @@ -947,7 +945,7 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int next, force = 1; + int ccc_count, next, force = 1, rc = 0; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); @@ -963,6 +961,19 @@ void ll_put_super(struct super_block *sb) force = obd->obd_force; } + /* Wait for unstable pages to be committed to stable storage */ + if (!force) { + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + + rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq, + !atomic_read(&sbi->ll_cache.ccc_unstable_nr), + &lwi); + } + + ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr); + if (!force && rc != -EINTR) + LASSERTF(!ccc_count, "count: %i\n", ccc_count); + /* We need to set force before the lov_disconnect in * lustre_common_put_super, since l_d cleans up osc's as well. */ @@ -999,6 +1010,8 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); + cl_env_cache_purge(~0); + module_put(THIS_MODULE); } /* client_put_super */ @@ -1032,8 +1045,8 @@ void ll_clear_inode(struct inode *inode) struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); if (S_ISDIR(inode->i_mode)) { /* these should have been cleared in ll_file_release */ @@ -1180,9 +1193,11 @@ static int ll_setattr_done_writing(struct inode *inode, * from OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); - else if (rc) - CERROR("inode %lu mdc truncate failed: rc = %d\n", - inode->i_ino, rc); + else if (rc) { + CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", + ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); + } return rc; } @@ -1210,12 +1225,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) bool file_is_released = false; int rc = 0, rc1 = 0; - CDEBUG(D_VFSTRACE, - "%s: setattr inode %p/fid:" DFID - " from %llu to %llu, valid %x, hsm_import %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size, - attr->ia_valid, hsm_import); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode, + i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import); if (attr->ia_valid & ATTR_SIZE) { /* Check new size against VFS/VM file size limit and rlimit */ @@ -1265,14 +1277,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), (s64)ktime_get_real_seconds()); - /* If we are changing file size, file content is modified, flag it. */ - if (attr->ia_valid & ATTR_SIZE) { - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -1284,13 +1288,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (!S_ISDIR(inode->i_mode)) inode_unlock(inode); - memcpy(&op_data->op_attr, attr, sizeof(*attr)); - - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; - /* truncate on a released file must failed with -ENODATA, * so size must not be set on MDS for released file * but other attributes must be set @@ -1304,29 +1301,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) file_is_released = true; ccc_inode_lsm_put(inode, lsm); + + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + if (file_is_released) { + rc = ll_layout_restore(inode, 0, attr->ia_size); + if (rc < 0) + goto out; + + file_is_released = false; + ll_layout_refresh(inode, &gen); + } + + /* + * If we are changing file size, file content is + * modified, flag it. + */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + op_data->op_bias |= MDS_DATA_MODIFIED; + } } - /* if not in HSM import mode, clear size attr for released file - * we clear the attribute send to MDT in op_data, not the original - * received from caller in attr which is used later to - * decide return code - */ - if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import) - op_data->op_attr.ia_valid &= ~ATTR_SIZE; + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && + (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) + op_data->op_flags = MF_EPOCH_OPEN; rc = ll_md_setattr(dentry, op_data, &mod); if (rc) goto out; - /* truncate failed (only when non HSM import), others succeed */ - if (file_is_released) { - if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) - rc = -ENODATA; - else - rc = 0; - goto out; - } - /* RPC to MDT is sent, cancel data modification flag */ if (op_data->op_bias & MDS_DATA_MODIFIED) { spin_lock(&lli->lli_lock); @@ -1335,7 +1343,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) } ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode) || file_is_released) { rc = 0; goto out; } @@ -1552,7 +1560,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->valid & OBD_MD_FLATIME) { if (body->atime > LTIME_S(inode->i_atime)) LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; + lli->lli_atime = body->atime; } if (body->valid & OBD_MD_FLMTIME) { if (body->mtime > LTIME_S(inode->i_mtime)) { @@ -1561,12 +1569,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } - lli->lli_lvb.lvb_mtime = body->mtime; + lli->lli_mtime = body->mtime; } if (body->valid & OBD_MD_FLCTIME) { if (body->ctime > LTIME_S(inode->i_ctime)) LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; + lli->lli_ctime = body->ctime; } if (body->valid & OBD_MD_FLMODE) inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); @@ -1593,12 +1601,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) /* FID shouldn't be changed! */ if (fid_is_sane(&lli->lli_fid)) { LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), - "Trying to change FID "DFID - " to the "DFID", inode %lu/%u(%p)\n", + "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n", PFID(&lli->lli_fid), PFID(&body->fid1), - inode->i_ino, inode->i_generation, inode); - } else + PFID(ll_inode2fid(inode)), inode); + } else { lli->lli_fid = body->fid1; + } } LASSERT(fid_seq(&lli->lli_fid) != 0); @@ -1622,8 +1630,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (lli->lli_flags & (LLIF_DONE_WRITING | LLIF_EPOCH_PENDING | LLIF_SOM_DIRTY)) { - CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n", - inode->i_ino, lli->lli_flags); + CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), + lli->lli_flags); } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 @@ -1699,7 +1709,7 @@ void ll_read_inode2(struct inode *inode, void *opaque) void ll_delete_inode(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); if (S_ISREG(inode->i_mode) && lli->lli_clob) /* discard all dirty pages before truncating them, required by @@ -1715,8 +1725,8 @@ void ll_delete_inode(struct inode *inode) spin_lock_irq(&inode->i_data.tree_lock); spin_unlock_irq(&inode->i_data.tree_lock); LASSERTF(inode->i_data.nrpages == 0, - "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", - inode->i_ino, inode->i_generation, inode, + "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); } /* Workaround end */ @@ -1747,7 +1757,9 @@ int ll_iocontrol(struct inode *inode, struct file *file, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); + CERROR("%s: failure inode "DFID": rc = %d\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); return -abs(rc); } @@ -1772,7 +1784,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, if (IS_ERR(op_data)) return PTR_ERR(op_data); - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &req, NULL); @@ -2066,11 +2078,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg) } memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); - if (type & LL_STATFS_LMV) + if (type & LL_STATFS_LMV) { exp = sbi->ll_md_exp; - else if (type & LL_STATFS_LOV) + } else if (type & LL_STATFS_LOV) { exp = sbi->ll_dt_exp; - else { + } else { rc = -ENODEV; goto out_statfs; } @@ -2271,7 +2283,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; struct dentry *dentry = NULL; - struct ccc_object *obj = cl_inode2ccc(page->mapping->host); + struct vvp_object *obj = cl_inode2vvp(page->mapping->host); /* this can be called inside spin lock so use GFP_ATOMIC. */ buf = (char *)__get_free_page(GFP_ATOMIC); @@ -2285,7 +2297,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->cob_header.coh_lu.loh_fid), + PFID(&obj->vob_header.coh_lu.loh_fid), (path && !IS_ERR(path)) ? path : "", ioret); if (dentry) diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c index 5b484e62f..88ef1cac9 100644 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c @@ -57,10 +57,10 @@ void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count) { - policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + + policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) + (vma->vm_pgoff << PAGE_SHIFT); policy->l_extent.end = (policy->l_extent.start + count - 1) | - ~CFS_PAGE_MASK; + ~PAGE_MASK; } struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, @@ -123,7 +123,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, *env_ret = env; - io = ccc_env_thread_io(env); +restart: + io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); @@ -146,17 +147,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - LASSERT(cio->cui_cl.cis_io == io); + LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; - cio->cui_fd = fd; + vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); + if (io->ci_need_restart) + goto restart; + cl_env_nested_put(nest, env); io = ERR_PTR(rc); } @@ -200,7 +204,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, * Otherwise, we could add dirty pages into osc cache * while truncate is on-going. */ - inode = ccc_object_inode(io->ci_obj); + inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); down_read(&lli->lli_trunc_sem); @@ -307,17 +311,17 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; - vio->u.fault.fault.ft_vmf = vmf; - vio->u.fault.fault.ft_flags = 0; - vio->u.fault.fault.ft_flags_valid = false; + vio->u.fault.ft_vmf = vmf; + vio->u.fault.ft_flags = 0; + vio->u.fault.ft_flags_valid = false; result = cl_io_loop(env, io); /* ft_flags are only valid if we reached * the call to filemap_fault */ - if (vio->u.fault.fault.ft_flags_valid) - fault_ret = vio->u.fault.fault.ft_flags; + if (vio->u.fault.ft_flags_valid) + fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage) { @@ -390,9 +394,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) result = ll_page_mkwrite0(vma, vmf->page, &retry); if (!printed && ++count > 16) { - CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n", + const struct dentry *de = vma->vm_file->f_path.dentry; + + CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n", current->comm, vmf->pgoff, - file_inode(vma->vm_file)->i_ino); + PFID(ll_inode2fid(de->d_inode))); printed = true; } } while (retry); @@ -422,16 +428,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /** * To avoid cancel the locks covering mmapped region for lock cache pressure, - * we track the mapped vma count in ccc_object::cob_mmap_cnt. + * we track the mapped vma count in vvp_object::vob_mmap_cnt. */ static void ll_vm_open(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); - atomic_inc(&vob->cob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); + atomic_inc(&vob->vob_mmap_cnt); } /** @@ -440,11 +446,11 @@ static void ll_vm_open(struct vm_area_struct *vma) static void ll_vm_close(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); - struct ccc_object *vob = cl_inode2ccc(inode); + struct vvp_object *vob = cl_inode2vvp(inode); LASSERT(vma->vm_file); - atomic_dec(&vob->cob_mmap_cnt); - LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0); + atomic_dec(&vob->vob_mmap_cnt); + LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); } /* XXX put nice comment here. talk about __free_pte -> dirty pages and diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index 193aab879..c1eef6198 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -119,7 +119,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb, rc = md_getattr(sbi->ll_md_exp, op_data, &req); kfree(op_data); if (rc) { - CERROR("can't get object attrs, fid "DFID", rc %d\n", + CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n", PFID(fid), rc); return ERR_PTR(rc); } @@ -191,8 +191,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, int fileid_len = sizeof(struct lustre_nfs_fid) / 4; struct lustre_nfs_fid *nfs_fid = (void *)fh; - CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n", - inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len); + CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), *plen, fileid_len); if (*plen < fileid_len) { *plen = fileid_len; @@ -298,8 +299,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild) sbi = ll_s2sbi(dir->i_sb); - CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n", - dir->i_ino, PFID(ll_inode2fid(dir))); + CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir))); rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc != 0) @@ -314,15 +316,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild) rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); + CERROR("%s: failure inode "DFID" get parent: rc = %d\n", + ll_get_fsname(dir->i_sb, NULL, 0), + PFID(ll_inode2fid(dir)), rc); return ERR_PTR(rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body->valid & OBD_MD_FLID); - - CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", - PFID(ll_inode2fid(dir)), PFID(&body->fid1)); - + /* + * LU-3952: MDT may lost the FID of its parent, we should not crash + * the NFS server, ll_iget_for_nfs() will handle the error. + */ + if (body->valid & OBD_MD_FLID) { + CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", + PFID(ll_inode2fid(dir)), PFID(&body->fid1)); + } result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL); ptlrpc_req_finished(req); diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c index f169c0db6..813a9a354 100644 --- a/drivers/staging/lustre/lustre/llite/lloop.c +++ b/drivers/staging/lustre/lustre/llite/lloop.c @@ -274,8 +274,9 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) if (lo->lo_biotail) { lo->lo_biotail->bi_next = bio; lo->lo_biotail = bio; - } else + } else { lo->lo_bio = lo->lo_biotail = bio; + } spin_unlock_irqrestore(&lo->lo_lock, flags); atomic_inc(&lo->lo_pending); diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 27ab12614..55d62eb11 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -254,7 +254,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj, pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ if (pages_number > totalram_pages / 2) { - CERROR("can't set file readahead more than %lu MB\n", totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ return -ERANGE; @@ -393,6 +392,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, struct super_block *sb = ((struct seq_file *)file->private_data)->private; struct ll_sb_info *sbi = ll_s2sbi(sb); struct cl_client_cache *cache = &sbi->ll_cache; + struct lu_env *env; + int refcheck; int mult, rc, pages_number; int diff = 0; int nrpages = 0; @@ -430,6 +431,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, goto out; } + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + return 0; + diff = -diff; while (diff > 0) { int tmp; @@ -455,19 +460,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file, break; if (!sbi->ll_dt_exp) { /* being initialized */ - rc = -ENODEV; - break; + rc = 0; + goto out; } /* difficult - have to ask OSCs to drop LRU slots. */ tmp = diff << 1; - rc = obd_set_info_async(NULL, sbi->ll_dt_exp, + rc = obd_set_info_async(env, sbi->ll_dt_exp, sizeof(KEY_CACHE_LRU_SHRINK), KEY_CACHE_LRU_SHRINK, sizeof(tmp), &tmp, NULL); if (rc < 0) break; } + cl_env_put(env, &refcheck); out: if (rc >= 0) { @@ -818,6 +824,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj, } LUSTRE_RW_ATTR(xattr_cache); +static ssize_t unstable_stats_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kobj); + struct cl_client_cache *cache = &sbi->ll_cache; + int pages, mb; + + pages = atomic_read(&cache->ccc_unstable_nr); + mb = (pages * PAGE_SIZE) >> 20; + + return sprintf(buf, "unstable_pages: %8d\n" + "unstable_mb: %8d\n", pages, mb); +} +LUSTRE_RO_ATTR(unstable_stats); + static struct lprocfs_vars lprocfs_llite_obd_vars[] = { /* { "mntpt_path", ll_rd_path, 0, 0 }, */ { "site", &ll_site_stats_fops, NULL, 0 }, @@ -853,6 +876,7 @@ static struct attribute *llite_attrs[] = { &lustre_attr_max_easize.attr, &lustre_attr_default_easize.attr, &lustre_attr_xattr_cache.attr, + &lustre_attr_unstable_stats.attr, NULL, }; @@ -953,6 +977,7 @@ static const char *ra_stat_string[] = { [RA_STAT_EOF] = "read-ahead to EOF", [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page", + [RA_STAT_FAILED_REACH_END] = "failed to reach end" }; int ldebugfs_register_mountpoint(struct dentry *parent, diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index f8f98e4e8..5eba0ebae 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -128,12 +128,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, if (rc != 0) { iget_failed(inode); inode = NULL; - } else + } else { unlock_new_inode(inode); - } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) + } + } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { ll_update_inode(inode, md); - CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n", - inode, PFID(&md->body->fid1)); + CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n", + PFID(&md->body->fid1), inode); + } } return inode; } @@ -188,7 +190,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; /* Invalidate all dentries associated with this inode */ - LASSERT(lock->l_flags & LDLM_FL_CANCELING); + LASSERT(ldlm_is_canceling(lock)); if (!fid_res_name_eq(ll_inode2fid(inode), &lock->l_resource->lr_name)) { @@ -255,8 +257,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { - CDEBUG(D_INODE, "invalidating inode %lu\n", - inode->i_ino); + CDEBUG(D_INODE, "invalidating inode "DFID"\n", + PFID(ll_inode2fid(inode))); truncate_inode_pages(inode->i_mapping, 0); ll_invalidate_negative_children(inode); } @@ -476,9 +478,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) return ERR_PTR(-ENAMETOOLONG); - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, parent->i_ino, - parent->i_generation, parent, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n", + dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it)); if (d_mountpoint(dentry)) CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); @@ -553,9 +554,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; struct dentry *de; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n", - dentry, parent->i_ino, - parent->i_generation, parent, flags); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n", + dentry, PFID(ll_inode2fid(parent)), parent, flags); /* Optimize away (CREATE && !OPEN). Let .create handle the race. */ if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN)) @@ -586,10 +586,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, long long lookup_flags = LOOKUP_OPEN; int rc = 0; - CDEBUG(D_VFSTRACE, - "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n", - dentry, dir->i_ino, - dir->i_generation, dir, file, open_flags, mode, *opened); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n", + dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, + *opened); it = kzalloc(sizeof(*it), GFP_NOFS); if (!it) @@ -680,8 +679,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) * lock on the inode. Since we finally have an inode pointer, * stuff it in the lock. */ - CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", - inode, inode->i_ino, inode->i_generation); + CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n", + PFID(ll_inode2fid(dir)), inode); ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); out: ptlrpc_req_finished(request); @@ -708,9 +707,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int rc = 0; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n", - dentry, dir->i_ino, - dir->i_generation, dir, LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n", + dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it)); rc = it_open_error(DISP_OPEN_CREATE, it); if (rc) @@ -733,8 +731,9 @@ static void ll_update_times(struct ptlrpc_request *request, LASSERT(body); if (body->valid & OBD_MD_FLMTIME && body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n", - inode->i_ino, LTIME_S(inode->i_mtime), body->mtime); + CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n", + PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime), + body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } if (body->valid & OBD_MD_FLCTIME && @@ -791,9 +790,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n", - dchild, dir->i_ino, dir->i_generation, dir, - mode, old_encode_dev(rdev)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n", + dchild, PFID(ll_inode2fid(dir)), dir, mode, + old_encode_dev(rdev)); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -831,9 +830,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, { int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n", - dentry, dir->i_ino, - dir->i_generation, dir, mode, want_excl); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n", + dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl); rc = ll_mknod(dir, dentry, mode, 0); @@ -845,12 +843,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry, return rc; } -static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid) -{ - if (d_really_is_positive(child)) - *fid = *ll_inode2fid(d_inode(child)); -} - int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) { struct mdt_body *body; @@ -927,23 +919,25 @@ out: * is any lock existing. They will recycle dentries and inodes based upon locks * too. b=20433 */ -static int ll_unlink(struct inode *dir, struct dentry *dentry) +static int ll_unlink(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + dchild, dir->i_ino, dir->i_generation, dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -963,8 +957,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n", + dentry, PFID(ll_inode2fid(dir)), dir); if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) mode &= ~current_umask(); @@ -977,23 +971,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return err; } -static int ll_rmdir(struct inode *dir, struct dentry *dentry) +static int ll_rmdir(struct inode *dir, struct dentry *dchild) { struct ptlrpc_request *request = NULL; struct md_op_data *op_data; int rc; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dentry, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", + dchild, PFID(ll_inode2fid(dir)), dir); op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, + dchild->d_name.name, + dchild->d_name.len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(dentry, &op_data->op_fid3); + if (dchild && dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); + op_data->op_fid2 = op_data->op_fid3; rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -1011,9 +1007,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, { int err; - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n", - dentry, dir->i_ino, dir->i_generation, - dir, 3000, oldname); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n", + dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO, 0, LUSTRE_OPC_SYMLINK); @@ -1033,10 +1028,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, struct md_op_data *op_data; int err; - CDEBUG(D_VFSTRACE, - "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n", - src->i_ino, src->i_generation, src, dir->i_ino, - dir->i_generation, dir, new_dentry); + CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n", + PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir, + new_dentry); op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name, new_dentry->d_name.len, @@ -1056,42 +1050,45 @@ out: return err; } -static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +static int ll_rename(struct inode *src, struct dentry *src_dchild, + struct inode *tgt, struct dentry *tgt_dchild) { struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(old_dir); + struct ll_sb_info *sbi = ll_i2sbi(src); struct md_op_data *op_data; int err; CDEBUG(D_VFSTRACE, - "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n", - old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir, - new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir); + "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n", + src_dchild, PFID(ll_inode2fid(src)), src, + tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); - op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0, + op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - ll_get_child_fid(old_dentry, &op_data->op_fid3); - ll_get_child_fid(new_dentry, &op_data->op_fid4); + if (src_dchild && src_dchild->d_inode) + op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode); + if (tgt_dchild && tgt_dchild->d_inode) + op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode); + err = md_rename(sbi->ll_md_exp, op_data, - old_dentry->d_name.name, - old_dentry->d_name.len, - new_dentry->d_name.name, - new_dentry->d_name.len, &request); + src_dchild->d_name.name, + src_dchild->d_name.len, + tgt_dchild->d_name.name, + tgt_dchild->d_name.len, &request); ll_finish_md_op_data(op_data); if (!err) { - ll_update_times(request, old_dir); - ll_update_times(request, new_dir); + ll_update_times(request, src); + ll_update_times(request, tgt); ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1); - err = ll_objects_destroy(request, old_dir); + err = ll_objects_destroy(request, src); } ptlrpc_req_finished(request); if (!err) - d_move(old_dentry, new_dentry); + d_move(src_dchild, tgt_dchild); return err; } diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index edab6c5b7..336397773 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -63,7 +63,7 @@ * Finalizes cl-data before exiting typical address_space operation. Dual to * ll_cl_init(). */ -static void ll_cl_fini(struct ll_cl_context *lcc) +void ll_cl_fini(struct ll_cl_context *lcc) { struct lu_env *env = lcc->lcc_env; struct cl_io *io = lcc->lcc_io; @@ -84,200 +84,59 @@ static void ll_cl_fini(struct ll_cl_context *lcc) * Initializes common cl-data at the typical address_space operation entry * point. */ -static struct ll_cl_context *ll_cl_init(struct file *file, - struct page *vmpage, int create) +struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage) { struct ll_cl_context *lcc; struct lu_env *env; struct cl_io *io; struct cl_object *clob; - struct ccc_io *cio; + struct vvp_io *vio; int refcheck; int result = 0; - clob = ll_i2info(vmpage->mapping->host)->lli_clob; + clob = ll_i2info(file_inode(file))->lli_clob; LASSERT(clob); env = cl_env_get(&refcheck); if (IS_ERR(env)) return ERR_CAST(env); - lcc = &vvp_env_info(env)->vti_io_ctx; + lcc = &ll_env_info(env)->lti_io_ctx; memset(lcc, 0, sizeof(*lcc)); lcc->lcc_env = env; lcc->lcc_refcheck = refcheck; lcc->lcc_cookie = current; - cio = ccc_env_io(env); - io = cio->cui_cl.cis_io; - if (!io && create) { - struct inode *inode = vmpage->mapping->host; - loff_t pos; - - if (inode_trylock(inode)) { - inode_unlock((inode)); - - /* this is too bad. Someone is trying to write the - * page w/o holding inode mutex. This means we can - * add dirty pages into cache during truncate - */ - CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n", - current->comm); - dump_stack(); - LBUG(); - return ERR_PTR(-EIO); - } - - /* - * Loop-back driver calls ->prepare_write(). - * methods directly, bypassing file system ->write() operation, - * so cl_io has to be created here. - */ - io = ccc_env_thread_io(env); - ll_io_init(io, file, 1); - - /* No lock at all for this kind of IO - we can't do it because - * we have held page lock, it would cause deadlock. - * XXX: This causes poor performance to loop device - One page - * per RPC. - * In order to get better performance, users should use - * lloop driver instead. - */ - io->ci_lockreq = CILR_NEVER; - - pos = vmpage->index << PAGE_SHIFT; - - /* Create a temp IO to serve write. */ - result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE); - if (result == 0) { - cio->cui_fd = LUSTRE_FPRIVATE(file); - cio->cui_iter = NULL; - result = cl_io_iter_init(env, io); - if (result == 0) { - result = cl_io_lock(env, io); - if (result == 0) - result = cl_io_start(env, io); - } - } else - result = io->ci_result; - } - + vio = vvp_env_io(env); + io = vio->vui_cl.cis_io; lcc->lcc_io = io; if (!io) result = -EIO; - if (result == 0) { + + if (result == 0 && vmpage) { struct cl_page *page; LASSERT(io->ci_state == CIS_IO_GOING); - LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file)); + LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file)); page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { lcc->lcc_page = page; lu_ref_add(&page->cp_reference, "cl_io", io); result = 0; - } else + } else { result = PTR_ERR(page); + } } if (result) { ll_cl_fini(lcc); lcc = ERR_PTR(result); } - CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n", - vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result, - env, io); - return lcc; -} - -static struct ll_cl_context *ll_cl_get(void) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - int refcheck; - - env = cl_env_get(&refcheck); - LASSERT(!IS_ERR(env)); - lcc = &vvp_env_info(env)->vti_io_ctx; - LASSERT(env == lcc->lcc_env); - LASSERT(current == lcc->lcc_cookie); - cl_env_put(env, &refcheck); - - /* env has got in ll_cl_init, so it is still usable. */ return lcc; } -/** - * ->prepare_write() address space operation called by generic_file_write() - * for every page during write. - */ -int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - int result; - - lcc = ll_cl_init(file, vmpage, 1); - if (!IS_ERR(lcc)) { - struct lu_env *env = lcc->lcc_env; - struct cl_io *io = lcc->lcc_io; - struct cl_page *page = lcc->lcc_page; - - cl_page_assume(env, io, page); - - result = cl_io_prepare_write(env, io, page, from, to); - if (result == 0) { - /* - * Add a reference, so that page is not evicted from - * the cache until ->commit_write() is called. - */ - cl_page_get(page); - lu_ref_add(&page->cp_reference, "prepare_write", - current); - } else { - cl_page_unassume(env, io, page); - ll_cl_fini(lcc); - } - /* returning 0 in prepare assumes commit must be called - * afterwards - */ - } else { - result = PTR_ERR(lcc); - } - return result; -} - -int ll_commit_write(struct file *file, struct page *vmpage, unsigned from, - unsigned to) -{ - struct ll_cl_context *lcc; - struct lu_env *env; - struct cl_io *io; - struct cl_page *page; - int result = 0; - - lcc = ll_cl_get(); - env = lcc->lcc_env; - page = lcc->lcc_page; - io = lcc->lcc_io; - - LASSERT(cl_page_is_owned(page, io)); - LASSERT(from <= to); - if (from != to) /* handle short write case. */ - result = cl_io_commit_write(env, io, page, from, to); - if (cl_page_is_owned(page, io)) - cl_page_unassume(env, io, page); - - /* - * Release reference acquired by ll_prepare_write(). - */ - lu_ref_del(&page->cp_reference, "prepare_write", current); - cl_page_put(env, page); - ll_cl_fini(lcc); - return result; -} - static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); /** @@ -301,7 +160,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); */ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, struct ra_io_arg *ria, - unsigned long pages) + unsigned long pages, unsigned long min) { struct ll_ra_info *ra = &sbi->ll_ra_info; long ret; @@ -341,6 +200,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, } out: + if (ret < min) { + /* override ra limit for maximum performance */ + atomic_add(min - ret, &ra->ra_cur_pages); + ret = min; + } return ret; } @@ -357,9 +221,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which) lprocfs_counter_incr(sbi->ll_ra_stats, which); } -void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which) +void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) { - struct ll_sb_info *sbi = ll_i2sbi(mapping->host); + struct ll_sb_info *sbi = ll_i2sbi(inode); ll_ra_stats_inc_sbi(sbi, which); } @@ -388,61 +252,42 @@ static int index_in_window(unsigned long index, unsigned long point, return start <= index && index <= end; } -static struct ll_readahead_state *ll_ras_get(struct file *f) +void ll_ras_enter(struct file *f) { - struct ll_file_data *fd; - - fd = LUSTRE_FPRIVATE(f); - return &fd->fd_ras; -} - -void ll_ra_read_in(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); + struct ll_file_data *fd = LUSTRE_FPRIVATE(f); + struct ll_readahead_state *ras = &fd->fd_ras; spin_lock(&ras->ras_lock); ras->ras_requests++; ras->ras_request_index = 0; ras->ras_consecutive_requests++; - rar->lrr_reader = current; - - list_add(&rar->lrr_linkage, &ras->ras_read_beads); - spin_unlock(&ras->ras_lock); -} - -void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar) -{ - struct ll_readahead_state *ras; - - ras = ll_ras_get(f); - - spin_lock(&ras->ras_lock); - list_del_init(&rar->lrr_linkage); spin_unlock(&ras->ras_lock); } static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct cl_page *page, - struct page *vmpage) + struct cl_object *clob, pgoff_t *max_index) { - struct ccc_page *cp; + struct page *vmpage = page->cp_vmpage; + struct vvp_page *vpg; int rc; rc = 0; cl_page_assume(env, io, page); lu_ref_add(&page->cp_reference, "ra", current); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) { - rc = cl_page_is_under_lock(env, io, page); - if (rc == -EBUSY) { - cp->cpg_defer_uptodate = 1; - cp->cpg_ra_used = 0; + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) { + CDEBUG(D_READA, "page index %lu, max_index: %lu\n", + vvp_index(vpg), *max_index); + if (*max_index == 0 || vvp_index(vpg) > *max_index) + rc = cl_page_is_under_lock(env, io, page, max_index); + if (rc == 0) { + vpg->vpg_defer_uptodate = 1; + vpg->vpg_ra_used = 0; cl_page_list_add(queue, page); rc = 1; } else { - cl_page_delete(env, page); + cl_page_discard(env, io, page); rc = -ENOLCK; } } else { @@ -466,24 +311,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io, */ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, - pgoff_t index, struct address_space *mapping) + pgoff_t index, pgoff_t *max_index) { + struct cl_object *clob = io->ci_obj; + struct inode *inode = vvp_object_inode(clob); struct page *vmpage; - struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; struct cl_page *page; enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ int rc = 0; const char *msg = NULL; - vmpage = grab_cache_page_nowait(mapping, index); + vmpage = grab_cache_page_nowait(inode->i_mapping, index); if (vmpage) { /* Check if vmpage was truncated or reclaimed */ - if (vmpage->mapping == mapping) { + if (vmpage->mapping == inode->i_mapping) { page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { rc = cl_read_ahead_page(env, io, queue, - page, vmpage); + page, clob, max_index); if (rc == -ENOLCK) { which = RA_STAT_FAILED_MATCH; msg = "lock match failed"; @@ -504,7 +350,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, msg = "g_c_p_n failed"; } if (msg) { - ll_ra_stats_inc(mapping, which); + ll_ra_stats_inc(inode, which); CDEBUG(D_READA, "%s\n", msg); } return rc; @@ -616,11 +462,12 @@ static int ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, struct cl_page_list *queue, struct ra_io_arg *ria, unsigned long *reserved_pages, - struct address_space *mapping, unsigned long *ra_end) { - int rc, count = 0, stride_ria; - unsigned long page_idx; + int rc, count = 0; + bool stride_ria; + pgoff_t page_idx; + pgoff_t max_index = 0; LASSERT(ria); RIA_DEBUG(ria); @@ -631,12 +478,13 @@ static int ll_read_ahead_pages(const struct lu_env *env, if (ras_inside_ra_window(page_idx, ria)) { /* If the page is inside the read-ahead window*/ rc = ll_read_ahead_page(env, io, queue, - page_idx, mapping); + page_idx, &max_index); if (rc == 1) { (*reserved_pages)--; count++; - } else if (rc == -ENOLCK) + } else if (rc == -ENOLCK) { break; + } } else if (stride_ria) { /* If it is not in the read-ahead window, and it is * read-ahead mode, then check whether it should skip @@ -666,25 +514,22 @@ static int ll_read_ahead_pages(const struct lu_env *env, } int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct ll_readahead_state *ras, struct address_space *mapping, - struct cl_page_list *queue, int flags) + struct cl_page_list *queue, struct ll_readahead_state *ras, + bool hit) { struct vvp_io *vio = vvp_env_io(env); - struct vvp_thread_info *vti = vvp_env_info(env); - struct cl_attr *attr = ccc_env_thread_attr(env); + struct ll_thread_info *lti = ll_env_info(env); + struct cl_attr *attr = vvp_env_thread_attr(env); unsigned long start = 0, end = 0, reserved; - unsigned long ra_end, len; + unsigned long ra_end, len, mlen = 0; struct inode *inode; - struct ll_ra_read *bead; - struct ra_io_arg *ria = &vti->vti_ria; - struct ll_inode_info *lli; + struct ra_io_arg *ria = <i->lti_ria; struct cl_object *clob; int ret = 0; __u64 kms; - inode = mapping->host; - lli = ll_i2info(inode); - clob = lli->lli_clob; + clob = io->ci_obj; + inode = vvp_object_inode(clob); memset(ria, 0, sizeof(*ria)); @@ -696,22 +541,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, return ret; kms = attr->cat_kms; if (kms == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN); + ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN); return 0; } spin_lock(&ras->ras_lock); - if (vio->cui_ra_window_set) - bead = &vio->cui_bead; - else - bead = NULL; /* Enlarge the RA window to encompass the full read */ - if (bead && ras->ras_window_start + ras->ras_window_len < - bead->lrr_start + bead->lrr_count) { - ras->ras_window_len = bead->lrr_start + bead->lrr_count - + if (vio->vui_ra_valid && + ras->ras_window_start + ras->ras_window_len < + vio->vui_ra_start + vio->vui_ra_count) { + ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count - ras->ras_window_start; } + /* Reserve a part of the read-ahead window that we'll be issuing */ if (ras->ras_window_len) { start = ras->ras_next_readahead; @@ -755,29 +598,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, spin_unlock(&ras->ras_lock); if (end == 0) { - ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW); + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; } len = ria_page_count(ria); - if (len == 0) + if (len == 0) { + ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); return 0; + } + + CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n", + PFID(lu_object_fid(&clob->co_lu)), + ria->ria_start, ria->ria_end, + vio->vui_ra_valid ? vio->vui_ra_start : 0, + vio->vui_ra_valid ? vio->vui_ra_count : 0, + hit); + + /* at least to extend the readahead window to cover current read */ + if (!hit && vio->vui_ra_valid && + vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { + /* to the end of current read window. */ + mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; + /* trim to RPC boundary */ + start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1); + mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start); + } - reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len); + reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); if (reserved < len) - ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT); + ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); - CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved, + CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", + reserved, len, mlen, atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), ll_i2sbi(inode)->ll_ra_info.ra_max_pages); - ret = ll_read_ahead_pages(env, io, queue, - ria, &reserved, mapping, &ra_end); + ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end); if (reserved != 0) ll_ra_count_put(ll_i2sbi(inode), reserved); if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT)) - ll_ra_stats_inc(mapping, RA_STAT_EOF); + ll_ra_stats_inc(inode, RA_STAT_EOF); /* if we didn't get to the end of the region we reserved from * the ras we need to go back and update the ras so that the @@ -789,6 +651,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io, ra_end, end, ria->ria_end); if (ra_end != end + 1) { + ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); spin_lock(&ras->ras_lock); if (ra_end < ras->ras_next_readahead && index_in_window(ra_end, ras->ras_window_start, 0, @@ -836,7 +699,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) spin_lock_init(&ras->ras_lock); ras_reset(inode, ras, 0); ras->ras_requests = 0; - INIT_LIST_HEAD(&ras->ras_read_beads); } /* @@ -1059,15 +921,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode, ras->ras_last_readpage = index; ras_set_start(inode, ras, index); - if (stride_io_mode(ras)) + if (stride_io_mode(ras)) { /* Since stride readahead is sensitive to the offset * of read-ahead, so we use original offset here, * instead of ras_window_start, which is RPC aligned */ ras->ras_next_readahead = max(index, ras->ras_next_readahead); - else - ras->ras_next_readahead = max(ras->ras_window_start, - ras->ras_next_readahead); + } else { + if (ras->ras_next_readahead < ras->ras_window_start) + ras->ras_next_readahead = ras->ras_window_start; + if (!hit) + ras->ras_next_readahead = index + 1; + } RAS_CDEBUG(ras); /* Trigger RA in the mmap case where ras_consecutive_requests @@ -1129,7 +994,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) clob = ll_i2info(inode)->lli_clob; LASSERT(clob); - io = ccc_env_thread_io(env); + io = vvp_env_thread_io(env); io->ci_obj = clob; io->ci_ignore_layout = 1; result = cl_io_init(env, io, CIT_MISC, clob); @@ -1240,8 +1105,9 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) { if (end == OBD_OBJECT_EOF) - end = i_size_read(inode); - mapping->writeback_index = (end >> PAGE_SHIFT) + 1; + mapping->writeback_index = 0; + else + mapping->writeback_index = (end >> PAGE_SHIFT) + 1; } return result; } @@ -1251,7 +1117,7 @@ int ll_readpage(struct file *file, struct page *vmpage) struct ll_cl_context *lcc; int result; - lcc = ll_cl_init(file, vmpage, 0); + lcc = ll_cl_init(file, vmpage); if (!IS_ERR(lcc)) { struct lu_env *env = lcc->lcc_env; struct cl_io *io = lcc->lcc_io; @@ -1273,3 +1139,28 @@ int ll_readpage(struct file *file, struct page *vmpage) } return result; } + +int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, + struct cl_page *page, enum cl_req_type crt) +{ + struct cl_2queue *queue; + int result; + + LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); + + queue = &io->ci_queue; + cl_2queue_init_page(queue, page); + + result = cl_io_submit_sync(env, io, crt, queue, 0); + LASSERT(cl_page_is_owned(page, io)); + + if (crt == CRT_READ) + /* + * in CRT_WRITE case page is left locked even in case of + * error. + */ + cl_page_list_disown(env, io, &queue->c2_qin); + cl_2queue_fini(env, queue); + + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 69aa15e8e..c12a048fc 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -95,15 +95,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, if (obj) { page = cl_vmpage_page(vmpage, obj); if (page) { - lu_ref_add(&page->cp_reference, - "delete", vmpage); cl_page_delete(env, page); - lu_ref_del(&page->cp_reference, - "delete", vmpage); cl_page_put(env, page); } - } else + } else { LASSERT(vmpage->private == 0); + } cl_env_put(env, &refcheck); } } @@ -111,12 +108,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset, static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) { - struct cl_env_nest nest; struct lu_env *env; + void *cookie; struct cl_object *obj; struct cl_page *page; struct address_space *mapping; - int result; + int result = 0; LASSERT(PageLocked(vmpage)); if (PageWriteback(vmpage) || PageDirty(vmpage)) @@ -130,53 +127,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) if (!obj) return 1; - /* 1 for page allocator, 1 for cl_page and 1 for page cache */ + /* 1 for caller, 1 for cl_page and 1 for page cache */ if (page_count(vmpage) > 3) return 0; - /* TODO: determine what gfp should be used by @gfp_mask. */ - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - /* If we can't allocate an env we won't call cl_page_put() - * later on which further means it's impossible to drop - * page refcount by cl_page, so ask kernel to not free - * this page. - */ - return 0; - page = cl_vmpage_page(vmpage, obj); - result = !page; - if (page) { - if (!cl_page_in_use(page)) { - result = 1; - cl_page_delete(env, page); - } - cl_page_put(env, page); - } - cl_env_nested_put(&nest, env); - return result; -} + if (!page) + return 1; -static int ll_set_page_dirty(struct page *vmpage) -{ -#if 0 - struct cl_page *page = vvp_vmpage_page_transient(vmpage); - struct vvp_object *obj = cl_inode2vvp(vmpage->mapping->host); - struct vvp_page *cpg; + cookie = cl_env_reenter(); + env = cl_env_percpu_get(); + LASSERT(!IS_ERR(env)); - /* - * XXX should page method be called here? - */ - LASSERT(&obj->co_cl == page->cp_obj); - cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); - /* - * XXX cannot do much here, because page is possibly not locked: - * sys_munmap()->... - * ->unmap_page_range()->zap_pte_range()->set_page_dirty(). + if (!cl_page_in_use(page)) { + result = 1; + cl_page_delete(env, page); + } + + /* To use percpu env array, the call path can not be rescheduled; + * otherwise percpu array will be messed if ll_releaspage() called + * again on the same CPU. + * + * If this page holds the last refc of cl_object, the following + * call path may cause reschedule: + * cl_page_put -> cl_page_free -> cl_object_put -> + * lu_object_put -> lu_object_free -> lov_delete_raid0. + * + * However, the kernel can't get rid of this inode until all pages have + * been cleaned up. Now that we hold page lock here, it's pretty safe + * that we won't get into object delete path. */ - vvp_write_pending(obj, cpg); -#endif - return __set_page_dirty_nobuffers(vmpage); + LASSERT(cl_object_refc(obj) > 1); + cl_page_put(env, page); + + cl_env_percpu_put(env); + cl_env_reexit(cookie); + return result; } #define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL) @@ -266,7 +252,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, * write directly */ if (clp->cp_type == CPT_CACHEABLE) { - struct page *vmpage = cl_page_vmpage(env, clp); + struct page *vmpage = cl_page_vmpage(clp); struct page *src_page; struct page *dst_page; void *src; @@ -358,14 +344,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, */ #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, - loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) { struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); + loff_t file_offset = iocb->ki_pos; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; struct ll_inode_info *lli = ll_i2info(inode); @@ -376,22 +362,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, return -EBADF; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK)) + if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) return -EINVAL; - CDEBUG(D_VFSTRACE, - "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", - inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE, + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", + PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_SHIFT, MAX_DIO_SIZE >> PAGE_SHIFT); /* Check that all user buffers are aligned as well */ - if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK) + if (iov_iter_alignment(iter) & ~PAGE_MASK) return -EINVAL; env = cl_env_get(&refcheck); LASSERT(!IS_ERR(env)); - io = ccc_env_io(env)->cui_cl.cis_io; + io = vvp_env_io(env)->vui_cl.cis_io; LASSERT(io); /* 0. Need locking between buffered and direct access. and race with @@ -401,7 +386,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == READ) inode_lock(inode); - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); while (iov_iter_count(iter)) { struct page **pages; size_t offs; @@ -435,8 +420,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, size > (PAGE_SIZE / sizeof(*pages)) * PAGE_SIZE) { size = ((((size / 2) - 1) | - ~CFS_PAGE_MASK) + 1) & - CFS_PAGE_MASK; + ~PAGE_MASK) + 1) & + PAGE_MASK; CDEBUG(D_VFSTRACE, "DIO size now %lu\n", size); continue; @@ -449,62 +434,213 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, file_offset += result; } out: - LASSERT(obj->cob_transient_pages == 0); + LASSERT(obj->vob_transient_pages == 0); if (iov_iter_rw(iter) == READ) inode_unlock(inode); if (tot_bytes > 0) { - if (iov_iter_rw(iter) == WRITE) { - struct lov_stripe_md *lsm; - - lsm = ccc_inode_lsm_get(inode); - LASSERT(lsm); - lov_stripe_lock(lsm); - obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0); - lov_stripe_unlock(lsm); - ccc_inode_lsm_put(inode, lsm); - } + struct vvp_io *vio = vvp_env_io(env); + + /* no commit async for direct IO */ + vio->u.write.vui_written += tot_bytes; } cl_env_put(env, &refcheck); - return tot_bytes ? : result; + return tot_bytes ? tot_bytes : result; +} + +/** + * Prepare partially written-to page for a write. + */ +static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, + struct cl_page *pg) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct cl_object *obj = io->ci_obj; + struct vvp_page *vpg = cl_object_page_slice(obj, pg); + loff_t offset = cl_offset(obj, vvp_index(vpg)); + int result; + + cl_object_attr_lock(obj); + result = cl_object_attr_get(env, obj, attr); + cl_object_attr_unlock(obj); + if (result == 0) { + /* + * If are writing to a new page, no need to read old data. + * The extent locking will have updated the KMS, and for our + * purposes here we can treat it like i_size. + */ + if (attr->cat_kms <= offset) { + char *kaddr = kmap_atomic(vpg->vpg_page); + + memset(kaddr, 0, cl_page_size(obj)); + kunmap_atomic(kaddr); + } else if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; + } else { + result = ll_page_sync_io(env, io, pg, CRT_READ); + } + } + return result; } static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + struct ll_cl_context *lcc; + struct lu_env *env; + struct cl_io *io; + struct cl_page *page; + struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; - struct page *page; - int rc; - unsigned from = pos & (PAGE_SIZE - 1); + struct page *vmpage = NULL; + unsigned int from = pos & (PAGE_SIZE - 1); + unsigned int to = from + len; + int result = 0; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; + CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); - *pagep = page; + lcc = ll_cl_init(file, NULL); + if (IS_ERR(lcc)) { + result = PTR_ERR(lcc); + goto out; + } - rc = ll_prepare_write(file, page, from, from + len); - if (rc) { - unlock_page(page); - put_page(page); + env = lcc->lcc_env; + io = lcc->lcc_io; + + /* To avoid deadlock, try to lock page first. */ + vmpage = grab_cache_page_nowait(mapping, index); + if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) { + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *plist = &vio->u.write.vui_queue; + + /* if the page is already in dirty cache, we have to commit + * the pages right now; otherwise, it may cause deadlock + * because it holds page lock of a dirty page and request for + * more grants. It's okay for the dirty page to be the first + * one in commit page list, though. + */ + if (vmpage && plist->pl_nr > 0) { + unlock_page(vmpage); + put_page(vmpage); + vmpage = NULL; + } + + /* commit pages and then wait for page lock */ + result = vvp_io_write_commit(env, io); + if (result < 0) + goto out; + + if (!vmpage) { + vmpage = grab_cache_page_write_begin(mapping, index, + flags); + if (!vmpage) { + result = -ENOMEM; + goto out; + } + } } - return rc; + + page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + if (IS_ERR(page)) { + result = PTR_ERR(page); + goto out; + } + + lcc->lcc_page = page; + lu_ref_add(&page->cp_reference, "cl_io", io); + + cl_page_assume(env, io, page); + if (!PageUptodate(vmpage)) { + /* + * We're completely overwriting an existing page, + * so _don't_ set it up to date until commit_write + */ + if (from == 0 && to == PAGE_SIZE) { + CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); + POISON_PAGE(vmpage, 0x11); + } else { + /* TODO: can be optimized at OSC layer to check if it + * is a lockless IO. In that case, it's not necessary + * to read the data. + */ + result = ll_prepare_partial_page(env, io, page); + if (result == 0) + SetPageUptodate(vmpage); + } + } + if (result < 0) + cl_page_unassume(env, io, page); +out: + if (result < 0) { + if (vmpage) { + unlock_page(vmpage); + put_page(vmpage); + } + if (!IS_ERR(lcc)) + ll_cl_fini(lcc); + } else { + *pagep = vmpage; + *fsdata = lcc; + } + return result; } static int ll_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct page *vmpage, void *fsdata) { + struct ll_cl_context *lcc = fsdata; + struct lu_env *env; + struct cl_io *io; + struct vvp_io *vio; + struct cl_page *page; unsigned from = pos & (PAGE_SIZE - 1); - int rc; + bool unplug = false; + int result = 0; + + put_page(vmpage); + + env = lcc->lcc_env; + page = lcc->lcc_page; + io = lcc->lcc_io; + vio = vvp_env_io(env); + + LASSERT(cl_page_is_owned(page, io)); + if (copied > 0) { + struct cl_page_list *plist = &vio->u.write.vui_queue; + + lcc->lcc_page = NULL; /* page will be queued */ + + /* Add it into write queue */ + cl_page_list_add(plist, page); + if (plist->pl_nr == 1) /* first page */ + vio->u.write.vui_from = from; + else + LASSERT(from == 0); + vio->u.write.vui_to = from + copied; + + /* We may have one full RPC, commit it soon */ + if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) + unplug = true; + + CL_PAGE_DEBUG(D_VFSTRACE, env, page, + "queued page: %d.\n", plist->pl_nr); + } else { + cl_page_disown(env, io, page); + + /* page list is not contiguous now, commit it now */ + unplug = true; + } - rc = ll_commit_write(file, page, from, from + copied); - unlock_page(page); - put_page(page); + if (unplug || + file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) + result = vvp_io_write_commit(env, io); - return rc ?: copied; + ll_cl_fini(lcc); + return result >= 0 ? copied : result; } #ifdef CONFIG_MIGRATION @@ -523,7 +659,7 @@ const struct address_space_operations ll_aops = { .direct_IO = ll_direct_IO_26, .writepage = ll_writepage, .writepages = ll_writepages, - .set_page_dirty = ll_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .write_begin = ll_write_begin, .write_end = ll_write_end, .invalidatepage = ll_invalidatepage, diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c index 99ffd1589..6322f8866 100644 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ b/drivers/staging/lustre/lustre/llite/statahead.c @@ -661,8 +661,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai) if (rc) goto out; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", - child, child->i_ino, child->i_generation); + CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n", + ll_get_fsname(child->i_sb, NULL, 0), + PFID(ll_inode2fid(child)), child); ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL); entry->se_inode = child; @@ -1591,13 +1592,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, *dentryp = alias; } else if (d_inode(*dentryp) != inode) { /* revalidate, but inode is recreated */ - CDEBUG(D_READA, - "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n", - *dentryp, - d_inode(*dentryp)->i_ino, - d_inode(*dentryp)->i_generation, - inode->i_ino, - inode->i_generation); + CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n", + ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0), + *dentryp, + PFID(ll_inode2fid(d_inode(*dentryp))), + PFID(ll_inode2fid(inode))); ll_sai_unplug(sai, entry); return -ESTALE; } else { diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c index 61856d37a..415750b0b 100644 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ b/drivers/staging/lustre/lustre/llite/super25.c @@ -164,9 +164,18 @@ static int __init lustre_init(void) if (rc != 0) goto out_sysfs; + cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck, + LCT_REMEMBER | LCT_NOREF); + if (IS_ERR(cl_inode_fini_env)) { + rc = PTR_ERR(cl_inode_fini_env); + goto out_vvp; + } + + cl_inode_fini_env->le_ctx.lc_cookie = 0x4; + rc = ll_xattr_init(); if (rc != 0) - goto out_vvp; + goto out_inode_fini_env; lustre_register_client_fill_super(ll_fill_super); lustre_register_kill_super_cb(ll_kill_super); @@ -174,6 +183,8 @@ static int __init lustre_init(void) return 0; +out_inode_fini_env: + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); out_vvp: vvp_global_fini(); out_sysfs: @@ -198,6 +209,7 @@ static void __exit lustre_exit(void) kset_unregister(llite_kset); ll_xattr_fini(); + cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); vvp_global_fini(); kmem_cache_destroy(ll_inode_cachep); diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c index 46d03ea48..3fc736ccf 100644 --- a/drivers/staging/lustre/lustre/llite/symlink.c +++ b/drivers/staging/lustre/lustre/llite/symlink.c @@ -77,7 +77,9 @@ static int ll_readlink_internal(struct inode *inode, ll_finish_md_op_data(op_data); if (rc) { if (rc != -ENOENT) - CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); + CERROR("%s: inode "DFID": rc = %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), rc); goto failed; } @@ -90,8 +92,10 @@ static int ll_readlink_internal(struct inode *inode, LASSERT(symlen != 0); if (body->eadatasize != symlen) { - CERROR("inode %lu: symlink length %d not expected %d\n", - inode->i_ino, body->eadatasize - 1, symlen - 1); + CERROR("%s: inode "DFID": symlink length %d not expected %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(ll_inode2fid(inode)), body->eadatasize - 1, + symlen - 1); rc = -EPROTO; goto failed; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c index 282b70b77..47101de1c 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c @@ -36,6 +36,7 @@ * cl_device and cl_device_type implementation for VVP layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_LLITE @@ -56,43 +57,74 @@ * "llite_" (var. "ll_") prefix. */ -static struct kmem_cache *vvp_thread_kmem; +static struct kmem_cache *ll_thread_kmem; +struct kmem_cache *vvp_lock_kmem; +struct kmem_cache *vvp_object_kmem; +struct kmem_cache *vvp_req_kmem; static struct kmem_cache *vvp_session_kmem; +static struct kmem_cache *vvp_thread_kmem; + static struct lu_kmem_descr vvp_caches[] = { { - .ckd_cache = &vvp_thread_kmem, - .ckd_name = "vvp_thread_kmem", - .ckd_size = sizeof(struct vvp_thread_info), + .ckd_cache = &ll_thread_kmem, + .ckd_name = "ll_thread_kmem", + .ckd_size = sizeof(struct ll_thread_info), + }, + { + .ckd_cache = &vvp_lock_kmem, + .ckd_name = "vvp_lock_kmem", + .ckd_size = sizeof(struct vvp_lock), + }, + { + .ckd_cache = &vvp_object_kmem, + .ckd_name = "vvp_object_kmem", + .ckd_size = sizeof(struct vvp_object), + }, + { + .ckd_cache = &vvp_req_kmem, + .ckd_name = "vvp_req_kmem", + .ckd_size = sizeof(struct vvp_req), }, { .ckd_cache = &vvp_session_kmem, .ckd_name = "vvp_session_kmem", .ckd_size = sizeof(struct vvp_session) }, + { + .ckd_cache = &vvp_thread_kmem, + .ckd_name = "vvp_thread_kmem", + .ckd_size = sizeof(struct vvp_thread_info), + }, { .ckd_cache = NULL } }; -static void *vvp_key_init(const struct lu_context *ctx, - struct lu_context_key *key) +static void *ll_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) { struct vvp_thread_info *info; - info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS); if (!info) info = ERR_PTR(-ENOMEM); return info; } -static void vvp_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) +static void ll_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) { struct vvp_thread_info *info = data; - kmem_cache_free(vvp_thread_kmem, info); + kmem_cache_free(ll_thread_kmem, info); } +struct lu_context_key ll_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = ll_thread_key_init, + .lct_fini = ll_thread_key_fini +}; + static void *vvp_session_key_init(const struct lu_context *ctx, struct lu_context_key *key) { @@ -112,34 +144,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx, kmem_cache_free(vvp_session_kmem, session); } -struct lu_context_key vvp_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = vvp_key_init, - .lct_fini = vvp_key_fini -}; - struct lu_context_key vvp_session_key = { .lct_tags = LCT_SESSION, .lct_init = vvp_session_key_init, .lct_fini = vvp_session_key_fini }; +void *vvp_thread_key_init(const struct lu_context *ctx, + struct lu_context_key *key) +{ + struct vvp_thread_info *vti; + + vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); + if (!vti) + vti = ERR_PTR(-ENOMEM); + return vti; +} + +void vvp_thread_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data) +{ + struct vvp_thread_info *vti = data; + + kmem_cache_free(vvp_thread_kmem, vti); +} + +struct lu_context_key vvp_thread_key = { + .lct_tags = LCT_CL_THREAD, + .lct_init = vvp_thread_key_init, + .lct_fini = vvp_thread_key_fini +}; + /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */ -LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key); +LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key); static const struct lu_device_operations vvp_lu_ops = { .ldo_object_alloc = vvp_object_alloc }; static const struct cl_device_operations vvp_cl_ops = { - .cdo_req_init = ccc_req_init + .cdo_req_init = vvp_req_init }; +static struct lu_device *vvp_device_free(const struct lu_env *env, + struct lu_device *d) +{ + struct vvp_device *vdv = lu2vvp_dev(d); + struct cl_site *site = lu2cl_site(d->ld_site); + struct lu_device *next = cl2lu_dev(vdv->vdv_next); + + if (d->ld_site) { + cl_site_fini(site); + kfree(site); + } + cl_device_fini(lu2cl_dev(d)); + kfree(vdv); + return next; +} + static struct lu_device *vvp_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) { - return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops); + struct vvp_device *vdv; + struct lu_device *lud; + struct cl_site *site; + int rc; + + vdv = kzalloc(sizeof(*vdv), GFP_NOFS); + if (!vdv) + return ERR_PTR(-ENOMEM); + + lud = &vdv->vdv_cl.cd_lu_dev; + cl_device_init(&vdv->vdv_cl, t); + vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops; + vdv->vdv_cl.cd_ops = &vvp_cl_ops; + + site = kzalloc(sizeof(*site), GFP_NOFS); + if (site) { + rc = cl_site_init(site, &vdv->vdv_cl); + if (rc == 0) { + rc = lu_site_init_finish(&site->cs_lu); + } else { + LASSERT(!lud->ld_site); + CERROR("Cannot init lu_site, rc %d.\n", rc); + kfree(site); + } + } else { + rc = -ENOMEM; + } + if (rc != 0) { + vvp_device_free(env, lud); + lud = ERR_PTR(rc); + } + return lud; +} + +static int vvp_device_init(const struct lu_env *env, struct lu_device *d, + const char *name, struct lu_device *next) +{ + struct vvp_device *vdv; + int rc; + + vdv = lu2vvp_dev(d); + vdv->vdv_next = lu2cl_dev(next); + + LASSERT(d->ld_site && next->ld_type); + next->ld_site = d->ld_site; + rc = next->ld_type->ldt_ops->ldto_device_init(env, next, + next->ld_type->ldt_name, + NULL); + if (rc == 0) { + lu_device_get(next); + lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); + } + return rc; +} + +static struct lu_device *vvp_device_fini(const struct lu_env *env, + struct lu_device *d) +{ + return cl2lu_dev(lu2vvp_dev(d)->vdv_next); } static const struct lu_device_type_operations vvp_device_type_ops = { @@ -150,9 +275,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = { .ldto_stop = vvp_type_stop, .ldto_device_alloc = vvp_device_alloc, - .ldto_device_free = ccc_device_free, - .ldto_device_init = ccc_device_init, - .ldto_device_fini = ccc_device_fini + .ldto_device_free = vvp_device_free, + .ldto_device_init = vvp_device_init, + .ldto_device_fini = vvp_device_fini, }; struct lu_device_type vvp_device_type = { @@ -168,20 +293,27 @@ struct lu_device_type vvp_device_type = { */ int vvp_global_init(void) { - int result; + int rc; - result = lu_kmem_init(vvp_caches); - if (result == 0) { - result = ccc_global_init(&vvp_device_type); - if (result != 0) - lu_kmem_fini(vvp_caches); - } - return result; + rc = lu_kmem_init(vvp_caches); + if (rc != 0) + return rc; + + rc = lu_device_type_init(&vvp_device_type); + if (rc != 0) + goto out_kmem; + + return 0; + +out_kmem: + lu_kmem_fini(vvp_caches); + + return rc; } void vvp_global_fini(void) { - ccc_global_fini(&vvp_device_type); + lu_device_type_fini(&vvp_device_type); lu_kmem_fini(vvp_caches); } @@ -205,13 +337,14 @@ int cl_sb_init(struct super_block *sb) cl = cl_type_setup(env, NULL, &vvp_device_type, sbi->ll_dt_exp->exp_obd->obd_lu_dev); if (!IS_ERR(cl)) { - cl2ccc_dev(cl)->cdv_sb = sb; + cl2vvp_dev(cl)->vdv_sb = sb; sbi->ll_cl = cl; sbi->ll_site = cl2lu_dev(cl)->ld_site; } cl_env_put(env, &refcheck); - } else + } else { rc = PTR_ERR(env); + } return rc; } @@ -356,23 +489,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, return ~0ULL; clob = vvp_pgcache_obj(env, dev, &id); if (clob) { - struct cl_object_header *hdr; - int nr; - struct cl_page *pg; - - /* got an object. Find next page. */ - hdr = cl_object_header(clob); + struct inode *inode = vvp_object_inode(clob); + struct page *vmpage; + int nr; - spin_lock(&hdr->coh_page_guard); - nr = radix_tree_gang_lookup(&hdr->coh_tree, - (void **)&pg, - id.vpi_index, 1); + nr = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, &vmpage); if (nr > 0) { - id.vpi_index = pg->cp_index; + id.vpi_index = vmpage->index; /* Cant support over 16T file */ - nr = !(pg->cp_index > 0xffffffff); + nr = !(vmpage->index > 0xffffffff); + put_page(vmpage); } - spin_unlock(&hdr->coh_page_guard); lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); @@ -398,21 +526,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env, static void vvp_pgcache_page_show(const struct lu_env *env, struct seq_file *seq, struct cl_page *page) { - struct ccc_page *cpg; + struct vvp_page *vpg; struct page *vmpage; int has_flags; - cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vmpage = cpg->cpg_page; - seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [", + vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); + vmpage = vpg->vpg_page; + seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [", 0 /* gen */, - cpg, page, + vpg, page, "none", - cpg->cpg_write_queued ? "wq" : "- ", - cpg->cpg_defer_uptodate ? "du" : "- ", + vpg->vpg_write_queued ? "wq" : "- ", + vpg->vpg_defer_uptodate ? "du" : "- ", PageWriteback(vmpage) ? "wb" : "-", - vmpage, vmpage->mapping->host->i_ino, - vmpage->mapping->host->i_generation, + vmpage, PFID(ll_inode2fid(vmpage->mapping->host)), vmpage->mapping->host, vmpage->index, page_count(vmpage)); has_flags = 0; @@ -431,8 +558,6 @@ static int vvp_pgcache_show(struct seq_file *f, void *v) struct ll_sb_info *sbi; struct cl_object *clob; struct lu_env *env; - struct cl_page *page; - struct cl_object_header *hdr; struct vvp_pgcache_id id; int refcheck; int result; @@ -444,27 +569,38 @@ static int vvp_pgcache_show(struct seq_file *f, void *v) sbi = f->private; clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id); if (clob) { - hdr = cl_object_header(clob); - - spin_lock(&hdr->coh_page_guard); - page = cl_page_lookup(hdr, id.vpi_index); - spin_unlock(&hdr->coh_page_guard); + struct inode *inode = vvp_object_inode(clob); + struct cl_page *page = NULL; + struct page *vmpage; + + result = find_get_pages_contig(inode->i_mapping, + id.vpi_index, 1, + &vmpage); + if (result > 0) { + lock_page(vmpage); + page = cl_vmpage_page(vmpage, clob); + unlock_page(vmpage); + put_page(vmpage); + } - seq_printf(f, "%8x@"DFID": ", - id.vpi_index, PFID(&hdr->coh_lu.loh_fid)); + seq_printf(f, "%8x@" DFID ": ", id.vpi_index, + PFID(lu_object_fid(&clob->co_lu))); if (page) { vvp_pgcache_page_show(env, f, page); cl_page_put(env, page); - } else + } else { seq_puts(f, "missing\n"); + } lu_object_ref_del(&clob->co_lu, "dump", current); cl_object_put(env, clob); - } else + } else { seq_printf(f, "%llx missing\n", pos); + } cl_env_put(env, &refcheck); result = 0; - } else + } else { result = PTR_ERR(env); + } return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h index bb393378c..27b9b0a01 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h @@ -41,21 +41,337 @@ #ifndef VVP_INTERNAL_H #define VVP_INTERNAL_H +#include "../include/lustre/lustre_idl.h" #include "../include/cl_object.h" -#include "llite_internal.h" -int vvp_io_init(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -int vvp_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); +enum obd_notify_event; +struct inode; +struct lov_stripe_md; +struct lustre_md; +struct obd_capa; +struct obd_device; +struct obd_export; +struct page; + +/* specific architecture can implement only part of this list */ +enum vvp_io_subtype { + /** normal IO */ + IO_NORMAL, + /** io started from splice_{read|write} */ + IO_SPLICE +}; + +/** + * IO state private to IO state private to VVP layer. + */ +struct vvp_io { + /** super class */ + struct cl_io_slice vui_cl; + struct cl_io_lock_link vui_link; + /** + * I/O vector information to or from which read/write is going. + */ + struct iov_iter *vui_iter; + /** + * Total size for the left IO. + */ + size_t vui_tot_count; + + union { + struct vvp_fault_io { + /** + * Inode modification time that is checked across DLM + * lock request. + */ + time64_t ft_mtime; + struct vm_area_struct *ft_vma; + /** + * locked page returned from vvp_io + */ + struct page *ft_vmpage; + /** + * kernel fault info + */ + struct vm_fault *ft_vmf; + /** + * fault API used bitflags for return code. + */ + unsigned int ft_flags; + /** + * check that flags are from filemap_fault + */ + bool ft_flags_valid; + } fault; + struct { + struct pipe_inode_info *vui_pipe; + unsigned int vui_flags; + } splice; + struct { + struct cl_page_list vui_queue; + unsigned long vui_written; + int vui_from; + int vui_to; + } write; + } u; + + enum vvp_io_subtype vui_io_subtype; + + /** + * Layout version when this IO is initialized + */ + __u32 vui_layout_gen; + /** + * File descriptor against which IO is done. + */ + struct ll_file_data *vui_fd; + struct kiocb *vui_iocb; + + /* Readahead state. */ + pgoff_t vui_ra_start; + pgoff_t vui_ra_count; + /* Set when vui_ra_{start,count} have been initialized. */ + bool vui_ra_valid; +}; + +extern struct lu_device_type vvp_device_type; + +extern struct lu_context_key vvp_session_key; +extern struct lu_context_key vvp_thread_key; + +extern struct kmem_cache *vvp_lock_kmem; +extern struct kmem_cache *vvp_object_kmem; +extern struct kmem_cache *vvp_req_kmem; + +struct vvp_thread_info { + struct cl_lock vti_lock; + struct cl_lock_descr vti_descr; + struct cl_io vti_io; + struct cl_attr vti_attr; +}; + +static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) +{ + struct vvp_thread_info *vti; + + vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key); + LASSERT(vti); + + return vti; +} + +static inline struct cl_lock *vvp_env_lock(const struct lu_env *env) +{ + struct cl_lock *lock = &vvp_env_info(env)->vti_lock; + + memset(lock, 0, sizeof(*lock)); + return lock; +} + +static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env) +{ + struct cl_attr *attr = &vvp_env_info(env)->vti_attr; + + memset(attr, 0, sizeof(*attr)); + + return attr; +} + +static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env) +{ + struct cl_io *io = &vvp_env_info(env)->vti_io; + + memset(io, 0, sizeof(*io)); + + return io; +} + +struct vvp_session { + struct vvp_io cs_ios; +}; + +static inline struct vvp_session *vvp_env_session(const struct lu_env *env) +{ + struct vvp_session *ses; + + ses = lu_context_key_get(env->le_ses, &vvp_session_key); + LASSERT(ses); + + return ses; +} + +static inline struct vvp_io *vvp_env_io(const struct lu_env *env) +{ + return &vvp_env_session(env)->cs_ios; +} + +/** + * ccc-private object state. + */ +struct vvp_object { + struct cl_object_header vob_header; + struct cl_object vob_cl; + struct inode *vob_inode; + + /** + * A list of dirty pages pending IO in the cache. Used by + * SOM. Protected by ll_inode_info::lli_lock. + * + * \see vvp_page::vpg_pending_linkage + */ + struct list_head vob_pending_list; + + /** + * Access this counter is protected by inode->i_sem. Now that + * the lifetime of transient pages must be covered by inode sem, + * we don't need to hold any lock.. + */ + int vob_transient_pages; + /** + * Number of outstanding mmaps on this file. + * + * \see ll_vm_open(), ll_vm_close(). + */ + atomic_t vob_mmap_cnt; + + /** + * various flags + * vob_discard_page_warned + * if pages belonging to this object are discarded when a client + * is evicted, some debug info will be printed, this flag will be set + * during processing the first discarded page, then avoid flooding + * debug message for lots of discarded pages. + * + * \see ll_dirty_page_discard_warn. + */ + unsigned int vob_discard_page_warned:1; +}; + +/** + * VVP-private page state. + */ +struct vvp_page { + struct cl_page_slice vpg_cl; + int vpg_defer_uptodate; + int vpg_ra_used; + int vpg_write_queued; + /** + * Non-empty iff this page is already counted in + * vvp_object::vob_pending_list. This list is only used as a flag, + * that is, never iterated through, only checked for list_empty(), but + * having a list is useful for debugging. + */ + struct list_head vpg_pending_linkage; + /** VM page */ + struct page *vpg_page; +}; + +static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice) +{ + return container_of(slice, struct vvp_page, vpg_cl); +} + +static inline pgoff_t vvp_index(struct vvp_page *vvp) +{ + return vvp->vpg_cl.cpl_index; +} + +struct vvp_device { + struct cl_device vdv_cl; + struct super_block *vdv_sb; + struct cl_device *vdv_next; +}; + +struct vvp_lock { + struct cl_lock_slice vlk_cl; +}; + +struct vvp_req { + struct cl_req_slice vrq_cl; +}; + +void *ccc_key_init(const struct lu_context *ctx, + struct lu_context_key *key); +void ccc_key_fini(const struct lu_context *ctx, + struct lu_context_key *key, void *data); + +void ccc_umount(const struct lu_env *env, struct cl_device *dev); + +static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv) +{ + return &vdv->vdv_cl.cd_lu_dev; +} + +static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev); +} + +static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d) +{ + return container_of0(d, struct vvp_device, vdv_cl); +} + +static inline struct vvp_object *cl2vvp(const struct cl_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl); +} + +static inline struct vvp_object *lu2vvp(const struct lu_object *obj) +{ + return container_of0(obj, struct vvp_object, vob_cl.co_lu); +} + +static inline struct inode *vvp_object_inode(const struct cl_object *obj) +{ + return cl2vvp(obj)->vob_inode; +} + +int vvp_object_invariant(const struct cl_object *obj); +struct vvp_object *cl_inode2vvp(struct inode *inode); + +static inline struct page *cl2vm_page(const struct cl_page_slice *slice) +{ + return cl2vvp_page(slice)->vpg_page; +} + +static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) +{ + return container_of(slice, struct vvp_lock, vlk_cl); +} + +# define CLOBINVRNT(env, clob, expr) \ + ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) + +/** + * New interfaces to get and put lov_stripe_md from lov layer. This violates + * layering because lov_stripe_md is supposed to be a private data in lov. + * + * NB: If you find you have to use these interfaces for your new code, please + * think about it again. These interfaces may be removed in the future for + * better layering. + */ +struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj); +void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm); +int lov_read_and_clear_async_rc(struct cl_object *clob); + +struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode); +void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm); + +int vvp_io_init(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io); +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); +int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, + struct cl_lock *lock, const struct cl_io *io); int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req); struct lu_object *vvp_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); -struct ccc_object *cl_inode2ccc(struct inode *inode); +int vvp_global_init(void); +void vvp_global_fini(void); extern const struct file_operations vvp_dump_pgcache_file_ops; diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c index 85a835976..5bf9592ae 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ b/drivers/staging/lustre/lustre/llite/vvp_io.c @@ -44,21 +44,30 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice); +struct vvp_io *cl2vvp_io(const struct lu_env *env, + const struct cl_io_slice *slice) +{ + struct vvp_io *vio; + + vio = container_of(slice, struct vvp_io, vui_cl); + LASSERT(vio == vvp_env_io(env)); + + return vio; +} /** * True, if \a io is a normal io, False for splice_{read,write} */ -int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) +static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - return vio->cui_io_subtype == IO_NORMAL; + return vio->vui_io_subtype == IO_NORMAL; } /** @@ -71,7 +80,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); bool rc = true; switch (io->ci_type) { @@ -80,7 +89,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, /* don't need lock here to check lli_layout_gen as we have held * extent lock and GROUP lock has to hold to swap layout */ - if (ll_layout_version_get(lli) != cio->cui_layout_gen) { + if (ll_layout_version_get(lli) != vio->vui_layout_gen) { io->ci_need_restart = 1; /* this will return application a short read/write */ io->ci_continue = 0; @@ -95,20 +104,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, return rc; } +static void vvp_object_size_lock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + ll_inode_size_lock(inode); + cl_object_attr_lock(obj); +} + +static void vvp_object_size_unlock(struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + + cl_object_attr_unlock(obj); + ll_inode_size_unlock(inode); +} + +/** + * Helper function that if necessary adjusts file size (inode->i_size), when + * position at the offset \a pos is accessed. File size can be arbitrary stale + * on a Lustre client, but client at least knows KMS. If accessed area is + * inside [0, KMS], set file size to KMS, otherwise glimpse file size. + * + * Locking: cl_isize_lock is used to serialize changes to inode size and to + * protect consistency between inode size and cl_object + * attributes. cl_object_size_lock() protects consistency between cl_attr's of + * top-object and sub-objects. + */ +static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj, + struct cl_io *io, loff_t start, size_t count, + int *exceed) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct inode *inode = vvp_object_inode(obj); + loff_t pos = start + count - 1; + loff_t kms; + int result; + + /* + * Consistency guarantees: following possibilities exist for the + * relation between region being accessed and real file size at this + * moment: + * + * (A): the region is completely inside of the file; + * + * (B-x): x bytes of region are inside of the file, the rest is + * outside; + * + * (C): the region is completely outside of the file. + * + * This classification is stable under DLM lock already acquired by + * the caller, because to change the class, other client has to take + * DLM lock conflicting with our lock. Also, any updates to ->i_size + * by other threads on this client are serialized by + * ll_inode_size_lock(). This guarantees that short reads are handled + * correctly in the face of concurrent writes and truncates. + */ + vvp_object_size_lock(obj); + result = cl_object_attr_get(env, obj, attr); + if (result == 0) { + kms = attr->cat_kms; + if (pos > kms) { + /* + * A glimpse is necessary to determine whether we + * return a short read (B) or some zeroes at the end + * of the buffer (C) + */ + vvp_object_size_unlock(obj); + result = cl_glimpse_lock(env, io, inode, obj, 0); + if (result == 0 && exceed) { + /* If objective page index exceed end-of-file + * page index, return directly. Do not expect + * kernel will check such case correctly. + * linux-2.6.18-128.1.1 miss to do that. + * --bug 17336 + */ + loff_t size = i_size_read(inode); + loff_t cur_index = start >> PAGE_SHIFT; + loff_t size_index = (size - 1) >> PAGE_SHIFT; + + if ((size == 0 && cur_index != 0) || + size_index < cur_index) + *exceed = 1; + } + return result; + } + /* + * region is within kms and, hence, within real file + * size (A). We need to increase i_size to cover the + * read region so that generic_file_read() will do its + * job, but that doesn't mean the kms size is + * _correct_, it is only the _minimum_ size. If + * someone does a stat they will get the correct size + * which will always be >= the kms value here. + * b=11081 + */ + if (i_size_read(inode) < kms) { + i_size_write(inode, kms); + CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", + PFID(lu_object_fid(&obj->co_lu)), + (__u64)i_size_read(inode)); + } + } + + vvp_object_size_unlock(obj); + + return result; +} + /***************************************************************************** * * io operations. * */ +static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + pgoff_t start, pgoff_t end) +{ + struct vvp_io *vio = vvp_env_io(env); + struct cl_lock_descr *descr = &vio->vui_link.cill_descr; + struct cl_object *obj = io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); + + memset(&vio->vui_link, 0, sizeof(vio->vui_link)); + + if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + descr->cld_mode = CLM_GROUP; + descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid; + } else { + descr->cld_mode = mode; + } + descr->cld_obj = obj; + descr->cld_start = start; + descr->cld_end = end; + descr->cld_enq_flags = enqflags; + + cl_io_lock_add(env, io, &vio->vui_link); + return 0; +} + +static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io, + __u32 enqflags, enum cl_lock_mode mode, + loff_t start, loff_t end) +{ + struct cl_object *obj = io->ci_obj; + + return vvp_io_one_lock_index(env, io, enqflags, mode, + cl_index(obj, start), cl_index(obj, end)); +} + +static int vvp_io_write_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + cl_page_list_init(&vio->u.write.vui_queue); + vio->u.write.vui_written = 0; + vio->u.write.vui_from = 0; + vio->u.write.vui_to = PAGE_SIZE; + + return 0; +} + +static void vvp_io_write_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + + LASSERT(vio->u.write.vui_queue.pl_nr == 0); +} + static int vvp_io_fault_iter_init(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct inode *inode = ccc_object_inode(ios->cis_obj); + struct inode *inode = vvp_object_inode(ios->cis_obj); - LASSERT(inode == - file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file)); + LASSERT(inode == file_inode(vio->vui_fd->fd_file)); vio->u.fault.ft_mtime = inode->i_mtime.tv_sec; return 0; } @@ -117,15 +293,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); + struct inode *inode = vvp_object_inode(obj); - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); if (io->ci_restore_needed == 1) { int rc; @@ -133,7 +310,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* file was detected release, we need to restore it * before finishing the io */ - rc = ll_layout_restore(ccc_object_inode(obj)); + rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF); /* if restore registration failed, no restart, * we will return -ENODATA */ @@ -159,16 +336,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) __u32 gen = 0; /* check layout version */ - ll_layout_refresh(ccc_object_inode(obj), &gen); - io->ci_need_restart = cio->cui_layout_gen != gen; + ll_layout_refresh(inode, &gen); + io->ci_need_restart = vio->vui_layout_gen != gen; if (io->ci_need_restart) { CDEBUG(D_VFSTRACE, DFID" layout changed from %d to %d.\n", PFID(lu_object_fid(&obj->co_lu)), - cio->cui_layout_gen, gen); + vio->vui_layout_gen, gen); /* today successful restore is the only possible case */ /* restore was done, clear restoring state */ - ll_i2info(ccc_object_inode(obj))->lli_flags &= + ll_i2info(vvp_object_inode(obj))->lli_flags &= ~LLIF_FILE_RESTORING; } } @@ -180,7 +357,7 @@ static void vvp_io_fault_fini(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct cl_page *page = io->u.ci_fault.ft_page; - CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj)); + CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj)); if (page) { lu_ref_del(&page->cp_reference, "fault", io); @@ -203,16 +380,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma) } static int vvp_mmap_locks(const struct lu_env *env, - struct ccc_io *vio, struct cl_io *io) + struct vvp_io *vio, struct cl_io *io) { - struct ccc_thread_info *cti = ccc_env_info(env); + struct vvp_thread_info *cti = vvp_env_info(env); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct cl_lock_descr *descr = &cti->cti_descr; + struct cl_lock_descr *descr = &cti->vti_descr; ldlm_policy_data_t policy; unsigned long addr; ssize_t count; - int result; + int result = 0; struct iov_iter i; struct iovec iov; @@ -221,21 +398,21 @@ static int vvp_mmap_locks(const struct lu_env *env, if (!cl_is_normalio(env, io)) return 0; - if (!vio->cui_iter) /* nfs or loop back device write */ + if (!vio->vui_iter) /* nfs or loop back device write */ return 0; /* No MM (e.g. NFS)? No vmas too. */ if (!mm) return 0; - iov_for_each(iov, i, *(vio->cui_iter)) { + iov_for_each(iov, i, *vio->vui_iter) { addr = (unsigned long)iov.iov_base; count = iov.iov_len; if (count == 0) continue; - count += addr & (~CFS_PAGE_MASK); - addr &= CFS_PAGE_MASK; + count += addr & (~PAGE_MASK); + addr &= PAGE_MASK; down_read(&mm->mmap_sem); while ((vma = our_vma(mm, addr, count)) != NULL) { @@ -244,10 +421,10 @@ static int vvp_mmap_locks(const struct lu_env *env, if (ll_file_nolock(vma->vm_file)) { /* - * For no lock case, a lockless lock will be - * generated. + * For no lock case is not allowed for mmap */ - flags = CEF_NEVER; + result = -EINVAL; + break; } /* @@ -269,10 +446,8 @@ static int vvp_mmap_locks(const struct lu_env *env, descr->cld_mode, descr->cld_start, descr->cld_end); - if (result < 0) { - up_read(&mm->mmap_sem); - return result; - } + if (result < 0) + break; if (vma->vm_end - addr >= count) break; @@ -281,26 +456,55 @@ static int vvp_mmap_locks(const struct lu_env *env, addr = vma->vm_end; } up_read(&mm->mmap_sem); + if (result < 0) + break; } - return 0; + return result; +} + +static void vvp_io_advance(const struct lu_env *env, + const struct cl_io_slice *ios, + size_t nob) +{ + struct vvp_io *vio = cl2vvp_io(env, ios); + struct cl_io *io = ios->cis_io; + struct cl_object *obj = ios->cis_io->ci_obj; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + if (!cl_is_normalio(env, io)) + return; + + iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob); +} + +static void vvp_io_update_iov(const struct lu_env *env, + struct vvp_io *vio, struct cl_io *io) +{ + size_t size = io->u.ci_rw.crw_count; + + if (!cl_is_normalio(env, io) || !vio->vui_iter) + return; + + iov_iter_truncate(vio->vui_iter, size); } static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, enum cl_lock_mode mode, loff_t start, loff_t end) { - struct ccc_io *cio = ccc_env_io(env); + struct vvp_io *vio = vvp_env_io(env); int result; int ast_flags = 0; LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - ccc_io_update_iov(env, cio, io); + vvp_io_update_iov(env, vio, io); if (io->u.ci_rw.crw_nonblock) ast_flags |= CEF_NONBLOCK; - result = vvp_mmap_locks(env, cio, io); + result = vvp_mmap_locks(env, vio, io); if (result == 0) - result = ccc_io_one_lock(env, io, ast_flags, mode, start, end); + result = vvp_io_one_lock(env, io, ast_flags, mode, start, end); return result; } @@ -325,9 +529,11 @@ static int vvp_io_fault_lock(const struct lu_env *env, /* * XXX LDLM_FL_CBPENDING */ - return ccc_io_one_lock_index - (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma), - io->u.ci_fault.ft_index, io->u.ci_fault.ft_index); + return vvp_io_one_lock_index(env, + io, 0, + vvp_mode_from_vma(vio->u.fault.ft_vma), + io->u.ci_fault.ft_index, + io->u.ci_fault.ft_index); } static int vvp_io_write_lock(const struct lu_env *env, @@ -354,14 +560,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env, } /** - * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io. + * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io. * * Handles "lockless io" mode when extent locking is done by server. */ static int vvp_io_setattr_lock(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = ccc_env_io(env); struct cl_io *io = ios->cis_io; __u64 new_size; __u32 enqflags = 0; @@ -378,8 +583,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env, return 0; new_size = 0; } - cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK; - return ccc_io_one_lock(env, io, enqflags, CLM_WRITE, + + return vvp_io_one_lock(env, io, enqflags, CLM_WRITE, new_size, OBD_OBJECT_EOF); } @@ -413,7 +618,7 @@ static int vvp_io_setattr_time(const struct lu_env *env, { struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct cl_attr *attr = ccc_env_thread_attr(env); + struct cl_attr *attr = vvp_env_thread_attr(env); int result; unsigned valid = CAT_CTIME; @@ -437,7 +642,7 @@ static int vvp_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); int result = 0; inode_lock(inode); @@ -453,7 +658,7 @@ static void vvp_io_setattr_end(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - struct inode *inode = ccc_object_inode(io->ci_obj); + struct inode *inode = vvp_object_inode(io->ci_obj); if (cl_io_is_trunc(io)) /* Truncate in memory pages - they must be clean pages @@ -474,27 +679,25 @@ static int vvp_io_read_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); - struct ll_ra_read *bead = &vio->cui_bead; - struct file *file = cio->cui_fd->fd_file; + struct inode *inode = vvp_object_inode(obj); + struct file *file = vio->vui_fd->fd_file; int result; loff_t pos = io->u.ci_rd.rd.crw_pos; long cnt = io->u.ci_rd.rd.crw_count; - long tot = cio->cui_tot_count; + long tot = vio->vui_tot_count; int exceed = 0; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); if (!can_populate_pages(env, io, inode)) return 0; - result = ccc_prep_size(env, obj, io, pos, tot, &exceed); + result = vvp_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; else if (exceed != 0) @@ -505,30 +708,27 @@ static int vvp_io_read_start(const struct lu_env *env, inode->i_ino, cnt, pos, i_size_read(inode)); /* turn off the kernel's read-ahead */ - cio->cui_fd->fd_file->f_ra.ra_pages = 0; + vio->vui_fd->fd_file->f_ra.ra_pages = 0; /* initialize read-ahead window once per syscall */ - if (!vio->cui_ra_window_set) { - vio->cui_ra_window_set = 1; - bead->lrr_start = cl_index(obj, pos); - /* - * XXX: explicit PAGE_SIZE - */ - bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1); - ll_ra_read_in(file, bead); + if (!vio->vui_ra_valid) { + vio->vui_ra_valid = true; + vio->vui_ra_start = cl_index(obj, pos); + vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1); + ll_ras_enter(file); } /* BUG: 5972 */ file_accessed(file); - switch (vio->cui_io_subtype) { + switch (vio->vui_io_subtype) { case IO_NORMAL: - LASSERT(cio->cui_iocb->ki_pos == pos); - result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter); + LASSERT(vio->vui_iocb->ki_pos == pos); + result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); break; case IO_SPLICE: result = generic_file_splice_read(file, &pos, - vio->u.splice.cui_pipe, cnt, - vio->u.splice.cui_flags); + vio->u.splice.vui_pipe, cnt, + vio->u.splice.vui_flags); /* LU-1109: do splice read stripe by stripe otherwise if it * may make nfsd stuck if this read occupied all internal pipe * buffers. @@ -536,7 +736,7 @@ static int vvp_io_read_start(const struct lu_env *env, io->ci_continue = 0; break; default: - CERROR("Wrong IO type %u\n", vio->cui_io_subtype); + CERROR("Wrong IO type %u\n", vio->vui_io_subtype); LBUG(); } @@ -546,30 +746,201 @@ out: io->ci_continue = 0; io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, READ); + vio->vui_fd, pos, result, READ); result = 0; } return result; } -static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios) +static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *plist, int from, int to) { - struct vvp_io *vio = cl2vvp_io(env, ios); - struct ccc_io *cio = cl2ccc_io(env, ios); + struct cl_2queue *queue = &io->ci_queue; + struct cl_page *page; + unsigned int bytes = 0; + int rc = 0; - if (vio->cui_ra_window_set) - ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead); + if (plist->pl_nr == 0) + return 0; - vvp_io_fini(env, ios); + if (from > 0 || to != PAGE_SIZE) { + page = cl_page_list_first(plist); + if (plist->pl_nr == 1) { + cl_page_clip(env, page, from, to); + } else { + if (from > 0) + cl_page_clip(env, page, from, PAGE_SIZE); + if (to != PAGE_SIZE) { + page = cl_page_list_last(plist); + cl_page_clip(env, page, 0, to); + } + } + } + + cl_2queue_init(queue); + cl_page_list_splice(plist, &queue->c2_qin); + rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0); + + /* plist is not sorted any more */ + cl_page_list_splice(&queue->c2_qin, plist); + cl_page_list_splice(&queue->c2_qout, plist); + cl_2queue_fini(env, queue); + + if (rc == 0) { + /* calculate bytes */ + bytes = plist->pl_nr << PAGE_SHIFT; + bytes -= from + PAGE_SIZE - to; + + while (plist->pl_nr > 0) { + page = cl_page_list_first(plist); + cl_page_list_del(env, plist, page); + + cl_page_clip(env, page, 0, PAGE_SIZE); + + SetPageUptodate(cl_page_vmpage(page)); + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + } + + return bytes > 0 ? bytes : rc; +} + +static void write_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct page *vmpage = page->cp_vmpage; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + SetPageUptodate(vmpage); + set_page_dirty(vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); +} + +/* make sure the page list is contiguous */ +static bool page_list_sanity_check(struct cl_object *obj, + struct cl_page_list *plist) +{ + struct cl_page *page; + pgoff_t index = CL_PAGE_EOF; + + cl_page_list_for_each(page, plist) { + struct vvp_page *vpg = cl_object_page_slice(obj, page); + + if (index == CL_PAGE_EOF) { + index = vvp_index(vpg); + continue; + } + + ++index; + if (index == vvp_index(vpg)) + continue; + + return false; + } + return true; +} + +/* Return how many bytes have queued or written */ +int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io) +{ + struct cl_object *obj = io->ci_obj; + struct inode *inode = vvp_object_inode(obj); + struct vvp_io *vio = vvp_env_io(env); + struct cl_page_list *queue = &vio->u.write.vui_queue; + struct cl_page *page; + int rc = 0; + int bytes = 0; + unsigned int npages = vio->u.write.vui_queue.pl_nr; + + if (npages == 0) + return 0; + + CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n", + npages, vio->u.write.vui_from, vio->u.write.vui_to); + + LASSERT(page_list_sanity_check(obj, queue)); + + /* submit IO with async write */ + rc = cl_io_commit_async(env, io, queue, + vio->u.write.vui_from, vio->u.write.vui_to, + write_commit_callback); + npages -= queue->pl_nr; /* already committed pages */ + if (npages > 0) { + /* calculate how many bytes were written */ + bytes = npages << PAGE_SHIFT; + + /* first page */ + bytes -= vio->u.write.vui_from; + if (queue->pl_nr == 0) /* last page */ + bytes -= PAGE_SIZE - vio->u.write.vui_to; + LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages); + + vio->u.write.vui_written += bytes; + + CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n", + npages, bytes, vio->u.write.vui_written); + + /* the first page must have been written. */ + vio->u.write.vui_from = 0; + } + LASSERT(page_list_sanity_check(obj, queue)); + LASSERT(ergo(rc == 0, queue->pl_nr == 0)); + + /* out of quota, try sync write */ + if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) { + rc = vvp_io_commit_sync(env, io, queue, + vio->u.write.vui_from, + vio->u.write.vui_to); + if (rc > 0) { + vio->u.write.vui_written += rc; + rc = 0; + } + } + + /* update inode size */ + ll_merge_attr(env, inode); + + /* Now the pages in queue were failed to commit, discard them + * unless they were dirtied before. + */ + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + cl_page_list_del(env, queue, page); + + if (!PageDirty(cl_page_vmpage(page))) + cl_page_discard(env, io, page); + + cl_page_disown(env, io, page); + + /* held in ll_cl_init() */ + lu_ref_del(&page->cp_reference, "cl_io", io); + cl_page_put(env, page); + } + cl_page_list_fini(env, queue); + + return rc; } static int vvp_io_write_start(const struct lu_env *env, const struct cl_io_slice *ios) { - struct ccc_io *cio = cl2ccc_io(env, ios); + struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); ssize_t result = 0; loff_t pos = io->u.ci_wr.wr.crw_pos; size_t cnt = io->u.ci_wr.wr.crw_count; @@ -582,25 +953,41 @@ static int vvp_io_write_start(const struct lu_env *env, * PARALLEL IO This has to be changed for parallel IO doing * out-of-order writes. */ + ll_merge_attr(env, inode); pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode); - cio->cui_iocb->ki_pos = pos; + vio->vui_iocb->ki_pos = pos; } else { - LASSERT(cio->cui_iocb->ki_pos == pos); + LASSERT(vio->vui_iocb->ki_pos == pos); } CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); - if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */ + if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */ result = 0; else - result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter); + result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter); + + if (result > 0) { + result = vvp_io_write_commit(env, io); + if (vio->u.write.vui_written > 0) { + result = vio->u.write.vui_written; + io->ci_nob += result; + CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n", + io->ci_nob, result); + } + } if (result > 0) { + struct ll_inode_info *lli = ll_i2info(inode); + + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + if (result < cnt) io->ci_continue = 0; - io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - cio->cui_fd, pos, result, WRITE); + vio->vui_fd, pos, result, WRITE); result = 0; } return result; @@ -608,10 +995,10 @@ static int vvp_io_write_start(const struct lu_env *env, static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { - struct vm_fault *vmf = cfio->fault.ft_vmf; + struct vm_fault *vmf = cfio->ft_vmf; - cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); - cfio->fault.ft_flags_valid = 1; + cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf); + cfio->ft_flags_valid = 1; if (vmf->page) { CDEBUG(D_PAGE, @@ -619,39 +1006,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) vmf->page, vmf->page->mapping, vmf->page->index, (long)vmf->page->flags, page_count(vmf->page), page_private(vmf->page), vmf->virtual_address); - if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { + if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) { lock_page(vmf->page); - cfio->fault.ft_flags |= VM_FAULT_LOCKED; + cfio->ft_flags |= VM_FAULT_LOCKED; } cfio->ft_vmpage = vmf->page; return 0; } - if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { + if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); return -EFAULT; } - if (cfio->fault.ft_flags & VM_FAULT_OOM) { + if (cfio->ft_flags & VM_FAULT_OOM) { CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address); return -ENOMEM; } - if (cfio->fault.ft_flags & VM_FAULT_RETRY) + if (cfio->ft_flags & VM_FAULT_RETRY) return -EAGAIN; - CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags); + CERROR("Unknown error in page fault %d!\n", cfio->ft_flags); return -EINVAL; } +static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) +{ + struct vvp_page *vpg; + struct cl_object *clob = cl_io_top(io)->ci_obj; + + set_page_dirty(page->cp_vmpage); + + vpg = cl2vvp_page(cl_object_page_slice(clob, page)); + vvp_write_pending(cl2vvp(clob), vpg); +} + static int vvp_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); struct cl_fault_io *fio = &io->u.ci_fault; struct vvp_fault_io *cfio = &vio->u.fault; loff_t offset; @@ -659,7 +1058,7 @@ static int vvp_io_fault_start(const struct lu_env *env, struct page *vmpage = NULL; struct cl_page *page; loff_t size; - pgoff_t last; /* last page in a file data region */ + pgoff_t last_index; if (fio->ft_executable && inode->i_mtime.tv_sec != vio->u.fault.ft_mtime) @@ -670,7 +1069,7 @@ static int vvp_io_fault_start(const struct lu_env *env, /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; LASSERT(cl_index(obj, offset) == fio->ft_index); - result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); + result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL); if (result != 0) return result; @@ -705,15 +1104,15 @@ static int vvp_io_fault_start(const struct lu_env *env, goto out; } + last_index = cl_index(obj, size - 1); + if (fio->ft_mkwrite) { - pgoff_t last_index; /* * Capture the size while holding the lli_trunc_sem from above * we want to make sure that we complete the mkwrite action * while holding this lock. We need to make sure that we are * not past the end of the file. */ - last_index = cl_index(obj, size - 1); if (last_index < fio->ft_index) { CDEBUG(D_PAGE, "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n", @@ -745,25 +1144,32 @@ static int vvp_io_fault_start(const struct lu_env *env, */ if (fio->ft_mkwrite) { wait_on_page_writeback(vmpage); - if (set_page_dirty(vmpage)) { - struct ccc_page *cp; + if (!PageDirty(vmpage)) { + struct cl_page_list *plist = &io->ci_queue.c2_qin; + struct vvp_page *vpg = cl_object_page_slice(obj, page); + int to = PAGE_SIZE; /* vvp_page_assume() calls wait_on_page_writeback(). */ cl_page_assume(env, io, page); - cp = cl2ccc_page(cl_page_at(page, &vvp_device_type)); - vvp_write_pending(cl2ccc(obj), cp); + cl_page_list_init(plist); + cl_page_list_add(plist, page); + + /* size fixup */ + if (last_index == vvp_index(vpg)) + to = size & ~PAGE_MASK; /* Do not set Dirty bit here so that in case IO is * started before the page is really made dirty, we * still have chance to detect it. */ - result = cl_page_cache_add(env, io, page, CRT_WRITE); + result = cl_io_commit_async(env, io, plist, 0, to, + mkwrite_commit_callback); LASSERT(cl_page_is_owned(page, io)); + cl_page_list_fini(env, plist); vmpage = NULL; if (result < 0) { - cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); @@ -773,20 +1179,20 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result == -EDQUOT) result = -ENOSPC; goto out; - } else + } else { cl_page_disown(env, io, page); + } } } - last = cl_index(obj, size - 1); /* * The ft_index is only used in the case of * a mkwrite action. We need to check * our assertions are correct, since * we should have caught this above */ - LASSERT(!fio->ft_mkwrite || fio->ft_index <= last); - if (fio->ft_index == last) + LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index); + if (fio->ft_index == last_index) /* * Last page is mapped partially. */ @@ -801,7 +1207,9 @@ out: /* return unlocked vmpage to avoid deadlocking */ if (vmpage) unlock_page(vmpage); - cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; + + cfio->ft_flags &= ~VM_FAULT_LOCKED; + return result; } @@ -820,293 +1228,58 @@ static int vvp_io_read_page(const struct lu_env *env, const struct cl_page_slice *slice) { struct cl_io *io = ios->cis_io; - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *page = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd; + struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd; struct ll_readahead_state *ras = &fd->fd_ras; - struct page *vmpage = cp->cpg_page; struct cl_2queue *queue = &io->ci_queue; - int rc; - - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - LASSERT(slice->cpl_obj == obj); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ras_update(sbi, inode, ras, page->cp_index, - cp->cpg_defer_uptodate); - - /* Sanity check whether the page is protected by a lock. */ - rc = cl_page_is_under_lock(env, io, page); - if (rc != -EBUSY) { - CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n", - rc == -ENODATA ? "without a lock" : - "match failed", rc); - if (rc != -ENODATA) - return rc; - } + ras_update(sbi, inode, ras, vvp_index(vpg), + vpg->vpg_defer_uptodate); - if (cp->cpg_defer_uptodate) { - cp->cpg_ra_used = 1; + if (vpg->vpg_defer_uptodate) { + vpg->vpg_ra_used = 1; cl_page_export(env, page, 1); } /* * Add page into the queue even when it is marked uptodate above. * this will unlock it automatically as part of cl_page_list_disown(). */ + cl_page_list_add(&queue->c2_qin, page); if (sbi->ll_ra_info.ra_max_pages_per_file && sbi->ll_ra_info.ra_max_pages) - ll_readahead(env, io, ras, - vmpage->mapping, &queue->c2_qin, fd->fd_flags); + ll_readahead(env, io, &queue->c2_qin, ras, + vpg->vpg_defer_uptodate); return 0; } -static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, struct ccc_page *cp, - enum cl_req_type crt) +void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios) { - struct cl_2queue *queue; - int result; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - queue = &io->ci_queue; - cl_2queue_init_page(queue, page); - - result = cl_io_submit_sync(env, io, crt, queue, 0); - LASSERT(cl_page_is_owned(page, io)); - - if (crt == CRT_READ) - /* - * in CRT_WRITE case page is left locked even in case of - * error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - - return result; -} - -/** - * Prepare partially written-to page for a write. - */ -static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io, - struct cl_object *obj, struct cl_page *pg, - struct ccc_page *cp, - unsigned from, unsigned to) -{ - struct cl_attr *attr = ccc_env_thread_attr(env); - loff_t offset = cl_offset(obj, pg->cp_index); - int result; - - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - if (result == 0) { - /* - * If are writing to a new page, no need to read old data. - * The extent locking will have updated the KMS, and for our - * purposes here we can treat it like i_size. - */ - if (attr->cat_kms <= offset) { - char *kaddr = kmap_atomic(cp->cpg_page); - - memset(kaddr, 0, cl_page_size(obj)); - kunmap_atomic(kaddr); - } else if (cp->cpg_defer_uptodate) - cp->cpg_ra_used = 1; - else - result = vvp_page_sync_io(env, io, pg, cp, CRT_READ); - /* - * In older implementations, obdo_refresh_inode is called here - * to update the inode because the write might modify the - * object info at OST. However, this has been proven useless, - * since LVB functions will be called when user space program - * tries to retrieve inode attribute. Also, see bug 15909 for - * details. -jay - */ - if (result == 0) - cl_page_export(env, pg, 1); - } - return result; -} - -static int vvp_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; - - int result; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == ccc_object_inode(obj)); - - result = 0; - - CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to); - if (!PageUptodate(vmpage)) { - /* - * We're completely overwriting an existing page, so _don't_ - * set it up to date until commit_write - */ - if (from == 0 && to == PAGE_SIZE) { - CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n"); - POISON_PAGE(page, 0x11); - } else - result = vvp_io_prepare_partial(env, ios->cis_io, obj, - pg, cp, from, to); - } else - CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n"); - return result; -} - -static int vvp_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct cl_object *obj = slice->cpl_obj; - struct cl_io *io = ios->cis_io; - struct ccc_page *cp = cl2ccc_page(slice); - struct cl_page *pg = slice->cpl_page; - struct inode *inode = ccc_object_inode(obj); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct page *vmpage = cp->cpg_page; - - int result; - int tallyop; - loff_t size; - - LINVRNT(cl_page_is_vmlocked(env, pg)); - LASSERT(vmpage->mapping->host == inode); - - LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n"); - CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to); - - /* - * queue a write for some time in the future the first time we - * dirty the page. - * - * This is different from what other file systems do: they usually - * just mark page (and some of its buffers) dirty and rely on - * balance_dirty_pages() to start a write-back. Lustre wants write-back - * to be started earlier for the following reasons: - * - * (1) with a large number of clients we need to limit the amount - * of cached data on the clients a lot; - * - * (2) large compute jobs generally want compute-only then io-only - * and the IO should complete as quickly as possible; - * - * (3) IO is batched up to the RPC size and is async until the - * client max cache is hit - * (/sys/fs/lustre/osc/OSC.../max_dirty_mb) - * - */ - if (!PageDirty(vmpage)) { - tallyop = LPROC_LL_DIRTY_MISSES; - result = cl_page_cache_add(env, io, pg, CRT_WRITE); - if (result == 0) { - /* page was added into cache successfully. */ - set_page_dirty(vmpage); - vvp_write_pending(cl2ccc(obj), cp); - } else if (result == -EDQUOT) { - pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT; - bool need_clip = true; - - /* - * Client ran out of disk space grant. Possible - * strategies are: - * - * (a) do a sync write, renewing grant; - * - * (b) stop writing on this stripe, switch to the - * next one. - * - * (b) is a part of "parallel io" design that is the - * ultimate goal. (a) is what "old" client did, and - * what the new code continues to do for the time - * being. - */ - if (last_index > pg->cp_index) { - to = PAGE_SIZE; - need_clip = false; - } else if (last_index == pg->cp_index) { - int size_to = i_size_read(inode) & ~CFS_PAGE_MASK; - - if (to < size_to) - to = size_to; - } - if (need_clip) - cl_page_clip(env, pg, 0, to); - result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE); - if (result) - CERROR("Write page %lu of inode %p failed %d\n", - pg->cp_index, inode, result); - } - } else { - tallyop = LPROC_LL_DIRTY_HITS; - result = 0; - } - ll_stats_ops_tally(sbi, tallyop, 1); - - /* Inode should be marked DIRTY even if no new page was marked DIRTY - * because page could have been not flushed between 2 modifications. - * It is important the file is marked DIRTY as soon as the I/O is done - * Indeed, when cache is flushed, file could be already closed and it - * is too late to warn the MDT. - * It is acceptable that file is marked DIRTY even if I/O is dropped - * for some reasons before being flushed to OST. - */ - if (result == 0) { - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - size = cl_offset(obj, pg->cp_index) + to; - - ll_inode_size_lock(inode); - if (result == 0) { - if (size > i_size_read(inode)) { - cl_isize_write_nolock(inode, size); - CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n", - PFID(lu_object_fid(&obj->co_lu)), - (unsigned long)size); - } - cl_page_export(env, pg, 1); - } else { - if (size > i_size_read(inode)) - cl_page_discard(env, io, pg); - } - ll_inode_size_unlock(inode); - return result; + CLOBINVRNT(env, ios->cis_io->ci_obj, + vvp_object_invariant(ios->cis_io->ci_obj)); } static const struct cl_io_operations vvp_io_ops = { .op = { [CIT_READ] = { - .cio_fini = vvp_io_read_fini, + .cio_fini = vvp_io_fini, .cio_lock = vvp_io_read_lock, .cio_start = vvp_io_read_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_WRITE] = { .cio_fini = vvp_io_fini, + .cio_iter_init = vvp_io_write_iter_init, + .cio_iter_fini = vvp_io_write_iter_fini, .cio_lock = vvp_io_write_lock, .cio_start = vvp_io_write_start, - .cio_advance = ccc_io_advance + .cio_advance = vvp_io_advance, }, [CIT_SETATTR] = { .cio_fini = vvp_io_setattr_fini, @@ -1120,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = { .cio_iter_init = vvp_io_fault_iter_init, .cio_lock = vvp_io_fault_lock, .cio_start = vvp_io_fault_start, - .cio_end = ccc_io_end + .cio_end = vvp_io_end, }, [CIT_FSYNC] = { .cio_start = vvp_io_fsync_start, @@ -1131,29 +1304,26 @@ static const struct cl_io_operations vvp_io_ops = { } }, .cio_read_page = vvp_io_read_page, - .cio_prepare_write = vvp_io_prepare_write, - .cio_commit_write = vvp_io_commit_write }; int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); - struct ccc_io *cio = ccc_env_io(env); - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); int result; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID " ignore/verify layout %d/%d, layout version %d restore needed %d\n", PFID(lu_object_fid(&obj->co_lu)), io->ci_ignore_layout, io->ci_verify_layout, - cio->cui_layout_gen, io->ci_restore_needed); + vio->vui_layout_gen, io->ci_restore_needed); - CL_IO_SLICE_CLEAN(cio, cui_cl); - cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops); - vio->cui_ra_window_set = 0; + CL_IO_SLICE_CLEAN(vio, vui_cl); + cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops); + vio->vui_ra_valid = false; result = 0; if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { size_t count; @@ -1166,7 +1336,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, if (count == 0) result = 1; else - cio->cui_tot_count = count; + vio->vui_tot_count = count; /* for read/write, we store the jobid in the inode, and * it'll be fetched by osc when building RPC. @@ -1192,7 +1362,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, * because it might not grant layout lock in IT_OPEN. */ if (result == 0 && !io->ci_ignore_layout) { - result = ll_layout_refresh(inode, &cio->cui_layout_gen); + result = ll_layout_refresh(inode, &vio->vui_layout_gen); if (result == -ENOENT) /* If the inode on MDS has been removed, but the objects * on OSTs haven't been destroyed (async unlink), layout @@ -1208,11 +1378,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, return result; } - -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - /* Calling just for assertion */ - cl2ccc_io(env, slice); - return vvp_env_io(env); -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c index ff0948043..f5bd6c22e 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_lock.c +++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c @@ -40,7 +40,7 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include "../include/obd_support.h" #include "../include/lustre_lite.h" #include "vvp_internal.h" @@ -51,36 +51,41 @@ * */ -/** - * Estimates lock value for the purpose of managing the lock cache during - * memory shortages. - * - * Locks for memory mapped files are almost infinitely precious, others are - * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are - * ordered within themselves by weights assigned from other layers. - */ -static unsigned long vvp_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) +static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) +{ + struct vvp_lock *vlk = cl2vvp_lock(slice); + + kmem_cache_free(vvp_lock_kmem, vlk); +} + +static int vvp_lock_enqueue(const struct lu_env *env, + const struct cl_lock_slice *slice, + struct cl_io *unused, struct cl_sync_io *anchor) { - struct ccc_object *cob = cl2ccc(slice->cls_obj); + CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj)); - return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0; + return 0; } static const struct cl_lock_operations vvp_lock_ops = { - .clo_delete = ccc_lock_delete, - .clo_fini = ccc_lock_fini, - .clo_enqueue = ccc_lock_enqueue, - .clo_wait = ccc_lock_wait, - .clo_use = ccc_lock_use, - .clo_unuse = ccc_lock_unuse, - .clo_fits_into = ccc_lock_fits_into, - .clo_state = ccc_lock_state, - .clo_weigh = vvp_lock_weigh + .clo_fini = vvp_lock_fini, + .clo_enqueue = vvp_lock_enqueue, }; int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) + struct cl_lock *lock, const struct cl_io *unused) { - return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops); + struct vvp_lock *vlk; + int result; + + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); + + vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS); + if (vlk) { + cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c index 03c887d8e..18c9df7eb 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ b/drivers/staging/lustre/lustre/llite/vvp_object.c @@ -45,6 +45,7 @@ #include "../include/obd.h" #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -53,16 +54,25 @@ * */ +int vvp_object_invariant(const struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); + + return (S_ISREG(inode->i_mode) || inode->i_mode == 0) && + lli->lli_clob == obj; +} + static int vvp_object_print(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o) { - struct ccc_object *obj = lu2ccc(o); - struct inode *inode = obj->cob_inode; + struct vvp_object *obj = lu2vvp(o); + struct inode *inode = obj->vob_inode; struct ll_inode_info *lli; (*p)(env, cookie, "(%s %d %d) inode: %p ", - list_empty(&obj->cob_pending_list) ? "-" : "+", - obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt), + list_empty(&obj->vob_pending_list) ? "-" : "+", + obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt), inode); if (inode) { lli = ll_i2info(inode); @@ -77,7 +87,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie, static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, struct cl_attr *attr) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); /* * lov overwrites most of these fields in @@ -99,7 +109,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, const struct cl_attr *attr, unsigned valid) { - struct inode *inode = ccc_object_inode(obj); + struct inode *inode = vvp_object_inode(obj); if (valid & CAT_UID) inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid); @@ -112,7 +122,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj, if (valid & CAT_CTIME) inode->i_ctime.tv_sec = attr->cat_ctime; if (0 && valid & CAT_SIZE) - cl_isize_write_nolock(inode, attr->cat_size); + i_size_write(inode, attr->cat_size); /* not currently necessary */ if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE)) mark_inode_dirty(inode); @@ -165,6 +175,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, return 0; } +static int vvp_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct inode *inode = vvp_object_inode(obj); + int rc; + + rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); + if (rc < 0) { + CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n", + PFID(lu_object_fid(&obj->co_lu)), rc); + return rc; + } + + truncate_inode_pages(inode->i_mapping, 0); + return 0; +} + +static int vvp_object_glimpse(const struct lu_env *env, + const struct cl_object *obj, struct ost_lvb *lvb) +{ + struct inode *inode = vvp_object_inode(obj); + + lvb->lvb_mtime = LTIME_S(inode->i_mtime); + lvb->lvb_atime = LTIME_S(inode->i_atime); + lvb->lvb_ctime = LTIME_S(inode->i_ctime); + /* + * LU-417: Add dirty pages block count lest i_blocks reports 0, some + * "cp" or "tar" on remote node may think it's a completely sparse file + * and skip it. + */ + if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) + lvb->lvb_blocks = dirty_cnt(inode); + return 0; +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_lock_init = vvp_lock_init, @@ -172,29 +216,94 @@ static const struct cl_object_operations vvp_ops = { .coo_attr_get = vvp_attr_get, .coo_attr_set = vvp_attr_set, .coo_conf_set = vvp_conf_set, - .coo_glimpse = ccc_object_glimpse + .coo_prune = vvp_prune, + .coo_glimpse = vvp_object_glimpse }; +static int vvp_object_init0(const struct lu_env *env, + struct vvp_object *vob, + const struct cl_object_conf *conf) +{ + vob->vob_inode = conf->coc_inode; + vob->vob_transient_pages = 0; + cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page)); + return 0; +} + +static int vvp_object_init(const struct lu_env *env, struct lu_object *obj, + const struct lu_object_conf *conf) +{ + struct vvp_device *dev = lu2vvp_dev(obj->lo_dev); + struct vvp_object *vob = lu2vvp(obj); + struct lu_object *below; + struct lu_device *under; + int result; + + under = &dev->vdv_next->cd_lu_dev; + below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); + if (below) { + const struct cl_object_conf *cconf; + + cconf = lu2cl_conf(conf); + INIT_LIST_HEAD(&vob->vob_pending_list); + lu_object_add(obj, below); + result = vvp_object_init0(env, vob, cconf); + } else { + result = -ENOMEM; + } + + return result; +} + +static void vvp_object_free(const struct lu_env *env, struct lu_object *obj) +{ + struct vvp_object *vob = lu2vvp(obj); + + lu_object_fini(obj); + lu_object_header_fini(obj->lo_header); + kmem_cache_free(vvp_object_kmem, vob); +} + static const struct lu_object_operations vvp_lu_obj_ops = { - .loo_object_init = ccc_object_init, - .loo_object_free = ccc_object_free, - .loo_object_print = vvp_object_print + .loo_object_init = vvp_object_init, + .loo_object_free = vvp_object_free, + .loo_object_print = vvp_object_print, }; -struct ccc_object *cl_inode2ccc(struct inode *inode) +struct vvp_object *cl_inode2vvp(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); struct cl_object *obj = lli->lli_clob; struct lu_object *lu; lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type); LASSERT(lu); - return lu2ccc(lu); + return lu2vvp(lu); } struct lu_object *vvp_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, + const struct lu_object_header *unused, struct lu_device *dev) { - return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops); + struct vvp_object *vob; + struct lu_object *obj; + + vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS); + if (vob) { + struct cl_object_header *hdr; + + obj = &vob->vob_cl.co_lu; + hdr = &vob->vob_header; + cl_object_header_init(hdr); + hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); + + lu_object_init(obj, &hdr->coh_lu, dev); + lu_object_add_top(&hdr->coh_lu, obj); + + vob->vob_cl.co_ops = &vvp_ops; + obj->lo_ops = &vvp_lu_obj_ops; + } else { + obj = NULL; + } + return obj; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c index 33ca3eb34..6cd2af7a9 100644 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ b/drivers/staging/lustre/lustre/llite/vvp_page.c @@ -41,9 +41,16 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include "../include/obd.h" +#include +#include +#include +#include +#include +#include + #include "../include/lustre_lite.h" +#include "llite_internal.h" #include "vvp_internal.h" /***************************************************************************** @@ -52,9 +59,9 @@ * */ -static void vvp_page_fini_common(struct ccc_page *cp) +static void vvp_page_fini_common(struct vvp_page *vpg) { - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); put_page(vmpage); @@ -63,23 +70,23 @@ static void vvp_page_fini_common(struct ccc_page *cp) static void vvp_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; /* * vmpage->private was already cleared when page was moved into * VPG_FREEING state. */ LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); - vvp_page_fini_common(cp); + vvp_page_fini_common(vpg); } static int vvp_page_own(const struct lu_env *env, const struct cl_page_slice *slice, struct cl_io *io, int nonblock) { - struct ccc_page *vpg = cl2ccc_page(slice); - struct page *vmpage = vpg->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; LASSERT(vmpage); if (nonblock) { @@ -96,6 +103,7 @@ static int vvp_page_own(const struct lu_env *env, lock_page(vmpage); wait_on_page_writeback(vmpage); + return 0; } @@ -136,41 +144,15 @@ static void vvp_page_discard(const struct lu_env *env, struct cl_io *unused) { struct page *vmpage = cl2vm_page(slice); - struct address_space *mapping; - struct ccc_page *cpg = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); LASSERT(vmpage); LASSERT(PageLocked(vmpage)); - mapping = vmpage->mapping; - - if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used) - ll_ra_stats_inc(mapping, RA_STAT_DISCARDED); + if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used) + ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED); - /* - * truncate_complete_page() calls - * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). - */ - truncate_complete_page(mapping, vmpage); -} - -static int vvp_page_unmap(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - __u64 offset; - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - - offset = vmpage->index << PAGE_SHIFT; - - /* - * XXX is it safe to call this with the page lock held? - */ - ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE); - return 0; + ll_invalidate_page(vmpage); } static void vvp_page_delete(const struct lu_env *env, @@ -179,12 +161,20 @@ static void vvp_page_delete(const struct lu_env *env, struct page *vmpage = cl2vm_page(slice); struct inode *inode = vmpage->mapping->host; struct cl_object *obj = slice->cpl_obj; + struct cl_page *page = slice->cpl_page; + int refc; LASSERT(PageLocked(vmpage)); - LASSERT((struct cl_page *)vmpage->private == slice->cpl_page); - LASSERT(inode == ccc_object_inode(obj)); + LASSERT((struct cl_page *)vmpage->private == page); + LASSERT(inode == vvp_object_inode(obj)); - vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice)); + vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice)); + + /* Drop the reference count held in vvp_page_init */ + refc = atomic_dec_return(&page->cp_ref); + LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); + + ClearPageUptodate(vmpage); ClearPagePrivate(vmpage); vmpage->private = 0; /* @@ -237,7 +227,7 @@ static int vvp_page_prep_write(const struct lu_env *env, if (!pg->cp_sync_io) set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); return 0; } @@ -250,11 +240,11 @@ static int vvp_page_prep_write(const struct lu_env *env, */ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret) { - struct ccc_object *obj = cl_inode2ccc(inode); + struct vvp_object *obj = cl_inode2vvp(inode); if (ioret == 0) { ClearPageError(vmpage); - obj->cob_discard_page_warned = 0; + obj->vob_discard_page_warned = 0; } else { SetPageError(vmpage); if (ioret == -ENOSPC) @@ -263,8 +253,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret set_bit(AS_EIO, &inode->i_mapping->flags); if ((ioret == -ESHUTDOWN || ioret == -EINTR) && - obj->cob_discard_page_warned == 0) { - obj->cob_discard_page_warned = 1; + obj->vob_discard_page_warned == 0) { + obj->vob_discard_page_warned = 1; ll_dirty_page_discard_warn(vmpage, ioret); } } @@ -274,22 +264,23 @@ static void vvp_page_completion_read(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); - struct page *vmpage = cp->cpg_page; - struct cl_page *page = cl_page_top(slice->cpl_page); - struct inode *inode = ccc_object_inode(page->cp_obj); + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; + struct cl_page *page = slice->cpl_page; + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(PageLocked(vmpage)); CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret); - if (cp->cpg_defer_uptodate) + if (vpg->vpg_defer_uptodate) ll_ra_count_put(ll_i2sbi(inode), 1); if (ioret == 0) { - if (!cp->cpg_defer_uptodate) + if (!vpg->vpg_defer_uptodate) cl_page_export(env, page, 1); - } else - cp->cpg_defer_uptodate = 0; + } else { + vpg->vpg_defer_uptodate = 0; + } if (!page->cp_sync_io) unlock_page(vmpage); @@ -299,9 +290,9 @@ static void vvp_page_completion_write(const struct lu_env *env, const struct cl_page_slice *slice, int ioret) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *pg = slice->cpl_page; - struct page *vmpage = cp->cpg_page; + struct page *vmpage = vpg->vpg_page; CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); @@ -315,8 +306,8 @@ static void vvp_page_completion_write(const struct lu_env *env, * and then re-add the page into pending transfer queue. -jay */ - cp->cpg_write_queued = 0; - vvp_write_complete(cl2ccc(slice->cpl_obj), cp); + vpg->vpg_write_queued = 0; + vvp_write_complete(cl2vvp(slice->cpl_obj), vpg); if (pg->cp_sync_io) { LASSERT(PageLocked(vmpage)); @@ -327,7 +318,7 @@ static void vvp_page_completion_write(const struct lu_env *env, * Only mark the page error only when it's an async write * because applications won't wait for IO to finish. */ - vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret); + vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret); end_page_writeback(vmpage); } @@ -359,7 +350,7 @@ static int vvp_page_make_ready(const struct lu_env *env, LASSERT(pg->cp_state == CPS_CACHED); /* This actually clears the dirty bit in the radix tree. */ set_page_writeback(vmpage); - vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); + vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice)); CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); } else if (pg->cp_state == CPS_PAGEOUT) { /* is it possible for osc_flush_async_page() to already @@ -375,24 +366,51 @@ static int vvp_page_make_ready(const struct lu_env *env, return result; } +static int vvp_page_is_under_lock(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *io, pgoff_t *max_index) +{ + if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || + io->ci_type == CIT_FAULT) { + struct vvp_io *vio = vvp_env_io(env); + + if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) + *max_index = CL_PAGE_EOF; + } + return 0; +} + static int vvp_page_print(const struct lu_env *env, const struct cl_page_slice *slice, void *cookie, lu_printer_t printer) { - struct ccc_page *vp = cl2ccc_page(slice); - struct page *vmpage = vp->cpg_page; + struct vvp_page *vpg = cl2vvp_page(slice); + struct page *vmpage = vpg->vpg_page; (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ", - vp, vp->cpg_defer_uptodate, vp->cpg_ra_used, - vp->cpg_write_queued, vmpage); + vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, + vpg->vpg_write_queued, vmpage); if (vmpage) { (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", (long)vmpage->flags, page_count(vmpage), page_mapcount(vmpage), vmpage->private, - page_index(vmpage), + vmpage->index, list_empty(&vmpage->lru) ? "not-" : ""); } + (*printer)(env, cookie, "\n"); + + return 0; +} + +static int vvp_page_fail(const struct lu_env *env, + const struct cl_page_slice *slice) +{ + /* + * Cached read? + */ + LBUG(); + return 0; } @@ -401,32 +419,38 @@ static const struct cl_page_operations vvp_page_ops = { .cpo_assume = vvp_page_assume, .cpo_unassume = vvp_page_unassume, .cpo_disown = vvp_page_disown, - .cpo_vmpage = ccc_page_vmpage, .cpo_discard = vvp_page_discard, .cpo_delete = vvp_page_delete, - .cpo_unmap = vvp_page_unmap, .cpo_export = vvp_page_export, .cpo_is_vmlocked = vvp_page_is_vmlocked, .cpo_fini = vvp_page_fini, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { .cpo_prep = vvp_page_prep_read, .cpo_completion = vvp_page_completion_read, - .cpo_make_ready = ccc_fail, + .cpo_make_ready = vvp_page_fail, }, [CRT_WRITE] = { .cpo_prep = vvp_page_prep_write, .cpo_completion = vvp_page_completion_write, .cpo_make_ready = vvp_page_make_ready, - } - } + }, + }, }; +static int vvp_transient_page_prep(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *unused) +{ + /* transient page should always be sent. */ + return 0; +} + static void vvp_transient_page_verify(const struct cl_page *page) { - struct inode *inode = ccc_object_inode(page->cp_obj); + struct inode *inode = vvp_object_inode(page->cp_obj); LASSERT(!inode_trylock(inode)); } @@ -477,7 +501,7 @@ static void vvp_transient_page_discard(const struct lu_env *env, static int vvp_transient_page_is_vmlocked(const struct lu_env *env, const struct cl_page_slice *slice) { - struct inode *inode = ccc_object_inode(slice->cpl_obj); + struct inode *inode = vvp_object_inode(slice->cpl_obj); int locked; locked = !inode_trylock(inode); @@ -497,13 +521,13 @@ vvp_transient_page_completion(const struct lu_env *env, static void vvp_transient_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct ccc_page *cp = cl2ccc_page(slice); + struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *clp = slice->cpl_page; - struct ccc_object *clobj = cl2ccc(clp->cp_obj); + struct vvp_object *clobj = cl2vvp(clp->cp_obj); - vvp_page_fini_common(cp); - LASSERT(!inode_trylock(clobj->cob_inode)); - clobj->cob_transient_pages--; + vvp_page_fini_common(vpg); + LASSERT(!inode_trylock(clobj->vob_inode)); + clobj->vob_transient_pages--; } static const struct cl_page_operations vvp_transient_page_ops = { @@ -512,45 +536,48 @@ static const struct cl_page_operations vvp_transient_page_ops = { .cpo_unassume = vvp_transient_page_unassume, .cpo_disown = vvp_transient_page_disown, .cpo_discard = vvp_transient_page_discard, - .cpo_vmpage = ccc_page_vmpage, .cpo_fini = vvp_transient_page_fini, .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, .cpo_print = vvp_page_print, - .cpo_is_under_lock = ccc_page_is_under_lock, + .cpo_is_under_lock = vvp_page_is_under_lock, .io = { [CRT_READ] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, }, [CRT_WRITE] = { - .cpo_prep = ccc_transient_page_prep, + .cpo_prep = vvp_transient_page_prep, .cpo_completion = vvp_transient_page_completion, } } }; int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { - struct ccc_page *cpg = cl_object_page_slice(obj, page); + struct vvp_page *vpg = cl_object_page_slice(obj, page); + struct page *vmpage = page->cp_vmpage; - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - cpg->cpg_page = vmpage; + vpg->vpg_page = vmpage; get_page(vmpage); - INIT_LIST_HEAD(&cpg->cpg_pending_linkage); + INIT_LIST_HEAD(&vpg->vpg_pending_linkage); if (page->cp_type == CPT_CACHEABLE) { + /* in cache, decref in vvp_page_delete */ + atomic_inc(&page->cp_ref); SetPagePrivate(vmpage); vmpage->private = (unsigned long)page; - cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, + &vvp_page_ops); } else { - struct ccc_object *clobj = cl2ccc(obj); + struct vvp_object *clobj = cl2vvp(obj); - LASSERT(!inode_trylock(clobj->cob_inode)); - cl_page_slice_add(page, &cpg->cpg_cl, obj, + LASSERT(!inode_trylock(clobj->vob_inode)); + cl_page_slice_add(page, &vpg->vpg_cl, obj, index, &vvp_transient_page_ops); - clobj->cob_transient_pages++; + clobj->vob_transient_pages++; } return 0; } diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c new file mode 100644 index 000000000..fb886291a --- /dev/null +++ b/drivers/staging/lustre/lustre/llite/vvp_req.c @@ -0,0 +1,121 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include "../include/lustre/lustre_idl.h" +#include "../include/cl_object.h" +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_lite.h" +#include "llite_internal.h" +#include "vvp_internal.h" + +static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice) +{ + return container_of0(slice, struct vvp_req, vrq_cl); +} + +/** + * Implementation of struct cl_req_operations::cro_attr_set() for VVP + * layer. VVP is responsible for + * + * - o_[mac]time + * + * - o_mode + * + * - o_parent_seq + * + * - o_[ug]id + * + * - o_parent_oid + * + * - o_parent_ver + * + * - o_ioepoch, + * + */ +void vvp_req_attr_set(const struct lu_env *env, + const struct cl_req_slice *slice, + const struct cl_object *obj, + struct cl_req_attr *attr, u64 flags) +{ + struct inode *inode; + struct obdo *oa; + u32 valid_flags; + + oa = attr->cra_oa; + inode = vvp_object_inode(obj); + valid_flags = OBD_MD_FLTYPE; + + if (slice->crs_req->crq_type == CRT_WRITE) { + if (flags & OBD_MD_FLEPOCH) { + oa->o_valid |= OBD_MD_FLEPOCH; + oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch; + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID; + } + } + obdo_from_inode(oa, inode, valid_flags & flags); + obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); + memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, + JOBSTATS_JOBID_SIZE); +} + +void vvp_req_completion(const struct lu_env *env, + const struct cl_req_slice *slice, int ioret) +{ + struct vvp_req *vrq; + + if (ioret > 0) + cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); + + vrq = cl2vvp_req(slice); + kmem_cache_free(vvp_req_kmem, vrq); +} + +static const struct cl_req_operations vvp_req_ops = { + .cro_attr_set = vvp_req_attr_set, + .cro_completion = vvp_req_completion +}; + +int vvp_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req) +{ + struct vvp_req *vrq; + int result; + + vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS); + if (vrq) { + cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops); + result = 0; + } else { + result = -ENOMEM; + } + return result; +} diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index b68dcc921..608014b0d 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -181,8 +181,9 @@ int ll_setxattr_common(struct inode *inode, const char *name, size = rc; pv = (const char *)new_value; - } else + } else { return -EOPNOTSUPP; + } valid |= rce_ops2valid(rce->rce_ops); } @@ -210,16 +211,14 @@ int ll_setxattr_common(struct inode *inode, const char *name, return 0; } -int ll_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int ll_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1); @@ -243,12 +242,12 @@ int ll_setxattr(struct dentry *dentry, const char *name, lump->lmm_stripe_offset = -1; if (lump && S_ISREG(inode->i_mode)) { - int flags = FMODE_WRITE; + __u64 it_flags = FMODE_WRITE; int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ? sizeof(*lump) : sizeof(struct lov_user_md_v3); - rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump, - lum_size); + rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, + lump, lum_size); /* b10667: rc always be 0 here for now */ rc = 0; } else if (S_ISDIR(inode->i_mode)) { @@ -272,8 +271,8 @@ int ll_removexattr(struct dentry *dentry, const char *name) LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); return ll_setxattr_common(inode, name, NULL, 0, 0, @@ -292,8 +291,8 @@ int ll_getxattr_common(struct inode *inode, const char *name, struct rmtacl_ctl_entry *rce = NULL; struct ll_inode_info *lli = ll_i2info(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); /* listxattr have slightly different behavior from of ext3: * without 'user_xattr' ext3 will list all xattr names but @@ -338,7 +337,6 @@ int ll_getxattr_common(struct inode *inode, const char *name, */ if (xattr_type == XATTR_ACL_ACCESS_T && !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { - struct posix_acl *acl; spin_lock(&lli->lli_lock); @@ -423,8 +421,7 @@ getxattr_nocache: if (rce && rce->rce_ops == RMT_LSETFACL) { ext_acl_xattr_header *acl; - acl = lustre_posix_acl_xattr_2ext( - (posix_acl_xattr_header *)buffer, rc); + acl = lustre_posix_acl_xattr_2ext(buffer, rc); if (IS_ERR(acl)) { rc = PTR_ERR(acl); goto out; @@ -451,16 +448,14 @@ out: return rc; } -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n", - inode->i_ino, inode->i_generation, inode, name); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n", + PFID(ll_inode2fid(inode)), inode, name); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); @@ -554,8 +549,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) LASSERT(inode); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1); diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c index 3480ce2bb..d7e17abbe 100644 --- a/drivers/staging/lustre/lustre/llite/xattr_cache.c +++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c @@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli) */ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) { - if (!ll_xattr_cache_valid(lli)) return 0; diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h index 8a0087190..7007e4c48 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h +++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h @@ -42,9 +42,6 @@ #define LMV_MAX_TGT_COUNT 128 -#define lmv_init_lock(lmv) mutex_lock(&lmv->init_mutex) -#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex) - #define LL_IT2STR(it) \ ((it) ? ldlm_it2str((it)->it_op) : "0") diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index 9abb7c2b9..9e31f6b03 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -132,8 +132,9 @@ static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid, static struct obd_uuid *lmv_get_uuid(struct obd_export *exp) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; - return obd_get_uuid(lmv->tgts[0]->ltd_exp); + return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL; } static int lmv_notify(struct obd_device *obd, struct obd_device *watched, @@ -249,7 +250,6 @@ static int lmv_connect(const struct lu_env *env, static void lmv_set_timeouts(struct obd_device *obd) { - struct lmv_tgt_desc *tgt; struct lmv_obd *lmv; int i; @@ -261,8 +261,10 @@ static void lmv_set_timeouts(struct obd_device *obd) return; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0) + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS), @@ -302,13 +304,14 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, return 0; for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) { CWARN("%s: NULL export for %d\n", obd->obd_name, i); continue; } - rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize, + rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize, cookiesize, def_cookiesize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n", @@ -425,7 +428,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index); - lmv_init_lock(lmv); + mutex_lock(&lmv->lmv_init_mutex); if (lmv->desc.ld_tgt_count == 0) { struct obd_device *mdc_obd; @@ -433,7 +436,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, &obd->obd_uuid); if (!mdc_obd) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); CERROR("%s: Target %s not attached: rc = %d\n", obd->obd_name, uuidp->uuid, -EINVAL); return -EINVAL; @@ -445,7 +448,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n", obd->obd_name, obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -EEXIST; } @@ -459,7 +462,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, newsize <<= 1; newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS); if (!newtgts) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -ENOMEM; } @@ -481,7 +484,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, tgt = kzalloc(sizeof(*tgt), GFP_NOFS); if (!tgt) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return -ENOMEM; } @@ -507,7 +510,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, } } - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return rc; } @@ -522,18 +525,27 @@ int lmv_check_connect(struct obd_device *obd) if (lmv->connected) return 0; - lmv_init_lock(lmv); + mutex_lock(&lmv->lmv_init_mutex); if (lmv->connected) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return 0; } if (lmv->desc.ld_tgt_count == 0) { - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); CERROR("%s: no targets configured.\n", obd->obd_name); return -EINVAL; } + LASSERT(lmv->tgts); + + if (!lmv->tgts[0]) { + mutex_unlock(&lmv->lmv_init_mutex); + CERROR("%s: no target configured for index 0.\n", + obd->obd_name); + return -EINVAL; + } + CDEBUG(D_CONFIG, "Time to connect %s to %s\n", lmv->cluuid.uuid, obd->obd_name); @@ -551,7 +563,7 @@ int lmv_check_connect(struct obd_device *obd) lmv->connected = 1; easize = lmv_get_easize(lmv); lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return 0; out_disc: @@ -572,7 +584,7 @@ int lmv_check_connect(struct obd_device *obd) } } class_disconnect(lmv->exp); - lmv_init_unlock(lmv); + mutex_unlock(&lmv->lmv_init_mutex); return rc; } @@ -796,6 +808,11 @@ static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len, /* unregister request (call from llapi_hsm_copytool_fini) */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp) + continue; + /* best effort: try to clean as much as possible * (continue on error) */ @@ -825,20 +842,28 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len, * except if it because of inactive target. */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg); + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp) + continue; + + err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg); if (err) { - if (lmv->tgts[i]->ltd_active) { + if (tgt->ltd_active) { /* permanent error */ CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - lmv->tgts[i]->ltd_uuid.uuid, - i, cmd, err); + tgt->ltd_uuid.uuid, i, cmd, err); rc = err; lk->lk_flags |= LK_FLG_STOP; /* unregister from previous MDS */ - for (j = 0; j < i; j++) - obd_iocontrol(cmd, - lmv->tgts[j]->ltd_exp, - len, lk, uarg); + for (j = 0; j < i; j++) { + tgt = lmv->tgts[j]; + + if (!tgt || !tgt->ltd_exp) + continue; + obd_iocontrol(cmd, tgt->ltd_exp, len, + lk, uarg); + } return rc; } /* else: transient error. @@ -877,6 +902,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, { struct obd_device *obddev = class_exp2obd(exp); struct lmv_obd *lmv = &obddev->u.lmv; + struct lmv_tgt_desc *tgt = NULL; int i = 0; int rc = 0; int set = 0; @@ -896,10 +922,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (index >= count) return -ENODEV; - if (!lmv->tgts[index] || lmv->tgts[index]->ltd_active == 0) + tgt = lmv->tgts[index]; + if (!tgt || !tgt->ltd_active) return -ENODATA; - mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp); + mdc_obd = class_exp2obd(tgt->ltd_exp); if (!mdc_obd) return -EINVAL; @@ -909,7 +936,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, (int)sizeof(struct obd_uuid)))) return -EFAULT; - rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf, + rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf, cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), 0); if (rc) @@ -922,11 +949,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl = karg; - struct lmv_tgt_desc *tgt = NULL; struct obd_quotactl *oqctl; if (qctl->qc_valid == QC_MDTIDX) { - if (qctl->qc_idx < 0 || count <= qctl->qc_idx) + if (count <= qctl->qc_idx) return -EINVAL; tgt = lmv->tgts[qctl->qc_idx]; @@ -975,18 +1001,18 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (icc->icc_mdtindex >= count) return -ENODEV; - if (!lmv->tgts[icc->icc_mdtindex] || - !lmv->tgts[icc->icc_mdtindex]->ltd_exp || - lmv->tgts[icc->icc_mdtindex]->ltd_active == 0) + tgt = lmv->tgts[icc->icc_mdtindex]; + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) return -ENODEV; - rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp, - sizeof(*icc), icc, NULL); + rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL); break; } case LL_IOC_GET_CONNECT_FLAGS: { - if (!lmv->tgts[0]) + tgt = lmv->tgts[0]; + + if (!tgt || !tgt->ltd_exp) return -ENODATA; - rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg); + rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); break; } case OBD_IOC_FID2PATH: { @@ -997,7 +1023,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, case LL_IOC_HSM_STATE_SET: case LL_IOC_HSM_ACTION: { struct md_op_data *op_data = karg; - struct lmv_tgt_desc *tgt; tgt = lmv_find_target(lmv, &op_data->op_fid1); if (IS_ERR(tgt)) @@ -1011,7 +1036,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case LL_IOC_HSM_PROGRESS: { const struct hsm_progress_kernel *hpk = karg; - struct lmv_tgt_desc *tgt; tgt = lmv_find_target(lmv, &hpk->hpk_fid); if (IS_ERR(tgt)) @@ -1021,7 +1045,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, } case LL_IOC_HSM_REQUEST: { struct hsm_user_request *hur = karg; - struct lmv_tgt_desc *tgt; unsigned int reqcount = hur->hur_request.hr_itemcount; if (reqcount == 0) @@ -1044,7 +1067,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, int rc1; struct hsm_user_request *req; - nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]); + tgt = lmv->tgts[i]; + if (!tgt || !tgt->ltd_exp) + continue; + + nr = lmv_hsm_req_count(lmv, hur, tgt); if (nr == 0) /* nothing for this MDS */ continue; @@ -1056,10 +1083,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, if (!req) return -ENOMEM; - lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req); + lmv_hsm_req_build(lmv, hur, tgt, req); - rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, - reqlen, req, uarg); + rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen, + req, uarg); if (rc1 != 0 && rc == 0) rc = rc1; kvfree(req); @@ -1103,27 +1130,27 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, struct obd_device *mdc_obd; int err; - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp) + tgt = lmv->tgts[i]; + if (!tgt || !tgt->ltd_exp) continue; /* ll_umount_begin() sets force flag but for lmv, not * mdc. Let's pass it through */ - mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp); + mdc_obd = class_exp2obd(tgt->ltd_exp); mdc_obd->obd_force = obddev->obd_force; - err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, - karg, uarg); + err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { return err; } else if (err) { - if (lmv->tgts[i]->ltd_active) { + if (tgt->ltd_active) { CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - lmv->tgts[i]->ltd_uuid.uuid, - i, cmd, err); + tgt->ltd_uuid.uuid, i, cmd, err); if (!rc) rc = err; } - } else + } else { set = 1; + } } if (!set && !rc) rc = -EIO; @@ -1269,7 +1296,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) lmv->lmv_placement = PLACEMENT_CHAR_POLICY; spin_lock_init(&lmv->lmv_lock); - mutex_init(&lmv->init_mutex); + mutex_init(&lmv->lmv_init_mutex); lprocfs_lmv_init_vars(&lvars); @@ -2071,7 +2098,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs) dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); /* Check if we've reached the end of the CFS_PAGE. */ - if (!((unsigned long)dp & ~CFS_PAGE_MASK)) + if (!((unsigned long)dp & ~PAGE_MASK)) break; /* Save the hash and flags of this lu_dirpage. */ @@ -2268,7 +2295,6 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, lmv = &obd->u.lmv; if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) { - struct lmv_tgt_desc *tgt; int i; rc = lmv_check_connect(obd); @@ -2277,7 +2303,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, LASSERT(*vallen == sizeof(__u32)); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - tgt = lmv->tgts[i]; + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + /* * All tgts should be connected when this gets called. */ @@ -2466,12 +2493,13 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, LASSERT(fid); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid, - policy, mode, flags, opaque); + err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags, + opaque); if (!rc) rc = err; } @@ -2482,9 +2510,13 @@ static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, __u64 *bits) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; int rc; - rc = md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + + rc = md_set_lock_data(tgt->ltd_exp, lockh, data, bits); return rc; } @@ -2509,12 +2541,13 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags, * one fid was created in. */ for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp || - lmv->tgts[i]->ltd_active == 0) + struct lmv_tgt_desc *tgt = lmv->tgts[i]; + + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid, - type, policy, mode, lockh); + rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode, + lockh); if (rc) return rc; } @@ -2529,18 +2562,24 @@ static int lmv_get_lustre_md(struct obd_export *exp, struct lustre_md *md) { struct lmv_obd *lmv = &exp->exp_obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; - return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md); } static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt = lmv->tgts[0]; if (md->mea) obd_free_memmd(exp, (void *)&md->mea); - return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md); + if (!tgt || !tgt->ltd_exp) + return -EINVAL; + return md_free_lustre_md(tgt->ltd_exp, md); } static int lmv_set_open_replay_data(struct obd_export *exp, @@ -2649,7 +2688,8 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, int rc = 0, i; __u64 curspace, curinodes; - if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) { + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active || + !lmv->desc.ld_tgt_count) { CERROR("master lmv inactive\n"); return -EIO; } @@ -2665,12 +2705,8 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0) + if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) continue; - if (!tgt->ltd_active) { - CDEBUG(D_HA, "mdt %d is inactive.\n", i); - continue; - } err = obd_quotactl(tgt->ltd_exp, oqctl); if (err) { diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h index 7dd3162b5..ac9744e88 100644 --- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h @@ -73,19 +73,6 @@ * - top-page keeps a reference to its sub-page, and destroys it when it * is destroyed. * - * - sub-lock keep a reference to its top-locks. Top-lock keeps a - * reference (and a hold, see cl_lock_hold()) on its sub-locks when it - * actively using them (that is, in cl_lock_state::CLS_QUEUING, - * cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When - * moving into cl_lock_state::CLS_CACHED state, top-lock releases a - * hold. From this moment top-lock has only a 'weak' reference to its - * sub-locks. This reference is protected by top-lock - * cl_lock::cll_guard, and will be automatically cleared by the sub-lock - * when the latter is destroyed. When a sub-lock is canceled, a - * reference to it is removed from the top-lock array, and top-lock is - * moved into CLS_NEW state. It is guaranteed that all sub-locks exist - * while their top-lock is in CLS_HELD or CLS_CACHED states. - * * - IO's are not reference counted. * * To implement a connection between top and sub entities, lov layer is split @@ -280,25 +267,18 @@ struct lov_object { struct task_struct *lo_owner; }; -/** - * Flags that top-lock can set on each of its sub-locks. - */ -enum lov_sub_flags { - /** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */ - LSF_HELD = 1 << 0 -}; - /** * State lov_lock keeps for each sub-lock. */ struct lov_lock_sub { /** sub-lock itself */ - struct lovsub_lock *sub_lock; - /** An array of per-sub-lock flags, taken from enum lov_sub_flags */ - unsigned sub_flags; + struct cl_lock sub_lock; + /** Set if the sublock has ever been enqueued, meaning it may + * hold resources of underlying layers + */ + unsigned int sub_is_enqueued:1, + sub_initialized:1; int sub_stripe; - struct cl_lock_descr sub_descr; - struct cl_lock_descr sub_got; }; /** @@ -308,59 +288,8 @@ struct lov_lock { struct cl_lock_slice lls_cl; /** Number of sub-locks in this lock */ int lls_nr; - /** - * Number of existing sub-locks. - */ - unsigned lls_nr_filled; - /** - * Set when sub-lock was canceled, while top-lock was being - * used, or unused. - */ - unsigned int lls_cancel_race:1; - /** - * An array of sub-locks - * - * There are two issues with managing sub-locks: - * - * - sub-locks are concurrently canceled, and - * - * - sub-locks are shared with other top-locks. - * - * To manage cancellation, top-lock acquires a hold on a sublock - * (lov_sublock_adopt()) when the latter is inserted into - * lov_lock::lls_sub[]. This hold is released (lov_sublock_release()) - * when top-lock is going into CLS_CACHED state or destroyed. Hold - * prevents sub-lock from cancellation. - * - * Sub-lock sharing means, among other things, that top-lock that is - * in the process of creation (i.e., not yet inserted into lock list) - * is already accessible to other threads once at least one of its - * sub-locks is created, see lov_lock_sub_init(). - * - * Sub-lock can be in one of the following states: - * - * - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such - * sub-lock was either never created (top-lock is in CLS_NEW - * state), or it was created, then canceled, then destroyed - * (lov_lock_unlink() cleared sub-lock pointer in the top-lock). - * - * - sub-lock exists and is on - * hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a - * normal state of a sub-lock in CLS_HELD and CLS_CACHED states - * of a top-lock. - * - * - sub-lock exists, but is not held by the top-lock. This - * happens after top-lock released a hold on sub-locks before - * going into cache (lov_lock_unuse()). - * - * \todo To support wide-striping, array has to be replaced with a set - * of queues to avoid scanning. - */ - struct lov_lock_sub *lls_sub; - /** - * Original description with which lock was enqueued. - */ - struct cl_lock_descr lls_orig; + /** sublock array */ + struct lov_lock_sub lls_sub[0]; }; struct lov_page { @@ -444,8 +373,9 @@ struct lov_thread_info { struct cl_lock_descr lti_ldescr; struct ost_lvb lti_lvb; struct cl_2queue lti_cl2q; - struct cl_lock_closure lti_closure; + struct cl_page_list lti_plist; wait_queue_t lti_waiter; + struct cl_attr lti_attr; }; /** @@ -611,14 +541,13 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov, const struct cl_lock_descr *d, int idx); int lov_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int lovsub_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, struct page *vmpage); - + struct cl_page *page, pgoff_t index); int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); struct lu_object *lov_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); @@ -631,6 +560,7 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env, struct lovsub_lock *sub); struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, const struct cl_page_slice *slice); +int lov_page_stripe(const struct cl_page *page); #define lov_foreach_target(lov, var) \ for (var = 0; var < lov_targets_nr(lov); ++var) @@ -789,11 +719,6 @@ static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice) return container_of0(slice, struct lovsub_req, lsrq_cl); } -static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice) -{ - return slice->cpl_page->cp_child; -} - static inline struct lov_io *cl2lov_io(const struct lu_env *env, const struct cl_io_slice *ios) { diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c index 532ef87df..dae8e89bc 100644 --- a/drivers/staging/lustre/lustre/lov/lov_dev.c +++ b/drivers/staging/lustre/lustre/lov/lov_dev.c @@ -143,9 +143,7 @@ static void *lov_key_init(const struct lu_context *ctx, struct lov_thread_info *info; info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS); - if (info) - INIT_LIST_HEAD(&info->lti_closure.clc_list); - else + if (!info) info = ERR_PTR(-ENOMEM); return info; } @@ -155,7 +153,6 @@ static void lov_key_fini(const struct lu_context *ctx, { struct lov_thread_info *info = data; - LINVRNT(list_empty(&info->lti_closure.clc_list)); kmem_cache_free(lov_thread_kmem, info); } @@ -265,8 +262,9 @@ static int lov_req_init(const struct lu_env *env, struct cl_device *dev, if (lr) { cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -335,14 +333,15 @@ static struct lov_device_emerg **lov_emerg_alloc(int nr) cl_page_list_init(&em->emrg_page_list); em->emrg_env = cl_env_alloc(&em->emrg_refcheck, LCT_REMEMBER | LCT_NOREF); - if (!IS_ERR(em->emrg_env)) + if (!IS_ERR(em->emrg_env)) { em->emrg_env->le_ctx.lc_cookie = 0x2; - else { + } else { result = PTR_ERR(em->emrg_env); em->emrg_env = NULL; } - } else + } else { result = -ENOMEM; + } } if (result != 0) { lov_emerg_free(emerg, nr); diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c index b6529401c..460f0fa5e 100644 --- a/drivers/staging/lustre/lustre/lov/lov_ea.c +++ b/drivers/staging/lustre/lustre/lov/lov_ea.c @@ -48,11 +48,6 @@ #include "lov_internal.h" -struct lovea_unpack_args { - struct lov_stripe_md *lsm; - int cursor; -}; - static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, __u16 stripe_count) { diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h index 590f9326a..eef9afac8 100644 --- a/drivers/staging/lustre/lustre/lov/lov_internal.h +++ b/drivers/staging/lustre/lustre/lov/lov_internal.h @@ -72,6 +72,21 @@ }) #endif +#define pool_tgt_size(p) ((p)->pool_obds.op_size) +#define pool_tgt_count(p) ((p)->pool_obds.op_count) +#define pool_tgt_array(p) ((p)->pool_obds.op_array) +#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem) + +struct pool_desc { + char pool_name[LOV_MAXPOOLNAME + 1]; + struct ost_pool pool_obds; + atomic_t pool_refcount; + struct hlist_node pool_hash; /* access by poolname */ + struct list_head pool_list; /* serial access */ + struct dentry *pool_debugfs_entry; /* file in debugfs */ + struct obd_device *pool_lobd; /* owner */ +}; + struct lov_request { struct obd_info rq_oi; struct lov_request_set *rq_rqset; @@ -88,7 +103,6 @@ struct lov_request { }; struct lov_request_set { - struct ldlm_enqueue_info *set_ei; struct obd_info *set_oi; atomic_t set_refcount; struct obd_export *set_exp; @@ -102,10 +116,8 @@ struct lov_request_set { atomic_t set_finish_checked; struct llog_cookie *set_cookies; int set_cookie_sent; - struct obd_trans_info *set_oti; struct list_head set_list; wait_queue_head_t set_waitq; - spinlock_t set_lock; }; extern struct kmem_cache *lov_oinfo_slab; @@ -114,12 +126,6 @@ extern struct lu_kmem_descr lov_caches[]; void lov_finish_set(struct lov_request_set *set); -static inline void lov_get_reqset(struct lov_request_set *set) -{ - LASSERT(atomic_read(&set->set_refcount) > 0); - atomic_inc(&set->set_refcount); -} - static inline void lov_put_reqset(struct lov_request_set *set) { if (atomic_dec_and_test(&set->set_refcount)) @@ -146,10 +152,8 @@ int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, u64 start, u64 end, u64 *obd_start, u64 *obd_end); int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off); - -/* lov_qos.c */ -#define LOV_USES_ASSIGNED_STRIPE 0 -#define LOV_USES_DEFAULT_STRIPE 1 +pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, + int stripe); /* lov_request.c */ int lov_update_common_set(struct lov_request_set *set, @@ -176,6 +180,8 @@ int lov_fini_statfs_set(struct lov_request_set *set); int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc); /* lov_obd.c */ +void lov_stripe_lock(struct lov_stripe_md *md); +void lov_stripe_unlock(struct lov_stripe_md *md); void lov_fix_desc(struct lov_desc *desc); void lov_fix_desc_stripe_size(__u64 *val); void lov_fix_desc_stripe_count(__u32 *val); @@ -231,8 +237,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname); int lov_pool_del(struct obd_device *obd, char *poolname); int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname); int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname); -struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname); -int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool); void lov_pool_putref(struct pool_desc *pool); static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm) diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c index 4296aacd8..86cb3f8f9 100644 --- a/drivers/staging/lustre/lustre/lov/lov_io.c +++ b/drivers/staging/lustre/lustre/lov/lov_io.c @@ -225,8 +225,9 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env, if (!sub->sub_io_initialized) { sub->sub_stripe = stripe; rc = lov_io_sub_init(env, lio, sub); - } else + } else { rc = 0; + } if (rc == 0) lov_sub_enter(sub); else @@ -245,13 +246,15 @@ void lov_sub_put(struct lov_io_sub *sub) * */ -static int lov_page_stripe(const struct cl_page *page) +int lov_page_stripe(const struct cl_page *page) { struct lovsub_object *subobj; + const struct cl_page_slice *slice; + + slice = cl_page_at(page, &lovsub_device_type); + LASSERT(slice->cpl_obj); - subobj = lu2lovsub( - lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header, - &lovsub_device_type)); + subobj = cl2lovsub(slice->cpl_obj); return subobj->lso_index; } @@ -274,10 +277,11 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio, static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, struct cl_io *io) { - struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; + struct lov_stripe_md *lsm; int result; LASSERT(lio->lis_object); + lsm = lio->lis_object->lo_lsm; /* * Need to be optimized, we can't afford to allocate a piece of memory @@ -292,8 +296,9 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, lio->lis_single_subio_index = -1; lio->lis_active_subios = 0; result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -411,8 +416,9 @@ static int lov_io_iter_init(const struct lu_env *env, lov_sub_put(sub); CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n", stripe, start, end); - } else + } else { rc = PTR_ERR(sub); + } if (!rc) list_add_tail(&sub->sub_linkage, &lio->lis_active); @@ -436,7 +442,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env, /* fast path for common case. */ if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) { - lov_do_div64(start, ssize); next = (start + 1) * ssize; if (next <= start * ssize) @@ -543,13 +548,6 @@ static void lov_io_unlock(const struct lu_env *env, LASSERT(rc == 0); } -static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld, - struct cl_page_list *qin, - int idx, int alloc) -{ - return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list; -} - /** * lov implementation of cl_operations::cio_submit() method. It takes a list * of pages in \a queue, splits it into per-stripe sub-lists, invokes @@ -569,25 +567,17 @@ static int lov_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, enum cl_req_type crt, struct cl_2queue *queue) { - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_object *obj = lio->lis_object; - struct lov_device *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev); - struct cl_page_list *qin = &queue->c2_qin; - struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - struct cl_page_list *stripes_qin = NULL; + struct cl_page_list *qin = &queue->c2_qin; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; struct cl_page *page; - struct cl_page *tmp; int stripe; -#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc) - int rc = 0; - int alloc = - !(current->flags & PF_MEMALLOC); if (lio->lis_active_subios == 1) { int idx = lio->lis_single_subio_index; - struct lov_io_sub *sub; LASSERT(idx < lio->lis_nr_subios); sub = lov_sub_get(env, lio, idx); @@ -600,119 +590,120 @@ static int lov_io_submit(const struct lu_env *env, } LASSERT(lio->lis_subs); - if (alloc) { - stripes_qin = - libcfs_kvzalloc(sizeof(*stripes_qin) * - lio->lis_nr_subios, - GFP_NOFS); - if (!stripes_qin) - return -ENOMEM; - - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) - cl_page_list_init(&stripes_qin[stripe]); - } else { - /* - * If we get here, it means pageout & swap doesn't help. - * In order to not make things worse, even don't try to - * allocate the memory with __GFP_NOWARN. -jay - */ - mutex_lock(&ld->ld_mutex); - lio->lis_mem_frozen = 1; - } - cl_2queue_init(cl2q); - cl_page_list_for_each_safe(page, tmp, qin) { - stripe = lov_page_stripe(page); - cl_page_list_move(QIN(stripe), qin, page); - } + cl_page_list_init(plist); + while (qin->pl_nr > 0) { + struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct lov_io_sub *sub; - struct cl_page_list *sub_qin = QIN(stripe); + cl_2queue_init(cl2q); - if (list_empty(&sub_qin->pl_pages)) - continue; + page = cl_page_list_first(qin); + cl_page_list_move(&cl2q->c2_qin, qin, page); + + stripe = lov_page_stripe(page); + while (qin->pl_nr > 0) { + page = cl_page_list_first(qin); + if (stripe != lov_page_stripe(page)) + break; + + cl_page_list_move(&cl2q->c2_qin, qin, page); + } - cl_page_list_splice(sub_qin, &cl2q->c2_qin); sub = lov_sub_get(env, lio, stripe); if (!IS_ERR(sub)) { rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, crt, cl2q); lov_sub_put(sub); - } else + } else { rc = PTR_ERR(sub); - cl_page_list_splice(&cl2q->c2_qin, &queue->c2_qin); + } + + cl_page_list_splice(&cl2q->c2_qin, plist); cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout); + cl_2queue_fini(env, cl2q); + if (rc != 0) break; } - for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) { - struct cl_page_list *sub_qin = QIN(stripe); + cl_page_list_splice(plist, qin); + cl_page_list_fini(env, plist); - if (list_empty(&sub_qin->pl_pages)) - continue; + return rc; +} + +static int lov_io_commit_async(const struct lu_env *env, + const struct cl_io_slice *ios, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb) +{ + struct cl_page_list *plist = &lov_env_info(env)->lti_plist; + struct lov_io *lio = cl2lov_io(env, ios); + struct lov_io_sub *sub; + struct cl_page *page; + int rc = 0; + + if (lio->lis_active_subios == 1) { + int idx = lio->lis_single_subio_index; - cl_page_list_splice(sub_qin, qin); + LASSERT(idx < lio->lis_nr_subios); + sub = lov_sub_get(env, lio, idx); + LASSERT(!IS_ERR(sub)); + LASSERT(sub->sub_io == &lio->lis_single_subio); + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue, + from, to, cb); + lov_sub_put(sub); + return rc; } - if (alloc) { - kvfree(stripes_qin); - } else { - int i; + LASSERT(lio->lis_subs); - for (i = 0; i < lio->lis_nr_subios; i++) { - struct cl_io *cio = lio->lis_subs[i].sub_io; + cl_page_list_init(plist); + while (queue->pl_nr > 0) { + int stripe_to = to; + int stripe; - if (cio && cio == &ld->ld_emrg[i]->emrg_subio) - lov_io_sub_fini(env, lio, &lio->lis_subs[i]); + LASSERT(plist->pl_nr == 0); + page = cl_page_list_first(queue); + cl_page_list_move(plist, queue, page); + + stripe = lov_page_stripe(page); + while (queue->pl_nr > 0) { + page = cl_page_list_first(queue); + if (stripe != lov_page_stripe(page)) + break; + + cl_page_list_move(plist, queue, page); } - lio->lis_mem_frozen = 0; - mutex_unlock(&ld->ld_mutex); - } - return rc; -#undef QIN -} + if (queue->pl_nr > 0) /* still has more pages */ + stripe_to = PAGE_SIZE; -static int lov_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; + sub = lov_sub_get(env, lio, stripe); + if (!IS_ERR(sub)) { + rc = cl_io_commit_async(sub->sub_env, sub->sub_io, + plist, from, stripe_to, cb); + lov_sub_put(sub); + } else { + rc = PTR_ERR(sub); + break; + } - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_prepare_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - return result; -} + if (plist->pl_nr > 0) /* short write */ + break; -static int lov_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_page *sub_page = lov_sub_page(slice); - struct lov_io_sub *sub; - int result; + from = 0; + } - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - result = cl_io_commit_write(sub->sub_env, sub->sub_io, - sub_page, from, to); - lov_sub_put(sub); - } else - result = PTR_ERR(sub); - return result; + /* for error case, add the page back into the qin list */ + LASSERT(ergo(rc == 0, plist->pl_nr == 0)); + while (plist->pl_nr > 0) { + /* error occurred, add the uncommitted pages back into queue */ + page = cl_page_list_last(plist); + cl_page_list_move_head(queue, plist, page); + } + + return rc; } static int lov_io_fault_start(const struct lu_env *env, @@ -803,16 +794,8 @@ static const struct cl_io_operations lov_io_ops = { .cio_fini = lov_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = lov_io_submit - }, - [CRT_WRITE] = { - .cio_submit = lov_io_submit - } - }, - .cio_prepare_write = lov_io_prepare_write, - .cio_commit_write = lov_io_commit_write + .cio_submit = lov_io_submit, + .cio_commit_async = lov_io_commit_async, }; /***************************************************************************** @@ -880,15 +863,8 @@ static const struct cl_io_operations lov_empty_io_ops = { .cio_fini = lov_empty_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - }, - [CRT_WRITE] = { - .cio_submit = LOV_EMPTY_IMPOSSIBLE - } - }, - .cio_commit_write = LOV_EMPTY_IMPOSSIBLE + .cio_submit = LOV_EMPTY_IMPOSSIBLE, + .cio_commit_async = LOV_EMPTY_IMPOSSIBLE }; int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, @@ -943,7 +919,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, } io->ci_result = result < 0 ? result : 0; - return result != 0; + return result; } int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, @@ -986,7 +962,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, } io->ci_result = result < 0 ? result : 0; - return result != 0; + return result; } /** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c index ae854bc25..1b203d18c 100644 --- a/drivers/staging/lustre/lustre/lov/lov_lock.c +++ b/drivers/staging/lustre/lustre/lov/lov_lock.c @@ -46,11 +46,6 @@ * @{ */ -static struct cl_lock_closure *lov_closure_get(const struct lu_env *env, - struct cl_lock *parent); - -static int lov_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice); /***************************************************************************** * * Lov lock operations. @@ -58,7 +53,7 @@ static int lov_lock_unuse(const struct lu_env *env, */ static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env, - struct cl_lock *parent, + const struct cl_lock *parent, struct lov_lock_sub *lls) { struct lov_sublock_env *subenv; @@ -100,184 +95,25 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv) lov_sub_put(subenv->lse_sub); } -static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck, - struct cl_lock *sublock, int idx, - struct lov_lock_link *link) +static int lov_sublock_init(const struct lu_env *env, + const struct cl_lock *parent, + struct lov_lock_sub *lls) { - struct lovsub_lock *lsl; - struct cl_lock *parent = lck->lls_cl.cls_lock; - int rc; - - LASSERT(cl_lock_is_mutexed(parent)); - LASSERT(cl_lock_is_mutexed(sublock)); - - lsl = cl2sub_lock(sublock); - /* - * check that sub-lock doesn't have lock link to this top-lock. - */ - LASSERT(!lov_lock_link_find(env, lck, lsl)); - LASSERT(idx < lck->lls_nr); - - lck->lls_sub[idx].sub_lock = lsl; - lck->lls_nr_filled++; - LASSERT(lck->lls_nr_filled <= lck->lls_nr); - list_add_tail(&link->lll_list, &lsl->lss_parents); - link->lll_idx = idx; - link->lll_super = lck; - cl_lock_get(parent); - lu_ref_add(&parent->cll_reference, "lov-child", sublock); - lck->lls_sub[idx].sub_flags |= LSF_HELD; - cl_lock_user_add(env, sublock); - - rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx); - LASSERT(rc == 0); /* there is no way this can fail, currently */ -} - -static struct cl_lock *lov_sublock_alloc(const struct lu_env *env, - const struct cl_io *io, - struct lov_lock *lck, - int idx, struct lov_lock_link **out) -{ - struct cl_lock *sublock; - struct cl_lock *parent; - struct lov_lock_link *link; - - LASSERT(idx < lck->lls_nr); - - link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS); - if (link) { - struct lov_sublock_env *subenv; - struct lov_lock_sub *lls; - struct cl_lock_descr *descr; - - parent = lck->lls_cl.cls_lock; - lls = &lck->lls_sub[idx]; - descr = &lls->sub_got; - - subenv = lov_sublock_env_get(env, parent, lls); - if (!IS_ERR(subenv)) { - /* CAVEAT: Don't try to add a field in lov_lock_sub - * to remember the subio. This is because lock is able - * to be cached, but this is not true for IO. This - * further means a sublock might be referenced in - * different io context. -jay - */ - - sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io, - descr, "lov-parent", parent); - lov_sublock_env_put(subenv); - } else { - /* error occurs. */ - sublock = (void *)subenv; - } - - if (!IS_ERR(sublock)) - *out = link; - else - kmem_cache_free(lov_lock_link_kmem, link); - } else - sublock = ERR_PTR(-ENOMEM); - return sublock; -} - -static void lov_sublock_unlock(const struct lu_env *env, - struct lovsub_lock *lsl, - struct cl_lock_closure *closure, - struct lov_sublock_env *subenv) -{ - lov_sublock_env_put(subenv); - lsl->lss_active = NULL; - cl_lock_disclosure(env, closure); -} - -static int lov_sublock_lock(const struct lu_env *env, - struct lov_lock *lck, - struct lov_lock_sub *lls, - struct cl_lock_closure *closure, - struct lov_sublock_env **lsep) -{ - struct lovsub_lock *sublock; - struct cl_lock *child; - int result = 0; - - LASSERT(list_empty(&closure->clc_list)); - - sublock = lls->sub_lock; - child = sublock->lss_cl.cls_lock; - result = cl_lock_closure_build(env, child, closure); - if (result == 0) { - struct cl_lock *parent = closure->clc_origin; - - LASSERT(cl_lock_is_mutexed(child)); - sublock->lss_active = parent; - - if (unlikely((child->cll_state == CLS_FREEING) || - (child->cll_flags & CLF_CANCELLED))) { - struct lov_lock_link *link; - /* - * we could race with lock deletion which temporarily - * put the lock in freeing state, bug 19080. - */ - LASSERT(!(lls->sub_flags & LSF_HELD)); - - link = lov_lock_link_find(env, lck, sublock); - LASSERT(link); - lov_lock_unlink(env, link, sublock); - lov_sublock_unlock(env, sublock, closure, NULL); - lck->lls_cancel_race = 1; - result = CLO_REPEAT; - } else if (lsep) { - struct lov_sublock_env *subenv; + struct lov_sublock_env *subenv; + int result; - subenv = lov_sublock_env_get(env, parent, lls); - if (IS_ERR(subenv)) { - lov_sublock_unlock(env, sublock, - closure, NULL); - result = PTR_ERR(subenv); - } else { - *lsep = subenv; - } - } + subenv = lov_sublock_env_get(env, parent, lls); + if (!IS_ERR(subenv)) { + result = cl_lock_init(subenv->lse_env, &lls->sub_lock, + subenv->lse_io); + lov_sublock_env_put(subenv); + } else { + /* error occurs. */ + result = PTR_ERR(subenv); } return result; } -/** - * Updates the result of a top-lock operation from a result of sub-lock - * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate - * over sub-locks and lov_subresult() is used to calculate return value of a - * top-operation. To this end, possible return values of sub-operations are - * ordered as - * - * - 0 success - * - CLO_WAIT wait for event - * - CLO_REPEAT repeat top-operation - * - -ne fundamental error - * - * Top-level return code can only go down through this list. CLO_REPEAT - * overwrites CLO_WAIT, because lock mutex was released and sleeping condition - * has to be rechecked by the upper layer. - */ -static int lov_subresult(int result, int rc) -{ - int result_rank; - int rc_rank; - - LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT, - "result = %d\n", result); - LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT, - "rc = %d\n", rc); - CLASSERT(CLO_WAIT < CLO_REPEAT); - - /* calculate ranks in the ordering above */ - result_rank = result < 0 ? 1 + CLO_REPEAT : result; - rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc; - - if (result_rank < rc_rank) - result = rc; - return result; -} - /** * Creates sub-locks for a given lov_lock for the first time. * @@ -286,8 +122,9 @@ static int lov_subresult(int result, int rc) * fact that top-lock (that is being created) can be accessed concurrently * through already created sub-locks (possibly shared with other top-locks). */ -static int lov_lock_sub_init(const struct lu_env *env, - struct lov_lock *lck, const struct cl_io *io) +static struct lov_lock *lov_lock_sub_init(const struct lu_env *env, + const struct cl_object *obj, + struct cl_lock *lock) { int result = 0; int i; @@ -297,241 +134,86 @@ static int lov_lock_sub_init(const struct lu_env *env, u64 file_start; u64 file_end; - struct lov_object *loo = cl2lov(lck->lls_cl.cls_obj); + struct lov_object *loo = cl2lov(obj); struct lov_layout_raid0 *r0 = lov_r0(loo); - struct cl_lock *parent = lck->lls_cl.cls_lock; + struct lov_lock *lovlck; - lck->lls_orig = parent->cll_descr; - file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start); - file_end = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1; + file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start); + file_end = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1; for (i = 0, nr = 0; i < r0->lo_nr; i++) { /* * XXX for wide striping smarter algorithm is desirable, * breaking out of the loop, early. */ - if (likely(r0->lo_sub[i]) && + if (likely(r0->lo_sub[i]) && /* spare layout */ lov_stripe_intersects(loo->lo_lsm, i, file_start, file_end, &start, &end)) nr++; } LASSERT(nr > 0); - lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS); - if (!lck->lls_sub) - return -ENOMEM; + lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]), + GFP_NOFS); + if (!lovlck) + return ERR_PTR(-ENOMEM); - lck->lls_nr = nr; - /* - * First, fill in sub-lock descriptions in - * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc() - * (called below in this function, and by lov_lock_enqueue()) to - * create sub-locks. At this moment, no other thread can access - * top-lock. - */ + lovlck->lls_nr = nr; for (i = 0, nr = 0; i < r0->lo_nr; ++i) { if (likely(r0->lo_sub[i]) && lov_stripe_intersects(loo->lo_lsm, i, file_start, file_end, &start, &end)) { + struct lov_lock_sub *lls = &lovlck->lls_sub[nr]; struct cl_lock_descr *descr; - descr = &lck->lls_sub[nr].sub_descr; + descr = &lls->sub_lock.cll_descr; LASSERT(!descr->cld_obj); descr->cld_obj = lovsub2cl(r0->lo_sub[i]); descr->cld_start = cl_index(descr->cld_obj, start); descr->cld_end = cl_index(descr->cld_obj, end); - descr->cld_mode = parent->cll_descr.cld_mode; - descr->cld_gid = parent->cll_descr.cld_gid; - descr->cld_enq_flags = parent->cll_descr.cld_enq_flags; - /* XXX has no effect */ - lck->lls_sub[nr].sub_got = *descr; - lck->lls_sub[nr].sub_stripe = i; + descr->cld_mode = lock->cll_descr.cld_mode; + descr->cld_gid = lock->cll_descr.cld_gid; + descr->cld_enq_flags = lock->cll_descr.cld_enq_flags; + lls->sub_stripe = i; + + /* initialize sub lock */ + result = lov_sublock_init(env, lock, lls); + if (result < 0) + break; + + lls->sub_initialized = 1; nr++; } } - LASSERT(nr == lck->lls_nr); - - /* - * Some sub-locks can be missing at this point. This is not a problem, - * because enqueue will create them anyway. Main duty of this function - * is to fill in sub-lock descriptions in a race free manner. - */ - return result; -} + LASSERT(ergo(result == 0, nr == lovlck->lls_nr)); -static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck, - int i, int deluser, int rc) -{ - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - - if (lck->lls_sub[i].sub_flags & LSF_HELD) { - struct cl_lock *sublock; - int dying; - - sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock; - LASSERT(cl_lock_is_mutexed(sublock)); + if (result != 0) { + for (i = 0; i < nr; ++i) { + if (!lovlck->lls_sub[i].sub_initialized) + break; - lck->lls_sub[i].sub_flags &= ~LSF_HELD; - if (deluser) - cl_lock_user_del(env, sublock); - /* - * If the last hold is released, and cancellation is pending - * for a sub-lock, release parent mutex, to avoid keeping it - * while sub-lock is being paged out. - */ - dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM || - sublock->cll_descr.cld_mode == CLM_GROUP || - (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) && - sublock->cll_holds == 1; - if (dying) - cl_lock_mutex_put(env, parent); - cl_lock_unhold(env, sublock, "lov-parent", parent); - if (dying) { - cl_lock_mutex_get(env, parent); - rc = lov_subresult(rc, CLO_REPEAT); + cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); } - /* - * From now on lck->lls_sub[i].sub_lock is a "weak" pointer, - * not backed by a reference on a - * sub-lock. lovsub_lock_delete() will clear - * lck->lls_sub[i].sub_lock under semaphores, just before - * sub-lock is destroyed. - */ + kvfree(lovlck); + lovlck = ERR_PTR(result); } - return rc; -} - -static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck, - int i) -{ - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - - if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) { - struct cl_lock *sublock; - - sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock; - LASSERT(cl_lock_is_mutexed(sublock)); - LASSERT(sublock->cll_state != CLS_FREEING); - lck->lls_sub[i].sub_flags |= LSF_HELD; - - cl_lock_get_trust(sublock); - cl_lock_hold_add(env, sublock, "lov-parent", parent); - cl_lock_user_add(env, sublock); - cl_lock_put(env, sublock); - } + return lovlck; } static void lov_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) { - struct lov_lock *lck; + struct lov_lock *lovlck; int i; - lck = cl2lov_lock(slice); - LASSERT(lck->lls_nr_filled == 0); - if (lck->lls_sub) { - for (i = 0; i < lck->lls_nr; ++i) - /* - * No sub-locks exists at this point, as sub-lock has - * a reference on its parent. - */ - LASSERT(!lck->lls_sub[i].sub_lock); - kvfree(lck->lls_sub); + lovlck = cl2lov_lock(slice); + for (i = 0; i < lovlck->lls_nr; ++i) { + LASSERT(!lovlck->lls_sub[i].sub_is_enqueued); + if (lovlck->lls_sub[i].sub_initialized) + cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); } - kmem_cache_free(lov_lock_kmem, lck); -} - -static int lov_lock_enqueue_wait(const struct lu_env *env, - struct lov_lock *lck, - struct cl_lock *sublock) -{ - struct cl_lock *lock = lck->lls_cl.cls_lock; - int result; - - LASSERT(cl_lock_is_mutexed(lock)); - - cl_lock_mutex_put(env, lock); - result = cl_lock_enqueue_wait(env, sublock, 0); - cl_lock_mutex_get(env, lock); - return result ?: CLO_REPEAT; -} - -/** - * Tries to advance a state machine of a given sub-lock toward enqueuing of - * the top-lock. - * - * \retval 0 if state-transition can proceed - * \retval -ve otherwise. - */ -static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck, - struct cl_lock *sublock, - struct cl_io *io, __u32 enqflags, int last) -{ - int result; - - /* first, try to enqueue a sub-lock ... */ - result = cl_enqueue_try(env, sublock, io, enqflags); - if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) { - /* if it is enqueued, try to `wait' on it---maybe it's already - * granted - */ - result = cl_wait_try(env, sublock); - if (result == CLO_REENQUEUED) - result = CLO_WAIT; - } - /* - * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in - * parallel, otherwise---enqueue has to wait until sub-lock is granted - * before proceeding to the next one. - */ - if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) && - (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL))) - result = 0; - return result; -} - -/** - * Helper function for lov_lock_enqueue() that creates missing sub-lock. - */ -static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent, - struct cl_io *io, struct lov_lock *lck, int idx) -{ - struct lov_lock_link *link = NULL; - struct cl_lock *sublock; - int result; - - LASSERT(parent->cll_depth == 1); - cl_lock_mutex_put(env, parent); - sublock = lov_sublock_alloc(env, io, lck, idx, &link); - if (!IS_ERR(sublock)) - cl_lock_mutex_get(env, sublock); - cl_lock_mutex_get(env, parent); - - if (!IS_ERR(sublock)) { - cl_lock_get_trust(sublock); - if (parent->cll_state == CLS_QUEUING && - !lck->lls_sub[idx].sub_lock) { - lov_sublock_adopt(env, lck, sublock, idx, link); - } else { - kmem_cache_free(lov_lock_link_kmem, link); - /* other thread allocated sub-lock, or enqueue is no - * longer going on - */ - cl_lock_mutex_put(env, parent); - cl_lock_unhold(env, sublock, "lov-parent", parent); - cl_lock_mutex_get(env, parent); - } - cl_lock_mutex_put(env, sublock); - cl_lock_put(env, sublock); - result = CLO_REPEAT; - } else - result = PTR_ERR(sublock); - return result; + kvfree(lovlck); } /** @@ -543,529 +225,59 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent, */ static int lov_lock_enqueue(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *io, __u32 enqflags) + struct cl_io *io, struct cl_sync_io *anchor) { - struct cl_lock *lock = slice->cls_lock; - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, lock); + struct cl_lock *lock = slice->cls_lock; + struct lov_lock *lovlck = cl2lov_lock(slice); int i; - int result; - enum cl_lock_state minstate; + int rc = 0; - for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct lov_lock_sub *lls; - struct cl_lock *sublock; + for (i = 0; i < lovlck->lls_nr; ++i) { + struct lov_lock_sub *lls = &lovlck->lls_sub[i]; struct lov_sublock_env *subenv; - if (lock->cll_state != CLS_QUEUING) { - /* - * Lock might have left QUEUING state if previous - * iteration released its mutex. Stop enqueing in this - * case and let the upper layer to decide what to do. - */ - LASSERT(i > 0 && result != 0); - break; - } - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - /* - * Sub-lock might have been canceled, while top-lock was - * cached. - */ - if (!sub) { - result = lov_sublock_fill(env, lock, io, lck, i); - /* lov_sublock_fill() released @lock mutex, - * restart. - */ + subenv = lov_sublock_env_get(env, lock, lls); + if (IS_ERR(subenv)) { + rc = PTR_ERR(subenv); break; } - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - lov_sublock_hold(env, lck, i); - rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock, - subenv->lse_io, enqflags, - i == lck->lls_nr - 1); - minstate = min(minstate, sublock->cll_state); - if (rc == CLO_WAIT) { - switch (sublock->cll_state) { - case CLS_QUEUING: - /* take recursive mutex, the lock is - * released in lov_lock_enqueue_wait. - */ - cl_lock_mutex_get(env, sublock); - lov_sublock_unlock(env, sub, closure, - subenv); - rc = lov_lock_enqueue_wait(env, lck, - sublock); - break; - case CLS_CACHED: - cl_lock_get(sublock); - /* take recursive mutex of sublock */ - cl_lock_mutex_get(env, sublock); - /* need to release all locks in closure - * otherwise it may deadlock. LU-2683. - */ - lov_sublock_unlock(env, sub, closure, - subenv); - /* sublock and parent are held. */ - rc = lov_sublock_release(env, lck, i, - 1, rc); - cl_lock_mutex_put(env, sublock); - cl_lock_put(env, sublock); - break; - default: - lov_sublock_unlock(env, sub, closure, - subenv); - break; - } - } else { - LASSERT(!sublock->cll_conflict); - lov_sublock_unlock(env, sub, closure, subenv); - } - } - result = lov_subresult(result, rc); - if (result != 0) + rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io, + &lls->sub_lock, anchor); + lov_sublock_env_put(subenv); + if (rc != 0) break; - } - cl_lock_closure_fini(closure); - return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT; -} - -static int lov_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - int i; - int result; - - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - /* top-lock state cannot change concurrently, because single - * thread (one that released the last hold) carries unlocking - * to the completion. - */ - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) - continue; - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (lls->sub_flags & LSF_HELD) { - LASSERT(sublock->cll_state == CLS_HELD || - sublock->cll_state == CLS_ENQUEUED); - rc = cl_unuse_try(subenv->lse_env, sublock); - rc = lov_sublock_release(env, lck, i, 0, rc); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); + lls->sub_is_enqueued = 1; } - - if (result == 0 && lck->lls_cancel_race) { - lck->lls_cancel_race = 0; - result = -ESTALE; - } - cl_lock_closure_fini(closure); - return result; + return rc; } static void lov_lock_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); + struct cl_lock *lock = slice->cls_lock; + struct lov_lock *lovlck = cl2lov_lock(slice); int i; - int result; - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; + for (i = 0; i < lovlck->lls_nr; ++i) { + struct lov_lock_sub *lls = &lovlck->lls_sub[i]; + struct cl_lock *sublock = &lls->sub_lock; struct lov_sublock_env *subenv; - /* top-lock state cannot change concurrently, because single - * thread (one that released the last hold) carries unlocking - * to the completion. - */ - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) - continue; - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (!(lls->sub_flags & LSF_HELD)) { - lov_sublock_unlock(env, sub, closure, subenv); - continue; - } - - switch (sublock->cll_state) { - case CLS_HELD: - rc = cl_unuse_try(subenv->lse_env, sublock); - lov_sublock_release(env, lck, i, 0, 0); - break; - default: - lov_sublock_release(env, lck, i, 1, 0); - break; - } - lov_sublock_unlock(env, sub, closure, subenv); - } - - if (rc == CLO_REPEAT) { - --i; - continue; - } - - result = lov_subresult(result, rc); - } - - if (result) - CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock, - "lov_lock_cancel fails with %d.\n", result); - - cl_lock_closure_fini(closure); -} - -static int lov_lock_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - enum cl_lock_state minstate; - int reenqueued; - int result; - int i; - -again: - for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0; - i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - LASSERT(sublock->cll_state >= CLS_ENQUEUED); - if (sublock->cll_state < CLS_HELD) - rc = cl_wait_try(env, sublock); - - minstate = min(minstate, sublock->cll_state); - lov_sublock_unlock(env, sub, closure, subenv); - } - if (rc == CLO_REENQUEUED) { - reenqueued++; - rc = 0; - } - result = lov_subresult(result, rc); - if (result != 0) - break; - } - /* Each sublock only can be reenqueued once, so will not loop - * forever. - */ - if (result == 0 && reenqueued != 0) - goto again; - cl_lock_closure_fini(closure); - return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT; -} - -static int lov_lock_use(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - int result; - int i; - - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - - for (result = 0, i = 0; i < lck->lls_nr; ++i) { - int rc; - struct lovsub_lock *sub; - struct cl_lock *sublock; - struct lov_lock_sub *lls; - struct lov_sublock_env *subenv; - - LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT); - - lls = &lck->lls_sub[i]; - sub = lls->sub_lock; - if (!sub) { - /* - * Sub-lock might have been canceled, while top-lock was - * cached. - */ - result = -ESTALE; - break; - } - - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - LASSERT(sublock->cll_state != CLS_FREEING); - lov_sublock_hold(env, lck, i); - if (sublock->cll_state == CLS_CACHED) { - rc = cl_use_try(subenv->lse_env, sublock, 0); - if (rc != 0) - rc = lov_sublock_release(env, lck, - i, 1, rc); - } else if (sublock->cll_state == CLS_NEW) { - /* Sub-lock might have been canceled, while - * top-lock was cached. - */ - result = -ESTALE; - lov_sublock_release(env, lck, i, 1, result); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); - if (result != 0) - break; - } - - if (lck->lls_cancel_race) { - /* - * If there is unlocking happened at the same time, then - * sublock_lock state should be FREEING, and lov_sublock_lock - * should return CLO_REPEAT. In this case, it should return - * ESTALE, and up layer should reset the lock state to be NEW. - */ - lck->lls_cancel_race = 0; - LASSERT(result != 0); - result = -ESTALE; - } - cl_lock_closure_fini(closure); - return result; -} - -/** - * Check if the extent region \a descr is covered by \a child against the - * specific \a stripe. - */ -static int lov_lock_stripe_is_matching(const struct lu_env *env, - struct lov_object *lov, int stripe, - const struct cl_lock_descr *child, - const struct cl_lock_descr *descr) -{ - struct lov_stripe_md *lsm = lov->lo_lsm; - u64 start; - u64 end; - int result; - - if (lov_r0(lov)->lo_nr == 1) - return cl_lock_ext_match(child, descr); - - /* - * For a multi-stripes object: - * - make sure the descr only covers child's stripe, and - * - check if extent is matching. - */ - start = cl_offset(&lov->lo_cl, descr->cld_start); - end = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1; - result = 0; - /* glimpse should work on the object with LOV EA hole. */ - if (end - start <= lsm->lsm_stripe_size) { - int idx; - - idx = lov_stripe_number(lsm, start); - if (idx == stripe || - unlikely(!lov_r0(lov)->lo_sub[idx])) { - idx = lov_stripe_number(lsm, end); - if (idx == stripe || - unlikely(!lov_r0(lov)->lo_sub[idx])) - result = 1; - } - } - - if (result != 0) { - struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr; - u64 sub_start; - u64 sub_end; - - subd->cld_obj = NULL; /* don't need sub object at all */ - subd->cld_mode = descr->cld_mode; - subd->cld_gid = descr->cld_gid; - result = lov_stripe_intersects(lsm, stripe, start, end, - &sub_start, &sub_end); - LASSERT(result); - subd->cld_start = cl_index(child->cld_obj, sub_start); - subd->cld_end = cl_index(child->cld_obj, sub_end); - result = cl_lock_ext_match(child, subd); - } - return result; -} - -/** - * An implementation of cl_lock_operations::clo_fits_into() method. - * - * Checks whether a lock (given by \a slice) is suitable for \a - * io. Multi-stripe locks can be used only for "quick" io, like truncate, or - * O_APPEND write. - * - * \see ccc_lock_fits_into(). - */ -static int lov_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct lov_lock *lov = cl2lov_lock(slice); - struct lov_object *obj = cl2lov(slice->cls_obj); - int result; - - LASSERT(cl_object_same(need->cld_obj, slice->cls_obj)); - LASSERT(lov->lls_nr > 0); - - /* for top lock, it's necessary to match enq flags otherwise it will - * run into problem if a sublock is missing and reenqueue. - */ - if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags) - return 0; - - if (need->cld_mode == CLM_GROUP) - /* - * always allow to match group lock. - */ - result = cl_lock_ext_match(&lov->lls_orig, need); - else if (lov->lls_nr == 1) { - struct cl_lock_descr *got = &lov->lls_sub[0].sub_got; - - result = lov_lock_stripe_is_matching(env, - cl2lov(slice->cls_obj), - lov->lls_sub[0].sub_stripe, - got, need); - } else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC && - !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM) - /* - * Multi-stripe locks are only suitable for `quick' IO and for - * glimpse. - */ - result = 0; - else - /* - * Most general case: multi-stripe existing lock, and - * (potentially) multi-stripe @need lock. Check that @need is - * covered by @lov's sub-locks. - * - * For now, ignore lock expansions made by the server, and - * match against original lock extent. - */ - result = cl_lock_ext_match(&lov->lls_orig, need); - CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n", - PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got), - lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr, - result); - return result; -} - -void lov_lock_unlink(const struct lu_env *env, - struct lov_lock_link *link, struct lovsub_lock *sub) -{ - struct lov_lock *lck = link->lll_super; - struct cl_lock *parent = lck->lls_cl.cls_lock; - - LASSERT(cl_lock_is_mutexed(parent)); - LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock)); - - list_del_init(&link->lll_list); - LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub); - /* yank this sub-lock from parent's array */ - lck->lls_sub[link->lll_idx].sub_lock = NULL; - LASSERT(lck->lls_nr_filled > 0); - lck->lls_nr_filled--; - lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock); - cl_lock_put(env, parent); - kmem_cache_free(lov_lock_link_kmem, link); -} - -struct lov_lock_link *lov_lock_link_find(const struct lu_env *env, - struct lov_lock *lck, - struct lovsub_lock *sub) -{ - struct lov_lock_link *scan; - - LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock)); - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - if (scan->lll_super == lck) - return scan; - } - return NULL; -} - -/** - * An implementation of cl_lock_operations::clo_delete() method. This is - * invoked for "top-to-bottom" delete, when lock destruction starts from the - * top-lock, e.g., as a result of inode destruction. - * - * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there: - * this is done separately elsewhere: - * - * - for inode destruction, lov_object_delete() calls cl_object_kill() for - * each sub-object, purging its locks; - * - * - in other cases (e.g., a fatal error with a top-lock) sub-locks are - * left in the cache. - */ -static void lov_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock); - struct lov_lock_link *link; - int rc; - int i; - - LASSERT(slice->cls_lock->cll_state == CLS_FREEING); - - for (i = 0; i < lck->lls_nr; ++i) { - struct lov_lock_sub *lls = &lck->lls_sub[i]; - struct lovsub_lock *lsl = lls->sub_lock; - - if (!lsl) /* already removed */ + if (!lls->sub_is_enqueued) continue; - rc = lov_sublock_lock(env, lck, lls, closure, NULL); - if (rc == CLO_REPEAT) { - --i; - continue; + lls->sub_is_enqueued = 0; + subenv = lov_sublock_env_get(env, lock, lls); + if (!IS_ERR(subenv)) { + cl_lock_cancel(subenv->lse_env, sublock); + lov_sublock_env_put(subenv); + } else { + CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock, + "lov_lock_cancel fails with %ld.\n", + PTR_ERR(subenv)); } - - LASSERT(rc == 0); - LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING); - - if (lls->sub_flags & LSF_HELD) - lov_sublock_release(env, lck, i, 1, 0); - - link = lov_lock_link_find(env, lck, lsl); - LASSERT(link); - lov_lock_unlink(env, link, lsl); - LASSERT(!lck->lls_sub[i].sub_lock); - - lov_sublock_unlock(env, lsl, closure, NULL); } - - cl_lock_closure_fini(closure); } static int lov_lock_print(const struct lu_env *env, void *cookie, @@ -1079,12 +291,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie, struct lov_lock_sub *sub; sub = &lck->lls_sub[i]; - (*p)(env, cookie, " %d %x: ", i, sub->sub_flags); - if (sub->sub_lock) - cl_lock_print(env, cookie, p, - sub->sub_lock->lss_cl.cls_lock); - else - (*p)(env, cookie, "---\n"); + (*p)(env, cookie, " %d %x: ", i, sub->sub_is_enqueued); + cl_lock_print(env, cookie, p, &sub->sub_lock); } return 0; } @@ -1092,12 +300,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie, static const struct cl_lock_operations lov_lock_ops = { .clo_fini = lov_lock_fini, .clo_enqueue = lov_lock_enqueue, - .clo_wait = lov_lock_wait, - .clo_use = lov_lock_use, - .clo_unuse = lov_lock_unuse, .clo_cancel = lov_lock_cancel, - .clo_fits_into = lov_lock_fits_into, - .clo_delete = lov_lock_delete, .clo_print = lov_lock_print }; @@ -1105,14 +308,13 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io) { struct lov_lock *lck; - int result; + int result = 0; - lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS); - if (lck) { + lck = lov_lock_sub_init(env, obj, lock); + if (!IS_ERR(lck)) cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops); - result = lov_lock_sub_init(env, lck, io); - } else - result = -ENOMEM; + else + result = PTR_ERR(lck); return result; } @@ -1147,21 +349,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj, lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS); if (lck) { cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops); - lck->lls_orig = lock->cll_descr; result = 0; } return result; } -static struct cl_lock_closure *lov_closure_get(const struct lu_env *env, - struct cl_lock *parent) -{ - struct cl_lock_closure *closure; - - closure = &lov_env_info(env)->lti_closure; - LASSERT(list_empty(&closure->clc_list)); - cl_lock_closure_init(env, closure, parent, 1); - return closure; -} - /** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c index 029cd4d62..56ef41d17 100644 --- a/drivers/staging/lustre/lustre/lov/lov_merge.c +++ b/drivers/staging/lustre/lustre/lov/lov_merge.c @@ -154,6 +154,7 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, valid &= src->o_valid; if (*set) { + tgt->o_valid &= valid; if (valid & OBD_MD_FLSIZE) { /* this handles sparse files properly */ u64 lov_size; @@ -172,12 +173,22 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid, tgt->o_mtime = src->o_mtime; if (valid & OBD_MD_FLDATAVERSION) tgt->o_data_version += src->o_data_version; + + /* handle flags */ + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags &= src->o_flags; + else + tgt->o_flags = 0; } else { memcpy(tgt, src, sizeof(*tgt)); tgt->o_oi = lsm->lsm_oi; + tgt->o_valid = valid; if (valid & OBD_MD_FLSIZE) tgt->o_size = lov_stripe_size(lsm, src->o_size, stripeno); + tgt->o_flags = 0; + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags = src->o_flags; } /* data_version needs to be valid on all stripes to be correct! */ diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c index 5daa7faf4..e15ef2ece 100644 --- a/drivers/staging/lustre/lustre/lov/lov_obd.c +++ b/drivers/staging/lustre/lustre/lov/lov_obd.c @@ -54,7 +54,6 @@ #include "../include/lprocfs_status.h" #include "../include/lustre_param.h" #include "../include/cl_object.h" -#include "../include/lclient.h" /* for cl_client_lru */ #include "../include/lustre/ll_fiemap.h" #include "../include/lustre_fid.h" @@ -124,7 +123,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, static int lov_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data); -#define MAX_STRING_SIZE 128 int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { @@ -965,7 +963,6 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, CERROR("Unknown command: %d\n", lcfg->lcfg_command); rc = -EINVAL; goto out; - } } out: @@ -1734,6 +1731,27 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key, unsigned int buffer_size = FIEMAP_BUFFER_SIZE; if (!lsm_has_objects(lsm)) { + if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start < + fm_key->oa.o_size)) { + /* + * released file, return a minimal FIEMAP if + * request fits in file-size. + */ + fiemap->fm_mapped_extents = 1; + fiemap->fm_extents[0].fe_logical = + fm_key->fiemap.fm_start; + if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length < + fm_key->oa.o_size) { + fiemap->fm_extents[0].fe_length = + fm_key->fiemap.fm_length; + } else { + fiemap->fm_extents[0].fe_length = + fm_key->oa.o_size - fm_key->fiemap.fm_start; + fiemap->fm_extents[0].fe_flags |= + (FIEMAP_EXTENT_UNKNOWN | + FIEMAP_EXTENT_LAST); + } + } rc = 0; goto out; } @@ -2173,7 +2191,6 @@ void lov_stripe_lock(struct lov_stripe_md *md) LASSERT(md->lsm_lock_owner == 0); md->lsm_lock_owner = current_pid(); } -EXPORT_SYMBOL(lov_stripe_lock); void lov_stripe_unlock(struct lov_stripe_md *md) __releases(&md->lsm_lock) @@ -2182,7 +2199,6 @@ void lov_stripe_unlock(struct lov_stripe_md *md) md->lsm_lock_owner = 0; spin_unlock(&md->lsm_lock); } -EXPORT_SYMBOL(lov_stripe_unlock); static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, struct obd_quotactl *oqctl) diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c index 1f8ed95a6..561d493b2 100644 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ b/drivers/staging/lustre/lustre/lov/lov_object.c @@ -67,7 +67,7 @@ struct lov_layout_operations { int (*llo_print)(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o); int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t index); int (*llo_lock_init)(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); @@ -193,6 +193,18 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov, return result; } +static int lov_page_slice_fixup(struct lov_object *lov, + struct cl_object *stripe) +{ + struct cl_object_header *hdr = cl_object_header(&lov->lo_cl); + struct cl_object *o; + + cl_object_for_each(o, stripe) + o->co_slice_off += hdr->coh_page_bufsize; + + return cl_object_header(stripe)->coh_page_bufsize; +} + static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev, struct lov_object *lov, const struct cl_object_conf *conf, @@ -222,6 +234,8 @@ static int lov_init_raid0(const struct lu_env *env, r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]), GFP_NOFS); if (r0->lo_sub) { + int psz = 0; + result = 0; subconf->coc_inode = conf->coc_inode; spin_lock_init(&r0->lo_sub_lock); @@ -254,13 +268,24 @@ static int lov_init_raid0(const struct lu_env *env, if (result == -EAGAIN) { /* try again */ --i; result = 0; + continue; } } else { result = PTR_ERR(stripe); } + + if (result == 0) { + int sz = lov_page_slice_fixup(lov, stripe); + + LASSERT(ergo(psz > 0, psz == sz)); + psz = sz; + } } - } else + if (result == 0) + cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz; + } else { result = -ENOMEM; + } out: return result; } @@ -286,8 +311,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov, LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED); lov_layout_wait(env, lov); - - cl_object_prune(env, &lov->lo_cl); return 0; } @@ -355,7 +378,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lovsub_object *los = r0->lo_sub[i]; if (los) { - cl_locks_prune(env, &los->lso_cl, 1); + cl_object_prune(env, &los->lso_cl); /* * If top-level object is to be evicted from * the cache, so are its sub-objects. @@ -364,7 +387,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, } } } - cl_object_prune(env, &lov->lo_cl); return 0; } @@ -666,7 +688,6 @@ static int lov_layout_change(const struct lu_env *unused, const struct lov_layout_operations *old_ops; const struct lov_layout_operations *new_ops; - struct cl_object_header *hdr = cl_object_header(&lov->lo_cl); void *cookie; struct lu_env *env; int refcheck; @@ -691,13 +712,15 @@ static int lov_layout_change(const struct lu_env *unused, old_ops = &lov_dispatch[lov->lo_type]; new_ops = &lov_dispatch[llt]; + result = cl_object_prune(env, &lov->lo_cl); + if (result != 0) + goto out; + result = old_ops->llo_delete(env, lov, &lov->u); if (result == 0) { old_ops->llo_fini(env, lov, &lov->u); LASSERT(atomic_read(&lov->lo_active_ios) == 0); - LASSERT(!hdr->coh_tree.rnode); - LASSERT(hdr->coh_pages == 0); lov->lo_type = LLT_EMPTY; result = new_ops->llo_init(env, @@ -713,6 +736,7 @@ static int lov_layout_change(const struct lu_env *unused, } } +out: cl_env_put(env, &refcheck); cl_env_reexit(cookie); return result; @@ -793,7 +817,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, goto out; } - lov->lo_layout_invalid = lov_layout_change(env, lov, conf); + result = lov_layout_change(env, lov, conf); + lov->lo_layout_invalid = result != 0; out: lov_conf_unlock(lov); @@ -825,10 +850,10 @@ static int lov_object_print(const struct lu_env *env, void *cookie, } int lov_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), - llo_page_init, env, obj, page, vmpage); + return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page, + index); } /** @@ -911,8 +936,9 @@ struct lu_object *lov_object_alloc(const struct lu_env *env, * for object with different layouts. */ obj->lo_ops = &lov_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c index ae83eb0f6..9302f06c3 100644 --- a/drivers/staging/lustre/lustre/lov/lov_offset.c +++ b/drivers/staging/lustre/lustre/lov/lov_offset.c @@ -66,6 +66,18 @@ u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno) return lov_size; } +/** + * Compute file level page index by stripe level page offset + */ +pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, + int stripe) +{ + loff_t offset; + + offset = lov_stripe_size(lsm, stripe_index << PAGE_SHIFT, stripe); + return offset >> PAGE_SHIFT; +} + /* we have an offset in file backed by an lov and want to find out where * that offset lands in our given stripe of the file. for the easy * case where the offset is within the stripe, we just have to scale the diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c index 3925633a9..0215ea54d 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pack.c +++ b/drivers/staging/lustre/lustre/lov/lov_pack.c @@ -136,7 +136,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); return -EINVAL; - } if (lsm) { @@ -444,8 +443,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, if (lum.lmm_magic == LOV_USER_MAGIC) { /* User request for v1, we need skip lmm_pool_name */ if (lmmk->lmm_magic == LOV_MAGIC_V3) { - memmove((char *)(&lmmk->lmm_stripe_count) + - sizeof(lmmk->lmm_stripe_count), + memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects, ((struct lov_mds_md_v3 *)lmmk)->lmm_objects, lmmk->lmm_stripe_count * sizeof(struct lov_ost_data_v1)); @@ -457,9 +455,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, } /* User wasn't expecting this many OST entries */ - if (lum.lmm_stripe_count == 0) + if (lum.lmm_stripe_count == 0) { lmm_size = lum_size; - else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { + } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { rc = -EOVERFLOW; goto out_set; } diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c index fdcaf8047..0306f00c3 100644 --- a/drivers/staging/lustre/lustre/lov/lov_page.c +++ b/drivers/staging/lustre/lustre/lov/lov_page.c @@ -36,6 +36,7 @@ * Implementation of cl_page for LOV layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_LOV @@ -52,116 +53,66 @@ * */ -static int lov_page_invariant(const struct cl_page_slice *slice) +/** + * Adjust the stripe index by layout of raid0. @max_index is the maximum + * page index covered by an underlying DLM lock. + * This function converts max_index from stripe level to file level, and make + * sure it's not beyond one stripe. + */ +static int lov_raid0_page_is_under_lock(const struct lu_env *env, + const struct cl_page_slice *slice, + struct cl_io *unused, + pgoff_t *max_index) { - const struct cl_page *page = slice->cpl_page; - const struct cl_page *sub = lov_sub_page(slice); - - return ergo(sub, - page->cp_child == sub && - sub->cp_parent == page && - page->cp_state == sub->cp_state); -} + struct lov_object *loo = cl2lov(slice->cpl_obj); + struct lov_layout_raid0 *r0 = lov_r0(loo); + pgoff_t index = *max_index; + unsigned int pps; /* pages per stripe */ -static void lov_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct cl_page *sub = lov_sub_page(slice); + CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr); + if (index == 0) /* the page is not covered by any lock */ + return 0; - LINVRNT(lov_page_invariant(slice)); + if (r0->lo_nr == 1) /* single stripe file */ + return 0; - if (sub) { - LASSERT(sub->cp_state == CPS_FREEING); - lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent); - sub->cp_parent = NULL; - slice->cpl_page->cp_child = NULL; - cl_page_put(env, sub); + /* max_index is stripe level, convert it into file level */ + if (index != CL_PAGE_EOF) { + int stripeno = lov_page_stripe(slice->cpl_page); + *max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno); } -} - -static int lov_page_own(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io, - int nonblock) -{ - struct lov_io *lio = lov_env_io(env); - struct lov_io_sub *sub; - LINVRNT(lov_page_invariant(slice)); - LINVRNT(!cl2lov_page(slice)->lps_invalid); + /* calculate the end of current stripe */ + pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT; + index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1; - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - lov_sub_page(slice)->cp_owner = sub->sub_io; - lov_sub_put(sub); - } else - LBUG(); /* Arrgh */ + /* never exceed the end of the stripe */ + *max_index = min_t(pgoff_t, *max_index, index); return 0; } -static void lov_page_assume(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io) -{ - lov_page_own(env, slice, io, 0); -} - -static int lov_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct lov_io *lio = lov_env_io(env); - struct lov_io_sub *sub; - int rc = 0; - - LINVRNT(lov_page_invariant(slice)); - LINVRNT(!cl2lov_page(slice)->lps_invalid); - - sub = lov_page_subio(env, lio, slice); - if (!IS_ERR(sub)) { - rc = cl_page_cache_add(sub->sub_env, sub->sub_io, - slice->cpl_page->cp_child, CRT_WRITE); - lov_sub_put(sub); - } else { - rc = PTR_ERR(sub); - CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc); - } - return rc; -} - -static int lov_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) +static int lov_raid0_page_print(const struct lu_env *env, + const struct cl_page_slice *slice, + void *cookie, lu_printer_t printer) { struct lov_page *lp = cl2lov_page(slice); - return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp); + return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp); } -static const struct cl_page_operations lov_page_ops = { - .cpo_fini = lov_page_fini, - .cpo_own = lov_page_own, - .cpo_assume = lov_page_assume, - .io = { - [CRT_WRITE] = { - .cpo_cache_add = lov_page_cache_add - } - }, - .cpo_print = lov_page_print +static const struct cl_page_operations lov_raid0_page_ops = { + .cpo_is_under_lock = lov_raid0_page_is_under_lock, + .cpo_print = lov_raid0_page_print }; -static void lov_empty_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - LASSERT(!slice->cpl_page->cp_child); -} - int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct lov_object *loo = cl2lov(obj); struct lov_layout_raid0 *r0 = lov_r0(loo); struct lov_io *lio = lov_env_io(env); - struct cl_page *subpage; struct cl_object *subobj; + struct cl_object *o; struct lov_io_sub *sub; struct lov_page *lpg = cl_object_page_slice(obj, page); loff_t offset; @@ -169,59 +120,57 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, int stripe; int rc; - offset = cl_offset(obj, page->cp_index); + offset = cl_offset(obj, index); stripe = lov_stripe_number(loo->lo_lsm, offset); LASSERT(stripe < r0->lo_nr); rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff); LASSERT(rc == 0); - lpg->lps_invalid = 1; - cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops); + cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops); sub = lov_sub_get(env, lio, stripe); - if (IS_ERR(sub)) { - rc = PTR_ERR(sub); - goto out; - } + if (IS_ERR(sub)) + return PTR_ERR(sub); subobj = lovsub2cl(r0->lo_sub[stripe]); - subpage = cl_page_find_sub(sub->sub_env, subobj, - cl_index(subobj, suboff), vmpage, page); - lov_sub_put(sub); - if (IS_ERR(subpage)) { - rc = PTR_ERR(subpage); - goto out; - } - - if (likely(subpage->cp_parent == page)) { - lu_ref_add(&subpage->cp_reference, "lov", page); - lpg->lps_invalid = 0; - rc = 0; - } else { - CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n"); - CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n"); - LASSERT(0); + list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers, + co_lu.lo_linkage) { + if (o->co_ops->coo_page_init) { + rc = o->co_ops->coo_page_init(sub->sub_env, o, page, + cl_index(subobj, suboff)); + if (rc != 0) + break; + } } + lov_sub_put(sub); -out: return rc; } +static int lov_empty_page_print(const struct lu_env *env, + const struct cl_page_slice *slice, + void *cookie, lu_printer_t printer) +{ + struct lov_page *lp = cl2lov_page(slice); + + return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n", + lp); +} + static const struct cl_page_operations lov_empty_page_ops = { - .cpo_fini = lov_empty_page_fini, - .cpo_print = lov_page_print + .cpo_print = lov_empty_page_print }; int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct lov_page *lpg = cl_object_page_slice(obj, page); void *addr; - cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops); - addr = kmap(vmpage); + cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops); + addr = kmap(page->cp_vmpage); memset(addr, 0, cl_page_size(obj)); - kunmap(vmpage); + kunmap(page->cp_vmpage); cl_page_export(env, page, 1); return 0; } diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c index 9ae1d6f42..690292ece 100644 --- a/drivers/staging/lustre/lustre/lov/lov_pool.c +++ b/drivers/staging/lustre/lustre/lov/lov_pool.c @@ -65,7 +65,6 @@ void lov_pool_putref(struct pool_desc *pool) LASSERT(hlist_unhashed(&pool->pool_hash)); LASSERT(list_empty(&pool->pool_list)); LASSERT(!pool->pool_debugfs_entry); - lov_ost_pool_free(&(pool->pool_rr.lqr_pool)); lov_ost_pool_free(&(pool->pool_obds)); kfree(pool); } @@ -424,11 +423,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname) if (rc) goto out_err; - memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr)); - rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0); - if (rc) - goto out_free_pool_obds; - INIT_HLIST_NODE(&new_pool->pool_hash); /* get ref for debugfs file */ @@ -469,13 +463,10 @@ out_err: list_del_init(&new_pool->pool_list); lov->lov_pool_count--; spin_unlock(&obd->obd_dev_lock); - ldebugfs_remove(&new_pool->pool_debugfs_entry); - - lov_ost_pool_free(&new_pool->pool_rr.lqr_pool); -out_free_pool_obds: lov_ost_pool_free(&new_pool->pool_obds); kfree(new_pool); + return rc; } @@ -543,8 +534,6 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname) if (rc) goto out; - pool->pool_rr.lqr_dirty = 1; - CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n", ostname, poolname, pool_tgt_count(pool)); @@ -589,8 +578,6 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname) lov_ost_pool_remove(&pool->pool_obds, lov_idx); - pool->pool_rr.lqr_dirty = 1; - CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname, poolname); @@ -599,50 +586,3 @@ out: lov_pool_putref(pool); return rc; } - -int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool) -{ - int i, rc; - - /* caller may no have a ref on pool if it got the pool - * without calling lov_find_pool() (e.g. go through the lov pool - * list) - */ - lov_pool_getref(pool); - - down_read(&pool_tgt_rw_sem(pool)); - - for (i = 0; i < pool_tgt_count(pool); i++) { - if (pool_tgt_array(pool)[i] == idx) { - rc = 0; - goto out; - } - } - rc = -ENOENT; -out: - up_read(&pool_tgt_rw_sem(pool)); - - lov_pool_putref(pool); - return rc; -} - -struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname) -{ - struct pool_desc *pool; - - pool = NULL; - if (poolname[0] != '\0') { - pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname); - if (!pool) - CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n", - poolname); - if (pool && (pool_tgt_count(pool) == 0)) { - CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n", - poolname); - /* pool is ignored, so we remove ref on it */ - lov_pool_putref(pool); - pool = NULL; - } - } - return pool; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c index 7178a02d6..1be4b921c 100644 --- a/drivers/staging/lustre/lustre/lov/lov_request.c +++ b/drivers/staging/lustre/lustre/lov/lov_request.c @@ -52,7 +52,6 @@ static void lov_init_set(struct lov_request_set *set) INIT_LIST_HEAD(&set->set_list); atomic_set(&set->set_refcount, 1); init_waitqueue_head(&set->set_waitq); - spin_lock_init(&set->set_lock); } void lov_finish_set(struct lov_request_set *set) @@ -235,7 +234,6 @@ out: if (tmp_oa) kmem_cache_free(obdo_cachep, tmp_oa); return rc; - } int lov_fini_getattr_set(struct lov_request_set *set) @@ -363,7 +361,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo, set->set_oi = oinfo; set->set_oi->oi_md = lsm; set->set_oi->oi_oa = src_oa; - set->set_oti = oti; if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE) set->set_cookies = oti->oti_logcookies; @@ -480,7 +477,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo, lov_init_set(set); set->set_exp = exp; - set->set_oti = oti; set->set_oi = oinfo; if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) set->set_cookies = oti->oti_logcookies; @@ -716,12 +712,15 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, struct lov_request *req; if (!lov->lov_tgts[i] || - (!lov_check_and_wait_active(lov, i) && - (oinfo->oi_flags & OBD_STATFS_NODELAY))) { + (oinfo->oi_flags & OBD_STATFS_NODELAY && + !lov->lov_tgts[i]->ltd_active)) { CDEBUG(D_HA, "lov idx %d inactive\n", i); continue; } + if (!lov->lov_tgts[i]->ltd_active) + lov_check_and_wait_active(lov, i); + /* skip targets that have been explicitly disabled by the * administrator */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c index c335c020f..35f6b1d66 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c @@ -151,8 +151,9 @@ static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev, if (lsr) { cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } @@ -182,10 +183,12 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env, d = lovsub2lu_dev(lsd); d->ld_ops = &lovsub_lu_ops; lsd->acid_cl.cd_ops = &lovsub_cl_ops; - } else + } else { d = ERR_PTR(result); - } else + } + } else { d = ERR_PTR(-ENOMEM); + } return d; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c index 3bb0c9068..e92edfb61 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c @@ -62,391 +62,8 @@ static void lovsub_lock_fini(const struct lu_env *env, kmem_cache_free(lovsub_lock_kmem, lsl); } -static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov) -{ - struct cl_lock *parent; - - parent = lov->lls_cl.cls_lock; - cl_lock_get(parent); - lu_ref_add(&parent->cll_reference, "lovsub-parent", current); - cl_lock_mutex_get(env, parent); -} - -static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov) -{ - struct cl_lock *parent; - - parent = lov->lls_cl.cls_lock; - cl_lock_mutex_put(env, lov->lls_cl.cls_lock); - lu_ref_del(&parent->cll_reference, "lovsub-parent", current); - cl_lock_put(env, parent); -} - -/** - * Implements cl_lock_operations::clo_state() method for lovsub layer, which - * method is called whenever sub-lock state changes. Propagates state change - * to the top-locks. - */ -static void lovsub_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct lovsub_lock *sub = cl2lovsub_lock(slice); - struct lov_lock_link *scan; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - struct lov_lock *lov = scan->lll_super; - struct cl_lock *parent = lov->lls_cl.cls_lock; - - if (sub->lss_active != parent) { - lovsub_parent_lock(env, lov); - cl_lock_signal(env, parent); - lovsub_parent_unlock(env, lov); - } - } -} - -/** - * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by - * asking parent lock. - */ -static unsigned long lovsub_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct lovsub_lock *lock = cl2lovsub_lock(slice); - struct lov_lock *lov; - unsigned long dumbbell; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - if (!list_empty(&lock->lss_parents)) { - /* - * It is not clear whether all parents have to be asked and - * their estimations summed, or it is enough to ask one. For - * the current usages, one is always enough. - */ - lov = container_of(lock->lss_parents.next, - struct lov_lock_link, lll_list)->lll_super; - - lovsub_parent_lock(env, lov); - dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock); - lovsub_parent_unlock(env, lov); - } else - dumbbell = 0; - - return dumbbell; -} - -/** - * Maps start/end offsets within a stripe, to offsets within a file. - */ -static void lovsub_lock_descr_map(const struct cl_lock_descr *in, - struct lov_object *lov, - int stripe, struct cl_lock_descr *out) -{ - pgoff_t size; /* stripe size in pages */ - pgoff_t skip; /* how many pages in every stripe are occupied by - * "other" stripes - */ - pgoff_t start; - pgoff_t end; - - start = in->cld_start; - end = in->cld_end; - - if (lov->lo_lsm->lsm_stripe_count > 1) { - size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size); - skip = (lov->lo_lsm->lsm_stripe_count - 1) * size; - - /* XXX overflow check here? */ - start += start/size * skip + stripe * size; - - if (end != CL_PAGE_EOF) { - end += end/size * skip + stripe * size; - /* - * And check for overflow... - */ - if (end < in->cld_end) - end = CL_PAGE_EOF; - } - } - out->cld_start = start; - out->cld_end = end; -} - -/** - * Adjusts parent lock extent when a sub-lock is attached to a parent. This is - * called in two ways: - * - * - as part of receive call-back, when server returns granted extent to - * the client, and - * - * - when top-lock finds existing sub-lock in the cache. - * - * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ - * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ. - */ -int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov, - struct lovsub_lock *sublock, - const struct cl_lock_descr *d, int idx) -{ - struct cl_lock *parent; - struct lovsub_object *subobj; - struct cl_lock_descr *pd; - struct cl_lock_descr *parent_descr; - int result; - - parent = lov->lls_cl.cls_lock; - parent_descr = &parent->cll_descr; - LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode)); - - subobj = cl2lovsub(sublock->lss_cl.cls_obj); - pd = &lov_env_info(env)->lti_ldescr; - - pd->cld_obj = parent_descr->cld_obj; - pd->cld_mode = parent_descr->cld_mode; - pd->cld_gid = parent_descr->cld_gid; - lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd); - lov->lls_sub[idx].sub_got = *d; - /* - * Notify top-lock about modification, if lock description changes - * materially. - */ - if (!cl_lock_ext_match(parent_descr, pd)) - result = cl_lock_modify(env, parent, pd); - else - result = 0; - return result; -} - -static int lovsub_lock_modify(const struct lu_env *env, - const struct cl_lock_slice *s, - const struct cl_lock_descr *d) -{ - struct lovsub_lock *lock = cl2lovsub_lock(s); - struct lov_lock_link *scan; - struct lov_lock *lov; - int result = 0; - - LASSERT(cl_lock_mode_match(d->cld_mode, - s->cls_lock->cll_descr.cld_mode)); - list_for_each_entry(scan, &lock->lss_parents, lll_list) { - int rc; - - lov = scan->lll_super; - lovsub_parent_lock(env, lov); - rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx); - lovsub_parent_unlock(env, lov); - result = result ?: rc; - } - return result; -} - -static int lovsub_lock_closure(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_lock_closure *closure) -{ - struct lovsub_lock *sub; - struct cl_lock *parent; - struct lov_lock_link *scan; - int result; - - LASSERT(cl_lock_is_mutexed(slice->cls_lock)); - - sub = cl2lovsub_lock(slice); - result = 0; - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - parent = scan->lll_super->lls_cl.cls_lock; - result = cl_lock_closure_build(env, parent, closure); - if (result != 0) - break; - } - return result; -} - -/** - * A helper function for lovsub_lock_delete() that deals with a given parent - * top-lock. - */ -static int lovsub_lock_delete_one(const struct lu_env *env, - struct cl_lock *child, struct lov_lock *lov) -{ - struct cl_lock *parent; - int result; - - parent = lov->lls_cl.cls_lock; - if (parent->cll_error) - return 0; - - result = 0; - switch (parent->cll_state) { - case CLS_ENQUEUED: - /* See LU-1355 for the case that a glimpse lock is - * interrupted by signal - */ - LASSERT(parent->cll_flags & CLF_CANCELLED); - break; - case CLS_QUEUING: - case CLS_FREEING: - cl_lock_signal(env, parent); - break; - case CLS_INTRANSIT: - /* - * Here lies a problem: a sub-lock is canceled while top-lock - * is being unlocked. Top-lock cannot be moved into CLS_NEW - * state, because unlocking has to succeed eventually by - * placing lock into CLS_CACHED (or failing it), see - * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED - * state, because lov maintains an invariant that all - * sub-locks exist in CLS_CACHED (this allows cached top-lock - * to be reused immediately). Nor can we wait for top-lock - * state to change, because this can be synchronous to the - * current thread. - * - * We know for sure that lov_lock_unuse() will be called at - * least one more time to finish un-using, so leave a mark on - * the top-lock, that will be seen by the next call to - * lov_lock_unuse(). - */ - if (cl_lock_is_intransit(parent)) - lov->lls_cancel_race = 1; - break; - case CLS_CACHED: - /* - * if a sub-lock is canceled move its top-lock into CLS_NEW - * state to preserve an invariant that a top-lock in - * CLS_CACHED is immediately ready for re-use (i.e., has all - * sub-locks), and so that next attempt to re-use the top-lock - * enqueues missing sub-lock. - */ - cl_lock_state_set(env, parent, CLS_NEW); - /* fall through */ - case CLS_NEW: - /* - * if last sub-lock is canceled, destroy the top-lock (which - * is now `empty') proactively. - */ - if (lov->lls_nr_filled == 0) { - /* ... but unfortunately, this cannot be done easily, - * as cancellation of a top-lock might acquire mutices - * of its other sub-locks, violating lock ordering, - * see cl_lock_{cancel,delete}() preconditions. - * - * To work around this, the mutex of this sub-lock is - * released, top-lock is destroyed, and sub-lock mutex - * acquired again. The list of parents has to be - * re-scanned from the beginning after this. - * - * Only do this if no mutices other than on @child and - * @parent are held by the current thread. - * - * TODO: The lock modal here is too complex, because - * the lock may be canceled and deleted by voluntarily: - * cl_lock_request - * -> osc_lock_enqueue_wait - * -> osc_lock_cancel_wait - * -> cl_lock_delete - * -> lovsub_lock_delete - * -> cl_lock_cancel/delete - * -> ... - * - * The better choice is to spawn a kernel thread for - * this purpose. -jay - */ - if (cl_lock_nr_mutexed(env) == 2) { - cl_lock_mutex_put(env, child); - cl_lock_cancel(env, parent); - cl_lock_delete(env, parent); - result = 1; - } - } - break; - case CLS_HELD: - CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n"); - default: - CERROR("Impossible state: %d\n", parent->cll_state); - LBUG(); - break; - } - - return result; -} - -/** - * An implementation of cl_lock_operations::clo_delete() method. This is - * invoked in "bottom-to-top" delete, when lock destruction starts from the - * sub-lock (e.g, as a result of ldlm lock LRU policy). - */ -static void lovsub_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct cl_lock *child = slice->cls_lock; - struct lovsub_lock *sub = cl2lovsub_lock(slice); - int restart; - - LASSERT(cl_lock_is_mutexed(child)); - - /* - * Destruction of a sub-lock might take multiple iterations, because - * when the last sub-lock of a given top-lock is deleted, top-lock is - * canceled proactively, and this requires to release sub-lock - * mutex. Once sub-lock mutex has been released, list of its parents - * has to be re-scanned from the beginning. - */ - do { - struct lov_lock *lov; - struct lov_lock_link *scan; - struct lov_lock_link *temp; - struct lov_lock_sub *subdata; - - restart = 0; - list_for_each_entry_safe(scan, temp, - &sub->lss_parents, lll_list) { - lov = scan->lll_super; - subdata = &lov->lls_sub[scan->lll_idx]; - lovsub_parent_lock(env, lov); - subdata->sub_got = subdata->sub_descr; - lov_lock_unlink(env, scan, sub); - restart = lovsub_lock_delete_one(env, child, lov); - lovsub_parent_unlock(env, lov); - - if (restart) { - cl_lock_mutex_get(env, child); - break; - } - } - } while (restart); -} - -static int lovsub_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_lock_slice *slice) -{ - struct lovsub_lock *sub = cl2lovsub_lock(slice); - struct lov_lock *lov; - struct lov_lock_link *scan; - - list_for_each_entry(scan, &sub->lss_parents, lll_list) { - lov = scan->lll_super; - (*p)(env, cookie, "[%d %p ", scan->lll_idx, lov); - if (lov) - cl_lock_descr_print(env, cookie, p, - &lov->lls_cl.cls_lock->cll_descr); - (*p)(env, cookie, "] "); - } - return 0; -} - static const struct cl_lock_operations lovsub_lock_ops = { .clo_fini = lovsub_lock_fini, - .clo_state = lovsub_lock_state, - .clo_delete = lovsub_lock_delete, - .clo_modify = lovsub_lock_modify, - .clo_closure = lovsub_lock_closure, - .clo_weigh = lovsub_lock_weigh, - .clo_print = lovsub_lock_print }; int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, @@ -460,8 +77,9 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, INIT_LIST_HEAD(&lsk->lss_parents); cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c index 6c5430d93..bcaae1e5b 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_object.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c @@ -67,10 +67,10 @@ int lovsub_object_init(const struct lu_env *env, struct lu_object *obj, lu_object_add(obj, below); cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page)); result = 0; - } else + } else { result = -ENOMEM; + } return result; - } static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj) @@ -154,8 +154,9 @@ struct lu_object *lovsub_object_alloc(const struct lu_env *env, lu_object_add_top(&hdr->coh_lu, obj); los->lso_cl.co_ops = &lovsub_ops; obj->lo_ops = &lovsub_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c index 2d945532b..9badedcce 100644 --- a/drivers/staging/lustre/lustre/lov/lovsub_page.c +++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c @@ -60,11 +60,11 @@ static const struct cl_page_operations lovsub_page_ops = { }; int lovsub_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *unused) + struct cl_page *page, pgoff_t index) { struct lovsub_page *lsb = cl_object_page_slice(obj, page); - cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops); + cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops); return 0; } diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c index 38f267a60..5c7a15dd7 100644 --- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c +++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c @@ -49,9 +49,9 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj, obd_kobj); struct client_obd *cli = &dev->u.cli; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -74,9 +74,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, if (val < 1 || val > MDC_MAX_RIF_MAX) return -ERANGE; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c index b3bfdcb73..856c54e03 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c @@ -279,8 +279,7 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec, rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime); rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime); rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime); - rec->sa_attr_flags = - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags; + rec->sa_attr_flags = op_data->op_attr_flags; if ((op_data->op_attr.ia_valid & ATTR_GID) && in_group_p(op_data->op_attr.ia_gid)) rec->sa_suppgid = @@ -439,7 +438,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags, char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME); LOGL0(op_data->op_name, op_data->op_namelen, tmp); - } } @@ -455,7 +453,7 @@ static void mdc_hsm_release_pack(struct ptlrpc_request *req, lock = ldlm_handle2lock(&op_data->op_lease_handle); if (lock) { data->cd_handle = lock->l_remote_handle; - ldlm_lock_put(lock); + LDLM_LOCK_PUT(lock); } ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL); @@ -481,9 +479,9 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&mcw->mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; }; @@ -497,23 +495,23 @@ int mdc_enter_request(struct client_obd *cli) struct mdc_cache_waiter mcw; struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); init_waitqueue_head(&mcw.mcw_waitq); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi); if (rc) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (list_empty(&mcw.mcw_entry)) cli->cl_r_in_flight--; list_del_init(&mcw.mcw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } } else { cli->cl_r_in_flight++; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } return rc; } @@ -523,7 +521,7 @@ void mdc_exit_request(struct client_obd *cli) struct list_head *l, *tmp; struct mdc_cache_waiter *mcw; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_r_in_flight--; list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { @@ -538,5 +536,5 @@ void mdc_exit_request(struct client_obd *cli) } /* Empty waiting list? Decrease reqs in-flight number */ - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c index 958a164f6..3b1bc9111 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c @@ -869,7 +869,9 @@ resend: * (explicits or automatically generated by Kernel to clean * current FLocks upon exit) that can't be trashed */ - if ((rc == -EINTR) || (rc == -ETIMEDOUT)) + if (((rc == -EINTR) || (rc == -ETIMEDOUT)) && + (einfo->ei_type == LDLM_FLOCK) && + (einfo->ei_mode == LCK_NL)) goto resend; return rc; } @@ -963,7 +965,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp, if (fid_is_sane(&op_data->op_fid2) && it->it_create_mode & M_CHECK_STALE && it->it_op != IT_GETATTR) { - /* Also: did we find the same inode? */ /* sever can return one of two fids: * op_fid2 - new allocated fid - if file is created. diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c index b91d3ff18..86b744536 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c @@ -142,9 +142,8 @@ static int mdc_getattr_common(struct obd_export *exp, CDEBUG(D_NET, "mode: %o\n", body->mode); + mdc_update_max_ea_from_body(exp, body); if (body->eadatasize != 0) { - mdc_update_max_ea_from_body(exp, body); - eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD, body->eadatasize); if (!eadata) @@ -1169,7 +1168,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp, goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0); /* Copy hsm_progress struct */ req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS); @@ -1203,7 +1202,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives) goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0); /* Copy hsm_progress struct */ archive_mask = req_capsule_client_get(&req->rq_pill, @@ -1278,7 +1277,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp) goto out; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0); ptlrpc_request_set_replen(req); @@ -1395,7 +1394,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp, return rc; } - mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0); /* Copy hsm_request struct */ req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST); @@ -1952,7 +1951,7 @@ static void lustre_swab_hal(struct hsm_action_list *h) __swab32s(&h->hal_count); __swab32s(&h->hal_archive_id); __swab64s(&h->hal_flags); - hai = hai_zero(h); + hai = hai_first(h); for (i = 0; i < h->hal_count; i++, hai = hai_next(hai)) lustre_swab_hai(hai); } @@ -2249,7 +2248,7 @@ static struct obd_uuid *mdc_get_uuid(struct obd_export *exp) * recovery, non zero value will be return if the lock can be canceled, * or zero returned for not */ -static int mdc_cancel_for_recovery(struct ldlm_lock *lock) +static int mdc_cancel_weight(struct ldlm_lock *lock) { if (lock->l_resource->lr_type != LDLM_IBITS) return 0; @@ -2314,12 +2313,14 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) return -ENOMEM; mdc_init_rpc_lock(cli->cl_rpc_lock); - ptlrpcd_addref(); + rc = ptlrpcd_addref(); + if (rc < 0) + goto err_rpc_lock; cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS); if (!cli->cl_close_lock) { rc = -ENOMEM; - goto err_rpc_lock; + goto err_ptlrpcd_decref; } mdc_init_rpc_lock(cli->cl_close_lock); @@ -2331,7 +2332,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) sptlrpc_lprocfs_cliobd_attach(obd); ptlrpc_lprocfs_register_obd(obd); - ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery); + ns_register_cancel(obd->obd_namespace, mdc_cancel_weight); obd->obd_namespace->ns_lvbo = &inode_lvbo; @@ -2345,9 +2346,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) err_close_lock: kfree(cli->cl_close_lock); +err_ptlrpcd_decref: + ptlrpcd_decref(); err_rpc_lock: kfree(cli->cl_rpc_lock); - ptlrpcd_decref(); return rc; } diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c index 3924b095b..2311a437c 100644 --- a/drivers/staging/lustre/lustre/mgc/mgc_request.c +++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c @@ -502,8 +502,12 @@ static void do_requeue(struct config_llog_data *cld) */ down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem); if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) { + int rc; + CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname); - mgc_process_log(cld->cld_mgcexp->exp_obd, cld); + rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld); + if (rc && rc != -ENOENT) + CERROR("failed processing log: %d\n", rc); } else { CDEBUG(D_MGC, "disconnecting, won't update log %s\n", cld->cld_logname); @@ -734,7 +738,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) struct task_struct *task; int rc; - ptlrpcd_addref(); + rc = ptlrpcd_addref(); + if (rc < 0) + goto err_noref; rc = client_obd_setup(obd, lcfg); if (rc) @@ -773,6 +779,7 @@ err_cleanup: client_obd_cleanup(obd); err_decref: ptlrpcd_decref(); +err_noref: return rc; } @@ -1720,7 +1727,6 @@ static int mgc_process_config(struct obd_device *obd, u32 len, void *buf) CERROR("Unknown command: %d\n", lcfg->lcfg_command); rc = -EINVAL; goto out; - } } out: diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c index f5128b4f1..583fb5f33 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_io.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c @@ -36,6 +36,7 @@ * Client IO. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_CLASS @@ -132,6 +133,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io) case CIT_WRITE: break; case CIT_FAULT: + break; case CIT_FSYNC: LASSERT(!io->ci_need_restart); break; @@ -159,7 +161,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io, io->ci_type = iot; INIT_LIST_HEAD(&io->ci_lockset.cls_todo); - INIT_LIST_HEAD(&io->ci_lockset.cls_curr); INIT_LIST_HEAD(&io->ci_lockset.cls_done); INIT_LIST_HEAD(&io->ci_layers); @@ -241,37 +242,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0, const struct cl_lock_descr *d1) { return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu), - lu_object_fid(&d1->cld_obj->co_lu)) ?: - __diff_normalize(d0->cld_start, d1->cld_start); -} - -static int cl_lock_descr_cmp(const struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - int ret; - - ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu), - lu_object_fid(&d1->cld_obj->co_lu)); - if (ret) - return ret; - if (d0->cld_end < d1->cld_start) - return -1; - if (d0->cld_start > d0->cld_end) - return 1; - return 0; -} - -static void cl_lock_descr_merge(struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - d0->cld_start = min(d0->cld_start, d1->cld_start); - d0->cld_end = max(d0->cld_end, d1->cld_end); - - if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE) - d0->cld_mode = CLM_WRITE; - - if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP) - d0->cld_mode = CLM_GROUP; + lu_object_fid(&d1->cld_obj->co_lu)); } /* @@ -320,33 +291,35 @@ static void cl_io_locks_sort(struct cl_io *io) } while (!done); } -/** - * Check whether \a queue contains locks matching \a need. - * - * \retval +ve there is a matching lock in the \a queue - * \retval 0 there are no matching locks in the \a queue - */ -int cl_queue_match(const struct list_head *queue, - const struct cl_lock_descr *need) +static void cl_lock_descr_merge(struct cl_lock_descr *d0, + const struct cl_lock_descr *d1) { - struct cl_io_lock_link *scan; + d0->cld_start = min(d0->cld_start, d1->cld_start); + d0->cld_end = max(d0->cld_end, d1->cld_end); - list_for_each_entry(scan, queue, cill_linkage) { - if (cl_lock_descr_match(&scan->cill_descr, need)) - return 1; - } - return 0; + if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE) + d0->cld_mode = CLM_WRITE; + + if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP) + d0->cld_mode = CLM_GROUP; } -EXPORT_SYMBOL(cl_queue_match); -static int cl_queue_merge(const struct list_head *queue, - const struct cl_lock_descr *need) +static int cl_lockset_merge(const struct cl_lockset *set, + const struct cl_lock_descr *need) { struct cl_io_lock_link *scan; - list_for_each_entry(scan, queue, cill_linkage) { - if (cl_lock_descr_cmp(&scan->cill_descr, need)) + list_for_each_entry(scan, &set->cls_todo, cill_linkage) { + if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj)) continue; + + /* Merge locks for the same object because ldlm lock server + * may expand the lock extent, otherwise there is a deadlock + * case if two conflicted locks are queueud for the same object + * and lock server expands one lock to overlap the another. + * The side effect is that it can generate a multi-stripe lock + * that may cause casacading problem + */ cl_lock_descr_merge(&scan->cill_descr, need); CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", scan->cill_descr.cld_mode, scan->cill_descr.cld_start, @@ -356,87 +329,20 @@ static int cl_queue_merge(const struct list_head *queue, return 0; } -static int cl_lockset_match(const struct cl_lockset *set, - const struct cl_lock_descr *need) -{ - return cl_queue_match(&set->cls_curr, need) || - cl_queue_match(&set->cls_done, need); -} - -static int cl_lockset_merge(const struct cl_lockset *set, - const struct cl_lock_descr *need) -{ - return cl_queue_merge(&set->cls_todo, need) || - cl_lockset_match(set, need); -} - -static int cl_lockset_lock_one(const struct lu_env *env, - struct cl_io *io, struct cl_lockset *set, - struct cl_io_lock_link *link) -{ - struct cl_lock *lock; - int result; - - lock = cl_lock_request(env, io, &link->cill_descr, "io", io); - - if (!IS_ERR(lock)) { - link->cill_lock = lock; - list_move(&link->cill_linkage, &set->cls_curr); - if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) { - result = cl_wait(env, lock); - if (result == 0) - list_move(&link->cill_linkage, &set->cls_done); - } else - result = 0; - } else - result = PTR_ERR(lock); - return result; -} - -static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io, - struct cl_io_lock_link *link) -{ - struct cl_lock *lock = link->cill_lock; - - list_del_init(&link->cill_linkage); - if (lock) { - cl_lock_release(env, lock, "io", io); - link->cill_lock = NULL; - } - if (link->cill_fini) - link->cill_fini(env, link); -} - static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io, struct cl_lockset *set) { struct cl_io_lock_link *link; struct cl_io_lock_link *temp; - struct cl_lock *lock; int result; result = 0; list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { - if (!cl_lockset_match(set, &link->cill_descr)) { - /* XXX some locking to guarantee that locks aren't - * expanded in between. - */ - result = cl_lockset_lock_one(env, io, set, link); - if (result != 0) - break; - } else - cl_lock_link_fini(env, io, link); - } - if (result == 0) { - list_for_each_entry_safe(link, temp, - &set->cls_curr, cill_linkage) { - lock = link->cill_lock; - result = cl_wait(env, lock); - if (result == 0) - list_move(&link->cill_linkage, &set->cls_done); - else - break; - } + result = cl_lock_request(env, io, &link->cill_lock); + if (result < 0) + break; + + list_move(&link->cill_linkage, &set->cls_done); } return result; } @@ -492,16 +398,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io) set = &io->ci_lockset; - list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) - cl_lock_link_fini(env, io, link); - - list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage) - cl_lock_link_fini(env, io, link); + list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { + list_del_init(&link->cill_linkage); + if (link->cill_fini) + link->cill_fini(env, link); + } list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) { - cl_unuse(env, link->cill_lock); - cl_lock_link_fini(env, io, link); + list_del_init(&link->cill_linkage); + cl_lock_release(env, &link->cill_lock); + if (link->cill_fini) + link->cill_fini(env, link); } + cl_io_for_each_reverse(scan, io) { if (scan->cis_iop->op[io->ci_type].cio_unlock) scan->cis_iop->op[io->ci_type].cio_unlock(env, scan); @@ -595,9 +504,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, { int result; - if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) + if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) { result = 1; - else { + } else { list_add(&link->cill_linkage, &io->ci_lockset.cls_todo); result = 0; } @@ -627,8 +536,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, result = cl_io_lock_add(env, io, link); if (result) /* lock match */ link->cill_fini(env, link); - } else + } else { result = -ENOMEM; + } return result; } @@ -691,42 +601,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page) return slice; } -/** - * True iff \a page is within \a io range. - */ -static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io) -{ - int result = 1; - loff_t start; - loff_t end; - pgoff_t idx; - - idx = page->cp_index; - switch (io->ci_type) { - case CIT_READ: - case CIT_WRITE: - /* - * check that [start, end) and [pos, pos + count) extents - * overlap. - */ - if (!cl_io_is_append(io)) { - const struct cl_io_rw_common *crw = &(io->u.ci_rw); - - start = cl_offset(page->cp_obj, idx); - end = cl_offset(page->cp_obj, idx + 1); - result = crw->crw_pos < end && - start < crw->crw_pos + crw->crw_count; - } - break; - case CIT_FAULT: - result = io->u.ci_fault.ft_index == idx; - break; - default: - LBUG(); - } - return result; -} - /** * Called by read io, when page has to be read from the server. * @@ -742,7 +616,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT); LINVRNT(cl_page_is_owned(page, io)); LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_page_in_io(page, io)); LINVRNT(cl_io_invariant(io)); queue = &io->ci_queue; @@ -769,7 +642,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, break; } } - if (result == 0) + if (result == 0 && queue->c2_qin.pl_nr > 0) result = cl_io_submit_rw(env, io, CRT_READ, queue); /* * Unlock unsent pages in case of error. @@ -781,77 +654,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io, EXPORT_SYMBOL(cl_io_read_page); /** - * Called by write io to prepare page to receive data from user buffer. + * Commit a list of contiguous pages into writeback cache. * - * \see cl_io_operations::cio_prepare_write() + * \returns 0 if all pages committed, or errcode if error occurred. + * \see cl_io_operations::cio_commit_async() */ -int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to) +int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, + struct cl_page_list *queue, int from, int to, + cl_commit_cbt cb) { const struct cl_io_slice *scan; int result = 0; - LINVRNT(io->ci_type == CIT_WRITE); - LINVRNT(cl_page_is_owned(page, io)); - LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_io_invariant(io)); - LASSERT(cl_page_in_io(page, io)); - - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->cio_prepare_write) { - const struct cl_page_slice *slice; - - slice = cl_io_slice_page(scan, page); - result = scan->cis_iop->cio_prepare_write(env, scan, - slice, - from, to); - if (result != 0) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_io_prepare_write); - -/** - * Called by write io after user data were copied into a page. - * - * \see cl_io_operations::cio_commit_write() - */ -int cl_io_commit_write(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, unsigned from, unsigned to) -{ - const struct cl_io_slice *scan; - int result = 0; - - LINVRNT(io->ci_type == CIT_WRITE); - LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED); - LINVRNT(cl_io_invariant(io)); - /* - * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov) - * already called cl_page_cache_add(), moving page into CPS_CACHED - * state. Better (and more general) way of dealing with such situation - * is needed. - */ - LASSERT(cl_page_is_owned(page, io) || page->cp_parent); - LASSERT(cl_page_in_io(page, io)); - cl_io_for_each(scan, io) { - if (scan->cis_iop->cio_commit_write) { - const struct cl_page_slice *slice; - - slice = cl_io_slice_page(scan, page); - result = scan->cis_iop->cio_commit_write(env, scan, - slice, - from, to); - if (result != 0) - break; - } + if (!scan->cis_iop->cio_commit_async) + continue; + result = scan->cis_iop->cio_commit_async(env, scan, queue, + from, to, cb); + if (result != 0) + break; } - LINVRNT(result <= 0); return result; } -EXPORT_SYMBOL(cl_io_commit_write); +EXPORT_SYMBOL(cl_io_commit_async); /** * Submits a list of pages for immediate io. @@ -869,13 +694,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, const struct cl_io_slice *scan; int result = 0; - LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op)); - cl_io_for_each(scan, io) { - if (!scan->cis_iop->req_op[crt].cio_submit) + if (!scan->cis_iop->cio_submit) continue; - result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt, - queue); + result = scan->cis_iop->cio_submit(env, scan, crt, queue); if (result != 0) break; } @@ -887,6 +709,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, } EXPORT_SYMBOL(cl_io_submit_rw); +static void cl_page_list_assume(const struct lu_env *env, + struct cl_io *io, struct cl_page_list *plist); + /** * Submit a sync_io and wait for the IO to be finished, or error happens. * If \a timeout is zero, it means to wait for the IO unconditionally. @@ -904,7 +729,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, pg->cp_sync_io = anchor; } - cl_sync_io_init(anchor, queue->c2_qin.pl_nr); + cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end); rc = cl_io_submit_rw(env, io, iot, queue); if (rc == 0) { /* @@ -915,12 +740,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, */ cl_page_list_for_each(pg, &queue->c2_qin) { pg->cp_sync_io = NULL; - cl_sync_io_note(anchor, 1); + cl_sync_io_note(env, anchor, 1); } /* wait for the IO to be finished. */ - rc = cl_sync_io_wait(env, io, &queue->c2_qout, - anchor, timeout); + rc = cl_sync_io_wait(env, anchor, timeout); + cl_page_list_assume(env, io, &queue->c2_qout); } else { LASSERT(list_empty(&queue->c2_qout.pl_pages)); cl_page_list_for_each(pg, &queue->c2_qin) @@ -930,26 +755,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, } EXPORT_SYMBOL(cl_io_submit_sync); -/** - * Cancel an IO which has been submitted by cl_io_submit_rw. - */ -static int cl_io_cancel(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue) -{ - struct cl_page *page; - int result = 0; - - CERROR("Canceling ongoing page transmission\n"); - cl_page_list_for_each(page, queue) { - int rc; - - LINVRNT(cl_page_in_io(page, io)); - rc = cl_page_cancel(env, page); - result = result ?: rc; - } - return result; -} - /** * Main io loop. * @@ -1072,8 +877,8 @@ EXPORT_SYMBOL(cl_page_list_add); /** * Removes a page from a page list. */ -static void cl_page_list_del(const struct lu_env *env, - struct cl_page_list *plist, struct cl_page *page) +void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, + struct cl_page *page) { LASSERT(plist->pl_nr > 0); LINVRNT(plist->pl_owner == current); @@ -1086,6 +891,7 @@ static void cl_page_list_del(const struct lu_env *env, lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist); cl_page_put(env, page); } +EXPORT_SYMBOL(cl_page_list_del); /** * Moves a page from one page list to another. @@ -1105,6 +911,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, } EXPORT_SYMBOL(cl_page_list_move); +/** + * Moves a page from one page list to the head of another list. + */ +void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, + struct cl_page *page) +{ + LASSERT(src->pl_nr > 0); + LINVRNT(dst->pl_owner == current); + LINVRNT(src->pl_owner == current); + + list_move(&page->cp_batch, &dst->pl_pages); + --src->pl_nr; + ++dst->pl_nr; + lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue", + src, dst); +} +EXPORT_SYMBOL(cl_page_list_move_head); + /** * splice the cl_page_list, just as list head does */ @@ -1162,8 +986,7 @@ EXPORT_SYMBOL(cl_page_list_disown); /** * Releases pages from queue. */ -static void cl_page_list_fini(const struct lu_env *env, - struct cl_page_list *plist) +void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist) { struct cl_page *page; struct cl_page *temp; @@ -1174,6 +997,7 @@ static void cl_page_list_fini(const struct lu_env *env, cl_page_list_del(env, plist, page); LASSERT(plist->pl_nr == 0); } +EXPORT_SYMBOL(cl_page_list_fini); /** * Assumes all pages in a queue. @@ -1260,7 +1084,7 @@ EXPORT_SYMBOL(cl_2queue_init_page); /** * Returns top-level io. * - * \see cl_object_top(), cl_page_top(). + * \see cl_object_top() */ struct cl_io *cl_io_top(struct cl_io *io) { @@ -1323,19 +1147,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req, int result; result = 0; - page = cl_page_top(page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev); - if (dev->cd_ops->cdo_req_init) { - result = dev->cd_ops->cdo_req_init(env, - dev, req); - if (result != 0) - break; - } + list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { + dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev); + if (dev->cd_ops->cdo_req_init) { + result = dev->cd_ops->cdo_req_init(env, dev, req); + if (result != 0) + break; } - page = page->cp_child; - } while (page && result == 0); + } return result; } @@ -1384,14 +1203,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page, if (req->crq_o) { req->crq_nrobjs = nr_objects; result = cl_req_init(env, req, page); - } else + } else { result = -ENOMEM; + } if (result != 0) { cl_req_completion(env, req, result); req = ERR_PTR(result); } - } else + } else { req = ERR_PTR(-ENOMEM); + } return req; } EXPORT_SYMBOL(cl_req_alloc); @@ -1406,8 +1227,6 @@ void cl_req_page_add(const struct lu_env *env, struct cl_req_obj *rqo; int i; - page = cl_page_top(page); - LASSERT(list_empty(&page->cp_flight)); LASSERT(!page->cp_req); @@ -1438,8 +1257,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page) { struct cl_req *req = page->cp_req; - page = cl_page_top(page); - LASSERT(!list_empty(&page->cp_flight)); LASSERT(req->crq_nrpages > 0); @@ -1511,25 +1328,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req, } EXPORT_SYMBOL(cl_req_attr_set); +/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to + * wait for the IO to finish. + */ +void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor) +{ + wake_up_all(&anchor->csi_waitq); + + /* it's safe to nuke or reuse anchor now */ + atomic_set(&anchor->csi_barrier, 0); +} +EXPORT_SYMBOL(cl_sync_io_end); /** - * Initialize synchronous io wait anchor, for transfer of \a nrpages pages. + * Initialize synchronous io wait anchor */ -void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages) +void cl_sync_io_init(struct cl_sync_io *anchor, int nr, + void (*end)(const struct lu_env *, struct cl_sync_io *)) { + memset(anchor, 0, sizeof(*anchor)); init_waitqueue_head(&anchor->csi_waitq); - atomic_set(&anchor->csi_sync_nr, nrpages); - atomic_set(&anchor->csi_barrier, nrpages > 0); + atomic_set(&anchor->csi_sync_nr, nr); + atomic_set(&anchor->csi_barrier, nr > 0); anchor->csi_sync_rc = 0; + anchor->csi_end_io = end; + LASSERT(end); } EXPORT_SYMBOL(cl_sync_io_init); /** - * Wait until all transfer completes. Transfer completion routine has to call - * cl_sync_io_note() for every page. + * Wait until all IO completes. Transfer completion routine has to call + * cl_sync_io_note() for every entity. */ -int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct cl_sync_io *anchor, +int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, long timeout) { struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout), @@ -1542,11 +1373,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, atomic_read(&anchor->csi_sync_nr) == 0, &lwi); if (rc < 0) { - CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n", + CERROR("IO failed: %d, still wait for %d remaining entries\n", rc, atomic_read(&anchor->csi_sync_nr)); - (void)cl_io_cancel(env, io, queue); - lwi = (struct l_wait_info) { 0 }; (void)l_wait_event(anchor->csi_waitq, atomic_read(&anchor->csi_sync_nr) == 0, @@ -1555,14 +1384,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, rc = anchor->csi_sync_rc; } LASSERT(atomic_read(&anchor->csi_sync_nr) == 0); - cl_page_list_assume(env, io, queue); /* wait until cl_sync_io_note() has done wakeup */ while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) { cpu_relax(); } - POISON(anchor, 0x5a, sizeof(*anchor)); return rc; } EXPORT_SYMBOL(cl_sync_io_wait); @@ -1570,7 +1397,8 @@ EXPORT_SYMBOL(cl_sync_io_wait); /** * Indicate that transfer of a single page completed. */ -void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) +void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, + int ioret) { if (anchor->csi_sync_rc == 0 && ioret < 0) anchor->csi_sync_rc = ioret; @@ -1581,9 +1409,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) */ LASSERT(atomic_read(&anchor->csi_sync_nr) > 0); if (atomic_dec_and_test(&anchor->csi_sync_nr)) { - wake_up_all(&anchor->csi_waitq); - /* it's safe to nuke or reuse anchor now */ - atomic_set(&anchor->csi_barrier, 0); + LASSERT(anchor->csi_end_io); + anchor->csi_end_io(env, anchor); + /* Can't access anchor any more */ } } EXPORT_SYMBOL(cl_sync_io_note); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c index aec644eb4..26a576b63 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c @@ -36,6 +36,7 @@ * Client Extent Lock. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_CLASS @@ -47,2116 +48,187 @@ #include "../include/cl_object.h" #include "cl_internal.h" -/** Lock class of cl_lock::cll_guard */ -static struct lock_class_key cl_lock_guard_class; -static struct kmem_cache *cl_lock_kmem; - -static struct lu_kmem_descr cl_lock_caches[] = { - { - .ckd_cache = &cl_lock_kmem, - .ckd_name = "cl_lock_kmem", - .ckd_size = sizeof (struct cl_lock) - }, - { - .ckd_cache = NULL - } -}; - -#define CS_LOCK_INC(o, item) -#define CS_LOCK_DEC(o, item) -#define CS_LOCKSTATE_INC(o, state) -#define CS_LOCKSTATE_DEC(o, state) - -/** - * Basic lock invariant that is maintained at all times. Caller either has a - * reference to \a lock, or somehow assures that \a lock cannot be freed. - * - * \see cl_lock_invariant() - */ -static int cl_lock_invariant_trusted(const struct lu_env *env, - const struct cl_lock *lock) -{ - return ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) && - atomic_read(&lock->cll_ref) >= lock->cll_holds && - lock->cll_holds >= lock->cll_users && - lock->cll_holds >= 0 && - lock->cll_users >= 0 && - lock->cll_depth >= 0; -} - -/** - * Stronger lock invariant, checking that caller has a reference on a lock. - * - * \see cl_lock_invariant_trusted() - */ -static int cl_lock_invariant(const struct lu_env *env, - const struct cl_lock *lock) -{ - int result; - - result = atomic_read(&lock->cll_ref) > 0 && - cl_lock_invariant_trusted(env, lock); - if (!result && env) - CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n"); - return result; -} - -/** - * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock. - */ -static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock) -{ - return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting; -} - -/** - * Returns a set of counters for this lock, depending on a lock nesting. - */ -static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env, - const struct cl_lock *lock) -{ - struct cl_thread_info *info; - enum clt_nesting_level nesting; - - info = cl_env_info(env); - nesting = cl_lock_nesting(lock); - LASSERT(nesting < ARRAY_SIZE(info->clt_counters)); - return &info->clt_counters[nesting]; -} - static void cl_lock_trace0(int level, const struct lu_env *env, - const char *prefix, const struct cl_lock *lock, - const char *func, const int line) -{ - struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj); - - CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n", - prefix, lock, atomic_read(&lock->cll_ref), - lock->cll_guarder, lock->cll_depth, - lock->cll_state, lock->cll_error, lock->cll_holds, - lock->cll_users, lock->cll_flags, - env, h->coh_nesting, cl_lock_nr_mutexed(env), - func, line); -} - -#define cl_lock_trace(level, env, prefix, lock) \ - cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__) - -#define RETIP ((unsigned long)__builtin_return_address(0)) - -#ifdef CONFIG_LOCKDEP -static struct lock_class_key cl_lock_key; - -static void cl_lock_lockdep_init(struct cl_lock *lock) -{ - lockdep_set_class_and_name(lock, &cl_lock_key, "EXT"); -} - -static void cl_lock_lockdep_acquire(const struct lu_env *env, - struct cl_lock *lock, __u32 enqflags) -{ - cl_lock_counters(env, lock)->ctc_nr_locks_acquired++; - lock_map_acquire(&lock->dep_map); -} - -static void cl_lock_lockdep_release(const struct lu_env *env, - struct cl_lock *lock) -{ - cl_lock_counters(env, lock)->ctc_nr_locks_acquired--; - lock_release(&lock->dep_map, 0, RETIP); -} - -#else /* !CONFIG_LOCKDEP */ - -static void cl_lock_lockdep_init(struct cl_lock *lock) -{} -static void cl_lock_lockdep_acquire(const struct lu_env *env, - struct cl_lock *lock, __u32 enqflags) -{} -static void cl_lock_lockdep_release(const struct lu_env *env, - struct cl_lock *lock) -{} - -#endif /* !CONFIG_LOCKDEP */ - -/** - * Adds lock slice to the compound lock. - * - * This is called by cl_object_operations::coo_lock_init() methods to add a - * per-layer state to the lock. New state is added at the end of - * cl_lock::cll_layers list, that is, it is at the bottom of the stack. - * - * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add() - */ -void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, - struct cl_object *obj, - const struct cl_lock_operations *ops) -{ - slice->cls_lock = lock; - list_add_tail(&slice->cls_linkage, &lock->cll_layers); - slice->cls_obj = obj; - slice->cls_ops = ops; -} -EXPORT_SYMBOL(cl_lock_slice_add); - -/** - * Returns true iff a lock with the mode \a has provides at least the same - * guarantees as a lock with the mode \a need. - */ -int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need) -{ - LINVRNT(need == CLM_READ || need == CLM_WRITE || - need == CLM_PHANTOM || need == CLM_GROUP); - LINVRNT(has == CLM_READ || has == CLM_WRITE || - has == CLM_PHANTOM || has == CLM_GROUP); - CLASSERT(CLM_PHANTOM < CLM_READ); - CLASSERT(CLM_READ < CLM_WRITE); - CLASSERT(CLM_WRITE < CLM_GROUP); - - if (has != CLM_GROUP) - return need <= has; - else - return need == has; -} -EXPORT_SYMBOL(cl_lock_mode_match); - -/** - * Returns true iff extent portions of lock descriptions match. - */ -int cl_lock_ext_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need) -{ - return - has->cld_start <= need->cld_start && - has->cld_end >= need->cld_end && - cl_lock_mode_match(has->cld_mode, need->cld_mode) && - (has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid); -} -EXPORT_SYMBOL(cl_lock_ext_match); - -/** - * Returns true iff a lock with the description \a has provides at least the - * same guarantees as a lock with the description \a need. - */ -int cl_lock_descr_match(const struct cl_lock_descr *has, - const struct cl_lock_descr *need) -{ - return - cl_object_same(has->cld_obj, need->cld_obj) && - cl_lock_ext_match(has, need); -} -EXPORT_SYMBOL(cl_lock_descr_match); - -static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object *obj = lock->cll_descr.cld_obj; - - LINVRNT(!cl_lock_is_mutexed(lock)); - - cl_lock_trace(D_DLMTRACE, env, "free lock", lock); - might_sleep(); - while (!list_empty(&lock->cll_layers)) { - struct cl_lock_slice *slice; - - slice = list_entry(lock->cll_layers.next, - struct cl_lock_slice, cls_linkage); - list_del_init(lock->cll_layers.next); - slice->cls_ops->clo_fini(env, slice); - } - CS_LOCK_DEC(obj, total); - CS_LOCKSTATE_DEC(obj, lock->cll_state); - lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock); - cl_object_put(env, obj); - lu_ref_fini(&lock->cll_reference); - lu_ref_fini(&lock->cll_holders); - mutex_destroy(&lock->cll_guard); - kmem_cache_free(cl_lock_kmem, lock); -} - -/** - * Releases a reference on a lock. - * - * When last reference is released, lock is returned to the cache, unless it - * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed - * immediately. - * - * \see cl_object_put(), cl_page_put() - */ -void cl_lock_put(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object *obj; - - LINVRNT(cl_lock_invariant(env, lock)); - obj = lock->cll_descr.cld_obj; - LINVRNT(obj); - - CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - - if (atomic_dec_and_test(&lock->cll_ref)) { - if (lock->cll_state == CLS_FREEING) { - LASSERT(list_empty(&lock->cll_linkage)); - cl_lock_free(env, lock); - } - CS_LOCK_DEC(obj, busy); - } -} -EXPORT_SYMBOL(cl_lock_put); - -/** - * Acquires an additional reference to a lock. - * - * This can be called only by caller already possessing a reference to \a - * lock. - * - * \see cl_object_get(), cl_page_get() - */ -void cl_lock_get(struct cl_lock *lock) -{ - LINVRNT(cl_lock_invariant(NULL, lock)); - CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - atomic_inc(&lock->cll_ref); -} -EXPORT_SYMBOL(cl_lock_get); - -/** - * Acquires a reference to a lock. - * - * This is much like cl_lock_get(), except that this function can be used to - * acquire initial reference to the cached lock. Caller has to deal with all - * possible races. Use with care! - * - * \see cl_page_get_trust() - */ -void cl_lock_get_trust(struct cl_lock *lock) -{ - CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n", - atomic_read(&lock->cll_ref), lock, RETIP); - if (atomic_inc_return(&lock->cll_ref) == 1) - CS_LOCK_INC(lock->cll_descr.cld_obj, busy); -} -EXPORT_SYMBOL(cl_lock_get_trust); - -/** - * Helper function destroying the lock that wasn't completely initialized. - * - * Other threads can acquire references to the top-lock through its - * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately. - */ -static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_mutex_get(env, lock); - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); -} - -static struct cl_lock *cl_lock_alloc(const struct lu_env *env, - struct cl_object *obj, - const struct cl_io *io, - const struct cl_lock_descr *descr) -{ - struct cl_lock *lock; - struct lu_object_header *head; - - lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS); - if (lock) { - atomic_set(&lock->cll_ref, 1); - lock->cll_descr = *descr; - lock->cll_state = CLS_NEW; - cl_object_get(obj); - lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", - lock); - INIT_LIST_HEAD(&lock->cll_layers); - INIT_LIST_HEAD(&lock->cll_linkage); - INIT_LIST_HEAD(&lock->cll_inclosure); - lu_ref_init(&lock->cll_reference); - lu_ref_init(&lock->cll_holders); - mutex_init(&lock->cll_guard); - lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class); - init_waitqueue_head(&lock->cll_wq); - head = obj->co_lu.lo_header; - CS_LOCKSTATE_INC(obj, CLS_NEW); - CS_LOCK_INC(obj, total); - CS_LOCK_INC(obj, create); - cl_lock_lockdep_init(lock); - list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) { - int err; - - err = obj->co_ops->coo_lock_init(env, obj, lock, io); - if (err != 0) { - cl_lock_finish(env, lock); - lock = ERR_PTR(err); - break; - } - } - } else - lock = ERR_PTR(-ENOMEM); - return lock; -} - -/** - * Transfer the lock into INTRANSIT state and return the original state. - * - * \pre state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED - * \post state: CLS_INTRANSIT - * \see CLS_INTRANSIT - */ -static enum cl_lock_state cl_lock_intransit(const struct lu_env *env, - struct cl_lock *lock) -{ - enum cl_lock_state state = lock->cll_state; - - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(state != CLS_INTRANSIT); - LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED, - "Malformed lock state %d.\n", state); - - cl_lock_state_set(env, lock, CLS_INTRANSIT); - lock->cll_intransit_owner = current; - cl_lock_hold_add(env, lock, "intransit", current); - return state; -} - -/** - * Exit the intransit state and restore the lock state to the original state - */ -static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(state != CLS_INTRANSIT); - LASSERT(lock->cll_intransit_owner == current); - - lock->cll_intransit_owner = NULL; - cl_lock_state_set(env, lock, state); - cl_lock_unhold(env, lock, "intransit", current); -} - -/** - * Checking whether the lock is intransit state - */ -int cl_lock_is_intransit(struct cl_lock *lock) -{ - LASSERT(cl_lock_is_mutexed(lock)); - return lock->cll_state == CLS_INTRANSIT && - lock->cll_intransit_owner != current; -} -EXPORT_SYMBOL(cl_lock_is_intransit); -/** - * Returns true iff lock is "suitable" for given io. E.g., locks acquired by - * truncate and O_APPEND cannot be reused for read/non-append-write, as they - * cover multiple stripes and can trigger cascading timeouts. - */ -static int cl_lock_fits_into(const struct lu_env *env, - const struct cl_lock *lock, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_invariant_trusted(env, lock)); - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_fits_into && - !slice->cls_ops->clo_fits_into(env, slice, need, io)) - return 0; - } - return 1; -} - -static struct cl_lock *cl_lock_lookup(const struct lu_env *env, - struct cl_object *obj, - const struct cl_io *io, - const struct cl_lock_descr *need) -{ - struct cl_lock *lock; - struct cl_object_header *head; - - head = cl_object_header(obj); - assert_spin_locked(&head->coh_lock_guard); - CS_LOCK_INC(obj, lookup); - list_for_each_entry(lock, &head->coh_locks, cll_linkage) { - int matched; - - matched = cl_lock_ext_match(&lock->cll_descr, need) && - lock->cll_state < CLS_FREEING && - lock->cll_error == 0 && - !(lock->cll_flags & CLF_CANCELLED) && - cl_lock_fits_into(env, lock, need, io); - CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n", - PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need), - matched); - if (matched) { - cl_lock_get_trust(lock); - CS_LOCK_INC(obj, hit); - return lock; - } - } - return NULL; -} - -/** - * Returns a lock matching description \a need. - * - * This is the main entry point into the cl_lock caching interface. First, a - * cache (implemented as a per-object linked list) is consulted. If lock is - * found there, it is returned immediately. Otherwise new lock is allocated - * and returned. In any case, additional reference to lock is acquired. - * - * \see cl_object_find(), cl_page_find() - */ -static struct cl_lock *cl_lock_find(const struct lu_env *env, - const struct cl_io *io, - const struct cl_lock_descr *need) -{ - struct cl_object_header *head; - struct cl_object *obj; - struct cl_lock *lock; - - obj = need->cld_obj; - head = cl_object_header(obj); - - spin_lock(&head->coh_lock_guard); - lock = cl_lock_lookup(env, obj, io, need); - spin_unlock(&head->coh_lock_guard); - - if (!lock) { - lock = cl_lock_alloc(env, obj, io, need); - if (!IS_ERR(lock)) { - struct cl_lock *ghost; - - spin_lock(&head->coh_lock_guard); - ghost = cl_lock_lookup(env, obj, io, need); - if (!ghost) { - cl_lock_get_trust(lock); - list_add_tail(&lock->cll_linkage, - &head->coh_locks); - spin_unlock(&head->coh_lock_guard); - CS_LOCK_INC(obj, busy); - } else { - spin_unlock(&head->coh_lock_guard); - /* - * Other threads can acquire references to the - * top-lock through its sub-locks. Hence, it - * cannot be cl_lock_free()-ed immediately. - */ - cl_lock_finish(env, lock); - lock = ghost; - } - } - } - return lock; -} - -/** - * Returns existing lock matching given description. This is similar to - * cl_lock_find() except that no new lock is created, and returned lock is - * guaranteed to be in enum cl_lock_state::CLS_HELD state. - */ -struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_object_header *head; - struct cl_object *obj; - struct cl_lock *lock; - - obj = need->cld_obj; - head = cl_object_header(obj); - - do { - spin_lock(&head->coh_lock_guard); - lock = cl_lock_lookup(env, obj, io, need); - spin_unlock(&head->coh_lock_guard); - if (!lock) - return NULL; - - cl_lock_mutex_get(env, lock); - if (lock->cll_state == CLS_INTRANSIT) - /* Don't care return value. */ - cl_lock_state_wait(env, lock); - if (lock->cll_state == CLS_FREEING) { - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); - lock = NULL; - } - } while (!lock); - - cl_lock_hold_add(env, lock, scope, source); - cl_lock_user_add(env, lock); - if (lock->cll_state == CLS_CACHED) - cl_use_try(env, lock, 1); - if (lock->cll_state == CLS_HELD) { - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_acquire(env, lock, 0); - cl_lock_put(env, lock); - } else { - cl_unuse_try(env, lock); - cl_lock_unhold(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); - lock = NULL; - } - - return lock; -} -EXPORT_SYMBOL(cl_lock_peek); - -/** - * Returns a slice within a lock, corresponding to the given layer in the - * device stack. - * - * \see cl_page_at() - */ -const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, - const struct lu_device_type *dtype) -{ - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_invariant_trusted(NULL, lock)); - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype) - return slice; - } - return NULL; -} -EXPORT_SYMBOL(cl_lock_at); - -static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_counters *counters; - - counters = cl_lock_counters(env, lock); - lock->cll_depth++; - counters->ctc_nr_locks_locked++; - lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock); - cl_lock_trace(D_TRACE, env, "got mutex", lock); -} - -/** - * Locks cl_lock object. - * - * This is used to manipulate cl_lock fields, and to serialize state - * transitions in the lock state machine. - * - * \post cl_lock_is_mutexed(lock) - * - * \see cl_lock_mutex_put() - */ -void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_guarder == current) { - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(lock->cll_depth > 0); - } else { - struct cl_object_header *hdr; - struct cl_thread_info *info; - int i; - - LINVRNT(lock->cll_guarder != current); - hdr = cl_object_header(lock->cll_descr.cld_obj); - /* - * Check that mutices are taken in the bottom-to-top order. - */ - info = cl_env_info(env); - for (i = 0; i < hdr->coh_nesting; ++i) - LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0); - mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting); - lock->cll_guarder = current; - LINVRNT(lock->cll_depth == 0); - } - cl_lock_mutex_tail(env, lock); -} -EXPORT_SYMBOL(cl_lock_mutex_get); - -/** - * Try-locks cl_lock object. - * - * \retval 0 \a lock was successfully locked - * - * \retval -EBUSY \a lock cannot be locked right now - * - * \post ergo(result == 0, cl_lock_is_mutexed(lock)) - * - * \see cl_lock_mutex_get() - */ -static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - - LINVRNT(cl_lock_invariant_trusted(env, lock)); - - result = 0; - if (lock->cll_guarder == current) { - LINVRNT(lock->cll_depth > 0); - cl_lock_mutex_tail(env, lock); - } else if (mutex_trylock(&lock->cll_guard)) { - LINVRNT(lock->cll_depth == 0); - lock->cll_guarder = current; - cl_lock_mutex_tail(env, lock); - } else - result = -EBUSY; - return result; -} - -/** - {* Unlocks cl_lock object. - * - * \pre cl_lock_is_mutexed(lock) - * - * \see cl_lock_mutex_get() - */ -void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_counters *counters; - - LINVRNT(cl_lock_invariant(env, lock)); - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(lock->cll_guarder == current); - LINVRNT(lock->cll_depth > 0); - - counters = cl_lock_counters(env, lock); - LINVRNT(counters->ctc_nr_locks_locked > 0); - - cl_lock_trace(D_TRACE, env, "put mutex", lock); - lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock); - counters->ctc_nr_locks_locked--; - if (--lock->cll_depth == 0) { - lock->cll_guarder = NULL; - mutex_unlock(&lock->cll_guard); - } -} -EXPORT_SYMBOL(cl_lock_mutex_put); - -/** - * Returns true iff lock's mutex is owned by the current thread. - */ -int cl_lock_is_mutexed(struct cl_lock *lock) -{ - return lock->cll_guarder == current; -} -EXPORT_SYMBOL(cl_lock_is_mutexed); - -/** - * Returns number of cl_lock mutices held by the current thread (environment). - */ -int cl_lock_nr_mutexed(const struct lu_env *env) -{ - struct cl_thread_info *info; - int i; - int locked; - - /* - * NOTE: if summation across all nesting levels (currently 2) proves - * too expensive, a summary counter can be added to - * struct cl_thread_info. - */ - info = cl_env_info(env); - for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i) - locked += info->clt_counters[i].ctc_nr_locks_locked; - return locked; -} -EXPORT_SYMBOL(cl_lock_nr_mutexed); - -static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - if (!(lock->cll_flags & CLF_CANCELLED)) { - const struct cl_lock_slice *slice; - - lock->cll_flags |= CLF_CANCELLED; - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_cancel) - slice->cls_ops->clo_cancel(env, slice); - } - } -} - -static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_object_header *head; - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_state < CLS_FREEING) { - bool in_cache; - - LASSERT(lock->cll_state != CLS_INTRANSIT); - cl_lock_state_set(env, lock, CLS_FREEING); - - head = cl_object_header(lock->cll_descr.cld_obj); - - spin_lock(&head->coh_lock_guard); - in_cache = !list_empty(&lock->cll_linkage); - if (in_cache) - list_del_init(&lock->cll_linkage); - spin_unlock(&head->coh_lock_guard); - - if (in_cache) /* coh_locks cache holds a refcount. */ - cl_lock_put(env, lock); - - /* - * From now on, no new references to this lock can be acquired - * by cl_lock_lookup(). - */ - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_delete) - slice->cls_ops->clo_delete(env, slice); - } - /* - * From now on, no new references to this lock can be acquired - * by layer-specific means (like a pointer from struct - * ldlm_lock in osc, or a pointer from top-lock to sub-lock in - * lov). - * - * Lock will be finally freed in cl_lock_put() when last of - * existing references goes away. - */ - } -} - -/** - * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a - * top-lock (nesting == 0) accounts for this modification in the per-thread - * debugging counters. Sub-lock holds can be released by a thread different - * from one that acquired it. - */ -static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock, - int delta) -{ - struct cl_thread_counters *counters; - enum clt_nesting_level nesting; - - lock->cll_holds += delta; - nesting = cl_lock_nesting(lock); - if (nesting == CNL_TOP) { - counters = &cl_env_info(env)->clt_counters[CNL_TOP]; - counters->ctc_nr_held += delta; - LASSERT(counters->ctc_nr_held >= 0); - } -} - -/** - * Mod(ifie)s cl_lock::cll_users counter for a given lock. See - * cl_lock_hold_mod() for the explanation of the debugging code. - */ -static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock, - int delta) -{ - struct cl_thread_counters *counters; - enum clt_nesting_level nesting; - - lock->cll_users += delta; - nesting = cl_lock_nesting(lock); - if (nesting == CNL_TOP) { - counters = &cl_env_info(env)->clt_counters[CNL_TOP]; - counters->ctc_nr_used += delta; - LASSERT(counters->ctc_nr_used >= 0); - } -} - -void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_holds > 0); - - cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock); - lu_ref_del(&lock->cll_holders, scope, source); - cl_lock_hold_mod(env, lock, -1); - if (lock->cll_holds == 0) { - CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock); - if (lock->cll_descr.cld_mode == CLM_PHANTOM || - lock->cll_descr.cld_mode == CLM_GROUP || - lock->cll_state != CLS_CACHED) - /* - * If lock is still phantom or grouplock when user is - * done with it---destroy the lock. - */ - lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED; - if (lock->cll_flags & CLF_CANCELPEND) { - lock->cll_flags &= ~CLF_CANCELPEND; - cl_lock_cancel0(env, lock); - } - if (lock->cll_flags & CLF_DOOMED) { - /* no longer doomed: it's dead... Jim. */ - lock->cll_flags &= ~CLF_DOOMED; - cl_lock_delete0(env, lock); - } - } -} -EXPORT_SYMBOL(cl_lock_hold_release); - -/** - * Waits until lock state is changed. - * - * This function is called with cl_lock mutex locked, atomically releases - * mutex and goes to sleep, waiting for a lock state change (signaled by - * cl_lock_signal()), and re-acquires the mutex before return. - * - * This function is used to wait until lock state machine makes some progress - * and to emulate synchronous operations on top of asynchronous lock - * interface. - * - * \retval -EINTR wait was interrupted - * - * \retval 0 wait wasn't interrupted - * - * \pre cl_lock_is_mutexed(lock) - * - * \see cl_lock_signal() - */ -int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock) -{ - wait_queue_t waiter; - sigset_t blocked; - int result; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_depth == 1); - LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */ - - cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock); - result = lock->cll_error; - if (result == 0) { - /* To avoid being interrupted by the 'non-fatal' signals - * (SIGCHLD, for instance), we'd block them temporarily. - * LU-305 - */ - blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); - - init_waitqueue_entry(&waiter, current); - add_wait_queue(&lock->cll_wq, &waiter); - set_current_state(TASK_INTERRUPTIBLE); - cl_lock_mutex_put(env, lock); - - LASSERT(cl_lock_nr_mutexed(env) == 0); - - /* Returning ERESTARTSYS instead of EINTR so syscalls - * can be restarted if signals are pending here - */ - result = -ERESTARTSYS; - if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) { - schedule(); - if (!cfs_signal_pending()) - result = 0; - } - - cl_lock_mutex_get(env, lock); - set_current_state(TASK_RUNNING); - remove_wait_queue(&lock->cll_wq, &waiter); - - /* Restore old blocked signals */ - cfs_restore_sigs(blocked); - } - return result; -} -EXPORT_SYMBOL(cl_lock_state_wait); - -static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - const struct cl_lock_slice *slice; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) - if (slice->cls_ops->clo_state) - slice->cls_ops->clo_state(env, slice, state); - wake_up_all(&lock->cll_wq); -} - -/** - * Notifies waiters that lock state changed. - * - * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all - * layers about state change by calling cl_lock_operations::clo_state() - * top-to-bottom. - */ -void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock); - cl_lock_state_signal(env, lock, lock->cll_state); -} -EXPORT_SYMBOL(cl_lock_signal); - -/** - * Changes lock state. - * - * This function is invoked to notify layers that lock state changed, possible - * as a result of an asynchronous event such as call-back reception. - * - * \post lock->cll_state == state - * - * \see cl_lock_operations::clo_state() - */ -void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock, - enum cl_lock_state state) -{ - LASSERT(lock->cll_state <= state || - (lock->cll_state == CLS_CACHED && - (state == CLS_HELD || /* lock found in cache */ - state == CLS_NEW || /* sub-lock canceled */ - state == CLS_INTRANSIT)) || - /* lock is in transit state */ - lock->cll_state == CLS_INTRANSIT); - - if (lock->cll_state != state) { - CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state); - CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state); - - cl_lock_state_signal(env, lock, state); - lock->cll_state = state; - } -} -EXPORT_SYMBOL(cl_lock_state_set); - -static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - int result; - - do { - result = 0; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_state == CLS_INTRANSIT); - - result = -ENOSYS; - list_for_each_entry_reverse(slice, &lock->cll_layers, - cls_linkage) { - if (slice->cls_ops->clo_unuse) { - result = slice->cls_ops->clo_unuse(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - } while (result == CLO_REPEAT); - - return result; -} - -/** - * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling - * cl_lock_operations::clo_use() top-to-bottom to notify layers. - * @atomic = 1, it must unuse the lock to recovery the lock to keep the - * use process atomic - */ -int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic) -{ - const struct cl_lock_slice *slice; - int result; - enum cl_lock_state state; - - cl_lock_trace(D_DLMTRACE, env, "use lock", lock); - - LASSERT(lock->cll_state == CLS_CACHED); - if (lock->cll_error) - return lock->cll_error; - - result = -ENOSYS; - state = cl_lock_intransit(env, lock); - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_use) { - result = slice->cls_ops->clo_use(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - - LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n", - lock->cll_state); - - if (result == 0) { - state = CLS_HELD; - } else { - if (result == -ESTALE) { - /* - * ESTALE means sublock being cancelled - * at this time, and set lock state to - * be NEW here and ask the caller to repeat. - */ - state = CLS_NEW; - result = CLO_REPEAT; - } - - /* @atomic means back-off-on-failure. */ - if (atomic) { - int rc; - - rc = cl_unuse_try_internal(env, lock); - /* Vet the results. */ - if (rc < 0 && result > 0) - result = rc; - } - - } - cl_lock_extransit(env, lock, state); - return result; -} -EXPORT_SYMBOL(cl_use_try); - -/** - * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers - * top-to-bottom. - */ -static int cl_enqueue_kick(const struct lu_env *env, - struct cl_lock *lock, - struct cl_io *io, __u32 flags) -{ - int result; - const struct cl_lock_slice *slice; - - result = -ENOSYS; - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_enqueue) { - result = slice->cls_ops->clo_enqueue(env, - slice, io, flags); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - return result; -} - -/** - * Tries to enqueue a lock. - * - * This function is called repeatedly by cl_enqueue() until either lock is - * enqueued, or error occurs. This function does not block waiting for - * networking communication to complete. - * - * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED || - * lock->cll_state == CLS_HELD) - * - * \see cl_enqueue() cl_lock_operations::clo_enqueue() - * \see cl_lock_state::CLS_ENQUEUED - */ -int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 flags) -{ - int result; - - cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock); - do { - LINVRNT(cl_lock_is_mutexed(lock)); - - result = lock->cll_error; - if (result != 0) - break; - - switch (lock->cll_state) { - case CLS_NEW: - cl_lock_state_set(env, lock, CLS_QUEUING); - /* fall-through */ - case CLS_QUEUING: - /* kick layers. */ - result = cl_enqueue_kick(env, lock, io, flags); - /* For AGL case, the cl_lock::cll_state may - * become CLS_HELD already. - */ - if (result == 0 && lock->cll_state == CLS_QUEUING) - cl_lock_state_set(env, lock, CLS_ENQUEUED); - break; - case CLS_INTRANSIT: - LASSERT(cl_lock_is_intransit(lock)); - result = CLO_WAIT; - break; - case CLS_CACHED: - /* yank lock from the cache. */ - result = cl_use_try(env, lock, 0); - break; - case CLS_ENQUEUED: - case CLS_HELD: - result = 0; - break; - default: - case CLS_FREEING: - /* - * impossible, only held locks with increased - * ->cll_holds can be enqueued, and they cannot be - * freed. - */ - LBUG(); - } - } while (result == CLO_REPEAT); - return result; -} -EXPORT_SYMBOL(cl_enqueue_try); - -/** - * Cancel the conflicting lock found during previous enqueue. - * - * \retval 0 conflicting lock has been canceled. - * \retval -ve error code. - */ -int cl_lock_enqueue_wait(const struct lu_env *env, - struct cl_lock *lock, - int keep_mutex) -{ - struct cl_lock *conflict; - int rc = 0; - - LASSERT(cl_lock_is_mutexed(lock)); - LASSERT(lock->cll_state == CLS_QUEUING); - LASSERT(lock->cll_conflict); - - conflict = lock->cll_conflict; - lock->cll_conflict = NULL; - - cl_lock_mutex_put(env, lock); - LASSERT(cl_lock_nr_mutexed(env) == 0); - - cl_lock_mutex_get(env, conflict); - cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict); - cl_lock_cancel(env, conflict); - cl_lock_delete(env, conflict); - - while (conflict->cll_state != CLS_FREEING) { - rc = cl_lock_state_wait(env, conflict); - if (rc != 0) - break; - } - cl_lock_mutex_put(env, conflict); - lu_ref_del(&conflict->cll_reference, "cancel-wait", lock); - cl_lock_put(env, conflict); - - if (keep_mutex) - cl_lock_mutex_get(env, lock); - - LASSERT(rc <= 0); - return rc; -} -EXPORT_SYMBOL(cl_lock_enqueue_wait); - -static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock, - struct cl_io *io, __u32 enqflags) + const char *prefix, const struct cl_lock *lock, + const char *func, const int line) { - int result; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_holds > 0); + struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj); - cl_lock_user_add(env, lock); - do { - result = cl_enqueue_try(env, lock, io, enqflags); - if (result == CLO_WAIT) { - if (lock->cll_conflict) - result = cl_lock_enqueue_wait(env, lock, 1); - else - result = cl_lock_state_wait(env, lock); - if (result == 0) - continue; - } - break; - } while (1); - if (result != 0) - cl_unuse_try(env, lock); - LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL), - lock->cll_state == CLS_ENQUEUED || - lock->cll_state == CLS_HELD)); - return result; + CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n", + prefix, lock, env, h->coh_nesting, func, line); } +#define cl_lock_trace(level, env, prefix, lock) \ + cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__) /** - * Tries to unlock a lock. - * - * This function is called to release underlying resource: - * 1. for top lock, the resource is sublocks it held; - * 2. for sublock, the resource is the reference to dlmlock. + * Adds lock slice to the compound lock. * - * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT. + * This is called by cl_object_operations::coo_lock_init() methods to add a + * per-layer state to the lock. New state is added at the end of + * cl_lock::cll_layers list, that is, it is at the bottom of the stack. * - * \see cl_unuse() cl_lock_operations::clo_unuse() - * \see cl_lock_state::CLS_CACHED + * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add() */ -int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock) +void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, + struct cl_object *obj, + const struct cl_lock_operations *ops) { - int result; - enum cl_lock_state state = CLS_NEW; - - cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock); - - if (lock->cll_users > 1) { - cl_lock_user_del(env, lock); - return 0; - } - - /* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold - * underlying resources. - */ - if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) { - cl_lock_user_del(env, lock); - return 0; - } - - /* - * New lock users (->cll_users) are not protecting unlocking - * from proceeding. From this point, lock eventually reaches - * CLS_CACHED, is reinitialized to CLS_NEW or fails into - * CLS_FREEING. - */ - state = cl_lock_intransit(env, lock); - - result = cl_unuse_try_internal(env, lock); - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(result != CLO_WAIT); - cl_lock_user_del(env, lock); - if (result == 0 || result == -ESTALE) { - /* - * Return lock back to the cache. This is the only - * place where lock is moved into CLS_CACHED state. - * - * If one of ->clo_unuse() methods returned -ESTALE, lock - * cannot be placed into cache and has to be - * re-initialized. This happens e.g., when a sub-lock was - * canceled while unlocking was in progress. - */ - if (state == CLS_HELD && result == 0) - state = CLS_CACHED; - else - state = CLS_NEW; - cl_lock_extransit(env, lock, state); - - /* - * Hide -ESTALE error. - * If the lock is a glimpse lock, and it has multiple - * stripes. Assuming that one of its sublock returned -ENAVAIL, - * and other sublocks are matched write locks. In this case, - * we can't set this lock to error because otherwise some of - * its sublocks may not be canceled. This causes some dirty - * pages won't be written to OSTs. -jay - */ - result = 0; - } else { - CERROR("result = %d, this is unlikely!\n", result); - state = CLS_NEW; - cl_lock_extransit(env, lock, state); - } - return result ?: lock->cll_error; + slice->cls_lock = lock; + list_add_tail(&slice->cls_linkage, &lock->cll_layers); + slice->cls_obj = obj; + slice->cls_ops = ops; } -EXPORT_SYMBOL(cl_unuse_try); +EXPORT_SYMBOL(cl_lock_slice_add); -static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock) +void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock) { - int result; + cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock); - result = cl_unuse_try(env, lock); - if (result) - CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result); -} + while (!list_empty(&lock->cll_layers)) { + struct cl_lock_slice *slice; -/** - * Unlocks a lock. - */ -void cl_unuse(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_mutex_get(env, lock); - cl_unuse_locked(env, lock); - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_release(env, lock); + slice = list_entry(lock->cll_layers.next, + struct cl_lock_slice, cls_linkage); + list_del_init(lock->cll_layers.next); + slice->cls_ops->clo_fini(env, slice); + } + POISON(lock, 0x5a, sizeof(*lock)); } -EXPORT_SYMBOL(cl_unuse); +EXPORT_SYMBOL(cl_lock_fini); -/** - * Tries to wait for a lock. - * - * This function is called repeatedly by cl_wait() until either lock is - * granted, or error occurs. This function does not block waiting for network - * communication to complete. - * - * \see cl_wait() cl_lock_operations::clo_wait() - * \see cl_lock_state::CLS_HELD - */ -int cl_wait_try(const struct lu_env *env, struct cl_lock *lock) +int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, + const struct cl_io *io) { - const struct cl_lock_slice *slice; - int result; - - cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock); - do { - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERTF(lock->cll_state == CLS_QUEUING || - lock->cll_state == CLS_ENQUEUED || - lock->cll_state == CLS_HELD || - lock->cll_state == CLS_INTRANSIT, - "lock state: %d\n", lock->cll_state); - LASSERT(lock->cll_users > 0); - LASSERT(lock->cll_holds > 0); + struct cl_object *obj = lock->cll_descr.cld_obj; + struct cl_object *scan; + int result = 0; - result = lock->cll_error; - if (result != 0) - break; + /* Make sure cl_lock::cll_descr is initialized. */ + LASSERT(obj); - if (cl_lock_is_intransit(lock)) { - result = CLO_WAIT; + INIT_LIST_HEAD(&lock->cll_layers); + list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers, + co_lu.lo_linkage) { + result = scan->co_ops->coo_lock_init(env, scan, lock, io); + if (result != 0) { + cl_lock_fini(env, lock); break; } + } - if (lock->cll_state == CLS_HELD) - /* nothing to do */ - break; - - result = -ENOSYS; - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_wait) { - result = slice->cls_ops->clo_wait(env, slice); - if (result != 0) - break; - } - } - LASSERT(result != -ENOSYS); - if (result == 0) { - LASSERT(lock->cll_state != CLS_INTRANSIT); - cl_lock_state_set(env, lock, CLS_HELD); - } - } while (result == CLO_REPEAT); return result; } -EXPORT_SYMBOL(cl_wait_try); +EXPORT_SYMBOL(cl_lock_init); /** - * Waits until enqueued lock is granted. - * - * \pre current thread or io owns a hold on the lock - * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED || - * lock->cll_state == CLS_HELD) + * Returns a slice with a lock, corresponding to the given layer in the + * device stack. * - * \post ergo(result == 0, lock->cll_state == CLS_HELD) - */ -int cl_wait(const struct lu_env *env, struct cl_lock *lock) -{ - int result; - - cl_lock_mutex_get(env, lock); - - LINVRNT(cl_lock_invariant(env, lock)); - LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD, - "Wrong state %d\n", lock->cll_state); - LASSERT(lock->cll_holds > 0); - - do { - result = cl_wait_try(env, lock); - if (result == CLO_WAIT) { - result = cl_lock_state_wait(env, lock); - if (result == 0) - continue; - } - break; - } while (1); - if (result < 0) { - cl_unuse_try(env, lock); - cl_lock_lockdep_release(env, lock); - } - cl_lock_trace(D_DLMTRACE, env, "wait lock", lock); - cl_lock_mutex_put(env, lock); - LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD)); - return result; -} -EXPORT_SYMBOL(cl_wait); - -/** - * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock - * value. + * \see cl_page_at() */ -unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock) +const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, + const struct lu_device_type *dtype) { const struct cl_lock_slice *slice; - unsigned long pound; - unsigned long ounce; - - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - pound = 0; - list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_weigh) { - ounce = slice->cls_ops->clo_weigh(env, slice); - pound += ounce; - if (pound < ounce) /* over-weight^Wflow */ - pound = ~0UL; - } + list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { + if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype) + return slice; } - return pound; + return NULL; } -EXPORT_SYMBOL(cl_lock_weigh); +EXPORT_SYMBOL(cl_lock_at); -/** - * Notifies layers that lock description changed. - * - * The server can grant client a lock different from one that was requested - * (e.g., larger in extent). This method is called when actually granted lock - * description becomes known to let layers to accommodate for changed lock - * description. - * - * \see cl_lock_operations::clo_modify() - */ -int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock, - const struct cl_lock_descr *desc) +void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) { const struct cl_lock_slice *slice; - struct cl_object *obj = lock->cll_descr.cld_obj; - struct cl_object_header *hdr = cl_object_header(obj); - int result; - - cl_lock_trace(D_DLMTRACE, env, "modify lock", lock); - /* don't allow object to change */ - LASSERT(obj == desc->cld_obj); - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); + cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock); list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_modify) { - result = slice->cls_ops->clo_modify(env, slice, desc); - if (result != 0) - return result; - } - } - CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n", - PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu))); - /* - * Just replace description in place. Nothing more is needed for - * now. If locks were indexed according to their extent and/or mode, - * that index would have to be updated here. - */ - spin_lock(&hdr->coh_lock_guard); - lock->cll_descr = *desc; - spin_unlock(&hdr->coh_lock_guard); - return 0; -} -EXPORT_SYMBOL(cl_lock_modify); - -/** - * Initializes lock closure with a given origin. - * - * \see cl_lock_closure - */ -void cl_lock_closure_init(const struct lu_env *env, - struct cl_lock_closure *closure, - struct cl_lock *origin, int wait) -{ - LINVRNT(cl_lock_is_mutexed(origin)); - LINVRNT(cl_lock_invariant(env, origin)); - - INIT_LIST_HEAD(&closure->clc_list); - closure->clc_origin = origin; - closure->clc_wait = wait; - closure->clc_nr = 0; -} -EXPORT_SYMBOL(cl_lock_closure_init); - -/** - * Builds a closure of \a lock. - * - * Building of a closure consists of adding initial lock (\a lock) into it, - * and calling cl_lock_operations::clo_closure() methods of \a lock. These - * methods might call cl_lock_closure_build() recursively again, adding more - * locks to the closure, etc. - * - * \see cl_lock_closure - */ -int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure) -{ - const struct cl_lock_slice *slice; - int result; - - LINVRNT(cl_lock_is_mutexed(closure->clc_origin)); - LINVRNT(cl_lock_invariant(env, closure->clc_origin)); - - result = cl_lock_enclosure(env, lock, closure); - if (result == 0) { - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_closure) { - result = slice->cls_ops->clo_closure(env, slice, - closure); - if (result != 0) - break; - } - } - } - if (result != 0) - cl_lock_disclosure(env, closure); - return result; -} -EXPORT_SYMBOL(cl_lock_closure_build); - -/** - * Adds new lock to a closure. - * - * Try-locks \a lock and if succeeded, adds it to the closure (never more than - * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting - * until next try-lock is likely to succeed. - */ -int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock, - struct cl_lock_closure *closure) -{ - int result = 0; - - cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock); - if (!cl_lock_mutex_try(env, lock)) { - /* - * If lock->cll_inclosure is not empty, lock is already in - * this closure. - */ - if (list_empty(&lock->cll_inclosure)) { - cl_lock_get_trust(lock); - lu_ref_add(&lock->cll_reference, "closure", closure); - list_add(&lock->cll_inclosure, &closure->clc_list); - closure->clc_nr++; - } else - cl_lock_mutex_put(env, lock); - result = 0; - } else { - cl_lock_disclosure(env, closure); - if (closure->clc_wait) { - cl_lock_get_trust(lock); - lu_ref_add(&lock->cll_reference, "closure-w", closure); - cl_lock_mutex_put(env, closure->clc_origin); - - LASSERT(cl_lock_nr_mutexed(env) == 0); - cl_lock_mutex_get(env, lock); - cl_lock_mutex_put(env, lock); - - cl_lock_mutex_get(env, closure->clc_origin); - lu_ref_del(&lock->cll_reference, "closure-w", closure); - cl_lock_put(env, lock); - } - result = CLO_REPEAT; - } - return result; -} -EXPORT_SYMBOL(cl_lock_enclosure); - -/** Releases mutices of enclosed locks. */ -void cl_lock_disclosure(const struct lu_env *env, - struct cl_lock_closure *closure) -{ - struct cl_lock *scan; - struct cl_lock *temp; - - cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin); - list_for_each_entry_safe(scan, temp, &closure->clc_list, - cll_inclosure) { - list_del_init(&scan->cll_inclosure); - cl_lock_mutex_put(env, scan); - lu_ref_del(&scan->cll_reference, "closure", closure); - cl_lock_put(env, scan); - closure->clc_nr--; - } - LASSERT(closure->clc_nr == 0); -} -EXPORT_SYMBOL(cl_lock_disclosure); - -/** Finalizes a closure. */ -void cl_lock_closure_fini(struct cl_lock_closure *closure) -{ - LASSERT(closure->clc_nr == 0); - LASSERT(list_empty(&closure->clc_list)); -} -EXPORT_SYMBOL(cl_lock_closure_fini); - -/** - * Destroys this lock. Notifies layers (bottom-to-top) that lock is being - * destroyed, then destroy the lock. If there are holds on the lock, postpone - * destruction until all holds are released. This is called when a decision is - * made to destroy the lock in the future. E.g., when a blocking AST is - * received on it, or fatal communication error happens. - * - * Caller must have a reference on this lock to prevent a situation, when - * deleted lock lingers in memory for indefinite time, because nobody calls - * cl_lock_put() to finish it. - * - * \pre atomic_read(&lock->cll_ref) > 0 - * \pre ergo(cl_lock_nesting(lock) == CNL_TOP, - * cl_lock_nr_mutexed(env) == 1) - * [i.e., if a top-lock is deleted, mutices of no other locks can be - * held, as deletion of sub-locks might require releasing a top-lock - * mutex] - * - * \see cl_lock_operations::clo_delete() - * \see cl_lock::cll_holds - */ -void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP, - cl_lock_nr_mutexed(env) == 1)); - - cl_lock_trace(D_DLMTRACE, env, "delete lock", lock); - if (lock->cll_holds == 0) - cl_lock_delete0(env, lock); - else - lock->cll_flags |= CLF_DOOMED; -} -EXPORT_SYMBOL(cl_lock_delete); - -/** - * Mark lock as irrecoverably failed, and mark it for destruction. This - * happens when, e.g., server fails to grant a lock to us, or networking - * time-out happens. - * - * \pre atomic_read(&lock->cll_ref) > 0 - * - * \see clo_lock_delete() - * \see cl_lock::cll_holds - */ -void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - if (lock->cll_error == 0 && error != 0) { - cl_lock_trace(D_DLMTRACE, env, "set lock error", lock); - lock->cll_error = error; - cl_lock_signal(env, lock); - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); + if (slice->cls_ops->clo_cancel) + slice->cls_ops->clo_cancel(env, slice); } } -EXPORT_SYMBOL(cl_lock_error); - -/** - * Cancels this lock. Notifies layers - * (bottom-to-top) that lock is being cancelled, then destroy the lock. If - * there are holds on the lock, postpone cancellation until - * all holds are released. - * - * Cancellation notification is delivered to layers at most once. - * - * \see cl_lock_operations::clo_cancel() - * \see cl_lock::cll_holds - */ -void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock); - if (lock->cll_holds == 0) - cl_lock_cancel0(env, lock); - else - lock->cll_flags |= CLF_CANCELPEND; -} EXPORT_SYMBOL(cl_lock_cancel); /** - * Finds an existing lock covering given index and optionally different from a - * given \a except lock. + * Enqueue a lock. + * \param anchor: if we need to wait for resources before getting the lock, + * use @anchor for the purpose. + * \retval 0 enqueue successfully + * \retval <0 error code */ -struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env, - struct cl_object *obj, pgoff_t index, - struct cl_lock *except, - int pending, int canceld) +int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock, struct cl_sync_io *anchor) { - struct cl_object_header *head; - struct cl_lock *scan; - struct cl_lock *lock; - struct cl_lock_descr *need; - - head = cl_object_header(obj); - need = &cl_env_info(env)->clt_descr; - lock = NULL; + const struct cl_lock_slice *slice; + int rc = -ENOSYS; - need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but - * not PHANTOM - */ - need->cld_start = need->cld_end = index; - need->cld_enq_flags = 0; + list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { + if (!slice->cls_ops->clo_enqueue) + continue; - spin_lock(&head->coh_lock_guard); - /* It is fine to match any group lock since there could be only one - * with a uniq gid and it conflicts with all other lock modes too - */ - list_for_each_entry(scan, &head->coh_locks, cll_linkage) { - if (scan != except && - (scan->cll_descr.cld_mode == CLM_GROUP || - cl_lock_ext_match(&scan->cll_descr, need)) && - scan->cll_state >= CLS_HELD && - scan->cll_state < CLS_FREEING && - /* - * This check is racy as the lock can be canceled right - * after it is done, but this is fine, because page exists - * already. - */ - (canceld || !(scan->cll_flags & CLF_CANCELLED)) && - (pending || !(scan->cll_flags & CLF_CANCELPEND))) { - /* Don't increase cs_hit here since this - * is just a helper function. - */ - cl_lock_get_trust(scan); - lock = scan; + rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor); + if (rc != 0) break; } - } - spin_unlock(&head->coh_lock_guard); - return lock; -} -EXPORT_SYMBOL(cl_lock_at_pgoff); - -/** - * Calculate the page offset at the layer of @lock. - * At the time of this writing, @page is top page and @lock is sub lock. - */ -static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock) -{ - struct lu_device_type *dtype; - const struct cl_page_slice *slice; - - dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type; - slice = cl_page_at(page, dtype); - return slice->cpl_page->cp_index; + return rc; } +EXPORT_SYMBOL(cl_lock_enqueue); /** - * Check if page @page is covered by an extra lock or discard it. + * Main high-level entry point of cl_lock interface that finds existing or + * enqueues new lock matching given description. */ -static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) -{ - struct cl_thread_info *info = cl_env_info(env); - struct cl_lock *lock = cbdata; - pgoff_t index = pgoff_at_lock(page, lock); - - if (index >= info->clt_fn_index) { - struct cl_lock *tmp; - - /* refresh non-overlapped index */ - tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index, - lock, 1, 0); - if (tmp) { - /* Cache the first-non-overlapped index so as to skip - * all pages within [index, clt_fn_index). This - * is safe because if tmp lock is canceled, it will - * discard these pages. - */ - info->clt_fn_index = tmp->cll_descr.cld_end + 1; - if (tmp->cll_descr.cld_end == CL_PAGE_EOF) - info->clt_fn_index = CL_PAGE_EOF; - cl_lock_put(env, tmp); - } else if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); - } - } - - info->clt_next_index = index + 1; - return CLP_GANG_OKAY; -} - -static int discard_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) +int cl_lock_request(const struct lu_env *env, struct cl_io *io, + struct cl_lock *lock) { - struct cl_thread_info *info = cl_env_info(env); - struct cl_lock *lock = cbdata; + struct cl_sync_io *anchor = NULL; + __u32 enq_flags = lock->cll_descr.cld_enq_flags; + int rc; - LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE); - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageWriteback(cl_page_vmpage(env, page)))); - KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, - !PageDirty(cl_page_vmpage(env, page)))); + rc = cl_lock_init(env, lock, io); + if (rc < 0) + return rc; - info->clt_next_index = pgoff_at_lock(page, lock) + 1; - if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); + if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) { + anchor = &cl_env_info(env)->clt_anchor; + cl_sync_io_init(anchor, 1, cl_sync_io_end); } - return CLP_GANG_OKAY; -} - -/** - * Discard pages protected by the given lock. This function traverses radix - * tree to find all covering pages and discard them. If a page is being covered - * by other locks, it should remain in cache. - * - * If error happens on any step, the process continues anyway (the reasoning - * behind this being that lock cancellation cannot be delayed indefinitely). - */ -int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_thread_info *info = cl_env_info(env); - struct cl_io *io = &info->clt_io; - struct cl_lock_descr *descr = &lock->cll_descr; - cl_page_gang_cb_t cb; - int res; - int result; - - LINVRNT(cl_lock_invariant(env, lock)); - - io->ci_obj = cl_object_top(descr->cld_obj); - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result != 0) - goto out; - - cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb; - info->clt_fn_index = info->clt_next_index = descr->cld_start; - do { - res = cl_page_gang_lookup(env, descr->cld_obj, io, - info->clt_next_index, descr->cld_end, - cb, (void *)lock); - if (info->clt_next_index > descr->cld_end) - break; - - if (res == CLP_GANG_RESCHED) - cond_resched(); - } while (res != CLP_GANG_OKAY); -out: - cl_io_fini(env, io); - return result; -} -EXPORT_SYMBOL(cl_lock_discard_pages); - -/** - * Eliminate all locks for a given object. - * - * Caller has to guarantee that no lock is in active use. - * - * \param cancel when this is set, cl_locks_prune() cancels locks before - * destroying. - */ -void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel) -{ - struct cl_object_header *head; - struct cl_lock *lock; - - head = cl_object_header(obj); - /* - * If locks are destroyed without cancellation, all pages must be - * already destroyed (as otherwise they will be left unprotected). - */ - LASSERT(ergo(!cancel, - !head->coh_tree.rnode && head->coh_pages == 0)); + rc = cl_lock_enqueue(env, io, lock, anchor); - spin_lock(&head->coh_lock_guard); - while (!list_empty(&head->coh_locks)) { - lock = container_of(head->coh_locks.next, - struct cl_lock, cll_linkage); - cl_lock_get_trust(lock); - spin_unlock(&head->coh_lock_guard); - lu_ref_add(&lock->cll_reference, "prune", current); - -again: - cl_lock_mutex_get(env, lock); - if (lock->cll_state < CLS_FREEING) { - LASSERT(lock->cll_users <= 1); - if (unlikely(lock->cll_users == 1)) { - struct l_wait_info lwi = { 0 }; - - cl_lock_mutex_put(env, lock); - l_wait_event(lock->cll_wq, - lock->cll_users == 0, - &lwi); - goto again; - } - - if (cancel) - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - } - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, "prune", current); - cl_lock_put(env, lock); - spin_lock(&head->coh_lock_guard); - } - spin_unlock(&head->coh_lock_guard); -} -EXPORT_SYMBOL(cl_locks_prune); - -static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env, - const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; + if (anchor) { + int rc2; - while (1) { - lock = cl_lock_find(env, io, need); - if (IS_ERR(lock)) - break; - cl_lock_mutex_get(env, lock); - if (lock->cll_state < CLS_FREEING && - !(lock->cll_flags & CLF_CANCELLED)) { - cl_lock_hold_mod(env, lock, 1); - lu_ref_add(&lock->cll_holders, scope, source); - lu_ref_add(&lock->cll_reference, scope, source); - break; - } - cl_lock_mutex_put(env, lock); - cl_lock_put(env, lock); + /* drop the reference count held at initialization time */ + cl_sync_io_note(env, anchor, 0); + rc2 = cl_sync_io_wait(env, anchor, 0); + if (rc2 < 0 && rc == 0) + rc = rc2; } - return lock; -} - -/** - * Returns a lock matching \a need description with a reference and a hold on - * it. - * - * This is much like cl_lock_find(), except that cl_lock_hold() additionally - * guarantees that lock is not in the CLS_FREEING state on return. - */ -struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; - - lock = cl_lock_hold_mutex(env, io, need, scope, source); - if (!IS_ERR(lock)) - cl_lock_mutex_put(env, lock); - return lock; -} -EXPORT_SYMBOL(cl_lock_hold); - -/** - * Main high-level entry point of cl_lock interface that finds existing or - * enqueues new lock matching given description. - */ -struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io, - const struct cl_lock_descr *need, - const char *scope, const void *source) -{ - struct cl_lock *lock; - int rc; - __u32 enqflags = need->cld_enq_flags; - do { - lock = cl_lock_hold_mutex(env, io, need, scope, source); - if (IS_ERR(lock)) - break; + if (rc < 0) + cl_lock_release(env, lock); - rc = cl_enqueue_locked(env, lock, io, enqflags); - if (rc == 0) { - if (cl_lock_fits_into(env, lock, need, io)) { - if (!(enqflags & CEF_AGL)) { - cl_lock_mutex_put(env, lock); - cl_lock_lockdep_acquire(env, lock, - enqflags); - break; - } - rc = 1; - } - cl_unuse_locked(env, lock); - } - cl_lock_trace(D_DLMTRACE, env, - rc <= 0 ? "enqueue failed" : "agl succeed", lock); - cl_lock_hold_release(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); - if (rc > 0) { - LASSERT(enqflags & CEF_AGL); - lock = NULL; - } else if (rc != 0) { - lock = ERR_PTR(rc); - } - } while (rc == 0); - return lock; + return rc; } EXPORT_SYMBOL(cl_lock_request); -/** - * Adds a hold to a known lock. - */ -void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_state != CLS_FREEING); - - cl_lock_hold_mod(env, lock, 1); - cl_lock_get(lock); - lu_ref_add(&lock->cll_holders, scope, source); - lu_ref_add(&lock->cll_reference, scope, source); -} -EXPORT_SYMBOL(cl_lock_hold_add); - -/** - * Releases a hold and a reference on a lock, on which caller acquired a - * mutex. - */ -void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) -{ - LINVRNT(cl_lock_invariant(env, lock)); - cl_lock_hold_release(env, lock, scope, source); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); -} -EXPORT_SYMBOL(cl_lock_unhold); - /** * Releases a hold and a reference on a lock, obtained by cl_lock_hold(). */ -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock, - const char *scope, const void *source) +void cl_lock_release(const struct lu_env *env, struct cl_lock *lock) { - LINVRNT(cl_lock_invariant(env, lock)); cl_lock_trace(D_DLMTRACE, env, "release lock", lock); - cl_lock_mutex_get(env, lock); - cl_lock_hold_release(env, lock, scope, source); - cl_lock_mutex_put(env, lock); - lu_ref_del(&lock->cll_reference, scope, source); - cl_lock_put(env, lock); + cl_lock_cancel(env, lock); + cl_lock_fini(env, lock); } EXPORT_SYMBOL(cl_lock_release); -void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - - cl_lock_used_mod(env, lock, 1); -} -EXPORT_SYMBOL(cl_lock_user_add); - -void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock) -{ - LINVRNT(cl_lock_is_mutexed(lock)); - LINVRNT(cl_lock_invariant(env, lock)); - LASSERT(lock->cll_users > 0); - - cl_lock_used_mod(env, lock, -1); - if (lock->cll_users == 0) - wake_up_all(&lock->cll_wq); -} -EXPORT_SYMBOL(cl_lock_user_del); - const char *cl_lock_mode_name(const enum cl_lock_mode mode) { static const char *names[] = { - [CLM_PHANTOM] = "P", [CLM_READ] = "R", [CLM_WRITE] = "W", [CLM_GROUP] = "G" @@ -2189,10 +261,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_lock *lock) { const struct cl_lock_slice *slice; - (*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ", - lock, atomic_read(&lock->cll_ref), - lock->cll_state, lock->cll_error, lock->cll_holds, - lock->cll_users, lock->cll_flags); + + (*printer)(env, cookie, "lock@%p", lock); cl_lock_descr_print(env, cookie, printer, &lock->cll_descr); (*printer)(env, cookie, " {\n"); @@ -2207,13 +277,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie, (*printer)(env, cookie, "} lock@%p\n", lock); } EXPORT_SYMBOL(cl_lock_print); - -int cl_lock_init(void) -{ - return lu_kmem_init(cl_lock_caches); -} - -void cl_lock_fini(void) -{ - lu_kmem_fini(cl_lock_caches); -} diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c index 43e299d4d..5940f3031 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_object.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c @@ -36,6 +36,7 @@ * Client Lustre Object. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ /* @@ -43,8 +44,6 @@ * * i_mutex * PG_locked - * ->coh_page_guard - * ->coh_lock_guard * ->coh_attr_guard * ->ls_guard */ @@ -63,10 +62,6 @@ static struct kmem_cache *cl_env_kmem; -/** Lock class of cl_object_header::coh_page_guard */ -static struct lock_class_key cl_page_guard_class; -/** Lock class of cl_object_header::coh_lock_guard */ -static struct lock_class_key cl_lock_guard_class; /** Lock class of cl_object_header::coh_attr_guard */ static struct lock_class_key cl_attr_guard_class; @@ -81,17 +76,9 @@ int cl_object_header_init(struct cl_object_header *h) result = lu_object_header_init(&h->coh_lu); if (result == 0) { - spin_lock_init(&h->coh_page_guard); - spin_lock_init(&h->coh_lock_guard); spin_lock_init(&h->coh_attr_guard); - lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class); - lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class); lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class); - h->coh_pages = 0; - /* XXX hard coded GFP_* mask. */ - INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC); - INIT_LIST_HEAD(&h->coh_locks); - h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8); + h->coh_page_bufsize = 0; } return result; } @@ -145,7 +132,7 @@ EXPORT_SYMBOL(cl_object_get); /** * Returns the top-object for a given \a o. * - * \see cl_page_top(), cl_io_top() + * \see cl_io_top() */ struct cl_object *cl_object_top(struct cl_object *o) { @@ -314,6 +301,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj, } EXPORT_SYMBOL(cl_conf_set); +/** + * Prunes caches of pages and locks for this object. + */ +int cl_object_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *top; + struct cl_object *o; + int result; + + top = obj->co_lu.lo_header; + result = 0; + list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) { + if (o->co_ops->coo_prune) { + result = o->co_ops->coo_prune(env, o); + if (result != 0) + break; + } + } + + return result; +} +EXPORT_SYMBOL(cl_object_prune); + /** * Helper function removing all object locks, and marking object for * deletion. All object pages must have been deleted at this point. @@ -323,34 +333,12 @@ EXPORT_SYMBOL(cl_conf_set); */ void cl_object_kill(const struct lu_env *env, struct cl_object *obj) { - struct cl_object_header *hdr; - - hdr = cl_object_header(obj); - LASSERT(!hdr->coh_tree.rnode); - LASSERT(hdr->coh_pages == 0); + struct cl_object_header *hdr = cl_object_header(obj); set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags); - /* - * Destroy all locks. Object destruction (including cl_inode_fini()) - * cannot cancel the locks, because in the case of a local client, - * where client and server share the same thread running - * prune_icache(), this can dead-lock with ldlm_cancel_handler() - * waiting on __wait_on_freeing_inode(). - */ - cl_locks_prune(env, obj, 0); } EXPORT_SYMBOL(cl_object_kill); -/** - * Prunes caches of pages and locks for this object. - */ -void cl_object_prune(const struct lu_env *env, struct cl_object *obj) -{ - cl_pages_prune(env, obj); - cl_locks_prune(env, obj, 1); -} -EXPORT_SYMBOL(cl_object_prune); - void cache_stats_init(struct cache_stats *cs, const char *name) { int i; @@ -383,6 +371,8 @@ static int cache_stats_print(const struct cache_stats *cs, return 0; } +static void cl_env_percpu_refill(void); + /** * Initialize client site. * @@ -397,11 +387,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d) result = lu_site_init(&s->cs_lu, &d->cd_lu_dev); if (result == 0) { cache_stats_init(&s->cs_pages, "pages"); - cache_stats_init(&s->cs_locks, "locks"); for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i) atomic_set(&s->cs_pages_state[0], 0); - for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i) - atomic_set(&s->cs_locks_state[i], 0); + cl_env_percpu_refill(); } return result; } @@ -435,15 +423,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m) [CPS_PAGEIN] = "r", [CPS_FREEING] = "f" }; - static const char *lstate[] = { - [CLS_NEW] = "n", - [CLS_QUEUING] = "q", - [CLS_ENQUEUED] = "e", - [CLS_HELD] = "h", - [CLS_INTRANSIT] = "t", - [CLS_CACHED] = "c", - [CLS_FREEING] = "f" - }; /* lookup hit total busy create pages: ...... ...... ...... ...... ...... [...... ...... ...... ......] @@ -457,12 +436,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......] seq_printf(m, "%s: %u ", pstate[i], atomic_read(&site->cs_pages_state[i])); seq_printf(m, "]\n"); - cache_stats_print(&site->cs_locks, m, 0); - seq_printf(m, " ["); - for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i) - seq_printf(m, "%s: %u ", lstate[i], - atomic_read(&site->cs_locks_state[i])); - seq_printf(m, "]\n"); cache_stats_print(&cl_env_stats, m, 0); seq_printf(m, "\n"); return 0; @@ -492,6 +465,13 @@ EXPORT_SYMBOL(cl_site_stats_print); * bz20044, bz22683. */ +static LIST_HEAD(cl_envs); +static unsigned int cl_envs_cached_nr; +static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit + * for now. + */ +static DEFINE_SPINLOCK(cl_envs_guard); + struct cl_env { void *ce_magic; struct lu_env ce_lu; @@ -674,8 +654,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) lu_context_enter(&cle->ce_ses); env->le_ses = &cle->ce_ses; cl_env_init0(cle, debug); - } else + } else { lu_env_fini(env); + } } if (rc != 0) { kmem_cache_free(cl_env_kmem, cle); @@ -684,8 +665,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) CL_ENV_INC(create); CL_ENV_INC(total); } - } else + } else { env = ERR_PTR(-ENOMEM); + } return env; } @@ -697,6 +679,39 @@ static void cl_env_fini(struct cl_env *cle) kmem_cache_free(cl_env_kmem, cle); } +static struct lu_env *cl_env_obtain(void *debug) +{ + struct cl_env *cle; + struct lu_env *env; + + spin_lock(&cl_envs_guard); + LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); + if (cl_envs_cached_nr > 0) { + int rc; + + cle = container_of(cl_envs.next, struct cl_env, ce_linkage); + list_del_init(&cle->ce_linkage); + cl_envs_cached_nr--; + spin_unlock(&cl_envs_guard); + + env = &cle->ce_lu; + rc = lu_env_refill(env); + if (rc == 0) { + cl_env_init0(cle, debug); + lu_context_enter(&env->le_ctx); + lu_context_enter(&cle->ce_ses); + } else { + cl_env_fini(cle); + env = ERR_PTR(rc); + } + } else { + spin_unlock(&cl_envs_guard); + env = cl_env_new(lu_context_tags_default, + lu_session_tags_default, debug); + } + return env; +} + static inline struct cl_env *cl_env_container(struct lu_env *env) { return container_of(env, struct cl_env, ce_lu); @@ -727,6 +742,8 @@ static struct lu_env *cl_env_peek(int *refcheck) * Returns lu_env: if there already is an environment associated with the * current thread, it is returned, otherwise, new environment is allocated. * + * Allocations are amortized through the global cache of environments. + * * \param refcheck pointer to a counter used to detect environment leaks. In * the usual case cl_env_get() and cl_env_put() are called in the same lexical * scope and pointer to the same integer is passed as \a refcheck. This is @@ -740,10 +757,7 @@ struct lu_env *cl_env_get(int *refcheck) env = cl_env_peek(refcheck); if (!env) { - env = cl_env_new(lu_context_tags_default, - lu_session_tags_default, - __builtin_return_address(0)); - + env = cl_env_obtain(__builtin_return_address(0)); if (!IS_ERR(env)) { struct cl_env *cle; @@ -786,6 +800,32 @@ static void cl_env_exit(struct cl_env *cle) lu_context_exit(&cle->ce_ses); } +/** + * Finalizes and frees a given number of cached environments. This is done to + * (1) free some memory (not currently hooked into VM), or (2) release + * references to modules. + */ +unsigned int cl_env_cache_purge(unsigned int nr) +{ + struct cl_env *cle; + + spin_lock(&cl_envs_guard); + for (; !list_empty(&cl_envs) && nr > 0; --nr) { + cle = container_of(cl_envs.next, struct cl_env, ce_linkage); + list_del_init(&cle->ce_linkage); + LASSERT(cl_envs_cached_nr > 0); + cl_envs_cached_nr--; + spin_unlock(&cl_envs_guard); + + cl_env_fini(cle); + spin_lock(&cl_envs_guard); + } + LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs))); + spin_unlock(&cl_envs_guard); + return nr; +} +EXPORT_SYMBOL(cl_env_cache_purge); + /** * Release an environment. * @@ -808,7 +848,22 @@ void cl_env_put(struct lu_env *env, int *refcheck) cl_env_detach(cle); cle->ce_debug = NULL; cl_env_exit(cle); - cl_env_fini(cle); + /* + * Don't bother to take a lock here. + * + * Return environment to the cache only when it was allocated + * with the standard tags. + */ + if (cl_envs_cached_nr < cl_envs_cached_max && + (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD && + (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) { + spin_lock(&cl_envs_guard); + list_add(&cle->ce_linkage, &cl_envs); + cl_envs_cached_nr++; + spin_unlock(&cl_envs_guard); + } else { + cl_env_fini(cle); + } } } EXPORT_SYMBOL(cl_env_put); @@ -914,6 +969,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb) } EXPORT_SYMBOL(cl_lvb2attr); +static struct cl_env cl_env_percpu[NR_CPUS]; + +static int cl_env_percpu_init(void) +{ + struct cl_env *cle; + int tags = LCT_REMEMBER | LCT_NOREF; + int i, j; + int rc = 0; + + for_each_possible_cpu(i) { + struct lu_env *env; + + cle = &cl_env_percpu[i]; + env = &cle->ce_lu; + + INIT_LIST_HEAD(&cle->ce_linkage); + cle->ce_magic = &cl_env_init0; + rc = lu_env_init(env, LCT_CL_THREAD | tags); + if (rc == 0) { + rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags); + if (rc == 0) { + lu_context_enter(&cle->ce_ses); + env->le_ses = &cle->ce_ses; + } else { + lu_env_fini(env); + } + } + if (rc != 0) + break; + } + if (rc != 0) { + /* Indices 0 to i (excluding i) were correctly initialized, + * thus we must uninitialize up to i, the rest are undefined. + */ + for (j = 0; j < i; j++) { + cle = &cl_env_percpu[i]; + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } + } + + return rc; +} + +static void cl_env_percpu_fini(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cl_env *cle = &cl_env_percpu[i]; + + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } +} + +static void cl_env_percpu_refill(void) +{ + int i; + + for_each_possible_cpu(i) + lu_env_refill(&cl_env_percpu[i].ce_lu); +} + +void cl_env_percpu_put(struct lu_env *env) +{ + struct cl_env *cle; + int cpu; + + cpu = smp_processor_id(); + cle = cl_env_container(env); + LASSERT(cle == &cl_env_percpu[cpu]); + + cle->ce_ref--; + LASSERT(cle->ce_ref == 0); + + CL_ENV_DEC(busy); + cl_env_detach(cle); + cle->ce_debug = NULL; + + put_cpu(); +} +EXPORT_SYMBOL(cl_env_percpu_put); + +struct lu_env *cl_env_percpu_get(void) +{ + struct cl_env *cle; + + cle = &cl_env_percpu[get_cpu()]; + cl_env_init0(cle, __builtin_return_address(0)); + + cl_env_attach(cle); + return &cle->ce_lu; +} +EXPORT_SYMBOL(cl_env_percpu_get); + /***************************************************************************** * * Temporary prototype thing: mirror obd-devices into cl devices. @@ -944,8 +1097,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site, CERROR("can't init device '%s', %d\n", typename, rc); d = ERR_PTR(rc); } - } else + } else { CERROR("Cannot allocate device: '%s'\n", typename); + } return lu2cl_dev(d); } EXPORT_SYMBOL(cl_type_setup); @@ -959,12 +1113,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl) } EXPORT_SYMBOL(cl_stack_fini); -int cl_lock_init(void); -void cl_lock_fini(void); - -int cl_page_init(void); -void cl_page_fini(void); - static struct lu_context_key cl_key; struct cl_thread_info *cl_env_info(const struct lu_env *env) @@ -1059,17 +1207,13 @@ int cl_global_init(void) if (result) goto out_kmem; - result = cl_lock_init(); + result = cl_env_percpu_init(); if (result) + /* no cl_env_percpu_fini on error */ goto out_context; - result = cl_page_init(); - if (result) - goto out_lock; - return 0; -out_lock: - cl_lock_fini(); + out_context: lu_context_key_degister(&cl_key); out_kmem: @@ -1084,8 +1228,7 @@ out_store: */ void cl_global_fini(void) { - cl_lock_fini(); - cl_page_fini(); + cl_env_percpu_fini(); lu_context_key_degister(&cl_key); lu_kmem_fini(cl_object_caches); cl_env_store_fini(); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c index 394580016..b754f516e 100644 --- a/drivers/staging/lustre/lustre/obdclass/cl_page.c +++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c @@ -36,6 +36,7 @@ * Client Lustre Page. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_CLASS @@ -48,8 +49,7 @@ #include "../include/cl_object.h" #include "cl_internal.h" -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, - int radix); +static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg); # define PASSERT(env, page, expr) \ do { \ @@ -62,25 +62,12 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, # define PINVRNT(env, page, exp) \ ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp)) -/** - * Internal version of cl_page_top, it should be called if the page is - * known to be not freed, says with page referenced, or radix tree lock held, - * or page owned. - */ -static struct cl_page *cl_page_top_trusted(struct cl_page *page) -{ - while (page->cp_parent) - page = page->cp_parent; - return page; -} - /** * Internal version of cl_page_get(). * * This function can be used to obtain initial reference to previously * unreferenced cached object. It can be called only if concurrent page - * reclamation is somehow prevented, e.g., by locking page radix-tree - * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page, + * reclamation is somehow prevented, e.g., by keeping a lock on a VM page, * associated with \a page. * * Use with care! Not exported. @@ -103,142 +90,12 @@ cl_page_at_trusted(const struct cl_page *page, { const struct cl_page_slice *slice; - page = cl_page_top_trusted((struct cl_page *)page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype) - return slice; - } - page = page->cp_child; - } while (page); - return NULL; -} - -/** - * Returns a page with given index in the given object, or NULL if no page is - * found. Acquires a reference on \a page. - * - * Locking: called under cl_object_header::coh_page_guard spin-lock. - */ -struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index) -{ - struct cl_page *page; - - assert_spin_locked(&hdr->coh_page_guard); - - page = radix_tree_lookup(&hdr->coh_tree, index); - if (page) - cl_page_get_trust(page); - return page; -} -EXPORT_SYMBOL(cl_page_lookup); - -/** - * Returns a list of pages by a given [start, end] of \a obj. - * - * \param resched If not NULL, then we give up before hogging CPU for too - * long and set *resched = 1, in that case caller should implement a retry - * logic. - * - * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely - * crucial in the face of [offset, EOF] locks. - * - * Return at least one page in @queue unless there is no covered page. - */ -int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, pgoff_t start, pgoff_t end, - cl_page_gang_cb_t cb, void *cbdata) -{ - struct cl_object_header *hdr; - struct cl_page *page; - struct cl_page **pvec; - const struct cl_page_slice *slice; - const struct lu_device_type *dtype; - pgoff_t idx; - unsigned int nr; - unsigned int i; - unsigned int j; - int res = CLP_GANG_OKAY; - int tree_lock = 1; - - idx = start; - hdr = cl_object_header(obj); - pvec = cl_env_info(env)->clt_pvec; - dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type; - spin_lock(&hdr->coh_page_guard); - while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec, - idx, CLT_PVEC_SIZE)) > 0) { - int end_of_region = 0; - - idx = pvec[nr - 1]->cp_index + 1; - for (i = 0, j = 0; i < nr; ++i) { - page = pvec[i]; - pvec[i] = NULL; - - LASSERT(page->cp_type == CPT_CACHEABLE); - if (page->cp_index > end) { - end_of_region = 1; - break; - } - if (page->cp_state == CPS_FREEING) - continue; - - slice = cl_page_at_trusted(page, dtype); - /* - * Pages for lsm-less file has no underneath sub-page - * for osc, in case of ... - */ - PASSERT(env, page, slice); - - page = slice->cpl_page; - /* - * Can safely call cl_page_get_trust() under - * radix-tree spin-lock. - * - * XXX not true, because @page is from object another - * than @hdr and protected by different tree lock. - */ - cl_page_get_trust(page); - lu_ref_add_atomic(&page->cp_reference, - "gang_lookup", current); - pvec[j++] = page; - } - - /* - * Here a delicate locking dance is performed. Current thread - * holds a reference to a page, but has to own it before it - * can be placed into queue. Owning implies waiting, so - * radix-tree lock is to be released. After a wait one has to - * check that pages weren't truncated (cl_page_own() returns - * error in the latter case). - */ - spin_unlock(&hdr->coh_page_guard); - tree_lock = 0; - - for (i = 0; i < j; ++i) { - page = pvec[i]; - if (res == CLP_GANG_OKAY) - res = (*cb)(env, io, page, cbdata); - lu_ref_del(&page->cp_reference, - "gang_lookup", current); - cl_page_put(env, page); - } - if (nr < CLT_PVEC_SIZE || end_of_region) - break; - - if (res == CLP_GANG_OKAY && need_resched()) - res = CLP_GANG_RESCHED; - if (res != CLP_GANG_OKAY) - break; - - spin_lock(&hdr->coh_page_guard); - tree_lock = 1; + list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { + if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype) + return slice; } - if (tree_lock) - spin_unlock(&hdr->coh_page_guard); - return res; + return NULL; } -EXPORT_SYMBOL(cl_page_gang_lookup); static void cl_page_free(const struct lu_env *env, struct cl_page *page) { @@ -247,17 +104,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page) PASSERT(env, page, list_empty(&page->cp_batch)); PASSERT(env, page, !page->cp_owner); PASSERT(env, page, !page->cp_req); - PASSERT(env, page, !page->cp_parent); PASSERT(env, page, page->cp_state == CPS_FREEING); - might_sleep(); while (!list_empty(&page->cp_layers)) { struct cl_page_slice *slice; slice = list_entry(page->cp_layers.next, struct cl_page_slice, cpl_linkage); list_del_init(page->cp_layers.next); - slice->cpl_ops->cpo_fini(env, slice); + if (unlikely(slice->cpl_ops->cpo_fini)) + slice->cpl_ops->cpo_fini(env, slice); } lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page); cl_object_put(env, obj); @@ -276,10 +132,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page, *(enum cl_page_state *)&page->cp_state = state; } -static struct cl_page *cl_page_alloc(const struct lu_env *env, - struct cl_object *o, pgoff_t ind, - struct page *vmpage, - enum cl_page_type type) +struct cl_page *cl_page_alloc(const struct lu_env *env, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, + enum cl_page_type type) { struct cl_page *page; struct lu_object_header *head; @@ -289,13 +145,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, int result = 0; atomic_set(&page->cp_ref, 1); - if (type == CPT_CACHEABLE) /* for radix tree */ - atomic_inc(&page->cp_ref); page->cp_obj = o; cl_object_get(o); lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page", page); - page->cp_index = ind; + page->cp_vmpage = vmpage; cl_page_state_set_trust(page, CPS_CACHED); page->cp_type = type; INIT_LIST_HEAD(&page->cp_layers); @@ -306,10 +160,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, head = o->co_lu.lo_header; list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) { if (o->co_ops->coo_page_init) { - result = o->co_ops->coo_page_init(env, o, - page, vmpage); + result = o->co_ops->coo_page_init(env, o, page, + ind); if (result != 0) { - cl_page_delete0(env, page, 0); + cl_page_delete0(env, page); cl_page_free(env, page); page = ERR_PTR(result); break; @@ -321,6 +175,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, } return page; } +EXPORT_SYMBOL(cl_page_alloc); /** * Returns a cl_page with index \a idx at the object \a o, and associated with @@ -333,16 +188,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env, * * \see cl_object_find(), cl_lock_find() */ -static struct cl_page *cl_page_find0(const struct lu_env *env, - struct cl_object *o, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type, - struct cl_page *parent) +struct cl_page *cl_page_find(const struct lu_env *env, + struct cl_object *o, + pgoff_t idx, struct page *vmpage, + enum cl_page_type type) { struct cl_page *page = NULL; - struct cl_page *ghost = NULL; struct cl_object_header *hdr; - int err; LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT); might_sleep(); @@ -368,120 +220,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, * reference on it. */ page = cl_vmpage_page(vmpage, o); - PINVRNT(env, page, - ergo(page, - cl_page_vmpage(env, page) == vmpage && - (void *)radix_tree_lookup(&hdr->coh_tree, - idx) == page)); - } - if (page) - return page; + if (page) + return page; + } /* allocate and initialize cl_page */ page = cl_page_alloc(env, o, idx, vmpage, type); - if (IS_ERR(page)) - return page; - - if (type == CPT_TRANSIENT) { - if (parent) { - LASSERT(!page->cp_parent); - page->cp_parent = parent; - parent->cp_child = page; - } - return page; - } - - /* - * XXX optimization: use radix_tree_preload() here, and change tree - * gfp mask to GFP_KERNEL in cl_object_header_init(). - */ - spin_lock(&hdr->coh_page_guard); - err = radix_tree_insert(&hdr->coh_tree, idx, page); - if (err != 0) { - ghost = page; - /* - * Noted by Jay: a lock on \a vmpage protects cl_page_find() - * from this race, but - * - * 0. it's better to have cl_page interface "locally - * consistent" so that its correctness can be reasoned - * about without appealing to the (obscure world of) VM - * locking. - * - * 1. handling this race allows ->coh_tree to remain - * consistent even when VM locking is somehow busted, - * which is very useful during diagnosing and debugging. - */ - page = ERR_PTR(err); - CL_PAGE_DEBUG(D_ERROR, env, ghost, - "fail to insert into radix tree: %d\n", err); - } else { - if (parent) { - LASSERT(!page->cp_parent); - page->cp_parent = parent; - parent->cp_child = page; - } - hdr->coh_pages++; - } - spin_unlock(&hdr->coh_page_guard); - - if (unlikely(ghost)) { - cl_page_delete0(env, ghost, 0); - cl_page_free(env, ghost); - } return page; } - -struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type) -{ - return cl_page_find0(env, o, idx, vmpage, type, NULL); -} EXPORT_SYMBOL(cl_page_find); -struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o, - pgoff_t idx, struct page *vmpage, - struct cl_page *parent) -{ - return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent); -} -EXPORT_SYMBOL(cl_page_find_sub); - static inline int cl_page_invariant(const struct cl_page *pg) { - struct cl_object_header *header; - struct cl_page *parent; - struct cl_page *child; - struct cl_io *owner; - /* * Page invariant is protected by a VM lock. */ LINVRNT(cl_page_is_vmlocked(NULL, pg)); - header = cl_object_header(pg->cp_obj); - parent = pg->cp_parent; - child = pg->cp_child; - owner = pg->cp_owner; - - return cl_page_in_use(pg) && - ergo(parent, parent->cp_child == pg) && - ergo(child, child->cp_parent == pg) && - ergo(child, pg->cp_obj != child->cp_obj) && - ergo(parent, pg->cp_obj != parent->cp_obj) && - ergo(owner && parent, - parent->cp_owner == pg->cp_owner->ci_parent) && - ergo(owner && child, child->cp_owner->ci_parent == owner) && - /* - * Either page is early in initialization (has neither child - * nor parent yet), or it is in the object radix tree. - */ - ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE, - (void *)radix_tree_lookup(&header->coh_tree, - pg->cp_index) == pg || - (!child && !parent)); + return cl_page_in_use_noref(pg); } static void cl_page_state_set0(const struct lu_env *env, @@ -534,13 +291,9 @@ static void cl_page_state_set0(const struct lu_env *env, old = page->cp_state; PASSERT(env, page, allowed_transitions[old][state]); CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state); - for (; page; page = page->cp_child) { - PASSERT(env, page, page->cp_state == old); - PASSERT(env, page, - equi(state == CPS_OWNED, page->cp_owner)); - - cl_page_state_set_trust(page, state); - } + PASSERT(env, page, page->cp_state == old); + PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner)); + cl_page_state_set_trust(page, state); } static void cl_page_state_set(const struct lu_env *env, @@ -574,8 +327,6 @@ EXPORT_SYMBOL(cl_page_get); */ void cl_page_put(const struct lu_env *env, struct cl_page *page) { - PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent); - CL_PAGE_HEADER(D_TRACE, env, page, "%d\n", atomic_read(&page->cp_ref)); @@ -594,35 +345,11 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page) } EXPORT_SYMBOL(cl_page_put); -/** - * Returns a VM page associated with a given cl_page. - */ -struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page) -{ - const struct cl_page_slice *slice; - - /* - * Find uppermost layer with ->cpo_vmpage() method, and return its - * result. - */ - page = cl_page_top(page); - do { - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - if (slice->cpl_ops->cpo_vmpage) - return slice->cpl_ops->cpo_vmpage(env, slice); - } - page = page->cp_child; - } while (page); - LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */ -} -EXPORT_SYMBOL(cl_page_vmpage); - /** * Returns a cl_page associated with a VM page, and given cl_object. */ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj) { - struct cl_page *top; struct cl_page *page; KLASSERT(PageLocked(vmpage)); @@ -633,36 +360,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj) * bottom-to-top pass. */ - /* - * This loop assumes that ->private points to the top-most page. This - * can be rectified easily. - */ - top = (struct cl_page *)vmpage->private; - if (!top) - return NULL; - - for (page = top; page; page = page->cp_child) { - if (cl_object_same(page->cp_obj, obj)) { - cl_page_get_trust(page); - break; - } + page = (struct cl_page *)vmpage->private; + if (page) { + cl_page_get_trust(page); + LASSERT(page->cp_type == CPT_CACHEABLE); } - LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE)); return page; } EXPORT_SYMBOL(cl_vmpage_page); -/** - * Returns the top-page for a given page. - * - * \see cl_object_top(), cl_io_top() - */ -struct cl_page *cl_page_top(struct cl_page *page) -{ - return cl_page_top_trusted(page); -} -EXPORT_SYMBOL(cl_page_top); - const struct cl_page_slice *cl_page_at(const struct cl_page *page, const struct lu_device_type *dtype) { @@ -682,26 +388,43 @@ EXPORT_SYMBOL(cl_page_at); int (*__method)_proto; \ \ __result = 0; \ - __page = cl_page_top(__page); \ - do { \ - list_for_each_entry(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) { \ - __result = (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - if (__result != 0) \ - break; \ - } \ - } \ - __page = __page->cp_child; \ - } while (__page && __result == 0); \ + list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) { \ + __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ + if (__result != 0) \ + break; \ + } \ + } \ if (__result > 0) \ __result = 0; \ __result; \ }) +#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...) \ +({ \ + const struct lu_env *__env = (_env); \ + struct cl_page *__page = (_page); \ + const struct cl_page_slice *__scan; \ + int __result; \ + ptrdiff_t __op = (_op); \ + int (*__method)_proto; \ + \ + __result = 0; \ + list_for_each_entry_reverse(__scan, &__page->cp_layers, \ + cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) { \ + __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ + if (__result != 0) \ + break; \ + } \ + } \ + if (__result > 0) \ + __result = 0; \ + __result; \ +}) + #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \ do { \ const struct lu_env *__env = (_env); \ @@ -710,18 +433,11 @@ do { \ ptrdiff_t __op = (_op); \ void (*__method)_proto; \ \ - __page = cl_page_top(__page); \ - do { \ - list_for_each_entry(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) \ - (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - } \ - __page = __page->cp_child; \ - } while (__page); \ + list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) \ + (*__method)(__env, __scan, ## __VA_ARGS__); \ + } \ } while (0) #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \ @@ -732,20 +448,11 @@ do { \ ptrdiff_t __op = (_op); \ void (*__method)_proto; \ \ - /* get to the bottom page. */ \ - while (__page->cp_child) \ - __page = __page->cp_child; \ - do { \ - list_for_each_entry_reverse(__scan, &__page->cp_layers, \ - cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + \ - __op); \ - if (__method) \ - (*__method)(__env, __scan, \ - ## __VA_ARGS__); \ - } \ - __page = __page->cp_parent; \ - } while (__page); \ + list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \ + __method = *(void **)((char *)__scan->cpl_ops + __op); \ + if (__method) \ + (*__method)(__env, __scan, ## __VA_ARGS__); \ + } \ } while (0) static int cl_page_invoke(const struct lu_env *env, @@ -771,20 +478,17 @@ static void cl_page_invoid(const struct lu_env *env, static void cl_page_owner_clear(struct cl_page *page) { - for (page = cl_page_top(page); page; page = page->cp_child) { - if (page->cp_owner) { - LASSERT(page->cp_owner->ci_owned_nr > 0); - page->cp_owner->ci_owned_nr--; - page->cp_owner = NULL; - page->cp_task = NULL; - } + if (page->cp_owner) { + LASSERT(page->cp_owner->ci_owned_nr > 0); + page->cp_owner->ci_owned_nr--; + page->cp_owner = NULL; + page->cp_task = NULL; } } static void cl_page_owner_set(struct cl_page *page) { - for (page = cl_page_top(page); page; page = page->cp_child) - page->cp_owner->ci_owned_nr++; + page->cp_owner->ci_owned_nr++; } void cl_page_disown0(const struct lu_env *env, @@ -794,7 +498,7 @@ void cl_page_disown0(const struct lu_env *env, state = pg->cp_state; PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING); - PINVRNT(env, pg, cl_page_invariant(pg)); + PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING); cl_page_owner_clear(pg); if (state == CPS_OWNED) @@ -815,8 +519,9 @@ void cl_page_disown0(const struct lu_env *env, */ int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io) { + struct cl_io *top = cl_io_top((struct cl_io *)io); LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj)); - return pg->cp_state == CPS_OWNED && pg->cp_owner == io; + return pg->cp_state == CPS_OWNED && pg->cp_owner == top; } EXPORT_SYMBOL(cl_page_is_owned); @@ -847,7 +552,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io, PINVRNT(env, pg, !cl_page_is_owned(pg, io)); - pg = cl_page_top(pg); io = cl_io_top(io); if (pg->cp_state == CPS_FREEING) { @@ -861,7 +565,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io, if (result == 0) { PASSERT(env, pg, !pg->cp_owner); PASSERT(env, pg, !pg->cp_req); - pg->cp_owner = io; + pg->cp_owner = cl_io_top(io); pg->cp_task = current; cl_page_owner_set(pg); if (pg->cp_state != CPS_FREEING) { @@ -914,12 +618,11 @@ void cl_page_assume(const struct lu_env *env, { PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj)); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume)); PASSERT(env, pg, !pg->cp_owner); - pg->cp_owner = io; + pg->cp_owner = cl_io_top(io); pg->cp_task = current; cl_page_owner_set(pg); cl_page_state_set(env, pg, CPS_OWNED); @@ -943,7 +646,6 @@ void cl_page_unassume(const struct lu_env *env, PINVRNT(env, pg, cl_page_is_owned(pg, io)); PINVRNT(env, pg, cl_page_invariant(pg)); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_owner_clear(pg); cl_page_state_set(env, pg, CPS_CACHED); @@ -968,9 +670,9 @@ EXPORT_SYMBOL(cl_page_unassume); void cl_page_disown(const struct lu_env *env, struct cl_io *io, struct cl_page *pg) { - PINVRNT(env, pg, cl_page_is_owned(pg, io)); + PINVRNT(env, pg, cl_page_is_owned(pg, io) || + pg->cp_state == CPS_FREEING); - pg = cl_page_top(pg); io = cl_io_top(io); cl_page_disown0(env, io, pg); } @@ -1001,12 +703,8 @@ EXPORT_SYMBOL(cl_page_discard); * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0() * path. Doesn't check page invariant. */ -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, - int radix) +static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg) { - struct cl_page *tmp = pg; - - PASSERT(env, pg, pg == cl_page_top(pg)); PASSERT(env, pg, pg->cp_state != CPS_FREEING); /* @@ -1014,41 +712,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, */ cl_page_owner_clear(pg); - /* - * unexport the page firstly before freeing it so that - * the page content is considered to be invalid. - * We have to do this because a CPS_FREEING cl_page may - * be NOT under the protection of a cl_lock. - * Afterwards, if this page is found by other threads, then this - * page will be forced to reread. - */ - cl_page_export(env, pg, 0); cl_page_state_set0(env, pg, CPS_FREEING); - CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete), - (const struct lu_env *, const struct cl_page_slice *)); - - if (tmp->cp_type == CPT_CACHEABLE) { - if (!radix) - /* !radix means that @pg is not yet in the radix tree, - * skip removing it. - */ - tmp = pg->cp_child; - for (; tmp; tmp = tmp->cp_child) { - void *value; - struct cl_object_header *hdr; - - hdr = cl_object_header(tmp->cp_obj); - spin_lock(&hdr->coh_page_guard); - value = radix_tree_delete(&hdr->coh_tree, - tmp->cp_index); - PASSERT(env, tmp, value == tmp); - PASSERT(env, tmp, hdr->coh_pages > 0); - hdr->coh_pages--; - spin_unlock(&hdr->coh_page_guard); - cl_page_put(env, tmp); - } - } + CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete), + (const struct lu_env *, + const struct cl_page_slice *)); } /** @@ -1070,7 +738,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, * Once page reaches cl_page_state::CPS_FREEING, all remaining references will * drain after some time, at which point page will be recycled. * - * \pre pg == cl_page_top(pg) * \pre VM page is locked * \post pg->cp_state == CPS_FREEING * @@ -1079,29 +746,10 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, void cl_page_delete(const struct lu_env *env, struct cl_page *pg) { PINVRNT(env, pg, cl_page_invariant(pg)); - cl_page_delete0(env, pg, 1); + cl_page_delete0(env, pg); } EXPORT_SYMBOL(cl_page_delete); -/** - * Unmaps page from user virtual memory. - * - * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The - * layer responsible for VM interaction has to unmap page from user space - * virtual memory. - * - * \see cl_page_operations::cpo_unmap() - */ -int cl_page_unmap(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap)); -} -EXPORT_SYMBOL(cl_page_unmap); - /** * Marks page up-to-date. * @@ -1129,7 +777,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg) int result; const struct cl_page_slice *slice; - pg = cl_page_top_trusted((struct cl_page *)pg); slice = container_of(pg->cp_layers.next, const struct cl_page_slice, cpl_linkage); PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked); @@ -1241,7 +888,7 @@ void cl_page_completion(const struct lu_env *env, cl_page_put(env, pg); if (anchor) - cl_sync_io_note(anchor, ioret); + cl_sync_io_note(env, anchor, ioret); } EXPORT_SYMBOL(cl_page_completion); @@ -1275,44 +922,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg, } EXPORT_SYMBOL(cl_page_make_ready); -/** - * Notify layers that high level io decided to place this page into a cache - * for future transfer. - * - * The layer implementing transfer engine (osc) has to register this page in - * its queues. - * - * \pre cl_page_is_owned(pg, io) - * \post cl_page_is_owned(pg, io) - * - * \see cl_page_operations::cpo_cache_add() - */ -int cl_page_cache_add(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt) -{ - const struct cl_page_slice *scan; - int result = 0; - - PINVRNT(env, pg, crt < CRT_NR); - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - if (crt >= CRT_NR) - return -EINVAL; - - list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) { - if (!scan->cpl_ops->io[crt].cpo_cache_add) - continue; - - result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io); - if (result != 0) - break; - } - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result); - return result; -} -EXPORT_SYMBOL(cl_page_cache_add); - /** * Called if a pge is being written back by kernel's intention. * @@ -1344,68 +953,21 @@ EXPORT_SYMBOL(cl_page_flush); * \see cl_page_operations::cpo_is_under_lock() */ int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) + struct cl_page *page, pgoff_t *max_index) { int rc; PINVRNT(env, page, cl_page_invariant(page)); - rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock), - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), - io); - PASSERT(env, page, rc != 0); + rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock), + (const struct lu_env *, + const struct cl_page_slice *, + struct cl_io *, pgoff_t *), + io, max_index); return rc; } EXPORT_SYMBOL(cl_page_is_under_lock); -static int page_prune_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) -{ - cl_page_own(env, io, page); - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - return CLP_GANG_OKAY; -} - -/** - * Purges all cached pages belonging to the object \a obj. - */ -int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj) -{ - struct cl_thread_info *info; - struct cl_object *obj = cl_object_top(clobj); - struct cl_io *io; - int result; - - info = cl_env_info(env); - io = &info->clt_io; - - /* - * initialize the io. This is ugly since we never do IO in this - * function, we just make cl_page_list functions happy. -jay - */ - io->ci_obj = obj; - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, obj); - if (result != 0) { - cl_io_fini(env, io); - return io->ci_result; - } - - do { - result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF, - page_prune_cb, NULL); - if (result == CLP_GANG_RESCHED) - cond_resched(); - } while (result != CLP_GANG_OKAY); - - cl_io_fini(env, io); - return result; -} -EXPORT_SYMBOL(cl_pages_prune); - /** * Tells transfer engine that only part of a page is to be transmitted. * @@ -1431,9 +993,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg) { (*printer)(env, cookie, - "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n", + "page@%p[%d %p %d %d %d %p %p %#x]\n", pg, atomic_read(&pg->cp_ref), pg->cp_obj, - pg->cp_index, pg->cp_parent, pg->cp_child, pg->cp_state, pg->cp_error, pg->cp_type, pg->cp_owner, pg->cp_req, pg->cp_flags); } @@ -1445,11 +1006,7 @@ EXPORT_SYMBOL(cl_page_header_print); void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg) { - struct cl_page *scan; - - for (scan = cl_page_top((struct cl_page *)pg); scan; - scan = scan->cp_child) - cl_page_header_print(env, cookie, printer, scan); + cl_page_header_print(env, cookie, printer, pg); CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print), (const struct lu_env *env, const struct cl_page_slice *slice, @@ -1509,21 +1066,13 @@ EXPORT_SYMBOL(cl_page_size); * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add() */ void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, + struct cl_object *obj, pgoff_t index, const struct cl_page_operations *ops) { list_add_tail(&slice->cpl_linkage, &page->cp_layers); slice->cpl_obj = obj; + slice->cpl_index = index; slice->cpl_ops = ops; slice->cpl_page = page; } EXPORT_SYMBOL(cl_page_slice_add); - -int cl_page_init(void) -{ - return 0; -} - -void cl_page_fini(void) -{ -} diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c index c2cf01596..f48816af8 100644 --- a/drivers/staging/lustre/lustre/obdclass/class_obd.c +++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c @@ -60,6 +60,8 @@ unsigned int obd_dump_on_eviction; EXPORT_SYMBOL(obd_dump_on_eviction); unsigned int obd_max_dirty_pages = 256; EXPORT_SYMBOL(obd_max_dirty_pages); +atomic_t obd_unstable_pages; +EXPORT_SYMBOL(obd_unstable_pages); atomic_t obd_dirty_pages; EXPORT_SYMBOL(obd_dirty_pages); unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ @@ -335,7 +337,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) err = 0; goto out; } - } if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { @@ -461,7 +462,7 @@ static int obd_init_checks(void) CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); ret = -EINVAL; } - if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) { + if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) { CWARN("mask failed: u64val %llu >= %llu\n", u64val, (__u64)PAGE_SIZE); ret = -EINVAL; diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c index 43a7f7a79..e4edfb2c0 100644 --- a/drivers/staging/lustre/lustre/obdclass/debug.c +++ b/drivers/staging/lustre/lustre/obdclass/debug.c @@ -68,8 +68,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) LASSERT(addr); - ne_off = le64_to_cpu (off); - id = le64_to_cpu (id); + ne_off = le64_to_cpu(off); + id = le64_to_cpu(id); if (memcmp(addr, (char *)&ne_off, LPDS)) { CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n", who, id, off, *(__u64 *)addr, ne_off); diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c index cf97b8f06..d95f11d62 100644 --- a/drivers/staging/lustre/lustre/obdclass/genops.c +++ b/drivers/staging/lustre/lustre/obdclass/genops.c @@ -604,7 +604,6 @@ int obd_init_caches(void) out: obd_cleanup_caches(); return -ENOMEM; - } /* map connection to client */ diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c index 8eddf206f..2cd452246 100644 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c +++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c @@ -158,9 +158,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len) { int err; - err = copy_to_user(arg, data, len); - if (err) - err = -EFAULT; + err = copy_to_user(arg, data, len) ? -EFAULT : 0; return err; } EXPORT_SYMBOL(obd_ioctl_popdata); diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c index 992573eae..79194d8cb 100644 --- a/drivers/staging/lustre/lustre/obdclass/llog.c +++ b/drivers/staging/lustre/lustre/obdclass/llog.c @@ -265,7 +265,6 @@ repeat: for (rec = (struct llog_rec_hdr *)buf; (char *)rec < buf + LLOG_CHUNK_SIZE; rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) { - CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n", rec, rec->lrh_type); diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c index d93f42fee..5a1eae1de 100644 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c @@ -49,7 +49,7 @@ static const char * const obd_connect_names[] = { "read_only", "lov_index", - "unused", + "connect_from_mds", "write_grant", "server_lock", "version", @@ -122,6 +122,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep) } EXPORT_SYMBOL(obd_connect_flags2str); +static void obd_connect_data_seqprint(struct seq_file *m, + struct obd_connect_data *ocd) +{ + int flags; + + LASSERT(ocd); + flags = ocd->ocd_connect_flags; + + seq_printf(m, " connect_data:\n" + " flags: %llx\n" + " instance: %u\n", + ocd->ocd_connect_flags, + ocd->ocd_instance); + if (flags & OBD_CONNECT_VERSION) + seq_printf(m, " target_version: %u.%u.%u.%u\n", + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version)); + if (flags & OBD_CONNECT_MDS) + seq_printf(m, " mdt_index: %d\n", ocd->ocd_group); + if (flags & OBD_CONNECT_GRANT) + seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant); + if (flags & OBD_CONNECT_INDEX) + seq_printf(m, " target_index: %u\n", ocd->ocd_index); + if (flags & OBD_CONNECT_BRW_SIZE) + seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size); + if (flags & OBD_CONNECT_IBITS) + seq_printf(m, " ibits_known: %llx\n", + ocd->ocd_ibits_known); + if (flags & OBD_CONNECT_GRANT_PARAM) + seq_printf(m, " grant_block_size: %d\n" + " grant_inode_size: %d\n" + " grant_extent_overhead: %d\n", + ocd->ocd_blocksize, + ocd->ocd_inodespace, + ocd->ocd_grant_extent); + if (flags & OBD_CONNECT_TRANSNO) + seq_printf(m, " first_transno: %llx\n", + ocd->ocd_transno); + if (flags & OBD_CONNECT_CKSUM) + seq_printf(m, " cksum_types: %#x\n", + ocd->ocd_cksum_types); + if (flags & OBD_CONNECT_MAX_EASIZE) + seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize); + if (flags & OBD_CONNECT_MAXBYTES) + seq_printf(m, " max_object_bytes: %llx\n", + ocd->ocd_maxbytes); +} + int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val, int mult) { @@ -624,6 +674,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data) struct obd_device *obd = data; struct obd_import *imp; struct obd_import_conn *conn; + struct obd_connect_data *ocd; int j; int k; int rw = 0; @@ -635,9 +686,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) return rc; imp = obd->u.cli.cl_import; + ocd = &imp->imp_connect_data; - seq_printf(m, - "import:\n" + seq_printf(m, "import:\n" " name: %s\n" " target: %s\n" " state: %s\n" @@ -649,9 +700,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) imp->imp_connect_data.ocd_instance); obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags, ", "); - seq_printf(m, - " ]\n" - " import_flags: [ "); + seq_printf(m, " ]\n"); + obd_connect_data_seqprint(m, ocd); + seq_printf(m, " import_flags: [ "); obd_import_flags2str(imp, m); seq_printf(m, @@ -694,8 +745,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data) do_div(sum, ret.lc_count); ret.lc_sum = sum; - } else + } else { ret.lc_sum = 0; + } seq_printf(m, " rpcs:\n" " inflight: %u\n" @@ -1471,10 +1523,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally); void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) { - unsigned int val; + unsigned int val = 0; - for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++) - ; + if (likely(value != 0)) + val = min(fls(value - 1), OBD_HIST_MAX); lprocfs_oh_tally(oh, val); } diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c index 978568ada..e04385760 100644 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c @@ -55,6 +55,7 @@ #include "../include/lustre_disk.h" #include "../include/lustre_fid.h" #include "../include/lu_object.h" +#include "../include/cl_object.h" #include "../include/lu_ref.h" #include @@ -103,7 +104,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o) if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) { if (lu_object_is_dying(top)) { - /* * somebody may be waiting for this, currently only * used for cl_object, see cl_object_put_last(). @@ -357,7 +357,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) if (count > 0 && --count == 0) break; - } cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1); cond_resched(); @@ -715,8 +714,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env, obj = lu_object_locate(top->lo_header, dev->ld_type); if (!obj) lu_object_put(env, top); - } else + } else { obj = top; + } return obj; } EXPORT_SYMBOL(lu_object_find_slice); @@ -935,7 +935,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d) * Initialize site \a s, with \a d as the top level device. */ #define LU_SITE_BITS_MIN 12 -#define LU_SITE_BITS_MAX 24 +#define LU_SITE_BITS_MAX 19 /** * total 256 buckets, we don't want too many buckets because: * - consume too much memory @@ -1468,6 +1468,7 @@ void lu_context_key_quiesce(struct lu_context_key *key) /* * XXX layering violation. */ + cl_env_cache_purge(~0); key->lct_tags |= LCT_QUIESCENT; /* * XXX memory barrier has to go here. diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c index 5f812460b..b1abe023b 100644 --- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c +++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c @@ -163,8 +163,9 @@ int class_del_uuid(const char *uuid) break; } } - } else + } else { list_splice_init(&g_uuid_list, &deathrow); + } spin_unlock(&g_uuid_lock); if (uuid && list_empty(&deathrow)) { diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c index 5395e994d..cb1d65c3d 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_config.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c @@ -606,7 +606,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) return rc; } -LIST_HEAD(lustre_profile_list); +static LIST_HEAD(lustre_profile_list); struct lustre_profile *class_get_profile(const char *prof) { @@ -961,7 +961,6 @@ int class_process_config(struct lustre_cfg *lcfg) default: { err = obd_process_config(obd, sizeof(*lcfg), lcfg); goto out; - } } out: @@ -1001,7 +1000,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, for (i = 1; i < lcfg->lcfg_bufcount; i++) { key = lustre_cfg_buf(lcfg, i); /* Strip off prefix */ - class_match_param(key, prefix, &key); + if (class_match_param(key, prefix, &key)) { + /* + * If the prefix doesn't match, return error so we + * can pass it down the stack + */ + return -ENOSYS; + } sval = strchr(key, '='); if (!sval || (*(sval + 1) == 0)) { CERROR("Can't parse param %s (missing '=')\n", key); @@ -1034,18 +1039,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, j++; } if (!matched) { - /* If the prefix doesn't match, return error so we - * can pass it down the stack - */ - if (strnchr(key, keylen, '.')) - return -ENOSYS; - CERROR("%s: unknown param %s\n", + CERROR("%.*s: %s unknown param %s\n", + (int)strlen(prefix) - 1, prefix, (char *)lustre_cfg_string(lcfg, 0), key); /* rc = -EINVAL; continue parsing other params */ skip++; } else if (rc < 0) { - CERROR("writing proc entry %s err %d\n", - var->name, rc); + CERROR("%s: error writing proc entry '%s': rc = %d\n", + prefix, var->name, rc); rc = 0; } else { CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n", @@ -1350,6 +1351,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf, lustre_cfg_string(lcfg, i)); } } + ptr += snprintf(ptr, end - ptr, "\n"); /* return consumed bytes */ rc = ptr - buf; return rc; @@ -1368,7 +1370,7 @@ int class_config_dump_handler(const struct lu_env *env, if (rec->lrh_type == OBD_CFG_REC) { class_config_parse_rec(rec, outstr, 256); - LCONSOLE(D_WARNING, " %s\n", outstr); + LCONSOLE(D_WARNING, " %s", outstr); } else { LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type); rc = -EINVAL; diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c index d3e28a389..e0c90adc7 100644 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c @@ -102,7 +102,7 @@ int lustre_process_log(struct super_block *sb, char *logname, LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n", mgc->obd_name, logname, rc); - if (rc) + else if (rc) LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n", mgc->obd_name, logname, rc); @@ -307,7 +307,8 @@ int lustre_start_mgc(struct super_block *sb) while (class_parse_nid(ptr, &nid, &ptr) == 0) { rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); - i++; + if (!rc) + i++; /* Stop at the first failover nid */ if (*ptr == ':') break; @@ -345,16 +346,18 @@ int lustre_start_mgc(struct super_block *sb) sprintf(niduuid, "%s_%x", mgcname, i); j = 0; while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { - j++; - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); + rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, + NULL, NULL, NULL); + if (!rc) + ++j; if (*ptr == ':') break; } if (j > 0) { rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, niduuid, NULL, NULL, NULL); - i++; + if (!rc) + i++; } else { /* at ":/fsname" */ break; diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c index e6436cb4a..748e33f01 100644 --- a/drivers/staging/lustre/lustre/obdclass/obdo.c +++ b/drivers/staging/lustre/lustre/obdclass/obdo.c @@ -185,8 +185,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid) op_data->op_attr.ia_valid |= ATTR_BLOCKS; } if (valid & OBD_MD_FLFLAGS) { - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = - oa->o_flags; + op_data->op_attr_flags = oa->o_flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; } } diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c index 1e83669c2..91ef06f17 100644 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c @@ -81,7 +81,6 @@ struct echo_object_conf { struct echo_page { struct cl_page_slice ep_cl; struct mutex ep_lock; - struct page *ep_vmpage; }; struct echo_lock { @@ -164,15 +163,13 @@ static int cl_echo_object_put(struct echo_object *eco); static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, struct page **pages, int npages, int async); -static struct echo_thread_info *echo_env_info(const struct lu_env *env); - struct echo_thread_info { struct echo_object_conf eti_conf; struct lustre_md eti_md; struct cl_2queue eti_queue; struct cl_io eti_io; - struct cl_lock_descr eti_descr; + struct cl_lock eti_lock; struct lu_fid eti_fid; struct lu_fid eti_fid2; }; @@ -219,12 +216,6 @@ static struct lu_kmem_descr echo_caches[] = { * * @{ */ -static struct page *echo_page_vmpage(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - return cl2echo_page(slice)->ep_vmpage; -} - static int echo_page_own(const struct lu_env *env, const struct cl_page_slice *slice, struct cl_io *io, int nonblock) @@ -273,12 +264,10 @@ static void echo_page_completion(const struct lu_env *env, static void echo_page_fini(const struct lu_env *env, struct cl_page_slice *slice) { - struct echo_page *ep = cl2echo_page(slice); struct echo_object *eco = cl2echo_obj(slice->cpl_obj); - struct page *vmpage = ep->ep_vmpage; atomic_dec(&eco->eo_npages); - put_page(vmpage); + put_page(slice->cpl_page->cp_vmpage); } static int echo_page_prep(const struct lu_env *env, @@ -295,7 +284,8 @@ static int echo_page_print(const struct lu_env *env, struct echo_page *ep = cl2echo_page(slice); (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n", - ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage); + ep, mutex_is_locked(&ep->ep_lock), + slice->cpl_page->cp_vmpage); return 0; } @@ -303,7 +293,6 @@ static const struct cl_page_operations echo_page_ops = { .cpo_own = echo_page_own, .cpo_disown = echo_page_disown, .cpo_discard = echo_page_discard, - .cpo_vmpage = echo_page_vmpage, .cpo_fini = echo_page_fini, .cpo_print = echo_page_print, .cpo_is_vmlocked = echo_page_is_vmlocked, @@ -336,26 +325,8 @@ static void echo_lock_fini(const struct lu_env *env, kmem_cache_free(echo_lock_kmem, ecl); } -static void echo_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct echo_lock *ecl = cl2echo_lock(slice); - - LASSERT(list_empty(&ecl->el_chain)); -} - -static int echo_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *unused) -{ - return 1; -} - static struct cl_lock_operations echo_lock_ops = { .clo_fini = echo_lock_fini, - .clo_delete = echo_lock_delete, - .clo_fits_into = echo_lock_fits_into }; /** @} echo_lock */ @@ -367,15 +338,14 @@ static struct cl_lock_operations echo_lock_ops = { * @{ */ static int echo_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct echo_page *ep = cl_object_page_slice(obj, page); struct echo_object *eco = cl2echo_obj(obj); - ep->ep_vmpage = vmpage; - get_page(vmpage); + get_page(page->cp_vmpage); mutex_init(&ep->ep_lock); - cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops); + cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops); atomic_inc(&eco->eo_npages); return 0; } @@ -568,6 +538,8 @@ static struct lu_object *echo_object_alloc(const struct lu_env *env, obj = &echo_obj2cl(eco)->co_lu; cl_object_header_init(hdr); + hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); + lu_object_init(obj, &hdr->coh_lu, dev); lu_object_add_top(&hdr->coh_lu, obj); @@ -694,8 +666,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, struct obd_device *obd = NULL; /* to keep compiler happy */ struct obd_device *tgt; const char *tgt_type_name; - int rc; - int cleanup = 0; + int rc, err; ed = kzalloc(sizeof(*ed), GFP_NOFS); if (!ed) { @@ -703,16 +674,14 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, goto out; } - cleanup = 1; cd = &ed->ed_cl; rc = cl_device_init(cd, t); if (rc) - goto out; + goto out_free; cd->cd_lu_dev.ld_ops = &echo_device_lu_ops; cd->cd_ops = &echo_device_cl_ops; - cleanup = 2; obd = class_name2obd(lustre_cfg_string(cfg, 0)); LASSERT(obd); LASSERT(env); @@ -722,28 +691,25 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, CERROR("Can not find tgt device %s\n", lustre_cfg_string(cfg, 1)); rc = -ENODEV; - goto out; + goto out_device_fini; } next = tgt->obd_lu_dev; if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) { CERROR("echo MDT client must be run on server\n"); rc = -EOPNOTSUPP; - goto out; + goto out_device_fini; } rc = echo_site_init(env, ed); if (rc) - goto out; - - cleanup = 3; + goto out_device_fini; rc = echo_client_setup(env, obd, cfg); if (rc) - goto out; + goto out_site_fini; ed->ed_ec = &obd->u.echo_client; - cleanup = 4; /* if echo client is to be stacked upon ost device, the next is * NULL since ost is not a clio device so far @@ -755,7 +721,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, if (next) { if (next->ld_site) { rc = -EBUSY; - goto out; + goto out_cleanup; } next->ld_site = &ed->ed_site->cs_lu; @@ -763,7 +729,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, next->ld_type->ldt_name, NULL); if (rc) - goto out; + goto out_cleanup; } else { LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0); @@ -771,27 +737,19 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env, ed->ed_next = next; return &cd->cd_lu_dev; -out: - switch (cleanup) { - case 4: { - int rc2; - - rc2 = echo_client_cleanup(obd); - if (rc2) - CERROR("Cleanup obd device %s error(%d)\n", - obd->obd_name, rc2); - } - case 3: - echo_site_fini(env, ed); - case 2: - cl_device_fini(&ed->ed_cl); - case 1: - kfree(ed); - case 0: - default: - break; - } +out_cleanup: + err = echo_client_cleanup(obd); + if (err) + CERROR("Cleanup obd device %s error(%d)\n", + obd->obd_name, err); +out_site_fini: + echo_site_fini(env, ed); +out_device_fini: + cl_device_fini(&ed->ed_cl); +out_free: + kfree(ed); +out: return ERR_PTR(rc); } @@ -819,16 +777,7 @@ static void echo_lock_release(const struct lu_env *env, { struct cl_lock *clk = echo_lock2cl(ecl); - cl_lock_get(clk); - cl_unuse(env, clk); - cl_lock_release(env, clk, "ec enqueue", ecl->el_object); - if (!still_used) { - cl_lock_mutex_get(env, clk); - cl_lock_cancel(env, clk); - cl_lock_delete(env, clk); - cl_lock_mutex_put(env, clk); - } - cl_lock_put(env, clk); + cl_lock_release(env, clk); } static struct lu_device *echo_device_free(const struct lu_env *env, @@ -1022,9 +971,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco, info = echo_env_info(env); io = &info->eti_io; - descr = &info->eti_descr; + lck = &info->eti_lock; obj = echo_obj2cl(eco); + memset(lck, 0, sizeof(*lck)); + descr = &lck->cll_descr; descr->cld_obj = obj; descr->cld_start = cl_index(obj, start); descr->cld_end = cl_index(obj, end); @@ -1032,25 +983,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco, descr->cld_enq_flags = enqflags; io->ci_obj = obj; - lck = cl_lock_request(env, io, descr, "ec enqueue", eco); - if (lck) { + rc = cl_lock_request(env, io, lck); + if (rc == 0) { struct echo_client_obd *ec = eco->eo_dev->ed_ec; struct echo_lock *el; - rc = cl_wait(env, lck); - if (rc == 0) { - el = cl2echo_lock(cl_lock_at(lck, &echo_device_type)); - spin_lock(&ec->ec_lock); - if (list_empty(&el->el_chain)) { - list_add(&el->el_chain, &ec->ec_locks); - el->el_cookie = ++ec->ec_unique; - } - atomic_inc(&el->el_refcount); - *cookie = el->el_cookie; - spin_unlock(&ec->ec_lock); - } else { - cl_lock_release(env, lck, "ec enqueue", current); + el = cl2echo_lock(cl_lock_at(lck, &echo_device_type)); + spin_lock(&ec->ec_lock); + if (list_empty(&el->el_chain)) { + list_add(&el->el_chain, &ec->ec_locks); + el->el_cookie = ++ec->ec_unique; } + atomic_inc(&el->el_refcount); + *cookie = el->el_cookie; + spin_unlock(&ec->ec_lock); } return rc; } @@ -1085,22 +1031,17 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed, return 0; } -static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io, - enum cl_req_type unused, struct cl_2queue *queue) +static void echo_commit_callback(const struct lu_env *env, struct cl_io *io, + struct cl_page *page) { - struct cl_page *clp; - struct cl_page *temp; - int result = 0; + struct echo_thread_info *info; + struct cl_2queue *queue; - cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) { - int rc; + info = echo_env_info(env); + LASSERT(io == &info->eti_io); - rc = cl_page_cache_add(env, io, clp, CRT_WRITE); - if (rc == 0) - continue; - result = result ?: rc; - } - return result; + queue = &info->eti_queue; + cl_page_list_add(&queue->c2_qout, page); } static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, @@ -1119,7 +1060,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, int rc; int i; - LASSERT((offset & ~CFS_PAGE_MASK) == 0); + LASSERT((offset & ~PAGE_MASK) == 0); LASSERT(ed->ed_next); env = cl_env_get(&refcheck); if (IS_ERR(env)) @@ -1179,7 +1120,9 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, async = async && (typ == CRT_WRITE); if (async) - rc = cl_echo_async_brw(env, io, typ, queue); + rc = cl_io_commit_async(env, io, &queue->c2_qin, + 0, PAGE_SIZE, + echo_commit_callback); else rc = cl_io_submit_sync(env, io, typ, queue, 0); CDEBUG(D_INFO, "echo_client %s write returns %d\n", @@ -1387,7 +1330,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); if (count <= 0 || - (count & (~CFS_PAGE_MASK)) != 0) + (count & (~PAGE_MASK)) != 0) return -EINVAL; /* XXX think again with misaligned I/O */ @@ -1409,7 +1352,6 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, for (i = 0, pgp = pga, off = offset; i < npages; i++, pgp++, off += PAGE_SIZE) { - LASSERT(!pgp->pg); /* for cleanup */ rc = -ENOMEM; @@ -1470,7 +1412,7 @@ static int echo_client_prep_commit(const struct lu_env *env, u64 npages, tot_pages; int i, ret = 0, brw_flags = 0; - if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0) + if (count <= 0 || (count & (~PAGE_MASK)) != 0) return -EINVAL; npages = batch >> PAGE_SHIFT; @@ -1886,7 +1828,6 @@ static int __init obdecho_init(void) static void /*__exit*/ obdecho_exit(void) { echo_client_exit(); - } MODULE_AUTHOR("OpenSFS, Inc. "); diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c index a3358c39b..33a113213 100644 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c @@ -121,9 +121,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, atomic_add(added, &osc_pool_req_count); } - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } @@ -139,9 +139,9 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj, long val; int mult; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); val = cli->cl_dirty_max; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); mult = 1 << 20; return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult); @@ -169,10 +169,10 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj, pages_number > totalram_pages / 4) /* 1/4 of RAM */ return -ERANGE; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT); osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } @@ -222,8 +222,16 @@ static ssize_t osc_cached_mb_seq_write(struct file *file, return -ERANGE; rc = atomic_read(&cli->cl_lru_in_list) - pages_number; - if (rc > 0) - (void)osc_lru_shrink(cli, rc); + if (rc > 0) { + struct lu_env *env; + int refcheck; + + env = cl_env_get(&refcheck); + if (!IS_ERR(env)) { + (void)osc_lru_shrink(env, cli, rc, true); + cl_env_put(env, &refcheck); + } + } return count; } @@ -239,9 +247,9 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_dirty); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -256,9 +264,9 @@ static ssize_t cur_grant_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_avail_grant); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -279,12 +287,12 @@ static ssize_t cur_grant_bytes_store(struct kobject *kobj, return rc; /* this is only for shrinking grant */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (val >= cli->cl_avail_grant) { - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return -EINVAL; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (cli->cl_import->imp_state == LUSTRE_IMP_FULL) rc = osc_shrink_grant_to_target(cli, val); @@ -303,9 +311,9 @@ static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj, struct client_obd *cli = &dev->u.cli; int len; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); len = sprintf(buf, "%lu\n", cli->cl_lost_grant); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return len; } @@ -577,14 +585,31 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj, if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) { return -ERANGE; } - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_max_pages_per_rpc = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return count; } LUSTRE_RW_ATTR(max_pages_per_rpc); +static ssize_t unstable_stats_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *dev = container_of(kobj, struct obd_device, + obd_kobj); + struct client_obd *cli = &dev->u.cli; + int pages, mb; + + pages = atomic_read(&cli->cl_unstable_count); + mb = (pages * PAGE_SIZE) >> 20; + + return sprintf(buf, "unstable_pages: %8d\n" + "unstable_mb: %8d\n", pages, mb); +} +LUSTRE_RO_ATTR(unstable_stats); + LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags); LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid); LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid); @@ -623,7 +648,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) ktime_get_real_ts64(&now); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n", (s64)now.tv_sec, (unsigned long)now.tv_nsec); @@ -707,7 +732,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) break; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return 0; } @@ -794,6 +819,7 @@ static struct attribute *osc_attrs[] = { &lustre_attr_max_pages_per_rpc.attr, &lustre_attr_max_rpcs_in_flight.attr, &lustre_attr_resend_count.attr, + &lustre_attr_unstable_stats.attr, NULL, }; diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c index 5f25bf83d..5a14bea96 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ b/drivers/staging/lustre/lustre/osc/osc_cache.c @@ -76,6 +76,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags) *buf++ = ext->oe_rw ? 'r' : 'w'; if (ext->oe_intree) *buf++ = 'i'; + if (ext->oe_sync) + *buf++ = 'S'; if (ext->oe_srvlock) *buf++ = 's'; if (ext->oe_hp) @@ -121,9 +123,13 @@ static const char *oes_strings[] = { __ext->oe_grants, __ext->oe_nr_pages, \ list_empty_marker(&__ext->oe_pages), \ waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \ - __ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner, \ + __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner, \ /* ----- part 4 ----- */ \ ## __VA_ARGS__); \ + if (lvl == D_ERROR && __ext->oe_dlmlock) \ + LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext); \ + else \ + LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext); \ } while (0) #undef EASSERTF @@ -240,20 +246,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, goto out; } - if (!ext->oe_osclock && ext->oe_grants > 0) { + if (ext->oe_sync && ext->oe_grants > 0) { rc = 90; goto out; } - if (ext->oe_osclock) { - struct cl_lock_descr *descr; + if (ext->oe_dlmlock) { + struct ldlm_extent *extent; - descr = &ext->oe_osclock->cll_descr; - if (!(descr->cld_start <= ext->oe_start && - descr->cld_end >= ext->oe_max_end)) { + extent = &ext->oe_dlmlock->l_policy_data.l_extent; + if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) && + extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) { rc = 100; goto out; } + + if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) { + rc = 102; + goto out; + } } if (ext->oe_nr_pages > ext->oe_mppr) { @@ -276,7 +287,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext, page_count = 0; list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - pgoff_t index = oap2cl_page(oap)->cp_index; + pgoff_t index = osc_index(oap2osc(oap)); ++page_count; if (index > ext->oe_end || index < ext->oe_start) { rc = 110; @@ -359,7 +370,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj) ext->oe_state = OES_INV; INIT_LIST_HEAD(&ext->oe_pages); init_waitqueue_head(&ext->oe_waitq); - ext->oe_osclock = NULL; + ext->oe_dlmlock = NULL; return ext; } @@ -385,9 +396,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext) LASSERT(ext->oe_state == OES_INV); LASSERT(!ext->oe_intree); - if (ext->oe_osclock) { - cl_lock_put(env, ext->oe_osclock); - ext->oe_osclock = NULL; + if (ext->oe_dlmlock) { + lu_ref_add(&ext->oe_dlmlock->l_reference, + "osc_extent", ext); + LDLM_LOCK_PUT(ext->oe_dlmlock); + ext->oe_dlmlock = NULL; } osc_extent_free(ext); } @@ -543,7 +556,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, if (cur->oe_max_end != victim->oe_max_end) return -ERANGE; - LASSERT(cur->oe_osclock == victim->oe_osclock); + LASSERT(cur->oe_dlmlock == victim->oe_dlmlock); ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT; chunk_start = cur->oe_start >> ppc_bits; chunk_end = cur->oe_end >> ppc_bits; @@ -624,10 +637,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2) static struct osc_extent *osc_extent_find(const struct lu_env *env, struct osc_object *obj, pgoff_t index, int *grants) - { struct client_obd *cli = osc_cli(obj); - struct cl_lock *lock; + struct osc_lock *olck; + struct cl_lock_descr *descr; struct osc_extent *cur; struct osc_extent *ext; struct osc_extent *conflict = NULL; @@ -644,8 +657,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, if (!cur) return ERR_PTR(-ENOMEM); - lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0); - LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE); + olck = osc_env_io(env)->oi_write_osclock; + LASSERTF(olck, "page %lu is not covered by lock\n", index); + LASSERT(olck->ols_state == OLS_GRANTED); + + descr = &olck->ols_cl.cls_lock->cll_descr; + LASSERT(descr->cld_mode >= CLM_WRITE); LASSERT(cli->cl_chunkbits >= PAGE_SHIFT); ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; @@ -657,19 +674,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env, max_pages = cli->cl_max_pages_per_rpc; LASSERT((max_pages & ~chunk_mask) == 0); max_end = index - (index % max_pages) + max_pages - 1; - max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end); + max_end = min_t(pgoff_t, max_end, descr->cld_end); /* initialize new extent by parameters so far */ cur->oe_max_end = max_end; cur->oe_start = index & chunk_mask; cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1; - if (cur->oe_start < lock->cll_descr.cld_start) - cur->oe_start = lock->cll_descr.cld_start; + if (cur->oe_start < descr->cld_start) + cur->oe_start = descr->cld_start; if (cur->oe_end > max_end) cur->oe_end = max_end; - cur->oe_osclock = lock; cur->oe_grants = 0; cur->oe_mppr = max_pages; + if (olck->ols_dlmlock) { + LASSERT(olck->ols_hold); + cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock); + lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur); + } /* grants has been allocated by caller */ LASSERTF(*grants >= chunksize + cli->cl_extent_tax, @@ -691,7 +712,7 @@ restart: break; /* if covering by different locks, no chance to match */ - if (lock != ext->oe_osclock) { + if (olck->ols_dlmlock != ext->oe_dlmlock) { EASSERTF(!overlapped(ext, cur), ext, EXTSTR"\n", EXTPARA(cur)); @@ -795,7 +816,7 @@ restart: if (found) { LASSERT(!conflict); if (!IS_ERR(found)) { - LASSERT(found->oe_osclock == cur->oe_osclock); + LASSERT(found->oe_dlmlock == cur->oe_dlmlock); OSC_EXTENT_DUMP(D_CACHE, found, "found caching ext for %lu.\n", index); } @@ -810,7 +831,7 @@ restart: found = osc_extent_hold(cur); osc_extent_insert(obj, cur); OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n", - index, lock->cll_descr.cld_end); + index, descr->cld_end); } osc_object_unlock(obj); @@ -856,6 +877,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, ext->oe_rc = rc ?: ext->oe_nr_pages; EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext); + + osc_lru_add_batch(cli, &ext->oe_pages); list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { list_del_init(&oap->oap_rpc_item); list_del_init(&oap->oap_pending_item); @@ -877,10 +900,9 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, * span a whole chunk on the OST side, or our accounting goes * wrong. Should match the code in filter_grant_check. */ - int offset = oap->oap_page_off & ~CFS_PAGE_MASK; - int count = oap->oap_count + (offset & (blocksize - 1)); - int end = (offset + oap->oap_count) & (blocksize - 1); - + int offset = last_off & ~PAGE_MASK; + int count = last_count + (offset & (blocksize - 1)); + int end = (offset + last_count) & (blocksize - 1); if (end) count += blocksize - end; @@ -943,7 +965,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, "%s: wait ext to %d timedout, recovery in progress?\n", osc_export(obj)->exp_obd->obd_name, state); - lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + lwi = LWI_INTR(NULL, NULL); rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi); } @@ -990,19 +1012,19 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, /* discard all pages with index greater then trunc_index */ list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { - struct cl_page *sub = oap2cl_page(oap); - struct cl_page *page = cl_page_top(sub); + pgoff_t index = osc_index(oap2osc(oap)); + struct cl_page *page = oap2cl_page(oap); LASSERT(list_empty(&oap->oap_rpc_item)); /* only discard the pages with their index greater than * trunc_index, and ... */ - if (sub->cp_index < trunc_index || - (sub->cp_index == trunc_index && partial)) { + if (index < trunc_index || + (index == trunc_index && partial)) { /* accounting how many pages remaining in the chunk * so that we can calculate grants correctly. */ - if (sub->cp_index >> ppc_bits == trunc_chunk) + if (index >> ppc_bits == trunc_chunk) ++pages_in_chunk; continue; } @@ -1013,7 +1035,6 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, lu_ref_add(&page->cp_reference, "truncate", current); if (cl_page_own(env, io, page) == 0) { - cl_page_unmap(env, io, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); } else { @@ -1126,7 +1147,9 @@ static int osc_extent_make_ready(const struct lu_env *env, last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE); LASSERT(last->oap_count > 0); LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE); + spin_lock(&last->oap_lock); last->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&last->oap_lock); } /* for the rest of pages, we don't need to call osf_refresh_count() @@ -1135,7 +1158,9 @@ static int osc_extent_make_ready(const struct lu_env *env, list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) { oap->oap_count = PAGE_SIZE - oap->oap_page_off; + spin_lock(&last->oap_lock); oap->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&last->oap_lock); } } @@ -1256,7 +1281,7 @@ static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap, int cmd) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = cl_page_top(oap2cl_page(oap)); + struct cl_page *page = oap2cl_page(oap); int result; LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */ @@ -1271,7 +1296,7 @@ static int osc_refresh_count(const struct lu_env *env, struct osc_async_page *oap, int cmd) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = oap2cl_page(oap); + pgoff_t index = osc_index(oap2osc(oap)); struct cl_object *obj; struct cl_attr *attr = &osc_env_info(env)->oti_attr; @@ -1288,10 +1313,10 @@ static int osc_refresh_count(const struct lu_env *env, if (result < 0) return result; kms = attr->cat_kms; - if (cl_offset(obj, page->cp_index) >= kms) + if (cl_offset(obj, index) >= kms) /* catch race with truncate */ return 0; - else if (cl_offset(obj, page->cp_index + 1) > kms) + else if (cl_offset(obj, index + 1) > kms) /* catch sub-page write at end of file */ return kms % PAGE_SIZE; else @@ -1302,14 +1327,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, int cmd, int rc) { struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = cl_page_top(oap2cl_page(oap)); + struct cl_page *page = oap2cl_page(oap); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); enum cl_req_type crt; int srvlock; cmd &= ~OBD_BRW_NOQUOTA; - LASSERT(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ)); - LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE)); + LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ), + "cp_state:%u, cmd:%d\n", page->cp_state, cmd); + LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE), + "cp_state:%u, cmd:%d\n", page->cp_state, cmd); LASSERT(opg->ops_transfer_pinned); /* @@ -1358,22 +1385,28 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, return 0; } -#define OSC_DUMP_GRANT(cli, fmt, args...) do { \ +#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \ struct client_obd *__tmp = (cli); \ - CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \ - "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \ + CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \ + "unstable_pages: %d/%d dropped: %ld avail: %ld, " \ + "reserved: %ld, flight: %d } lru {in list: %d, " \ + "left: %d, waiters: %d }" fmt, \ __tmp->cl_import->imp_obd->obd_name, \ __tmp->cl_dirty, __tmp->cl_dirty_max, \ atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \ + atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ - __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \ + __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \ + atomic_read(&__tmp->cl_lru_in_list), \ + atomic_read(&__tmp->cl_lru_busy), \ + atomic_read(&__tmp->cl_lru_shrinkers), ##args); \ } while (0) /* caller must hold loi_list_lock */ static void osc_consume_write_grant(struct client_obd *cli, struct brw_page *pga) { - assert_spin_locked(&cli->cl_loi_list_lock.lock); + assert_spin_locked(&cli->cl_loi_list_lock); LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT)); atomic_inc(&obd_dirty_pages); cli->cl_dirty += PAGE_SIZE; @@ -1389,7 +1422,7 @@ static void osc_consume_write_grant(struct client_obd *cli, static void osc_release_write_grant(struct client_obd *cli, struct brw_page *pga) { - assert_spin_locked(&cli->cl_loi_list_lock.lock); + assert_spin_locked(&cli->cl_loi_list_lock); if (!(pga->flag & OBD_BRW_FROM_GRANT)) { return; } @@ -1408,7 +1441,7 @@ static void osc_release_write_grant(struct client_obd *cli, * To avoid sleeping with object lock held, it's good for us allocate enough * grants before entering into critical section. * - * client_obd_list_lock held by caller + * spin_lock held by caller */ static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes) { @@ -1442,11 +1475,11 @@ static void __osc_unreserve_grant(struct client_obd *cli, static void osc_unreserve_grant(struct client_obd *cli, unsigned int reserved, unsigned int unused) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); __osc_unreserve_grant(cli, reserved, unused); if (unused > 0) osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } /** @@ -1467,7 +1500,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, { int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); atomic_sub(nr_pages, &obd_dirty_pages); cli->cl_dirty -= nr_pages << PAGE_SHIFT; cli->cl_lost_grant += lost_grant; @@ -1479,7 +1512,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, cli->cl_avail_grant += grant; } osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n", lost_grant, cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty); @@ -1491,9 +1524,9 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, */ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_release_write_grant(cli, &oap->oap_brw_page); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } /** @@ -1506,14 +1539,15 @@ static int osc_enter_cache_try(struct client_obd *cli, { int rc; - OSC_DUMP_GRANT(cli, "need:%d.\n", bytes); + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); rc = osc_reserve_grant(cli, bytes); if (rc < 0) return 0; if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max && - atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) { + atomic_read(&obd_unstable_pages) + 1 + + atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) { osc_consume_write_grant(cli, &oap->oap_brw_page); if (transient) { cli->cl_dirty_transit += PAGE_SIZE; @@ -1532,9 +1566,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) { int rc; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = list_empty(&ocw->ocw_entry); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return rc; } @@ -1551,12 +1585,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc = oap->oap_obj; struct lov_oinfo *loi = osc->oo_oinfo; struct osc_cache_waiter ocw; - struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL, + LWI_ON_SIGNAL_NOOP, NULL); int rc = -EDQUOT; - OSC_DUMP_GRANT(cli, "need:%d.\n", bytes); + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* force the caller to try sync io. this can jump the list * of queued writes and create a discontiguous rpc stream @@ -1587,7 +1622,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) { list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); ocw.ocw_rc = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); osc_io_unplug_async(env, cli, NULL); @@ -1596,10 +1631,17 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); - /* l_wait_event is interrupted by signal */ + /* l_wait_event is interrupted by signal, or timed out */ if (rc < 0) { + if (rc == -ETIMEDOUT) { + OSC_DUMP_GRANT(D_ERROR, cli, + "try to reserve %d.\n", bytes); + osc_extent_tree_dump(D_ERROR, osc); + rc = -EDQUOT; + } + list_del_init(&ocw.ocw_entry); goto out; } @@ -1615,8 +1657,8 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, } } out: - client_obd_list_unlock(&cli->cl_loi_list_lock); - OSC_DUMP_GRANT(cli, "returned %d.\n", rc); + spin_unlock(&cli->cl_loi_list_lock); + OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc); return rc; } @@ -1633,8 +1675,8 @@ void osc_wake_cache_waiters(struct client_obd *cli) ocw->ocw_rc = -EDQUOT; /* we can't dirty more */ if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) || - (atomic_read(&obd_dirty_pages) + 1 > - obd_max_dirty_pages)) { + (atomic_read(&obd_unstable_pages) + 1 + + atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) { CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n", cli->cl_dirty, cli->cl_dirty_max, obd_max_dirty_pages); @@ -1776,9 +1818,9 @@ static int osc_list_maint(struct client_obd *cli, struct osc_object *osc) { int is_ready; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); is_ready = __osc_list_maint(cli, osc); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return is_ready; } @@ -1799,13 +1841,101 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid, ar->ar_force_sync = 1; ar->ar_min_xid = ptlrpc_sample_next_xid(); return; - } if (ar->ar_force_sync && (xid >= ar->ar_min_xid)) ar->ar_force_sync = 0; } +/** + * Performs "unstable" page accounting. This function balances the + * increment operations performed in osc_inc_unstable_pages. It is + * registered as the RPC request callback, and is executed when the + * bulk RPC is committed on the server. Thus at this point, the pages + * involved in the bulk transfer are no longer considered unstable. + */ +void osc_dec_unstable_pages(struct ptlrpc_request *req) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + int page_count = desc->bd_iov_count; + int i; + + /* No unstable page tracking */ + if (!cli->cl_cache) + return; + + LASSERT(page_count >= 0); + + for (i = 0; i < page_count; i++) + dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS); + + atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr); + LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0); + + atomic_sub(page_count, &cli->cl_unstable_count); + LASSERT(atomic_read(&cli->cl_unstable_count) >= 0); + + atomic_sub(page_count, &obd_unstable_pages); + LASSERT(atomic_read(&obd_unstable_pages) >= 0); + + spin_lock(&req->rq_lock); + req->rq_committed = 1; + req->rq_unstable = 0; + spin_unlock(&req->rq_lock); + + wake_up_all(&cli->cl_cache->ccc_unstable_waitq); +} + +/* "unstable" page accounting. See: osc_dec_unstable_pages. */ +void osc_inc_unstable_pages(struct ptlrpc_request *req) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + long page_count = desc->bd_iov_count; + int i; + + /* No unstable page tracking */ + if (!cli->cl_cache) + return; + + LASSERT(page_count >= 0); + + for (i = 0; i < page_count; i++) + inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS); + + LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0); + atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr); + + LASSERT(atomic_read(&cli->cl_unstable_count) >= 0); + atomic_add(page_count, &cli->cl_unstable_count); + + LASSERT(atomic_read(&obd_unstable_pages) >= 0); + atomic_add(page_count, &obd_unstable_pages); + + spin_lock(&req->rq_lock); + + /* + * If the request has already been committed (i.e. brw_commit + * called via rq_commit_cb), we need to undo the unstable page + * increments we just performed because rq_commit_cb wont be + * called again. Otherwise, just set the commit callback so the + * unstable page accounting is properly updated when the request + * is committed + */ + if (req->rq_committed) { + /* Drop lock before calling osc_dec_unstable_pages */ + spin_unlock(&req->rq_lock); + osc_dec_unstable_pages(req); + spin_lock(&req->rq_lock); + } else { + req->rq_unstable = 1; + req->rq_commit_cb = osc_dec_unstable_pages; + } + + spin_unlock(&req->rq_lock); +} + /* this must be called holding the loi list lock to give coverage to exit_cache, * async_flag maintenance, and oap_request */ @@ -1817,6 +1947,9 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, __u64 xid = 0; if (oap->oap_request) { + if (!rc) + osc_inc_unstable_pages(oap->oap_request); + xid = ptlrpc_req_xid(oap->oap_request); ptlrpc_req_finished(oap->oap_request); oap->oap_request = NULL; @@ -1829,10 +1962,10 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, oap->oap_interrupted = 0; if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_process_ar(&cli->cl_ar, xid, rc); osc_process_ar(&loi->loi_ar, xid, rc); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } rc = osc_completion(env, oap, oap->oap_cmd, rc); @@ -2133,9 +2266,8 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) } cl_object_get(obj); - client_obd_list_unlock(&cli->cl_loi_list_lock); - lu_object_ref_add_at(&obj->co_lu, &link, "check", - current); + spin_unlock(&cli->cl_loi_list_lock); + lu_object_ref_add_at(&obj->co_lu, &link, "check", current); /* attempt some read/write balancing by alternating between * reads and writes in an object. The makes_rpc checks here @@ -2178,11 +2310,10 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) osc_object_unlock(osc); osc_list_maint(cli, osc); - lu_object_ref_del_at(&obj->co_lu, &link, "check", - current); + lu_object_ref_del_at(&obj->co_lu, &link, "check", current); cl_object_put(env, obj); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); } } @@ -2199,9 +2330,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli, * potential stack overrun problem. LU-2859 */ atomic_inc(&cli->cl_lru_shrinkers); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); osc_check_rpcs(env, cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); atomic_dec(&cli->cl_lru_shrinkers); } else { CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli); @@ -2238,7 +2369,7 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, oap->oap_page = page; oap->oap_obj_off = offset; - LASSERT(!(offset & ~CFS_PAGE_MASK)); + LASSERT(!(offset & ~PAGE_MASK)); if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags = OBD_BRW_NOQUOTA; @@ -2306,16 +2437,23 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, return rc; } + if (osc_over_unstable_soft_limit(cli)) + brw_flags |= OBD_BRW_SOFT_SYNC; + oap->oap_cmd = cmd; oap->oap_page_off = ops->ops_from; oap->oap_count = ops->ops_to - ops->ops_from; + /* + * No need to hold a lock here, + * since this page is not in any list yet. + */ oap->oap_async_flags = 0; oap->oap_brw_flags = brw_flags; OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n", oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK); - index = oap2cl_page(oap)->cp_index; + index = osc_index(oap2osc(oap)); /* Add this page into extent by the following steps: * 1. if there exists an active extent for this IO, mostly this page @@ -2334,9 +2472,9 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, grants = 0; /* it doesn't need any grant to dirty this page */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); rc = osc_enter_cache_try(cli, oap, grants, 0); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (rc == 0) { /* try failed */ grants = 0; need_release = 1; @@ -2427,21 +2565,21 @@ int osc_teardown_async_page(const struct lu_env *env, LASSERT(oap->oap_magic == OAP_MAGIC); CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n", - oap, ops, oap2cl_page(oap)->cp_index); + oap, ops, osc_index(oap2osc(oap))); osc_object_lock(obj); if (!list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap); rc = -EBUSY; } else if (!list_empty(&oap->oap_pending_item)) { - ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index); + ext = osc_extent_lookup(obj, osc_index(oap2osc(oap))); /* only truncated pages are allowed to be taken out. * See osc_extent_truncate() and osc_cache_truncate_start() * for details. */ if (ext && ext->oe_state != OES_TRUNC) { OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n", - oap2cl_page(oap)->cp_index); + osc_index(oap2osc(oap))); rc = -EBUSY; } } @@ -2464,7 +2602,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, struct osc_extent *ext = NULL; struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj); struct cl_page *cp = ops->ops_cl.cpl_page; - pgoff_t index = cp->cp_index; + pgoff_t index = osc_index(ops); struct osc_async_page *oap = &ops->ops_oap; bool unplug = false; int rc = 0; @@ -2479,8 +2617,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, switch (ext->oe_state) { case OES_RPC: case OES_LOCK_DONE: - CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp), - "flush an in-rpc page?\n"); + CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n"); LASSERT(0); break; case OES_LOCKING: @@ -2506,7 +2643,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, break; } - rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE); + rc = cl_page_prep(env, io, cp, CRT_WRITE); if (rc) goto out; @@ -2550,7 +2687,7 @@ int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops) struct osc_extent *ext; struct osc_extent *found = NULL; struct list_head *plist; - pgoff_t index = oap2cl_page(oap)->cp_index; + pgoff_t index = osc_index(ops); int rc = -EBUSY; int cmd; @@ -2613,12 +2750,12 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, pgoff_t end = 0; list_for_each_entry(oap, list, oap_pending_item) { - struct cl_page *cp = oap2cl_page(oap); + pgoff_t index = osc_index(oap2osc(oap)); - if (cp->cp_index > end) - end = cp->cp_index; - if (cp->cp_index < start) - start = cp->cp_index; + if (index > end) + end = index; + if (index < start) + start = index; ++page_count; mppr <<= (page_count > mppr); } @@ -2633,6 +2770,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, } ext->oe_rw = !!(cmd & OBD_BRW_READ); + ext->oe_sync = 1; ext->oe_urgent = 1; ext->oe_start = start; ext->oe_end = ext->oe_max_end = end; @@ -2988,7 +3126,200 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, result = rc; } - OSC_IO_DEBUG(obj, "cache page out.\n"); + OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result); + return result; +} + +/** + * Returns a list of pages by a given [start, end] of \a obj. + * + * \param resched If not NULL, then we give up before hogging CPU for too + * long and set *resched = 1, in that case caller should implement a retry + * logic. + * + * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely + * crucial in the face of [offset, EOF] locks. + * + * Return at least one page in @queue unless there is no covered page. + */ +int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, + struct osc_object *osc, pgoff_t start, pgoff_t end, + osc_page_gang_cbt cb, void *cbdata) +{ + struct osc_page *ops; + void **pvec; + pgoff_t idx; + unsigned int nr; + unsigned int i; + unsigned int j; + int res = CLP_GANG_OKAY; + bool tree_lock = true; + + idx = start; + pvec = osc_env_info(env)->oti_pvec; + spin_lock(&osc->oo_tree_lock); + while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec, + idx, OTI_PVEC_SIZE)) > 0) { + struct cl_page *page; + bool end_of_region = false; + + for (i = 0, j = 0; i < nr; ++i) { + ops = pvec[i]; + pvec[i] = NULL; + + idx = osc_index(ops); + if (idx > end) { + end_of_region = true; + break; + } + + page = ops->ops_cl.cpl_page; + LASSERT(page->cp_type == CPT_CACHEABLE); + if (page->cp_state == CPS_FREEING) + continue; + + cl_page_get(page); + lu_ref_add_atomic(&page->cp_reference, + "gang_lookup", current); + pvec[j++] = ops; + } + ++idx; + + /* + * Here a delicate locking dance is performed. Current thread + * holds a reference to a page, but has to own it before it + * can be placed into queue. Owning implies waiting, so + * radix-tree lock is to be released. After a wait one has to + * check that pages weren't truncated (cl_page_own() returns + * error in the latter case). + */ + spin_unlock(&osc->oo_tree_lock); + tree_lock = false; + + for (i = 0; i < j; ++i) { + ops = pvec[i]; + if (res == CLP_GANG_OKAY) + res = (*cb)(env, io, ops, cbdata); + + page = ops->ops_cl.cpl_page; + lu_ref_del(&page->cp_reference, "gang_lookup", current); + cl_page_put(env, page); + } + if (nr < OTI_PVEC_SIZE || end_of_region) + break; + + if (res == CLP_GANG_OKAY && need_resched()) + res = CLP_GANG_RESCHED; + if (res != CLP_GANG_OKAY) + break; + + spin_lock(&osc->oo_tree_lock); + tree_lock = true; + } + if (tree_lock) + spin_unlock(&osc->oo_tree_lock); + return res; +} + +/** + * Check if page @page is covered by an extra lock or discard it. + */ +static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) +{ + struct osc_thread_info *info = osc_env_info(env); + struct osc_object *osc = cbdata; + pgoff_t index; + + index = osc_index(ops); + if (index >= info->oti_fn_index) { + struct ldlm_lock *tmp; + struct cl_page *page = ops->ops_cl.cpl_page; + + /* refresh non-overlapped index */ + tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0); + if (tmp) { + __u64 end = tmp->l_policy_data.l_extent.end; + /* Cache the first-non-overlapped index so as to skip + * all pages within [index, oti_fn_index). This is safe + * because if tmp lock is canceled, it will discard + * these pages. + */ + info->oti_fn_index = cl_index(osc2cl(osc), end + 1); + if (end == OBD_OBJECT_EOF) + info->oti_fn_index = CL_PAGE_EOF; + LDLM_LOCK_PUT(tmp); + } else if (cl_page_own(env, io, page) == 0) { + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } + } + + info->oti_next_index = index + 1; + return CLP_GANG_OKAY; +} + +static int discard_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) +{ + struct osc_thread_info *info = osc_env_info(env); + struct cl_page *page = ops->ops_cl.cpl_page; + + /* page is top page. */ + info->oti_next_index = osc_index(ops) + 1; + if (cl_page_own(env, io, page) == 0) { + KLASSERT(ergo(page->cp_type == CPT_CACHEABLE, + !PageDirty(cl_page_vmpage(page)))); + + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } + + return CLP_GANG_OKAY; +} + +/** + * Discard pages protected by the given lock. This function traverses radix + * tree to find all covering pages and discard them. If a page is being covered + * by other locks, it should remain in cache. + * + * If error happens on any step, the process continues anyway (the reasoning + * behind this being that lock cancellation cannot be delayed indefinitely). + */ +int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, + pgoff_t start, pgoff_t end, enum cl_lock_mode mode) +{ + struct osc_thread_info *info = osc_env_info(env); + struct cl_io *io = &info->oti_io; + osc_page_gang_cbt cb; + int res; + int result; + + io->ci_obj = cl_object_top(osc2cl(osc)); + io->ci_ignore_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result != 0) + goto out; + + cb = mode == CLM_READ ? check_and_discard_cb : discard_cb; + info->oti_fn_index = info->oti_next_index = start; + do { + res = osc_page_gang_lookup(env, io, osc, + info->oti_next_index, end, cb, osc); + if (info->oti_next_index > end) + break; + + if (res == CLP_GANG_RESCHED) + cond_resched(); + } while (res != CLP_GANG_OKAY); +out: + cl_io_fini(env, io); return result; } diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h index d55d04d04..ae19d396b 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h @@ -51,7 +51,6 @@ #include "../include/obd.h" /* osc_build_res_name() */ #include "../include/cl_object.h" -#include "../include/lclient.h" #include "osc_internal.h" /** \defgroup osc osc @@ -68,6 +67,9 @@ struct osc_io { struct cl_io_slice oi_cl; /** true if this io is lockless. */ int oi_lockless; + /** how many LRU pages are reserved for this IO */ + int oi_lru_reserved; + /** active extents, we know how many bytes is going to be written, * so having an active extent will prevent it from being fragmented */ @@ -77,6 +79,8 @@ struct osc_io { */ struct osc_extent *oi_trunc; + /** write osc_lock for this IO, used by osc_extent_find(). */ + struct osc_lock *oi_write_osclock; struct obd_info oi_info; struct obdo oi_oa; struct osc_async_cbargs { @@ -100,7 +104,7 @@ struct osc_session { struct osc_io os_io; }; -#define OTI_PVEC_SIZE 64 +#define OTI_PVEC_SIZE 256 struct osc_thread_info { struct ldlm_res_id oti_resname; ldlm_policy_data_t oti_policy; @@ -109,7 +113,13 @@ struct osc_thread_info { struct lustre_handle oti_handle; struct cl_page_list oti_plist; struct cl_io oti_io; - struct cl_page *oti_pvec[OTI_PVEC_SIZE]; + void *oti_pvec[OTI_PVEC_SIZE]; + /** + * Fields used by cl_lock_discard_pages(). + */ + pgoff_t oti_next_index; + pgoff_t oti_fn_index; /* first non-overlapped index */ + struct cl_sync_io oti_anchor; }; struct osc_object { @@ -125,7 +135,7 @@ struct osc_object { */ struct list_head oo_inflight[CRT_NR]; /** - * Lock, protecting ccc_object::cob_inflight, because a seat-belt is + * Lock, protecting osc_page::ops_inflight, because a seat-belt is * locked during take-off and landing. */ spinlock_t oo_seatbelt; @@ -159,6 +169,17 @@ struct osc_object { * oo_{read|write}_pages soon. */ spinlock_t oo_lock; + + /** + * Radix tree for caching pages + */ + struct radix_tree_root oo_tree; + spinlock_t oo_tree_lock; + unsigned long oo_npages; + + /* Protect osc_lock this osc_object has */ + spinlock_t oo_ol_spin; + struct list_head oo_ol_list; }; static inline void osc_object_lock(struct osc_object *obj) @@ -198,8 +219,6 @@ enum osc_lock_state { OLS_ENQUEUED, OLS_UPCALL_RECEIVED, OLS_GRANTED, - OLS_RELEASED, - OLS_BLOCKED, OLS_CANCELLED }; @@ -208,10 +227,8 @@ enum osc_lock_state { * * Interaction with DLM. * - * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode). - * * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in - * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock. + * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock. * * This pointer is protected through a reference, acquired by * osc_lock_upcall0(). Also, an additional reference is acquired by @@ -249,26 +266,27 @@ enum osc_lock_state { */ struct osc_lock { struct cl_lock_slice ols_cl; + /** Internal lock to protect states, etc. */ + spinlock_t ols_lock; + /** Owner sleeps on this channel for state change */ + struct cl_sync_io *ols_owner; + /** waiting list for this lock to be cancelled */ + struct list_head ols_waiting_list; + /** wait entry of ols_waiting_list */ + struct list_head ols_wait_entry; + /** list entry for osc_object::oo_ol_list */ + struct list_head ols_nextlock_oscobj; + /** underlying DLM lock */ - struct ldlm_lock *ols_lock; - /** lock value block */ - struct ost_lvb ols_lvb; + struct ldlm_lock *ols_dlmlock; /** DLM flags with which osc_lock::ols_lock was enqueued */ __u64 ols_flags; /** osc_lock::ols_lock handle */ struct lustre_handle ols_handle; struct ldlm_enqueue_info ols_einfo; enum osc_lock_state ols_state; - - /** - * How many pages are using this lock for io, currently only used by - * read-ahead. If non-zero, the underlying dlm lock won't be cancelled - * during recovery to avoid deadlock. see bz16774. - * - * \see osc_page::ops_lock - * \see osc_page_addref_lock(), osc_page_putref_lock() - */ - atomic_t ols_pageref; + /** lock value block */ + struct ost_lvb ols_lvb; /** * true, if ldlm_lock_addref() was called against @@ -298,16 +316,6 @@ struct osc_lock { * If true, osc_lock_enqueue is able to tolerate the -EUSERS error. */ ols_locklessable:1, - /** - * set by osc_lock_use() to wait until blocking AST enters into - * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for - * further synchronization. - */ - ols_ast_wait:1, - /** - * If the data of this lock has been flushed to server side. - */ - ols_flush:1, /** * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat * the EVAVAIL error as tolerable, this will make upper logic happy @@ -321,15 +329,6 @@ struct osc_lock { * For async glimpse lock. */ ols_agl:1; - /** - * IO that owns this lock. This field is used for a dead-lock - * avoidance by osc_lock_enqueue_wait(). - * - * XXX: unfortunately, the owner of a osc_lock is not unique, - * the lock may have multiple users, if the lock is granted and - * then matched. - */ - struct osc_io *ols_owner; }; /** @@ -369,18 +368,15 @@ struct osc_page { * Set if the page must be transferred with OBD_BRW_SRVLOCK. */ ops_srvlock:1; - union { - /** - * lru page list. ops_inflight and ops_lru are exclusive so - * that they can share the same data. - */ - struct list_head ops_lru; - /** - * Linkage into a per-osc_object list of pages in flight. For - * debugging. - */ - struct list_head ops_inflight; - }; + /** + * lru page list. See osc_lru_{del|use}() in osc_page.c for usage. + */ + struct list_head ops_lru; + /** + * Linkage into a per-osc_object list of pages in flight. For + * debugging. + */ + struct list_head ops_inflight; /** * Thread that submitted this page for transfer. For debugging. */ @@ -389,16 +385,6 @@ struct osc_page { * Submit time - the time when the page is starting RPC. For debugging. */ unsigned long ops_submit_time; - - /** - * A lock of which we hold a reference covers this page. Only used by - * read-ahead: for a readahead page, we hold it's covering lock to - * prevent it from being canceled during recovery. - * - * \see osc_lock::ols_pageref - * \see osc_page_addref_lock(), osc_page_putref_lock(). - */ - struct cl_lock *ops_lock; }; extern struct kmem_cache *osc_lock_kmem; @@ -417,21 +403,22 @@ extern struct lu_context_key osc_session_key; int osc_lock_init(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); -int osc_io_init (const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -int osc_req_init (const struct lu_env *env, struct cl_device *dev, - struct cl_req *req); +int osc_io_init(const struct lu_env *env, + struct cl_object *obj, struct cl_io *io); +int osc_req_init(const struct lu_env *env, struct cl_device *dev, + struct cl_req *req); struct lu_object *osc_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage); + struct cl_page *page, pgoff_t ind); -void osc_index2policy (ldlm_policy_data_t *policy, const struct cl_object *obj, - pgoff_t start, pgoff_t end); -int osc_lvb_print (const struct lu_env *env, void *cookie, - lu_printer_t p, const struct ost_lvb *lvb); +void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, + pgoff_t start, pgoff_t end); +int osc_lvb_print(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct ost_lvb *lvb); +void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); void osc_page_submit(const struct lu_env *env, struct osc_page *opg, enum cl_req_type crt, int brw_flags); int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops); @@ -441,6 +428,8 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, struct page *page, loff_t offset); int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, struct osc_page *ops); +int osc_page_cache_add(const struct lu_env *env, + const struct cl_page_slice *slice, struct cl_io *io); int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, struct osc_page *ops); int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, @@ -457,12 +446,13 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end); void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc); +int lru_queue_work(const struct lu_env *env, void *data); -void osc_object_set_contended (struct osc_object *obj); +void osc_object_set_contended(struct osc_object *obj); void osc_object_clear_contended(struct osc_object *obj); -int osc_object_is_contended (struct osc_object *obj); +int osc_object_is_contended(struct osc_object *obj); -int osc_lock_is_lockless (const struct osc_lock *olck); +int osc_lock_is_lockless(const struct osc_lock *olck); /***************************************************************************** * @@ -558,6 +548,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap) return container_of0(oap, struct osc_page, ops_oap); } +static inline pgoff_t osc_index(struct osc_page *opg) +{ + return opg->ops_cl.cpl_index; +} + static inline struct cl_page *oap2cl_page(struct osc_async_page *oap) { return oap2osc(oap)->ops_cl.cpl_page; @@ -608,7 +603,7 @@ enum osc_extent_state { * * LOCKING ORDER * ============= - * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock) + * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock) */ struct osc_extent { /** red-black tree node */ @@ -627,6 +622,8 @@ struct osc_extent { unsigned int oe_intree:1, /** 0 is write, 1 is read */ oe_rw:1, + /** sync extent, queued by osc_queue_sync_pages() */ + oe_sync:1, oe_srvlock:1, oe_memalloc:1, /** an ACTIVE extent is going to be truncated, so when this extent @@ -675,7 +672,7 @@ struct osc_extent { */ wait_queue_head_t oe_waitq; /** lock covering this extent */ - struct cl_lock *oe_osclock; + struct ldlm_lock *oe_dlmlock; /** terminator of this extent. Must be true if this extent is in IO. */ struct task_struct *oe_owner; /** return value of writeback. If somebody is waiting for this extent, @@ -690,6 +687,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, int sent, int rc); void osc_extent_release(const struct lu_env *env, struct osc_extent *ext); +int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, + pgoff_t start, pgoff_t end, enum cl_lock_mode mode); + +typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, + struct osc_page *, void *); +int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, + struct osc_object *osc, pgoff_t start, pgoff_t end, + osc_page_gang_cbt cb, void *cbdata); /** @} osc */ #endif /* OSC_CL_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h index ea695c209..7fad82781 100644 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_internal.h @@ -83,6 +83,12 @@ struct osc_async_page { #define oap_count oap_brw_page.count #define oap_brw_flags oap_brw_page.flag +static inline struct osc_async_page *brw_page2oap(struct brw_page *pga) +{ + return (struct osc_async_page *)container_of(pga, struct osc_async_page, + oap_brw_page); +} + struct osc_cache_waiter { struct list_head ocw_entry; wait_queue_head_t ocw_waitq; @@ -102,12 +108,14 @@ void osc_update_next_shrink(struct client_obd *cli); extern struct ptlrpc_request_set *PTLRPCD_SET; +typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh, + int rc); + int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, __u64 *flags, ldlm_policy_data_t *policy, struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, + osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, struct ptlrpc_request_set *rqset, int async, int agl); int osc_cancel_base(struct lustre_handle *lockh, __u32 mode); @@ -130,9 +138,11 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg); int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct list_head *ext_list, int cmd); -int osc_lru_shrink(struct client_obd *cli, int target); +int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, + int target, bool force); +int osc_lru_reclaim(struct client_obd *cli); -extern spinlock_t osc_ast_guard; +unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg); @@ -173,8 +183,6 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d) return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev); } -int osc_dlm_lock_pageref(struct ldlm_lock *dlm); - extern struct kmem_cache *osc_quota_kmem; struct osc_quota_info { /** linkage for quota hash table */ @@ -192,5 +200,12 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp, int osc_quotacheck(struct obd_device *unused, struct obd_export *exp, struct obd_quotactl *oqctl); int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); +void osc_inc_unstable_pages(struct ptlrpc_request *req); +void osc_dec_unstable_pages(struct ptlrpc_request *req); +int osc_over_unstable_soft_limit(struct client_obd *cli); + +struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, + struct osc_object *obj, pgoff_t index, + int pending, int canceling); #endif /* OSC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index 6bd0a45d8..d534b0e0e 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -68,11 +68,15 @@ static struct osc_io *cl2osc_io(const struct lu_env *env, return oio; } -static struct osc_page *osc_cl_page_osc(struct cl_page *page) +static struct osc_page *osc_cl_page_osc(struct cl_page *page, + struct osc_object *osc) { const struct cl_page_slice *slice; - slice = cl_page_at(page, &osc_device_type); + if (osc) + slice = cl_object_page_slice(&osc->oo_cl, page); + else + slice = cl_page_at(page, &osc_device_type); LASSERT(slice); return cl2osc_page(slice); @@ -137,7 +141,7 @@ static int osc_io_submit(const struct lu_env *env, io = page->cp_owner; LASSERT(io); - opg = osc_cl_page_osc(page); + opg = osc_cl_page_osc(page, osc); oap = &opg->ops_oap; LASSERT(osc == oap->oap_obj); @@ -164,8 +168,10 @@ static int osc_io_submit(const struct lu_env *env, } cl_page_list_move(qout, qin, page); + spin_lock(&oap->oap_lock); oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY; oap->oap_async_flags |= ASYNC_COUNT_STABLE; + spin_unlock(&oap->oap_lock); osc_page_submit(env, opg, crt, brw_flags); list_add_tail(&oap->oap_pending_item, &list); @@ -185,6 +191,13 @@ static int osc_io_submit(const struct lu_env *env, return qout->pl_nr > 0 ? 0 : result; } +/** + * This is called when a page is accessed within file in a way that creates + * new page, if one were missing (i.e., if there were a hole at that place in + * the file, or accessed page is beyond the current file size). + * + * Expand stripe KMS if necessary. + */ static void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj, pgoff_t idx, unsigned to) { @@ -208,7 +221,8 @@ static void osc_page_touch_at(const struct lu_env *env, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms, loi->loi_lvb.lvb_size); - valid = 0; + attr->cat_mtime = attr->cat_ctime = LTIME_S(CURRENT_TIME); + valid = CAT_MTIME | CAT_CTIME; if (kms > loi->loi_kms) { attr->cat_kms = kms; valid |= CAT_KMS; @@ -221,91 +235,128 @@ static void osc_page_touch_at(const struct lu_env *env, cl_object_attr_unlock(obj); } -/** - * This is called when a page is accessed within file in a way that creates - * new page, if one were missing (i.e., if there were a hole at that place in - * the file, or accessed page is beyond the current file size). Examples: - * ->commit_write() and ->nopage() methods. - * - * Expand stripe KMS if necessary. - */ -static void osc_page_touch(const struct lu_env *env, - struct osc_page *opage, unsigned to) -{ - struct cl_page *page = opage->ops_cl.cpl_page; - struct cl_object *obj = opage->ops_cl.cpl_obj; - - osc_page_touch_at(env, obj, page->cp_index, to); -} - -/** - * Implements cl_io_operations::cio_prepare_write() method for osc layer. - * - * \retval -EIO transfer initiated against this osc will most likely fail - * \retval 0 transfer initiated against this osc will most likely succeed. - * - * The reason for this check is to immediately return an error to the caller - * in the case of a deactivated import. Note, that import can be deactivated - * later, while pages, dirtied by this IO, are still in the cache, but this is - * irrelevant, because that would still return an error to the application (if - * it does fsync), but many applications don't do fsync because of performance - * issues, and we wanted to return an -EIO at write time to notify the - * application. - */ -static int osc_io_prepare_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) +static int osc_io_commit_async(const struct lu_env *env, + const struct cl_io_slice *ios, + struct cl_page_list *qin, int from, int to, + cl_commit_cbt cb) { - struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev); - struct obd_import *imp = class_exp2cliimp(dev->od_exp); + struct cl_io *io = ios->cis_io; struct osc_io *oio = cl2osc_io(env, ios); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct cl_page *page; + struct cl_page *last_page; + struct osc_page *opg; int result = 0; - /* - * This implements OBD_BRW_CHECK logic from old client. - */ + LASSERT(qin->pl_nr > 0); + + /* Handle partial page cases */ + last_page = cl_page_list_last(qin); + if (oio->oi_lockless) { + page = cl_page_list_first(qin); + if (page == last_page) { + cl_page_clip(env, page, from, to); + } else { + if (from != 0) + cl_page_clip(env, page, from, PAGE_SIZE); + if (to != PAGE_SIZE) + cl_page_clip(env, last_page, 0, to); + } + } + + while (qin->pl_nr > 0) { + struct osc_async_page *oap; + + page = cl_page_list_first(qin); + opg = osc_cl_page_osc(page, osc); + oap = &opg->ops_oap; + + if (!list_empty(&oap->oap_rpc_item)) { + CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", + oap, opg); + result = -EBUSY; + break; + } + + /* The page may be already in dirty cache. */ + if (list_empty(&oap->oap_pending_item)) { + result = osc_page_cache_add(env, &opg->ops_cl, io); + if (result != 0) + break; + } + + osc_page_touch_at(env, osc2cl(osc), osc_index(opg), + page == last_page ? to : PAGE_SIZE); + + cl_page_list_del(env, qin, page); - if (!imp || imp->imp_invalid) - result = -EIO; - if (result == 0 && oio->oi_lockless) - /* this page contains `invalid' data, but who cares? - * nobody can access the invalid data. - * in osc_io_commit_write(), we're going to write exact - * [from, to) bytes of this page to OST. -jay + (*cb)(env, io, page); + /* Can't access page any more. Page can be in transfer and + * complete at any time. */ - cl_page_export(env, slice->cpl_page, 1); + } + /* for sync write, kernel will wait for this page to be flushed before + * osc_io_end() is called, so release it earlier. + * for mkwrite(), it's known there is no further pages. + */ + if (cl_io_is_sync_write(io) && oio->oi_active) { + osc_extent_release(env, oio->oi_active); + oio->oi_active = NULL; + } + + CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result); return result; } -static int osc_io_commit_write(const struct lu_env *env, - const struct cl_io_slice *ios, - const struct cl_page_slice *slice, - unsigned from, unsigned to) +static int osc_io_rw_iter_init(const struct lu_env *env, + const struct cl_io_slice *ios) { - struct osc_io *oio = cl2osc_io(env, ios); - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - struct osc_async_page *oap = &opg->ops_oap; + struct cl_io *io = ios->cis_io; + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct client_obd *cli = osc_cli(osc); + unsigned long c; + unsigned int npages; + unsigned int max_pages; + + if (cl_io_is_append(io)) + return 0; + + npages = io->u.ci_rw.crw_count >> PAGE_SHIFT; + if (io->u.ci_rw.crw_pos & ~PAGE_MASK) + ++npages; + + max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; + if (npages > max_pages) + npages = max_pages; + + c = atomic_read(cli->cl_lru_left); + if (c < npages && osc_lru_reclaim(cli) > 0) + c = atomic_read(cli->cl_lru_left); + while (c >= npages) { + if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) { + oio->oi_lru_reserved = npages; + break; + } + c = atomic_read(cli->cl_lru_left); + } - LASSERT(to > 0); - /* - * XXX instead of calling osc_page_touch() here and in - * osc_io_fault_start() it might be more logical to introduce - * cl_page_touch() method, that generic cl_io_commit_write() and page - * fault code calls. - */ - osc_page_touch(env, cl2osc_page(slice), to); - if (!client_is_remote(osc_export(obj)) && - capable(CFS_CAP_SYS_RESOURCE)) - oap->oap_brw_flags |= OBD_BRW_NOQUOTA; + return 0; +} - if (oio->oi_lockless) - /* see osc_io_prepare_write() for lockless io handling. */ - cl_page_clip(env, slice->cpl_page, from, to); +static void osc_io_rw_iter_fini(const struct lu_env *env, + const struct cl_io_slice *ios) +{ + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(ios->cis_obj); + struct client_obd *cli = osc_cli(osc); - return 0; + if (oio->oi_lru_reserved > 0) { + atomic_add(oio->oi_lru_reserved, cli->cl_lru_left); + oio->oi_lru_reserved = 0; + } + oio->oi_write_osclock = NULL; } static int osc_io_fault_start(const struct lu_env *env, @@ -342,31 +393,21 @@ static int osc_async_upcall(void *a, int rc) * Checks that there are no pages being written in the extent being truncated. */ static int trunc_check_cb(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, void *cbdata) + struct osc_page *ops, void *cbdata) { - const struct cl_page_slice *slice; - struct osc_page *ops; + struct cl_page *page = ops->ops_cl.cpl_page; struct osc_async_page *oap; __u64 start = *(__u64 *)cbdata; - slice = cl_page_at(page, &osc_device_type); - LASSERT(slice); - ops = cl2osc_page(slice); oap = &ops->ops_oap; - if (oap->oap_cmd & OBD_BRW_WRITE && !list_empty(&oap->oap_pending_item)) CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", start, current->comm); - { - struct page *vmpage = cl_page_vmpage(env, page); - - if (PageLocked(vmpage)) - CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", - ops, page->cp_index, - (oap->oap_cmd & OBD_BRW_RWMASK)); - } + if (PageLocked(page->cp_vmpage)) + CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", + ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK); return CLP_GANG_OKAY; } @@ -385,8 +426,9 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, /* * Complain if there are pages in the truncated region. */ - cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, - trunc_check_cb, (void *)&size); + osc_page_gang_lookup(env, io, cl2osc(clob), + start + partial, CL_PAGE_EOF, + trunc_check_cb, (void *)&size); } static int osc_io_setattr_start(const struct lu_env *env, @@ -650,6 +692,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_fini = osc_io_fini }, [CIT_WRITE] = { + .cio_iter_init = osc_io_rw_iter_init, + .cio_iter_fini = osc_io_rw_iter_fini, .cio_start = osc_io_write_start, .cio_end = osc_io_end, .cio_fini = osc_io_fini @@ -672,16 +716,8 @@ static const struct cl_io_operations osc_io_ops = { .cio_fini = osc_io_fini } }, - .req_op = { - [CRT_READ] = { - .cio_submit = osc_io_submit - }, - [CRT_WRITE] = { - .cio_submit = osc_io_submit - } - }, - .cio_prepare_write = osc_io_prepare_write, - .cio_commit_write = osc_io_commit_write + .cio_submit = osc_io_submit, + .cio_commit_async = osc_io_commit_async }; /***************************************************************************** @@ -718,8 +754,7 @@ static void osc_req_attr_set(const struct lu_env *env, struct lov_oinfo *oinfo; struct cl_req *clerq; struct cl_page *apage; /* _some_ page in @clerq */ - struct cl_lock *lock; /* _some_ lock protecting @apage */ - struct osc_lock *olck; + struct ldlm_lock *lock; /* _some_ lock protecting @apage */ struct osc_page *opg; struct obdo *oa; struct ost_lvb *lvb; @@ -753,31 +788,32 @@ static void osc_req_attr_set(const struct lu_env *env, LASSERT(!list_empty(&clerq->crq_pages)); apage = container_of(clerq->crq_pages.next, struct cl_page, cp_flight); - opg = osc_cl_page_osc(apage); - apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */ - lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1); - if (!lock) { - struct cl_object_header *head; - struct cl_lock *scan; - - head = cl_object_header(apage->cp_obj); - list_for_each_entry(scan, &head->coh_locks, cll_linkage) - CL_LOCK_DEBUG(D_ERROR, env, scan, - "no cover page!\n"); - CL_PAGE_DEBUG(D_ERROR, env, apage, - "dump uncover page!\n"); + opg = osc_cl_page_osc(apage, NULL); + lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), + 1, 1); + if (!lock && !opg->ops_srvlock) { + struct ldlm_resource *res; + struct ldlm_res_id *resname; + + CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n"); + + resname = &osc_env_info(env)->oti_resname; + ostid_build_res_name(&oinfo->loi_oi, resname); + res = ldlm_resource_get( + osc_export(cl2osc(obj))->exp_obd->obd_namespace, + NULL, resname, LDLM_EXTENT, 0); + ldlm_resource_dump(D_ERROR, res); + dump_stack(); LBUG(); } - olck = osc_lock_at(lock); - LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock)); /* check for lockless io. */ - if (olck->ols_lock) { - oa->o_handle = olck->ols_lock->l_remote_handle; + if (lock) { + oa->o_handle = lock->l_remote_handle; oa->o_valid |= OBD_MD_FLHANDLE; + LDLM_LOCK_PUT(lock); } - cl_lock_put(env, lock); } } @@ -807,8 +843,9 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev, if (or) { cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops); result = 0; - } else + } else { result = -ENOMEM; + } return result; } diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c index 013df9787..16f9cd9d3 100644 --- a/drivers/staging/lustre/lustre/osc/osc_lock.c +++ b/drivers/staging/lustre/lustre/osc/osc_lock.c @@ -36,6 +36,7 @@ * Implementation of cl_lock for OSC layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_OSC @@ -50,8 +51,6 @@ * @{ */ -#define _PAGEREF_MAGIC (-10000000) - /***************************************************************************** * * Type conversions. @@ -62,7 +61,6 @@ static const struct cl_lock_operations osc_lock_ops; static const struct cl_lock_operations osc_lock_lockless_ops; static void osc_lock_to_lockless(const struct lu_env *env, struct osc_lock *ols, int force); -static int osc_lock_has_pages(struct osc_lock *olck); int osc_lock_is_lockless(const struct osc_lock *olck) { @@ -90,11 +88,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle) static int osc_lock_invariant(struct osc_lock *ols) { struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle); - struct ldlm_lock *olock = ols->ols_lock; + struct ldlm_lock *olock = ols->ols_dlmlock; int handle_used = lustre_handle_is_used(&ols->ols_handle); if (ergo(osc_lock_is_lockless(ols), - ols->ols_locklessable && !ols->ols_lock)) + ols->ols_locklessable && !ols->ols_dlmlock)) return 1; /* @@ -111,7 +109,7 @@ static int osc_lock_invariant(struct osc_lock *ols) ergo(!lock, !olock))) return 0; /* - * Check that ->ols_handle and ->ols_lock are consistent, but + * Check that ->ols_handle and ->ols_dlmlock are consistent, but * take into account that they are set at the different time. */ if (!ergo(ols->ols_state == OLS_CANCELLED, @@ -122,7 +120,7 @@ static int osc_lock_invariant(struct osc_lock *ols) * ast. */ if (!ergo(olock && ols->ols_state < OLS_CANCELLED, - ((olock->l_flags & LDLM_FL_DESTROYED) == 0))) + !ldlm_is_destroyed(olock))) return 0; if (!ergo(ols->ols_state == OLS_GRANTED, @@ -138,117 +136,13 @@ static int osc_lock_invariant(struct osc_lock *ols) * */ -/** - * Breaks a link between osc_lock and dlm_lock. - */ -static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) -{ - struct ldlm_lock *dlmlock; - - spin_lock(&osc_ast_guard); - dlmlock = olck->ols_lock; - if (!dlmlock) { - spin_unlock(&osc_ast_guard); - return; - } - - olck->ols_lock = NULL; - /* wb(); --- for all who checks (ols->ols_lock != NULL) before - * call to osc_lock_detach() - */ - dlmlock->l_ast_data = NULL; - olck->ols_handle.cookie = 0ULL; - spin_unlock(&osc_ast_guard); - - lock_res_and_lock(dlmlock); - if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { - struct cl_object *obj = olck->ols_cl.cls_obj; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - __u64 old_kms; - - cl_object_attr_lock(obj); - /* Must get the value under the lock to avoid possible races. */ - old_kms = cl2osc(obj)->oo_oinfo->loi_kms; - /* Update the kms. Need to loop all granted locks. - * Not a problem for the client - */ - attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); - - cl_object_attr_set(env, obj, attr, CAT_KMS); - cl_object_attr_unlock(obj); - } - unlock_res_and_lock(dlmlock); - - /* release a reference taken in osc_lock_upcall0(). */ - LASSERT(olck->ols_has_ref); - lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); - LDLM_LOCK_RELEASE(dlmlock); - olck->ols_has_ref = 0; -} - -static int osc_lock_unhold(struct osc_lock *ols) -{ - int result = 0; - - if (ols->ols_hold) { - ols->ols_hold = 0; - result = osc_cancel_base(&ols->ols_handle, - ols->ols_einfo.ei_mode); - } - return result; -} - -static int osc_lock_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - - LINVRNT(osc_lock_invariant(ols)); - - switch (ols->ols_state) { - case OLS_NEW: - LASSERT(!ols->ols_hold); - LASSERT(ols->ols_agl); - return 0; - case OLS_UPCALL_RECEIVED: - osc_lock_unhold(ols); - case OLS_ENQUEUED: - LASSERT(!ols->ols_hold); - osc_lock_detach(env, ols); - ols->ols_state = OLS_NEW; - return 0; - case OLS_GRANTED: - LASSERT(!ols->ols_glimpse); - LASSERT(ols->ols_hold); - /* - * Move lock into OLS_RELEASED state before calling - * osc_cancel_base() so that possible synchronous cancellation - * sees that lock is released. - */ - ols->ols_state = OLS_RELEASED; - return osc_lock_unhold(ols); - default: - CERROR("Impossible state: %d\n", ols->ols_state); - LBUG(); - } -} - static void osc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) { struct osc_lock *ols = cl2osc_lock(slice); LINVRNT(osc_lock_invariant(ols)); - /* - * ->ols_hold can still be true at this point if, for example, a - * thread that requested a lock was killed (and released a reference - * to the lock), before reply from a server was received. In this case - * lock is destroyed immediately after upcall. - */ - osc_lock_unhold(ols); - LASSERT(!ols->ols_lock); - LASSERT(atomic_read(&ols->ols_pageref) == 0 || - atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC); + LASSERT(!ols->ols_dlmlock); kmem_cache_free(osc_lock_kmem, ols); } @@ -275,54 +169,11 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags) result |= LDLM_FL_HAS_INTENT; if (enqflags & CEF_DISCARD_DATA) result |= LDLM_FL_AST_DISCARD_DATA; + if (enqflags & CEF_PEEK) + result |= LDLM_FL_TEST_LOCK; return result; } -/** - * Global spin-lock protecting consistency of ldlm_lock::l_ast_data - * pointers. Initialized in osc_init(). - */ -spinlock_t osc_ast_guard; - -static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock) -{ - struct osc_lock *olck; - - lock_res_and_lock(dlm_lock); - spin_lock(&osc_ast_guard); - olck = dlm_lock->l_ast_data; - if (olck) { - struct cl_lock *lock = olck->ols_cl.cls_lock; - /* - * If osc_lock holds a reference on ldlm lock, return it even - * when cl_lock is in CLS_FREEING state. This way - * - * osc_ast_data_get(dlmlock) == NULL - * - * guarantees that all osc references on dlmlock were - * released. osc_dlm_blocking_ast0() relies on that. - */ - if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) { - cl_lock_get_trust(lock); - lu_ref_add_atomic(&lock->cll_reference, - "ast", current); - } else - olck = NULL; - } - spin_unlock(&osc_ast_guard); - unlock_res_and_lock(dlm_lock); - return olck; -} - -static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck) -{ - struct cl_lock *lock; - - lock = olck->ols_cl.cls_lock; - lu_ref_del(&lock->cll_reference, "ast", current); - cl_lock_put(env, lock); -} - /** * Updates object attributes from a lock value block (lvb) received together * with the DLM lock reply from the server. Copy of osc_update_enqueue() @@ -333,35 +184,30 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck) * * Called under lock and resource spin-locks. */ -static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck, - int rc) +static void osc_lock_lvb_update(const struct lu_env *env, + struct osc_object *osc, + struct ldlm_lock *dlmlock, + struct ost_lvb *lvb) { - struct ost_lvb *lvb; - struct cl_object *obj; - struct lov_oinfo *oinfo; - struct cl_attr *attr; + struct cl_object *obj = osc2cl(osc); + struct lov_oinfo *oinfo = osc->oo_oinfo; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; unsigned valid; - if (!(olck->ols_flags & LDLM_FL_LVB_READY)) - return; - - lvb = &olck->ols_lvb; - obj = olck->ols_cl.cls_obj; - oinfo = cl2osc(obj)->oo_oinfo; - attr = &osc_env_info(env)->oti_attr; valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE; + if (!lvb) + lvb = dlmlock->l_lvb_data; + cl_lvb2attr(attr, lvb); cl_object_attr_lock(obj); - if (rc == 0) { - struct ldlm_lock *dlmlock; + if (dlmlock) { __u64 size; - dlmlock = olck->ols_lock; - - /* re-grab LVB from a dlm lock under DLM spin-locks. */ - *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; + check_res_locked(dlmlock->l_resource); + LASSERT(lvb == dlmlock->l_lvb_data); size = lvb->lvb_size; + /* Extend KMS up to the end of this lock and no further * A lock on [x,y] means a KMS of up to y + 1 bytes! */ @@ -378,102 +224,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck, dlmlock->l_policy_data.l_extent.end); } ldlm_lock_allow_match_locked(dlmlock); - } else if (rc == -ENAVAIL && olck->ols_glimpse) { - CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n", - lvb->lvb_size, oinfo->loi_kms); - } else - valid = 0; - - if (valid != 0) - cl_object_attr_set(env, obj, attr, valid); + } + cl_object_attr_set(env, obj, attr, valid); cl_object_attr_unlock(obj); } -/** - * Called when a lock is granted, from an upcall (when server returned a - * granted lock), or from completion AST, when server returned a blocked lock. - * - * Called under lock and resource spin-locks, that are released temporarily - * here. - */ -static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck, - struct ldlm_lock *dlmlock, int rc) +static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl, + struct lustre_handle *lockh, bool lvb_update) { - struct ldlm_extent *ext; - struct cl_lock *lock; - struct cl_lock_descr *descr; + struct ldlm_lock *dlmlock; - LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); + dlmlock = ldlm_handle2lock_long(lockh, 0); + LASSERT(dlmlock); - if (olck->ols_state < OLS_GRANTED) { - lock = olck->ols_cl.cls_lock; - ext = &dlmlock->l_policy_data.l_extent; - descr = &osc_env_info(env)->oti_descr; - descr->cld_obj = lock->cll_descr.cld_obj; + /* lock reference taken by ldlm_handle2lock_long() is + * owned by osc_lock and released in osc_lock_detach() + */ + lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl); + oscl->ols_has_ref = 1; - /* XXX check that ->l_granted_mode is valid. */ - descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); - descr->cld_start = cl_index(descr->cld_obj, ext->start); - descr->cld_end = cl_index(descr->cld_obj, ext->end); - descr->cld_gid = ext->gid; - /* - * tell upper layers the extent of the lock that was actually - * granted - */ - olck->ols_state = OLS_GRANTED; - osc_lock_lvb_update(env, olck, rc); - - /* release DLM spin-locks to allow cl_lock_{modify,signal}() - * to take a semaphore on a parent lock. This is safe, because - * spin-locks are needed to protect consistency of - * dlmlock->l_*_mode and LVB, and we have finished processing - * them. + LASSERT(!oscl->ols_dlmlock); + oscl->ols_dlmlock = dlmlock; + + /* This may be a matched lock for glimpse request, do not hold + * lock reference in that case. + */ + if (!oscl->ols_glimpse) { + /* hold a refc for non glimpse lock which will + * be released in osc_lock_cancel() */ - unlock_res_and_lock(dlmlock); - cl_lock_modify(env, lock, descr); - cl_lock_signal(env, lock); - LINVRNT(osc_lock_invariant(olck)); - lock_res_and_lock(dlmlock); + lustre_handle_copy(&oscl->ols_handle, lockh); + ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode); + oscl->ols_hold = 1; } -} - -static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) - -{ - struct ldlm_lock *dlmlock; - - dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0); - LASSERT(dlmlock); + /* Lock must have been granted. */ lock_res_and_lock(dlmlock); - spin_lock(&osc_ast_guard); - LASSERT(dlmlock->l_ast_data == olck); - LASSERT(!olck->ols_lock); - olck->ols_lock = dlmlock; - spin_unlock(&osc_ast_guard); + if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { + struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent; + struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; - /* - * Lock might be not yet granted. In this case, completion ast - * (osc_ldlm_completion_ast()) comes later and finishes lock - * granting. - */ - if (dlmlock->l_granted_mode == dlmlock->l_req_mode) - osc_lock_granted(env, olck, dlmlock, 0); - unlock_res_and_lock(dlmlock); + /* extend the lock extent, otherwise it will have problem when + * we decide whether to grant a lockless lock. + */ + descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); + descr->cld_start = cl_index(descr->cld_obj, ext->start); + descr->cld_end = cl_index(descr->cld_obj, ext->end); + descr->cld_gid = ext->gid; - /* - * osc_enqueue_interpret() decrefs asynchronous locks, counter - * this. - */ - ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode); - olck->ols_hold = 1; + /* no lvb update for matched lock */ + if (lvb_update) { + LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); + osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj), + dlmlock, NULL); + } + LINVRNT(osc_lock_invariant(oscl)); + } + unlock_res_and_lock(dlmlock); - /* lock reference taken by ldlm_handle2lock_long() is owned by - * osc_lock and released in osc_lock_detach() - */ - lu_ref_add(&dlmlock->l_reference, "osc_lock", olck); - olck->ols_has_ref = 1; + LASSERT(oscl->ols_state != OLS_GRANTED); + oscl->ols_state = OLS_GRANTED; } /** @@ -481,143 +292,124 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) * received from a server, or after osc_enqueue_base() matched a local DLM * lock. */ -static int osc_lock_upcall(void *cookie, int errcode) +static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh, + int errcode) { - struct osc_lock *olck = cookie; - struct cl_lock_slice *slice = &olck->ols_cl; - struct cl_lock *lock = slice->cls_lock; + struct osc_lock *oscl = cookie; + struct cl_lock_slice *slice = &oscl->ols_cl; struct lu_env *env; struct cl_env_nest nest; + int rc; env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - int rc; + /* should never happen, similar to osc_ldlm_blocking_ast(). */ + LASSERT(!IS_ERR(env)); + + rc = ldlm_error2errno(errcode); + if (oscl->ols_state == OLS_ENQUEUED) { + oscl->ols_state = OLS_UPCALL_RECEIVED; + } else if (oscl->ols_state == OLS_CANCELLED) { + rc = -EIO; + } else { + CERROR("Impossible state: %d\n", oscl->ols_state); + LBUG(); + } - cl_lock_mutex_get(env, lock); + if (rc == 0) + osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK); - LASSERT(lock->cll_state >= CLS_QUEUING); - if (olck->ols_state == OLS_ENQUEUED) { - olck->ols_state = OLS_UPCALL_RECEIVED; - rc = ldlm_error2errno(errcode); - } else if (olck->ols_state == OLS_CANCELLED) { - rc = -EIO; - } else { - CERROR("Impossible state: %d\n", olck->ols_state); - LBUG(); - } - if (rc) { - struct ldlm_lock *dlmlock; - - dlmlock = ldlm_handle2lock(&olck->ols_handle); - if (dlmlock) { - lock_res_and_lock(dlmlock); - spin_lock(&osc_ast_guard); - LASSERT(!olck->ols_lock); - dlmlock->l_ast_data = NULL; - olck->ols_handle.cookie = 0ULL; - spin_unlock(&osc_ast_guard); - ldlm_lock_fail_match_locked(dlmlock); - unlock_res_and_lock(dlmlock); - LDLM_LOCK_PUT(dlmlock); - } - } else { - if (olck->ols_glimpse) - olck->ols_glimpse = 0; - osc_lock_upcall0(env, olck); - } + /* Error handling, some errors are tolerable. */ + if (oscl->ols_locklessable && rc == -EUSERS) { + /* This is a tolerable error, turn this lock into + * lockless lock. + */ + osc_object_set_contended(cl2osc(slice->cls_obj)); + LASSERT(slice->cls_ops == &osc_lock_ops); + + /* Change this lock to ldlmlock-less lock. */ + osc_lock_to_lockless(env, oscl, 1); + oscl->ols_state = OLS_GRANTED; + rc = 0; + } else if (oscl->ols_glimpse && rc == -ENAVAIL) { + LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); + osc_lock_lvb_update(env, cl2osc(slice->cls_obj), + NULL, &oscl->ols_lvb); + /* Hide the error. */ + rc = 0; + } - /* Error handling, some errors are tolerable. */ - if (olck->ols_locklessable && rc == -EUSERS) { - /* This is a tolerable error, turn this lock into - * lockless lock. - */ - osc_object_set_contended(cl2osc(slice->cls_obj)); - LASSERT(slice->cls_ops == &osc_lock_ops); + if (oscl->ols_owner) + cl_sync_io_note(env, oscl->ols_owner, rc); + cl_env_nested_put(&nest, env); - /* Change this lock to ldlmlock-less lock. */ - osc_lock_to_lockless(env, olck, 1); - olck->ols_state = OLS_GRANTED; - rc = 0; - } else if (olck->ols_glimpse && rc == -ENAVAIL) { - osc_lock_lvb_update(env, olck, rc); - cl_lock_delete(env, lock); - /* Hide the error. */ - rc = 0; - } - - if (rc == 0) { - /* For AGL case, the RPC sponsor may exits the cl_lock - * processing without wait() called before related OSC - * lock upcall(). So update the lock status according - * to the enqueue result inside AGL upcall(). - */ - if (olck->ols_agl) { - lock->cll_flags |= CLF_FROM_UPCALL; - cl_wait_try(env, lock); - lock->cll_flags &= ~CLF_FROM_UPCALL; - if (!olck->ols_glimpse) - olck->ols_agl = 0; - } - cl_lock_signal(env, lock); - /* del user for lock upcall cookie */ - cl_unuse_try(env, lock); - } else { - /* del user for lock upcall cookie */ - cl_lock_user_del(env, lock); - cl_lock_error(env, lock, rc); - } + return rc; +} - /* release cookie reference, acquired by osc_lock_enqueue() */ - cl_lock_hold_release(env, lock, "upcall", lock); - cl_lock_mutex_put(env, lock); +static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, + int errcode) +{ + struct osc_object *osc = cookie; + struct ldlm_lock *dlmlock; + struct lu_env *env; + struct cl_env_nest nest; - lu_ref_del(&lock->cll_reference, "upcall", lock); - /* This maybe the last reference, so must be called after - * cl_lock_mutex_put(). - */ - cl_lock_put(env, lock); + env = cl_env_nested_get(&nest); + LASSERT(!IS_ERR(env)); - cl_env_nested_put(&nest, env); - } else { - /* should never happen, similar to osc_ldlm_blocking_ast(). */ - LBUG(); + if (errcode == ELDLM_LOCK_MATCHED) { + errcode = ELDLM_OK; + goto out; } - return errcode; + + if (errcode != ELDLM_OK) + goto out; + + dlmlock = ldlm_handle2lock(lockh); + LASSERT(dlmlock); + + lock_res_and_lock(dlmlock); + LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); + + /* there is no osc_lock associated with AGL lock */ + osc_lock_lvb_update(env, osc, dlmlock, NULL); + + unlock_res_and_lock(dlmlock); + LDLM_LOCK_PUT(dlmlock); + +out: + cl_object_put(env, osc2cl(osc)); + cl_env_nested_put(&nest, env); + return ldlm_error2errno(errcode); } -/** - * Core of osc_dlm_blocking_ast() logic. - */ -static void osc_lock_blocking(const struct lu_env *env, - struct ldlm_lock *dlmlock, - struct osc_lock *olck, int blocking) +static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end, + enum cl_lock_mode mode, int discard) { - struct cl_lock *lock = olck->ols_cl.cls_lock; + struct lu_env *env; + struct cl_env_nest nest; + int rc = 0; + int rc2 = 0; - LASSERT(olck->ols_lock == dlmlock); - CLASSERT(OLS_BLOCKED < OLS_CANCELLED); - LASSERT(!osc_lock_is_lockless(olck)); + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + return PTR_ERR(env); + + if (mode == CLM_WRITE) { + rc = osc_cache_writeback_range(env, obj, start, end, 1, + discard); + CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n", + obj, start, end, rc, + discard ? "discarded" : "written back"); + if (rc > 0) + rc = 0; + } - /* - * Lock might be still addref-ed here, if e.g., blocking ast - * is sent for a failed lock. - */ - osc_lock_unhold(olck); + rc2 = osc_lock_discard_pages(env, obj, start, end, mode); + if (rc == 0 && rc2 < 0) + rc = rc2; - if (blocking && olck->ols_state < OLS_BLOCKED) - /* - * Move osc_lock into OLS_BLOCKED before canceling the lock, - * because it recursively re-enters osc_lock_blocking(), with - * the state set to OLS_CANCELLED. - */ - olck->ols_state = OLS_BLOCKED; - /* - * cancel and destroy lock at least once no matter how blocking ast is - * entered (see comment above osc_ldlm_blocking_ast() for use - * cases). cl_lock_cancel() and cl_lock_delete() are idempotent. - */ - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); + cl_env_nested_put(&nest, env); + return rc; } /** @@ -628,65 +420,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env, struct ldlm_lock *dlmlock, void *data, int flag) { - struct osc_lock *olck; - struct cl_lock *lock; - int result; - int cancel; - - LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING); - - cancel = 0; - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; - cl_lock_mutex_get(env, lock); - LINVRNT(osc_lock_invariant(olck)); - if (olck->ols_ast_wait) { - /* wake up osc_lock_use() */ - cl_lock_signal(env, lock); - olck->ols_ast_wait = 0; - } - /* - * Lock might have been canceled while this thread was - * sleeping for lock mutex, but olck is pinned in memory. - */ - if (olck == dlmlock->l_ast_data) { - /* - * NOTE: DLM sends blocking AST's for failed locks - * (that are still in pre-OLS_GRANTED state) - * too, and they have to be canceled otherwise - * DLM lock is never destroyed and stuck in - * the memory. - * - * Alternatively, ldlm_cli_cancel() can be - * called here directly for osc_locks with - * ols_state < OLS_GRANTED to maintain an - * invariant that ->clo_cancel() is only called - * for locks that were granted. - */ - LASSERT(data == olck); - osc_lock_blocking(env, dlmlock, - olck, flag == LDLM_CB_BLOCKING); - } else - cancel = 1; - cl_lock_mutex_put(env, lock); - osc_ast_data_put(env, olck); - } else - /* - * DLM lock exists, but there is no cl_lock attached to it. - * This is a `normal' race. cl_object and its cl_lock's can be - * removed by memory pressure, together with all pages. + struct cl_object *obj = NULL; + int result = 0; + int discard; + enum cl_lock_mode mode = CLM_READ; + + LASSERT(flag == LDLM_CB_CANCELING); + + lock_res_and_lock(dlmlock); + if (dlmlock->l_granted_mode != dlmlock->l_req_mode) { + dlmlock->l_ast_data = NULL; + unlock_res_and_lock(dlmlock); + return 0; + } + + discard = ldlm_is_discard_data(dlmlock); + if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)) + mode = CLM_WRITE; + + if (dlmlock->l_ast_data) { + obj = osc2cl(dlmlock->l_ast_data); + dlmlock->l_ast_data = NULL; + + cl_object_get(obj); + } + + unlock_res_and_lock(dlmlock); + + /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or + * the object has been destroyed. + */ + if (obj) { + struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + __u64 old_kms; + + /* Destroy pages covered by the extent of the DLM lock */ + result = osc_lock_flush(cl2osc(obj), + cl_index(obj, extent->start), + cl_index(obj, extent->end), + mode, discard); + + /* losing a lock, update kms */ + lock_res_and_lock(dlmlock); + cl_object_attr_lock(obj); + /* Must get the value under the lock to avoid race. */ + old_kms = cl2osc(obj)->oo_oinfo->loi_kms; + /* Update the kms. Need to loop all granted locks. + * Not a problem for the client */ - cancel = (flag == LDLM_CB_BLOCKING); + attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); - if (cancel) { - struct lustre_handle *lockh; + cl_object_attr_set(env, obj, attr, CAT_KMS); + cl_object_attr_unlock(obj); + unlock_res_and_lock(dlmlock); - lockh = &osc_env_info(env)->oti_handle; - ldlm_lock2handle(dlmlock, lockh); - result = ldlm_cli_cancel(lockh, LCF_ASYNC); - } else - result = 0; + cl_object_put(env, obj); + } return result; } @@ -736,107 +526,52 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, struct ldlm_lock_desc *new, void *data, int flag) { - struct lu_env *env; - struct cl_env_nest nest; - int result; + int result = 0; - /* - * This can be called in the context of outer IO, e.g., - * - * cl_enqueue()->... - * ->osc_enqueue_base()->... - * ->ldlm_prep_elc_req()->... - * ->ldlm_cancel_callback()->... - * ->osc_ldlm_blocking_ast() - * - * new environment has to be created to not corrupt outer context. - */ - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); - cl_env_nested_put(&nest, env); - } else { - result = PTR_ERR(env); - /* - * XXX This should never happen, as cl_lock is - * stuck. Pre-allocated environment a la vvp_inode_fini_env - * should be used. - */ - LBUG(); - } - if (result != 0) { + switch (flag) { + case LDLM_CB_BLOCKING: { + struct lustre_handle lockh; + + ldlm_lock2handle(dlmlock, &lockh); + result = ldlm_cli_cancel(&lockh, LCF_ASYNC); if (result == -ENODATA) result = 0; - else - CERROR("BAST failed: %d\n", result); + break; } - return result; -} + case LDLM_CB_CANCELING: { + struct lu_env *env; + struct cl_env_nest nest; -static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock, - __u64 flags, void *data) -{ - struct cl_env_nest nest; - struct lu_env *env; - struct osc_lock *olck; - struct cl_lock *lock; - int result; - int dlmrc; - - /* first, do dlm part of the work */ - dlmrc = ldlm_completion_ast_async(dlmlock, flags, data); - /* then, notify cl_lock */ - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; - cl_lock_mutex_get(env, lock); - /* - * ldlm_handle_cp_callback() copied LVB from request - * to lock->l_lvb_data, store it in osc_lock. - */ - LASSERT(dlmlock->l_lvb_data); - lock_res_and_lock(dlmlock); - olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; - if (!olck->ols_lock) { - /* - * upcall (osc_lock_upcall()) hasn't yet been - * called. Do nothing now, upcall will bind - * olck to dlmlock and signal the waiters. - * - * This maintains an invariant that osc_lock - * and ldlm_lock are always bound when - * osc_lock is in OLS_GRANTED state. - */ - } else if (dlmlock->l_granted_mode == - dlmlock->l_req_mode) { - osc_lock_granted(env, olck, dlmlock, dlmrc); - } - unlock_res_and_lock(dlmlock); + /* + * This can be called in the context of outer IO, e.g., + * + * osc_enqueue_base()->... + * ->ldlm_prep_elc_req()->... + * ->ldlm_cancel_callback()->... + * ->osc_ldlm_blocking_ast() + * + * new environment has to be created to not corrupt outer + * context. + */ + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) { + result = PTR_ERR(env); + break; + } - if (dlmrc != 0) { - CL_LOCK_DEBUG(D_ERROR, env, lock, - "dlmlock returned %d\n", dlmrc); - cl_lock_error(env, lock, dlmrc); - } - cl_lock_mutex_put(env, lock); - osc_ast_data_put(env, olck); - result = 0; - } else - result = -ELDLM_NO_LOCK_DATA; + result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); cl_env_nested_put(&nest, env); - } else - result = PTR_ERR(env); - return dlmrc ?: result; + break; + } + default: + LBUG(); + } + return result; } static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) { struct ptlrpc_request *req = data; - struct osc_lock *olck; - struct cl_lock *lock; - struct cl_object *obj; struct cl_env_nest nest; struct lu_env *env; struct ost_lvb *lvb; @@ -847,14 +582,16 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) env = cl_env_nested_get(&nest); if (!IS_ERR(env)) { - /* osc_ast_data_get() has to go after environment is - * allocated, because osc_ast_data() acquires a - * reference to a lock, and it can only be released in - * environment. - */ - olck = osc_ast_data_get(dlmlock); - if (olck) { - lock = olck->ols_cl.cls_lock; + struct cl_object *obj = NULL; + + lock_res_and_lock(dlmlock); + if (dlmlock->l_ast_data) { + obj = osc2cl(dlmlock->l_ast_data); + cl_object_get(obj); + } + unlock_res_and_lock(dlmlock); + + if (obj) { /* Do not grab the mutex of cl_lock for glimpse. * See LU-1274 for details. * BTW, it's okay for cl_lock to be cancelled during @@ -869,7 +606,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) result = req_capsule_server_pack(cap); if (result == 0) { lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); - obj = lock->cll_descr.cld_obj; result = cl_object_glimpse(env, obj, lvb); } if (!exp_connect_lvb_type(req->rq_export)) @@ -877,7 +613,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) &RMF_DLM_LVB, sizeof(struct ost_lvb_v1), RCL_SERVER); - osc_ast_data_put(env, olck); + cl_object_put(env, obj); } else { /* * These errors are normal races, so we don't want to @@ -888,44 +624,123 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) result = -ELDLM_NO_LOCK_DATA; } cl_env_nested_put(&nest, env); - } else + } else { result = PTR_ERR(env); + } req->rq_status = result; return result; } -static unsigned long osc_lock_weigh(const struct lu_env *env, - const struct cl_lock_slice *slice) +static int weigh_cb(const struct lu_env *env, struct cl_io *io, + struct osc_page *ops, void *cbdata) { - /* - * don't need to grab coh_page_guard since we don't care the exact # - * of pages.. - */ - return cl_object_header(slice->cls_obj)->coh_pages; + struct cl_page *page = ops->ops_cl.cpl_page; + + if (cl_page_is_vmlocked(env, page) || + PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage) + ) { + (*(unsigned long *)cbdata)++; + return CLP_GANG_ABORT; + } + + return CLP_GANG_OKAY; } -static void osc_lock_build_einfo(const struct lu_env *env, - const struct cl_lock *clock, - struct osc_lock *lock, - struct ldlm_enqueue_info *einfo) +static unsigned long osc_lock_weight(const struct lu_env *env, + struct osc_object *oscobj, + struct ldlm_extent *extent) +{ + struct cl_io *io = &osc_env_info(env)->oti_io; + struct cl_object *obj = cl_object_top(&oscobj->oo_cl); + unsigned long npages = 0; + int result; + + io->ci_obj = obj; + io->ci_ignore_layout = 1; + result = cl_io_init(env, io, CIT_MISC, io->ci_obj); + if (result != 0) + return result; + + do { + result = osc_page_gang_lookup(env, io, oscobj, + cl_index(obj, extent->start), + cl_index(obj, extent->end), + weigh_cb, (void *)&npages); + if (result == CLP_GANG_ABORT) + break; + if (result == CLP_GANG_RESCHED) + cond_resched(); + } while (result != CLP_GANG_OKAY); + cl_io_fini(env, io); + + return npages; +} + +/** + * Get the weight of dlm lock for early cancellation. + */ +unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) { - enum cl_lock_mode mode; + struct cl_env_nest nest; + struct lu_env *env; + struct osc_object *obj; + struct osc_lock *oscl; + unsigned long weight; + bool found = false; + + might_sleep(); + /* + * osc_ldlm_weigh_ast has a complex context since it might be called + * because of lock canceling, or from user's input. We have to make + * a new environment for it. Probably it is implementation safe to use + * the upper context because cl_lock_put don't modify environment + * variables. But just in case .. + */ + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + /* Mostly because lack of memory, do not eliminate this lock */ + return 1; - mode = clock->cll_descr.cld_mode; - if (mode == CLM_PHANTOM) + LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT); + obj = dlmlock->l_ast_data; + if (obj) { + weight = 1; + goto out; + } + + spin_lock(&obj->oo_ol_spin); + list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) { + if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock) + continue; + found = true; + } + spin_unlock(&obj->oo_ol_spin); + if (found) { /* - * For now, enqueue all glimpse locks in read mode. In the - * future, client might choose to enqueue LCK_PW lock for - * glimpse on a file opened for write. + * If the lock is being used by an IO, definitely not cancel it. */ - mode = CLM_READ; + weight = 1; + goto out; + } + + weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent); + +out: + cl_env_nested_put(&nest, env); + return weight; +} +static void osc_lock_build_einfo(const struct lu_env *env, + const struct cl_lock *lock, + struct osc_object *osc, + struct ldlm_enqueue_info *einfo) +{ einfo->ei_type = LDLM_EXTENT; - einfo->ei_mode = osc_cl_lock2ldlm(mode); + einfo->ei_mode = osc_cl_lock2ldlm(lock->cll_descr.cld_mode); einfo->ei_cb_bl = osc_ldlm_blocking_ast; - einfo->ei_cb_cp = osc_ldlm_completion_ast; + einfo->ei_cb_cp = ldlm_completion_ast; einfo->ei_cb_gl = osc_ldlm_glimpse_ast; - einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */ + einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */ } /** @@ -981,113 +796,100 @@ static void osc_lock_to_lockless(const struct lu_env *env, LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); } -static int osc_lock_compatible(const struct osc_lock *qing, - const struct osc_lock *qed) +static bool osc_lock_compatible(const struct osc_lock *qing, + const struct osc_lock *qed) { - enum cl_lock_mode qing_mode; - enum cl_lock_mode qed_mode; + struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr; + struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr; - qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode; - if (qed->ols_glimpse && - (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ)) - return 1; + if (qed->ols_glimpse) + return true; + + if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ) + return true; + + if (qed->ols_state < OLS_GRANTED) + return true; + + if (qed_descr->cld_mode >= qing_descr->cld_mode && + qed_descr->cld_start <= qing_descr->cld_start && + qed_descr->cld_end >= qing_descr->cld_end) + return true; - qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode; - return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ)); + return false; } -/** - * Cancel all conflicting locks and wait for them to be destroyed. - * - * This function is used for two purposes: - * - * - early cancel all conflicting locks before starting IO, and - * - * - guarantee that pages added to the page cache by lockless IO are never - * covered by locks other than lockless IO lock, and, hence, are not - * visible to other threads. - */ -static int osc_lock_enqueue_wait(const struct lu_env *env, - const struct osc_lock *olck) +static void osc_lock_wake_waiters(const struct lu_env *env, + struct osc_object *osc, + struct osc_lock *oscl) { - struct cl_lock *lock = olck->ols_cl.cls_lock; - struct cl_lock_descr *descr = &lock->cll_descr; - struct cl_object_header *hdr = cl_object_header(descr->cld_obj); - struct cl_lock *scan; - struct cl_lock *conflict = NULL; - int lockless = osc_lock_is_lockless(olck); - int rc = 0; + spin_lock(&osc->oo_ol_spin); + list_del_init(&oscl->ols_nextlock_oscobj); + spin_unlock(&osc->oo_ol_spin); - LASSERT(cl_lock_is_mutexed(lock)); + spin_lock(&oscl->ols_lock); + while (!list_empty(&oscl->ols_waiting_list)) { + struct osc_lock *scan; - /* make it enqueue anyway for glimpse lock, because we actually - * don't need to cancel any conflicting locks. - */ - if (olck->ols_glimpse) - return 0; + scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock, + ols_wait_entry); + list_del_init(&scan->ols_wait_entry); - spin_lock(&hdr->coh_lock_guard); - list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) { - struct cl_lock_descr *cld = &scan->cll_descr; - const struct osc_lock *scan_ols; + cl_sync_io_note(env, scan->ols_owner, 0); + } + spin_unlock(&oscl->ols_lock); +} + +static void osc_lock_enqueue_wait(const struct lu_env *env, + struct osc_object *obj, + struct osc_lock *oscl) +{ + struct osc_lock *tmp_oscl; + struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr; + struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor; - if (scan == lock) + spin_lock(&obj->oo_ol_spin); + list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list); + +restart: + list_for_each_entry(tmp_oscl, &obj->oo_ol_list, + ols_nextlock_oscobj) { + struct cl_lock_descr *descr; + + if (tmp_oscl == oscl) break; - if (scan->cll_state < CLS_QUEUING || - scan->cll_state == CLS_FREEING || - cld->cld_start > descr->cld_end || - cld->cld_end < descr->cld_start) + descr = &tmp_oscl->ols_cl.cls_lock->cll_descr; + if (descr->cld_start > need->cld_end || + descr->cld_end < need->cld_start) continue; - /* overlapped and living locks. */ + /* We're not supposed to give up group lock */ + if (descr->cld_mode == CLM_GROUP) + break; - /* We're not supposed to give up group lock. */ - if (scan->cll_descr.cld_mode == CLM_GROUP) { - LASSERT(descr->cld_mode != CLM_GROUP || - descr->cld_gid != scan->cll_descr.cld_gid); + if (!osc_lock_is_lockless(oscl) && + osc_lock_compatible(oscl, tmp_oscl)) continue; - } - scan_ols = osc_lock_at(scan); + /* wait for conflicting lock to be canceled */ + cl_sync_io_init(waiter, 1, cl_sync_io_end); + oscl->ols_owner = waiter; - /* We need to cancel the compatible locks if we're enqueuing - * a lockless lock, for example: - * imagine that client has PR lock on [0, 1000], and thread T0 - * is doing lockless IO in [500, 1500] region. Concurrent - * thread T1 can see lockless data in [500, 1000], which is - * wrong, because these data are possibly stale. - */ - if (!lockless && osc_lock_compatible(olck, scan_ols)) - continue; + spin_lock(&tmp_oscl->ols_lock); + /* add oscl into tmp's ols_waiting list */ + list_add_tail(&oscl->ols_wait_entry, + &tmp_oscl->ols_waiting_list); + spin_unlock(&tmp_oscl->ols_lock); - cl_lock_get_trust(scan); - conflict = scan; - break; - } - spin_unlock(&hdr->coh_lock_guard); + spin_unlock(&obj->oo_ol_spin); + (void)cl_sync_io_wait(env, waiter, 0); - if (conflict) { - if (lock->cll_descr.cld_mode == CLM_GROUP) { - /* we want a group lock but a previous lock request - * conflicts, we do not wait but return 0 so the - * request is send to the server - */ - CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n", - lock, conflict); - cl_lock_put(env, conflict); - rc = 0; - } else { - CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n", - lock, conflict); - LASSERT(!lock->cll_conflict); - lu_ref_add(&conflict->cll_reference, "cancel-wait", - lock); - lock->cll_conflict = conflict; - rc = CLO_WAIT; - } + spin_lock(&obj->oo_ol_spin); + oscl->ols_owner = NULL; + goto restart; } - return rc; + spin_unlock(&obj->oo_ol_spin); } /** @@ -1106,188 +908,122 @@ static int osc_lock_enqueue_wait(const struct lu_env *env, */ static int osc_lock_enqueue(const struct lu_env *env, const struct cl_lock_slice *slice, - struct cl_io *unused, __u32 enqflags) + struct cl_io *unused, struct cl_sync_io *anchor) { - struct osc_lock *ols = cl2osc_lock(slice); - struct cl_lock *lock = ols->ols_cl.cls_lock; + struct osc_thread_info *info = osc_env_info(env); + struct osc_io *oio = osc_env_io(env); + struct osc_object *osc = cl2osc(slice->cls_obj); + struct osc_lock *oscl = cl2osc_lock(slice); + struct cl_lock *lock = slice->cls_lock; + struct ldlm_res_id *resname = &info->oti_resname; + ldlm_policy_data_t *policy = &info->oti_policy; + osc_enqueue_upcall_f upcall = osc_lock_upcall; + void *cookie = oscl; + bool async = false; int result; - LASSERT(cl_lock_is_mutexed(lock)); - LASSERTF(ols->ols_state == OLS_NEW, - "Impossible state: %d\n", ols->ols_state); - - LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), - "lock = %p, ols = %p\n", lock, ols); + LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), + "lock = %p, ols = %p\n", lock, oscl); - result = osc_lock_enqueue_wait(env, ols); - if (result == 0) { - if (!osc_lock_is_lockless(ols)) { - struct osc_object *obj = cl2osc(slice->cls_obj); - struct osc_thread_info *info = osc_env_info(env); - struct ldlm_res_id *resname = &info->oti_resname; - ldlm_policy_data_t *policy = &info->oti_policy; - struct ldlm_enqueue_info *einfo = &ols->ols_einfo; + if (oscl->ols_state == OLS_GRANTED) + return 0; - /* lock will be passed as upcall cookie, - * hold ref to prevent to be released. - */ - cl_lock_hold_add(env, lock, "upcall", lock); - /* a user for lock also */ - cl_lock_user_add(env, lock); - ols->ols_state = OLS_ENQUEUED; + if (oscl->ols_flags & LDLM_FL_TEST_LOCK) + goto enqueue_base; - /* - * XXX: this is possible blocking point as - * ldlm_lock_match(LDLM_FL_LVB_READY) waits for - * LDLM_CP_CALLBACK. - */ - ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); - osc_lock_build_policy(env, lock, policy); - result = osc_enqueue_base(osc_export(obj), resname, - &ols->ols_flags, policy, - &ols->ols_lvb, - obj->oo_oinfo->loi_kms_valid, - osc_lock_upcall, - ols, einfo, &ols->ols_handle, - PTLRPCD_SET, 1, ols->ols_agl); - if (result != 0) { - cl_lock_user_del(env, lock); - cl_lock_unhold(env, lock, "upcall", lock); - if (unlikely(result == -ECANCELED)) { - ols->ols_state = OLS_NEW; - result = 0; - } - } - } else { - ols->ols_state = OLS_GRANTED; - ols->ols_owner = osc_env_io(env); - } + if (oscl->ols_glimpse) { + LASSERT(equi(oscl->ols_agl, !anchor)); + async = true; + goto enqueue_base; } - LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); - return result; -} -static int osc_lock_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck = cl2osc_lock(slice); - struct cl_lock *lock = olck->ols_cl.cls_lock; - - LINVRNT(osc_lock_invariant(olck)); - - if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) { - if (olck->ols_flags & LDLM_FL_LVB_READY) { - return 0; - } else if (olck->ols_agl) { - if (lock->cll_flags & CLF_FROM_UPCALL) - /* It is from enqueue RPC reply upcall for - * updating state. Do not re-enqueue. - */ - return -ENAVAIL; - olck->ols_state = OLS_NEW; - } else { - LASSERT(lock->cll_error); - return lock->cll_error; - } + osc_lock_enqueue_wait(env, osc, oscl); + + /* we can grant lockless lock right after all conflicting locks + * are canceled. + */ + if (osc_lock_is_lockless(oscl)) { + oscl->ols_state = OLS_GRANTED; + oio->oi_lockless = 1; + return 0; } - if (olck->ols_state == OLS_NEW) { - int rc; - - LASSERT(olck->ols_agl); - olck->ols_agl = 0; - olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT; - rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST); - if (rc != 0) - return rc; - else - return CLO_REENQUEUED; +enqueue_base: + oscl->ols_state = OLS_ENQUEUED; + if (anchor) { + atomic_inc(&anchor->csi_sync_nr); + oscl->ols_owner = anchor; } - LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED && - lock->cll_error == 0, olck->ols_lock)); + /** + * DLM lock's ast data must be osc_object; + * if glimpse or AGL lock, async of osc_enqueue_base() must be true, + * DLM's enqueue callback set to osc_lock_upcall() with cookie as + * osc_lock. + */ + ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); + osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo); + osc_lock_build_policy(env, lock, policy); + if (oscl->ols_agl) { + oscl->ols_einfo.ei_cbdata = NULL; + /* hold a reference for callback */ + cl_object_get(osc2cl(osc)); + upcall = osc_lock_upcall_agl; + cookie = osc; + } + result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags, + policy, &oscl->ols_lvb, + osc->oo_oinfo->loi_kms_valid, + upcall, cookie, + &oscl->ols_einfo, PTLRPCD_SET, async, + oscl->ols_agl); + if (result != 0) { + oscl->ols_state = OLS_CANCELLED; + osc_lock_wake_waiters(env, osc, oscl); - return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT; + /* hide error for AGL lock. */ + if (oscl->ols_agl) { + cl_object_put(env, osc2cl(osc)); + result = 0; + } + if (anchor) + cl_sync_io_note(env, anchor, result); + } else { + if (osc_lock_is_lockless(oscl)) { + oio->oi_lockless = 1; + } else if (!async) { + LASSERT(oscl->ols_state == OLS_GRANTED); + LASSERT(oscl->ols_hold); + LASSERT(oscl->ols_dlmlock); + } + } + return result; } /** - * An implementation of cl_lock_operations::clo_use() method that pins cached - * lock. + * Breaks a link between osc_lock and dlm_lock. */ -static int osc_lock_use(const struct lu_env *env, - const struct cl_lock_slice *slice) +static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) { - struct osc_lock *olck = cl2osc_lock(slice); - int rc; - - LASSERT(!olck->ols_hold); + struct ldlm_lock *dlmlock; - /* - * Atomically check for LDLM_FL_CBPENDING and addref a lock if this - * flag is not set. This protects us from a concurrent blocking ast. - */ - rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode); - if (rc == 0) { - olck->ols_hold = 1; - olck->ols_state = OLS_GRANTED; - } else { - struct cl_lock *lock; + dlmlock = olck->ols_dlmlock; + if (!dlmlock) + return; - /* - * Lock is being cancelled somewhere within - * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already - * set, but osc_ldlm_blocking_ast() hasn't yet acquired - * cl_lock mutex. - */ - lock = slice->cls_lock; - LASSERT(lock->cll_state == CLS_INTRANSIT); - LASSERT(lock->cll_users > 0); - /* set a flag for osc_dlm_blocking_ast0() to signal the - * lock. - */ - olck->ols_ast_wait = 1; - rc = CLO_WAIT; + if (olck->ols_hold) { + olck->ols_hold = 0; + osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode); + olck->ols_handle.cookie = 0ULL; } - return rc; -} -static int osc_lock_flush(struct osc_lock *ols, int discard) -{ - struct cl_lock *lock = ols->ols_cl.cls_lock; - struct cl_env_nest nest; - struct lu_env *env; - int result = 0; - - env = cl_env_nested_get(&nest); - if (!IS_ERR(env)) { - struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj); - struct cl_lock_descr *descr = &lock->cll_descr; - int rc = 0; - - if (descr->cld_mode >= CLM_WRITE) { - result = osc_cache_writeback_range(env, obj, - descr->cld_start, - descr->cld_end, - 1, discard); - LDLM_DEBUG(ols->ols_lock, - "lock %p: %d pages were %s.\n", lock, result, - discard ? "discarded" : "written"); - if (result > 0) - result = 0; - } + olck->ols_dlmlock = NULL; - rc = cl_lock_discard_pages(env, lock); - if (result == 0 && rc < 0) - result = rc; - - cl_env_nested_put(&nest, env); - } else - result = PTR_ERR(env); - if (result == 0) { - ols->ols_flush = 1; - LINVRNT(!osc_lock_has_pages(ols)); - } - return result; + /* release a reference taken in osc_lock_upcall(). */ + LASSERT(olck->ols_has_ref); + lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); + LDLM_LOCK_RELEASE(dlmlock); + olck->ols_has_ref = 0; } /** @@ -1307,96 +1043,16 @@ static int osc_lock_flush(struct osc_lock *ols, int discard) static void osc_lock_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { - struct cl_lock *lock = slice->cls_lock; - struct osc_lock *olck = cl2osc_lock(slice); - struct ldlm_lock *dlmlock = olck->ols_lock; - int result = 0; - int discard; - - LASSERT(cl_lock_is_mutexed(lock)); - LINVRNT(osc_lock_invariant(olck)); - - if (dlmlock) { - int do_cancel; - - discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA); - if (olck->ols_state >= OLS_GRANTED) - result = osc_lock_flush(olck, discard); - osc_lock_unhold(olck); - - lock_res_and_lock(dlmlock); - /* Now that we're the only user of dlm read/write reference, - * mostly the ->l_readers + ->l_writers should be zero. - * However, there is a corner case. - * See bug 18829 for details. - */ - do_cancel = (dlmlock->l_readers == 0 && - dlmlock->l_writers == 0); - dlmlock->l_flags |= LDLM_FL_CBPENDING; - unlock_res_and_lock(dlmlock); - if (do_cancel) - result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC); - if (result < 0) - CL_LOCK_DEBUG(D_ERROR, env, lock, - "lock %p cancel failure with error(%d)\n", - lock, result); - } - olck->ols_state = OLS_CANCELLED; - olck->ols_flags &= ~LDLM_FL_LVB_READY; - osc_lock_detach(env, olck); -} - -static int osc_lock_has_pages(struct osc_lock *olck) -{ - return 0; -} - -static void osc_lock_delete(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck; + struct osc_object *obj = cl2osc(slice->cls_obj); + struct osc_lock *oscl = cl2osc_lock(slice); - olck = cl2osc_lock(slice); - if (olck->ols_glimpse) { - LASSERT(!olck->ols_hold); - LASSERT(!olck->ols_lock); - return; - } + LINVRNT(osc_lock_invariant(oscl)); - LINVRNT(osc_lock_invariant(olck)); - LINVRNT(!osc_lock_has_pages(olck)); + osc_lock_detach(env, oscl); + oscl->ols_state = OLS_CANCELLED; + oscl->ols_flags &= ~LDLM_FL_LVB_READY; - osc_lock_unhold(olck); - osc_lock_detach(env, olck); -} - -/** - * Implements cl_lock_operations::clo_state() method for osc layer. - * - * Maintains osc_lock::ols_owner field. - * - * This assumes that lock always enters CLS_HELD (from some other state) in - * the same IO context as one that requested the lock. This should not be a - * problem, because context is by definition shared by all activity pertaining - * to the same high-level IO. - */ -static void osc_lock_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct osc_lock *lock = cl2osc_lock(slice); - - /* - * XXX multiple io contexts can use the lock at the same time. - */ - LINVRNT(osc_lock_invariant(lock)); - if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) { - struct osc_io *oio = osc_env_io(env); - - LASSERT(!lock->ols_owner); - lock->ols_owner = oio; - } else if (state != CLS_HELD) - lock->ols_owner = NULL; + osc_lock_wake_waiters(env, obj, oscl); } static int osc_lock_print(const struct lu_env *env, void *cookie, @@ -1404,221 +1060,161 @@ static int osc_lock_print(const struct lu_env *env, void *cookie, { struct osc_lock *lock = cl2osc_lock(slice); - /* - * XXX print ldlm lock and einfo properly. - */ (*p)(env, cookie, "%p %#16llx %#llx %d %p ", - lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie, + lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie, lock->ols_state, lock->ols_owner); osc_lvb_print(env, cookie, p, &lock->ols_lvb); return 0; } -static int osc_lock_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct osc_lock *ols = cl2osc_lock(slice); - - if (need->cld_enq_flags & CEF_NEVER) - return 0; - - if (ols->ols_state >= OLS_CANCELLED) - return 0; - - if (need->cld_mode == CLM_PHANTOM) { - if (ols->ols_agl) - return !(ols->ols_state > OLS_RELEASED); - - /* - * Note: the QUEUED lock can't be matched here, otherwise - * it might cause the deadlocks. - * In read_process, - * P1: enqueued read lock, create sublock1 - * P2: enqueued write lock, create sublock2(conflicted - * with sublock1). - * P1: Grant read lock. - * P1: enqueued glimpse lock(with holding sublock1_read), - * matched with sublock2, waiting sublock2 to be granted. - * But sublock2 can not be granted, because P1 - * will not release sublock1. Bang! - */ - if (ols->ols_state < OLS_GRANTED || - ols->ols_state > OLS_RELEASED) - return 0; - } else if (need->cld_enq_flags & CEF_MUST) { - /* - * If the lock hasn't ever enqueued, it can't be matched - * because enqueue process brings in many information - * which can be used to determine things such as lockless, - * CEF_MUST, etc. - */ - if (ols->ols_state < OLS_UPCALL_RECEIVED && - ols->ols_locklessable) - return 0; - } - return 1; -} - static const struct cl_lock_operations osc_lock_ops = { .clo_fini = osc_lock_fini, .clo_enqueue = osc_lock_enqueue, - .clo_wait = osc_lock_wait, - .clo_unuse = osc_lock_unuse, - .clo_use = osc_lock_use, - .clo_delete = osc_lock_delete, - .clo_state = osc_lock_state, .clo_cancel = osc_lock_cancel, - .clo_weigh = osc_lock_weigh, .clo_print = osc_lock_print, - .clo_fits_into = osc_lock_fits_into, }; -static int osc_lock_lockless_unuse(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - struct cl_lock *lock = slice->cls_lock; - - LASSERT(ols->ols_state == OLS_GRANTED); - LINVRNT(osc_lock_invariant(ols)); - - cl_lock_cancel(env, lock); - cl_lock_delete(env, lock); - return 0; -} - static void osc_lock_lockless_cancel(const struct lu_env *env, const struct cl_lock_slice *slice) { struct osc_lock *ols = cl2osc_lock(slice); + struct osc_object *osc = cl2osc(slice->cls_obj); + struct cl_lock_descr *descr = &slice->cls_lock->cll_descr; int result; - result = osc_lock_flush(ols, 0); + LASSERT(!ols->ols_dlmlock); + result = osc_lock_flush(osc, descr->cld_start, descr->cld_end, + descr->cld_mode, 0); if (result) CERROR("Pages for lockless lock %p were not purged(%d)\n", ols, result); - ols->ols_state = OLS_CANCELLED; -} - -static int osc_lock_lockless_wait(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *olck = cl2osc_lock(slice); - struct cl_lock *lock = olck->ols_cl.cls_lock; - LINVRNT(osc_lock_invariant(olck)); - LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED); - - return lock->cll_error; + osc_lock_wake_waiters(env, osc, ols); } -static void osc_lock_lockless_state(const struct lu_env *env, - const struct cl_lock_slice *slice, - enum cl_lock_state state) -{ - struct osc_lock *lock = cl2osc_lock(slice); +static const struct cl_lock_operations osc_lock_lockless_ops = { + .clo_fini = osc_lock_fini, + .clo_enqueue = osc_lock_enqueue, + .clo_cancel = osc_lock_lockless_cancel, + .clo_print = osc_lock_print +}; - LINVRNT(osc_lock_invariant(lock)); - if (state == CLS_HELD) { - struct osc_io *oio = osc_env_io(env); +static void osc_lock_set_writer(const struct lu_env *env, + const struct cl_io *io, + struct cl_object *obj, struct osc_lock *oscl) +{ + struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; + pgoff_t io_start; + pgoff_t io_end; - LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio)); - lock->ols_owner = oio; + if (!cl_object_same(io->ci_obj, obj)) + return; - /* set the io to be lockless if this lock is for io's - * host object - */ - if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj)) - oio->oi_lockless = 1; + if (likely(io->ci_type == CIT_WRITE)) { + io_start = cl_index(obj, io->u.ci_rw.crw_pos); + io_end = cl_index(obj, io->u.ci_rw.crw_pos + + io->u.ci_rw.crw_count - 1); + if (cl_io_is_append(io)) { + io_start = 0; + io_end = CL_PAGE_EOF; + } + } else { + LASSERT(cl_io_is_mkwrite(io)); + io_start = io_end = io->u.ci_fault.ft_index; } -} -static int osc_lock_lockless_fits_into(const struct lu_env *env, - const struct cl_lock_slice *slice, - const struct cl_lock_descr *need, - const struct cl_io *io) -{ - struct osc_lock *lock = cl2osc_lock(slice); - - if (!(need->cld_enq_flags & CEF_NEVER)) - return 0; + if (descr->cld_mode >= CLM_WRITE && + descr->cld_start <= io_start && descr->cld_end >= io_end) { + struct osc_io *oio = osc_env_io(env); - /* lockless lock should only be used by its owning io. b22147 */ - return (lock->ols_owner == osc_env_io(env)); + /* There must be only one lock to match the write region */ + LASSERT(!oio->oi_write_osclock); + oio->oi_write_osclock = oscl; + } } -static const struct cl_lock_operations osc_lock_lockless_ops = { - .clo_fini = osc_lock_fini, - .clo_enqueue = osc_lock_enqueue, - .clo_wait = osc_lock_lockless_wait, - .clo_unuse = osc_lock_lockless_unuse, - .clo_state = osc_lock_lockless_state, - .clo_fits_into = osc_lock_lockless_fits_into, - .clo_cancel = osc_lock_lockless_cancel, - .clo_print = osc_lock_print -}; - int osc_lock_init(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *unused) + const struct cl_io *io) { - struct osc_lock *clk; - int result; - - clk = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS); - if (clk) { - __u32 enqflags = lock->cll_descr.cld_enq_flags; + struct osc_lock *oscl; + __u32 enqflags = lock->cll_descr.cld_enq_flags; + + oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS); + if (!oscl) + return -ENOMEM; + + oscl->ols_state = OLS_NEW; + spin_lock_init(&oscl->ols_lock); + INIT_LIST_HEAD(&oscl->ols_waiting_list); + INIT_LIST_HEAD(&oscl->ols_wait_entry); + INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj); + + oscl->ols_flags = osc_enq2ldlm_flags(enqflags); + oscl->ols_agl = !!(enqflags & CEF_AGL); + if (oscl->ols_agl) + oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT; + if (oscl->ols_flags & LDLM_FL_HAS_INTENT) { + oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; + oscl->ols_glimpse = 1; + } - osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo); - atomic_set(&clk->ols_pageref, 0); - clk->ols_state = OLS_NEW; + cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops); - clk->ols_flags = osc_enq2ldlm_flags(enqflags); - clk->ols_agl = !!(enqflags & CEF_AGL); - if (clk->ols_agl) - clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT; - if (clk->ols_flags & LDLM_FL_HAS_INTENT) - clk->ols_glimpse = 1; + if (!(enqflags & CEF_MUST)) + /* try to convert this lock to a lockless lock */ + osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER)); + if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) + oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; - cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops); + if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io)) + osc_lock_set_writer(env, io, obj, oscl); - if (!(enqflags & CEF_MUST)) - /* try to convert this lock to a lockless lock */ - osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER)); - if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) - clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; - LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx", - lock, clk, clk->ols_flags); + LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx\n", + lock, oscl, oscl->ols_flags); - result = 0; - } else - result = -ENOMEM; - return result; + return 0; } -int osc_dlm_lock_pageref(struct ldlm_lock *dlm) +/** + * Finds an existing lock covering given index and optionally different from a + * given \a except lock. + */ +struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, + struct osc_object *obj, pgoff_t index, + int pending, int canceling) { - struct osc_lock *olock; - int rc = 0; - - spin_lock(&osc_ast_guard); - olock = dlm->l_ast_data; + struct osc_thread_info *info = osc_env_info(env); + struct ldlm_res_id *resname = &info->oti_resname; + ldlm_policy_data_t *policy = &info->oti_policy; + struct lustre_handle lockh; + struct ldlm_lock *lock = NULL; + enum ldlm_mode mode; + __u64 flags; + + ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); + osc_index2policy(policy, osc2cl(obj), index, index); + policy->l_extent.gid = LDLM_GID_ANY; + + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; + if (pending) + flags |= LDLM_FL_CBPENDING; /* - * there's a very rare race with osc_page_addref_lock(), but that - * doesn't matter because in the worst case we don't cancel a lock - * which we actually can, that's no harm. + * It is fine to match any group lock since there could be only one + * with a uniq gid and it conflicts with all other lock modes too */ - if (olock && - atomic_add_return(_PAGEREF_MAGIC, - &olock->ols_pageref) != _PAGEREF_MAGIC) { - atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref); - rc = 1; +again: + mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace, + flags, resname, LDLM_EXTENT, policy, + LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling); + if (mode != 0) { + lock = ldlm_handle2lock(&lockh); + /* RACE: the lock is cancelled so let's try again */ + if (unlikely(!lock)) + goto again; } - spin_unlock(&osc_ast_guard); - return rc; + return lock; } /** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index 9d474fcdd..738ab10ab 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -36,6 +36,7 @@ * Implementation of cl_object for OSC layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_OSC @@ -94,6 +95,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, atomic_set(&osc->oo_nr_reads, 0); atomic_set(&osc->oo_nr_writes, 0); spin_lock_init(&osc->oo_lock); + spin_lock_init(&osc->oo_tree_lock); + spin_lock_init(&osc->oo_ol_spin); + INIT_LIST_HEAD(&osc->oo_ol_list); cl_object_page_init(lu2cl(obj), sizeof(struct osc_page)); @@ -120,6 +124,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj) LASSERT(list_empty(&osc->oo_reading_exts)); LASSERT(atomic_read(&osc->oo_nr_reads) == 0); LASSERT(atomic_read(&osc->oo_nr_writes) == 0); + LASSERT(list_empty(&osc->oo_ol_list)); lu_object_fini(obj); kmem_cache_free(osc_object_kmem, osc); @@ -192,6 +197,32 @@ static int osc_object_glimpse(const struct lu_env *env, return 0; } +static int osc_object_ast_clear(struct ldlm_lock *lock, void *data) +{ + LASSERT(lock->l_granted_mode == lock->l_req_mode); + if (lock->l_ast_data == data) + lock->l_ast_data = NULL; + return LDLM_ITER_CONTINUE; +} + +static int osc_object_prune(const struct lu_env *env, struct cl_object *obj) +{ + struct osc_object *osc = cl2osc(obj); + struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname; + + LASSERTF(osc->oo_npages == 0, + DFID "still have %lu pages, obj: %p, osc: %p\n", + PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc); + + /* DLM locks don't hold a reference of osc_object so we have to + * clear it before the object is being destroyed. + */ + ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); + ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname, + osc_object_ast_clear, osc); + return 0; +} + void osc_object_set_contended(struct osc_object *obj) { obj->oo_contention_time = cfs_time_current(); @@ -236,12 +267,12 @@ static const struct cl_object_operations osc_ops = { .coo_io_init = osc_io_init, .coo_attr_get = osc_attr_get, .coo_attr_set = osc_attr_set, - .coo_glimpse = osc_object_glimpse + .coo_glimpse = osc_object_glimpse, + .coo_prune = osc_object_prune }; static const struct lu_object_operations osc_lu_obj_ops = { .loo_object_init = osc_object_init, - .loo_object_delete = NULL, .loo_object_release = NULL, .loo_object_free = osc_object_free, .loo_object_print = osc_object_print, @@ -261,8 +292,9 @@ struct lu_object *osc_object_alloc(const struct lu_env *env, lu_object_init(obj, NULL, dev); osc->oo_cl.co_ops = &osc_ops; obj->lo_ops = &osc_lu_obj_ops; - } else + } else { obj = NULL; + } return obj; } diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index ce9ddd515..c29c2eabe 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -36,14 +36,15 @@ * Implementation of cl_page for OSC layer. * * Author: Nikita Danilov + * Author: Jinshan Xiong */ #define DEBUG_SUBSYSTEM S_OSC #include "osc_cl_internal.h" -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del); -static void osc_lru_add(struct client_obd *cli, struct osc_page *opg); +static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); +static void osc_lru_use(struct client_obd *cli, struct osc_page *opg); static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, struct osc_page *opg); @@ -63,18 +64,9 @@ static int osc_page_protected(const struct lu_env *env, * Page operations. * */ -static void osc_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct osc_page *opg = cl2osc_page(slice); - - CDEBUG(D_TRACE, "%p\n", opg); - LASSERT(!opg->ops_lock); -} - static void osc_page_transfer_get(struct osc_page *opg, const char *label) { - struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page); + struct cl_page *page = opg->ops_cl.cpl_page; LASSERT(!opg->ops_transfer_pinned); cl_page_get(page); @@ -85,11 +77,11 @@ static void osc_page_transfer_get(struct osc_page *opg, const char *label) static void osc_page_transfer_put(const struct lu_env *env, struct osc_page *opg) { - struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page); + struct cl_page *page = opg->ops_cl.cpl_page; if (opg->ops_transfer_pinned) { - lu_ref_del(&page->cp_reference, "transfer", page); opg->ops_transfer_pinned = 0; + lu_ref_del(&page->cp_reference, "transfer", page); cl_page_put(env, page); } } @@ -104,10 +96,7 @@ static void osc_page_transfer_add(const struct lu_env *env, { struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - /* ops_lru and ops_inflight share the same field, so take it from LRU - * first and then use it as inflight. - */ - osc_lru_del(osc_cli(obj), opg, false); + osc_lru_use(osc_cli(obj), opg); spin_lock(&obj->oo_seatbelt); list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); @@ -115,11 +104,9 @@ static void osc_page_transfer_add(const struct lu_env *env, spin_unlock(&obj->oo_seatbelt); } -static int osc_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) +int osc_page_cache_add(const struct lu_env *env, + const struct cl_page_slice *slice, struct cl_io *io) { - struct osc_io *oio = osc_env_io(env); struct osc_page *opg = cl2osc_page(slice); int result; @@ -132,17 +119,6 @@ static int osc_page_cache_add(const struct lu_env *env, else osc_page_transfer_add(env, opg, CRT_WRITE); - /* for sync write, kernel will wait for this page to be flushed before - * osc_io_end() is called, so release it earlier. - * for mkwrite(), it's known there is no further pages. - */ - if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) { - if (oio->oi_active) { - osc_extent_release(env, oio->oi_active); - oio->oi_active = NULL; - } - } - return result; } @@ -154,102 +130,25 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, policy->l_extent.end = cl_offset(obj, end + 1) - 1; } -static int osc_page_addref_lock(const struct lu_env *env, - struct osc_page *opg, - struct cl_lock *lock) -{ - struct osc_lock *olock; - int rc; - - LASSERT(!opg->ops_lock); - - olock = osc_lock_at(lock); - if (atomic_inc_return(&olock->ols_pageref) <= 0) { - atomic_dec(&olock->ols_pageref); - rc = -ENODATA; - } else { - cl_lock_get(lock); - opg->ops_lock = lock; - rc = 0; - } - return rc; -} - -static void osc_page_putref_lock(const struct lu_env *env, - struct osc_page *opg) -{ - struct cl_lock *lock = opg->ops_lock; - struct osc_lock *olock; - - LASSERT(lock); - olock = osc_lock_at(lock); - - atomic_dec(&olock->ols_pageref); - opg->ops_lock = NULL; - - cl_lock_put(env, lock); -} - static int osc_page_is_under_lock(const struct lu_env *env, const struct cl_page_slice *slice, - struct cl_io *unused) + struct cl_io *unused, pgoff_t *max_index) { - struct cl_lock *lock; + struct osc_page *opg = cl2osc_page(slice); + struct ldlm_lock *dlmlock; int result = -ENODATA; - lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page, - NULL, 1, 0); - if (lock) { - if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0) - result = -EBUSY; - cl_lock_put(env, lock); + dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj), + osc_index(opg), 1, 0); + if (dlmlock) { + *max_index = cl_index(slice->cpl_obj, + dlmlock->l_policy_data.l_extent.end); + LDLM_LOCK_PUT(dlmlock); + result = 0; } return result; } -static void osc_page_disown(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct osc_page *opg = cl2osc_page(slice); - - if (unlikely(opg->ops_lock)) - osc_page_putref_lock(env, opg); -} - -static void osc_page_completion_read(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - - if (likely(opg->ops_lock)) - osc_page_putref_lock(env, opg); - osc_lru_add(osc_cli(obj), opg); -} - -static void osc_page_completion_write(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(slice->cpl_obj); - - osc_lru_add(osc_cli(obj), opg); -} - -static int osc_page_fail(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* - * Cached read? - */ - LBUG(); - return 0; -} - static const char *osc_list(struct list_head *head) { return list_empty(head) ? "-" : "+"; @@ -272,8 +171,8 @@ static int osc_page_print(const struct lu_env *env, struct osc_object *obj = cl2osc(slice->cpl_obj); struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli; - return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", - opg, + return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", + opg, osc_index(opg), /* 1 */ oap->oap_magic, oap->oap_cmd, oap->oap_interrupted, @@ -321,7 +220,7 @@ static void osc_page_delete(const struct lu_env *env, osc_page_transfer_put(env, opg); rc = osc_teardown_async_page(env, obj, opg); if (rc) { - CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page), + CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "Trying to teardown failed: %d\n", rc); LASSERT(0); } @@ -334,7 +233,19 @@ static void osc_page_delete(const struct lu_env *env, } spin_unlock(&obj->oo_seatbelt); - osc_lru_del(osc_cli(obj), opg, true); + osc_lru_del(osc_cli(obj), opg); + + if (slice->cpl_page->cp_type == CPT_CACHEABLE) { + void *value; + + spin_lock(&obj->oo_tree_lock); + value = radix_tree_delete(&obj->oo_tree, osc_index(opg)); + if (value) + --obj->oo_npages; + spin_unlock(&obj->oo_tree_lock); + + LASSERT(ergo(value, value == opg)); + } } static void osc_page_clip(const struct lu_env *env, @@ -382,28 +293,16 @@ static int osc_page_flush(const struct lu_env *env, } static const struct cl_page_operations osc_page_ops = { - .cpo_fini = osc_page_fini, .cpo_print = osc_page_print, .cpo_delete = osc_page_delete, .cpo_is_under_lock = osc_page_is_under_lock, - .cpo_disown = osc_page_disown, - .io = { - [CRT_READ] = { - .cpo_cache_add = osc_page_fail, - .cpo_completion = osc_page_completion_read - }, - [CRT_WRITE] = { - .cpo_cache_add = osc_page_cache_add, - .cpo_completion = osc_page_completion_write - } - }, .cpo_clip = osc_page_clip, .cpo_cancel = osc_page_cancel, .cpo_flush = osc_page_flush }; int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, struct page *vmpage) + struct cl_page *page, pgoff_t index) { struct osc_object *osc = cl2osc(obj); struct osc_page *opg = cl_object_page_slice(obj, page); @@ -412,13 +311,14 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, opg->ops_from = 0; opg->ops_to = PAGE_SIZE; - result = osc_prep_async_page(osc, opg, vmpage, - cl_offset(obj, page->cp_index)); + result = osc_prep_async_page(osc, opg, page->cp_vmpage, + cl_offset(obj, index)); if (result == 0) { struct osc_io *oio = osc_env_io(env); opg->ops_srvlock = osc_io_srvlock(oio); - cl_page_slice_add(page, &opg->ops_cl, obj, &osc_page_ops); + cl_page_slice_add(page, &opg->ops_cl, obj, index, + &osc_page_ops); } /* * Cannot assert osc_page_protected() here as read-ahead @@ -431,12 +331,47 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, INIT_LIST_HEAD(&opg->ops_lru); /* reserve an LRU space for this page */ - if (page->cp_type == CPT_CACHEABLE && result == 0) + if (page->cp_type == CPT_CACHEABLE && result == 0) { result = osc_lru_reserve(env, osc, opg); + if (result == 0) { + spin_lock(&osc->oo_tree_lock); + result = radix_tree_insert(&osc->oo_tree, index, opg); + if (result == 0) + ++osc->oo_npages; + spin_unlock(&osc->oo_tree_lock); + LASSERT(result == 0); + } + } return result; } +int osc_over_unstable_soft_limit(struct client_obd *cli) +{ + long obd_upages, obd_dpages, osc_upages; + + /* Can't check cli->cl_unstable_count, therefore, no soft limit */ + if (!cli) + return 0; + + obd_upages = atomic_read(&obd_unstable_pages); + obd_dpages = atomic_read(&obd_dirty_pages); + + osc_upages = atomic_read(&cli->cl_unstable_count); + + /* + * obd_max_dirty_pages is the max number of (dirty + unstable) + * pages allowed at any given time. To simulate an unstable page + * only limit, we subtract the current number of dirty pages + * from this max. This difference is roughly the amount of pages + * currently available for unstable pages. Thus, the soft limit + * is half of that difference. Check osc_upages to ensure we don't + * set SOFT_SYNC for OSCs without any outstanding unstable pages. + */ + return osc_upages && + obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2; +} + /** * Helper function called by osc_io_submit() for every page in an immediate * transfer (i.e., transferred synchronously). @@ -460,6 +395,9 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, oap->oap_count = opg->ops_to - opg->ops_from; oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC; + if (osc_over_unstable_soft_limit(oap->oap_cli)) + oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; + if (!client_is_remote(osc_export(obj)) && capable(CFS_CAP_SYS_RESOURCE)) { oap->oap_brw_flags |= OBD_BRW_NOQUOTA; @@ -483,13 +421,12 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, */ static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq); -static atomic_t osc_lru_waiters = ATOMIC_INIT(0); /* LRU pages are freed in batch mode. OSC should at least free this * number of pages to avoid running out of LRU budget, and.. */ static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */ /* free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */ +static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */ /* Check if we can free LRU slots from this OSC. If there exists LRU waiters, * we should free slots aggressively. In this way, slots are freed in a steady @@ -500,65 +437,142 @@ static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */ static int osc_cache_too_much(struct client_obd *cli) { struct cl_client_cache *cache = cli->cl_cache; - int pages = atomic_read(&cli->cl_lru_in_list) >> 1; + int pages = atomic_read(&cli->cl_lru_in_list); + unsigned long budget; - if (atomic_read(&osc_lru_waiters) > 0 && - atomic_read(cli->cl_lru_left) < lru_shrink_max) - /* drop lru pages aggressively */ - return min(pages, lru_shrink_max); + budget = cache->ccc_lru_max / atomic_read(&cache->ccc_users); /* if it's going to run out LRU slots, we should free some, but not * too much to maintain fairness among OSCs. */ if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) { - unsigned long tmp; + if (pages >= budget) + return lru_shrink_max; + else if (pages >= budget / 2) + return lru_shrink_min; + } else if (pages >= budget * 2) { + return lru_shrink_min; + } + return 0; +} - tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users); - if (pages > tmp) - return min(pages, lru_shrink_max); +int lru_queue_work(const struct lu_env *env, void *data) +{ + struct client_obd *cli = data; - return pages > lru_shrink_min ? lru_shrink_min : 0; - } + CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli); + + if (osc_cache_too_much(cli)) + osc_lru_shrink(env, cli, lru_shrink_max, true); return 0; } -/* Return how many pages are not discarded in @pvec. */ -static int discard_pagevec(const struct lu_env *env, struct cl_io *io, - struct cl_page **pvec, int max_index) +void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist) +{ + LIST_HEAD(lru); + struct osc_async_page *oap; + int npages = 0; + + list_for_each_entry(oap, plist, oap_pending_item) { + struct osc_page *opg = oap2osc_page(oap); + + if (!opg->ops_in_lru) + continue; + + ++npages; + LASSERT(list_empty(&opg->ops_lru)); + list_add(&opg->ops_lru, &lru); + } + + if (npages > 0) { + spin_lock(&cli->cl_lru_list_lock); + list_splice_tail(&lru, &cli->cl_lru_list); + atomic_sub(npages, &cli->cl_lru_busy); + atomic_add(npages, &cli->cl_lru_in_list); + spin_unlock(&cli->cl_lru_list_lock); + + /* XXX: May set force to be true for better performance */ + if (osc_cache_too_much(cli)) + (void)ptlrpcd_queue_work(cli->cl_lru_work); + } +} + +static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg) +{ + LASSERT(atomic_read(&cli->cl_lru_in_list) > 0); + list_del_init(&opg->ops_lru); + atomic_dec(&cli->cl_lru_in_list); +} + +/** + * Page is being destroyed. The page may be not in LRU list, if the transfer + * has never finished(error occurred). + */ +static void osc_lru_del(struct client_obd *cli, struct osc_page *opg) +{ + if (opg->ops_in_lru) { + spin_lock(&cli->cl_lru_list_lock); + if (!list_empty(&opg->ops_lru)) { + __osc_lru_del(cli, opg); + } else { + LASSERT(atomic_read(&cli->cl_lru_busy) > 0); + atomic_dec(&cli->cl_lru_busy); + } + spin_unlock(&cli->cl_lru_list_lock); + + atomic_inc(cli->cl_lru_left); + /* this is a great place to release more LRU pages if + * this osc occupies too many LRU pages and kernel is + * stealing one of them. + */ + if (!memory_pressure_get()) + (void)ptlrpcd_queue_work(cli->cl_lru_work); + wake_up(&osc_lru_waitq); + } else { + LASSERT(list_empty(&opg->ops_lru)); + } +} + +/** + * Delete page from LRUlist for redirty. + */ +static void osc_lru_use(struct client_obd *cli, struct osc_page *opg) +{ + /* If page is being transferred for the first time, + * ops_lru should be empty + */ + if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) { + spin_lock(&cli->cl_lru_list_lock); + __osc_lru_del(cli, opg); + spin_unlock(&cli->cl_lru_list_lock); + atomic_inc(&cli->cl_lru_busy); + } +} + +static void discard_pagevec(const struct lu_env *env, struct cl_io *io, + struct cl_page **pvec, int max_index) { - int count; int i; - for (count = 0, i = 0; i < max_index; i++) { + for (i = 0; i < max_index; i++) { struct cl_page *page = pvec[i]; - if (cl_page_own_try(env, io, page) == 0) { - /* free LRU page only if nobody is using it. - * This check is necessary to avoid freeing the pages - * having already been removed from LRU and pinned - * for IO. - */ - if (!cl_page_in_use(page)) { - cl_page_unmap(env, io, page); - cl_page_discard(env, io, page); - ++count; - } - cl_page_disown(env, io, page); - } + LASSERT(cl_page_is_owned(page, io)); + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); cl_page_put(env, page); + pvec[i] = NULL; } - return max_index - count; } /** * Drop @target of pages from LRU at most. */ -int osc_lru_shrink(struct client_obd *cli, int target) +int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, + int target, bool force) { - struct cl_env_nest nest; - struct lu_env *env; struct cl_io *io; struct cl_object *clobj = NULL; struct cl_page **pvec; @@ -573,23 +587,31 @@ int osc_lru_shrink(struct client_obd *cli, int target) if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0) return 0; - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - return PTR_ERR(env); + if (!force) { + if (atomic_read(&cli->cl_lru_shrinkers) > 0) + return -EBUSY; - pvec = osc_env_info(env)->oti_pvec; + if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) { + atomic_dec(&cli->cl_lru_shrinkers); + return -EBUSY; + } + } else { + atomic_inc(&cli->cl_lru_shrinkers); + } + + pvec = (struct cl_page **)osc_env_info(env)->oti_pvec; io = &osc_env_info(env)->oti_io; - client_obd_list_lock(&cli->cl_lru_list_lock); - atomic_inc(&cli->cl_lru_shrinkers); + spin_lock(&cli->cl_lru_list_lock); maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list)); list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { struct cl_page *page; + bool will_free = false; if (--maxscan < 0) break; - page = cl_page_top(opg->ops_cl.cpl_page); + page = opg->ops_cl.cpl_page; if (cl_page_in_use_noref(page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); continue; @@ -600,10 +622,10 @@ int osc_lru_shrink(struct client_obd *cli, int target) struct cl_object *tmp = page->cp_obj; cl_object_get(tmp); - client_obd_list_unlock(&cli->cl_lru_list_lock); + spin_unlock(&cli->cl_lru_list_lock); if (clobj) { - count -= discard_pagevec(env, io, pvec, index); + discard_pagevec(env, io, pvec, index); index = 0; cl_io_fini(env, io); @@ -616,7 +638,7 @@ int osc_lru_shrink(struct client_obd *cli, int target) io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, clobj); - client_obd_list_lock(&cli->cl_lru_list_lock); + spin_lock(&cli->cl_lru_list_lock); if (rc != 0) break; @@ -625,98 +647,54 @@ int osc_lru_shrink(struct client_obd *cli, int target) continue; } - /* move this page to the end of list as it will be discarded - * soon. The page will be finally removed from LRU list in - * osc_page_delete(). - */ - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); + if (cl_page_own_try(env, io, page) == 0) { + if (!cl_page_in_use_noref(page)) { + /* remove it from lru list earlier to avoid + * lock contention + */ + __osc_lru_del(cli, opg); + opg->ops_in_lru = 0; /* will be discarded */ + + cl_page_get(page); + will_free = true; + } else { + cl_page_disown(env, io, page); + } + } - /* it's okay to grab a refcount here w/o holding lock because - * it has to grab cl_lru_list_lock to delete the page. - */ - cl_page_get(page); - pvec[index++] = page; - if (++count >= target) - break; + if (!will_free) { + list_move_tail(&opg->ops_lru, &cli->cl_lru_list); + continue; + } + /* Don't discard and free the page with cl_lru_list held */ + pvec[index++] = page; if (unlikely(index == OTI_PVEC_SIZE)) { - client_obd_list_unlock(&cli->cl_lru_list_lock); - count -= discard_pagevec(env, io, pvec, index); + spin_unlock(&cli->cl_lru_list_lock); + discard_pagevec(env, io, pvec, index); index = 0; - client_obd_list_lock(&cli->cl_lru_list_lock); + spin_lock(&cli->cl_lru_list_lock); } + + if (++count >= target) + break; } - client_obd_list_unlock(&cli->cl_lru_list_lock); + spin_unlock(&cli->cl_lru_list_lock); if (clobj) { - count -= discard_pagevec(env, io, pvec, index); + discard_pagevec(env, io, pvec, index); cl_io_fini(env, io); cl_object_put(env, clobj); } - cl_env_nested_put(&nest, env); atomic_dec(&cli->cl_lru_shrinkers); - return count > 0 ? count : rc; -} - -static void osc_lru_add(struct client_obd *cli, struct osc_page *opg) -{ - bool wakeup = false; - - if (!opg->ops_in_lru) - return; - - atomic_dec(&cli->cl_lru_busy); - client_obd_list_lock(&cli->cl_lru_list_lock); - if (list_empty(&opg->ops_lru)) { - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); - atomic_inc_return(&cli->cl_lru_in_list); - wakeup = atomic_read(&osc_lru_waiters) > 0; - } - client_obd_list_unlock(&cli->cl_lru_list_lock); - - if (wakeup) { - osc_lru_shrink(cli, osc_cache_too_much(cli)); + if (count > 0) { + atomic_add(count, cli->cl_lru_left); wake_up_all(&osc_lru_waitq); } -} - -/* delete page from LRUlist. The page can be deleted from LRUlist for two - * reasons: redirtied or deleted from page cache. - */ -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del) -{ - if (opg->ops_in_lru) { - client_obd_list_lock(&cli->cl_lru_list_lock); - if (!list_empty(&opg->ops_lru)) { - LASSERT(atomic_read(&cli->cl_lru_in_list) > 0); - list_del_init(&opg->ops_lru); - atomic_dec(&cli->cl_lru_in_list); - if (!del) - atomic_inc(&cli->cl_lru_busy); - } else if (del) { - LASSERT(atomic_read(&cli->cl_lru_busy) > 0); - atomic_dec(&cli->cl_lru_busy); - } - client_obd_list_unlock(&cli->cl_lru_list_lock); - if (del) { - atomic_inc(cli->cl_lru_left); - /* this is a great place to release more LRU pages if - * this osc occupies too many LRU pages and kernel is - * stealing one of them. - * cl_lru_shrinkers is to avoid recursive call in case - * we're already in the context of osc_lru_shrink(). - */ - if (atomic_read(&cli->cl_lru_shrinkers) == 0 && - !memory_pressure_get()) - osc_lru_shrink(cli, osc_cache_too_much(cli)); - wake_up(&osc_lru_waitq); - } - } else { - LASSERT(list_empty(&opg->ops_lru)); - } + return count > 0 ? count : rc; } static inline int max_to_shrink(struct client_obd *cli) @@ -724,19 +702,28 @@ static inline int max_to_shrink(struct client_obd *cli) return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max); } -static int osc_lru_reclaim(struct client_obd *cli) +int osc_lru_reclaim(struct client_obd *cli) { + struct cl_env_nest nest; + struct lu_env *env; struct cl_client_cache *cache = cli->cl_cache; int max_scans; - int rc; + int rc = 0; LASSERT(cache); - rc = osc_lru_shrink(cli, lru_shrink_min); + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + return 0; + + rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false); if (rc != 0) { + if (rc == -EBUSY) + rc = 0; + CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n", cli->cl_import->imp_obd->obd_name, rc, cli); - return rc; + goto out; } CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n", @@ -764,10 +751,11 @@ static int osc_lru_reclaim(struct client_obd *cli) atomic_read(&cli->cl_lru_busy)); list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - if (atomic_read(&cli->cl_lru_in_list) > 0) { + if (osc_cache_too_much(cli) > 0) { spin_unlock(&cache->ccc_lru_lock); - rc = osc_lru_shrink(cli, max_to_shrink(cli)); + rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), + true); spin_lock(&cache->ccc_lru_lock); if (rc != 0) break; @@ -775,6 +763,8 @@ static int osc_lru_reclaim(struct client_obd *cli) } spin_unlock(&cache->ccc_lru_lock); +out: + cl_env_nested_put(&nest, env); CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n", cli->cl_import->imp_obd->obd_name, cli, rc); return rc; @@ -784,16 +774,20 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, struct osc_page *opg) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + struct osc_io *oio = osc_env_io(env); struct client_obd *cli = osc_cli(obj); int rc = 0; if (!cli->cl_cache) /* shall not be in LRU */ return 0; + if (oio->oi_lru_reserved > 0) { + --oio->oi_lru_reserved; + goto out; + } + LASSERT(atomic_read(cli->cl_lru_left) >= 0); while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) { - int gen; - /* run out of LRU spaces, try to drop some by itself */ rc = osc_lru_reclaim(cli); if (rc < 0) @@ -803,23 +797,15 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, cond_resched(); - /* slowest case, all of caching pages are busy, notifying - * other OSCs that we're lack of LRU slots. - */ - atomic_inc(&osc_lru_waiters); - - gen = atomic_read(&cli->cl_lru_in_list); rc = l_wait_event(osc_lru_waitq, - atomic_read(cli->cl_lru_left) > 0 || - (atomic_read(&cli->cl_lru_in_list) > 0 && - gen != atomic_read(&cli->cl_lru_in_list)), + atomic_read(cli->cl_lru_left) > 0, &lwi); - atomic_dec(&osc_lru_waiters); if (rc < 0) break; } +out: if (rc >= 0) { atomic_inc(&cli->cl_lru_busy); opg->ops_in_lru = 1; diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c index 30526ebca..47417f88f 100644 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ b/drivers/staging/lustre/lustre/osc/osc_request.c @@ -92,12 +92,13 @@ struct osc_fsync_args { struct osc_enqueue_args { struct obd_export *oa_exp; + enum ldlm_type oa_type; + enum ldlm_mode oa_mode; __u64 *oa_flags; - obd_enqueue_update_f oa_upcall; + osc_enqueue_upcall_f oa_upcall; void *oa_cookie; struct ost_lvb *oa_lvb; - struct lustre_handle *oa_lockh; - struct ldlm_enqueue_info *oa_ei; + struct lustre_handle oa_lockh; unsigned int oa_agl:1; }; @@ -801,21 +802,24 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, LASSERT(!(oa->o_valid & bits)); oa->o_valid |= bits; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); oa->o_dirty = cli->cl_dirty; if (unlikely(cli->cl_dirty - cli->cl_dirty_transit > cli->cl_dirty_max)) { CERROR("dirty %lu - %lu > dirty_max %lu\n", cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max); oa->o_undirty = 0; - } else if (unlikely(atomic_read(&obd_dirty_pages) - + } else if (unlikely(atomic_read(&obd_unstable_pages) + + atomic_read(&obd_dirty_pages) - atomic_read(&obd_dirty_transit_pages) > (long)(obd_max_dirty_pages + 1))) { /* The atomic_read() allowing the atomic_inc() are * not covered by a lock thus they may safely race and trip * this CERROR() unless we add in a small fudge factor (+1). */ - CERROR("dirty %d - %d > system dirty_max %d\n", + CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n", + cli->cl_import->imp_obd->obd_name, + atomic_read(&obd_unstable_pages), atomic_read(&obd_dirty_pages), atomic_read(&obd_dirty_transit_pages), obd_max_dirty_pages); @@ -833,10 +837,9 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant; oa->o_dropped = cli->cl_lost_grant; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n", oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); - } void osc_update_next_shrink(struct client_obd *cli) @@ -849,9 +852,9 @@ void osc_update_next_shrink(struct client_obd *cli) static void __osc_update_grant(struct client_obd *cli, u64 grant) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant += grant; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); } static void osc_update_grant(struct client_obd *cli, struct ost_body *body) @@ -889,10 +892,10 @@ out: static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) { - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); oa->o_grant = cli->cl_avail_grant / 4; cli->cl_avail_grant -= oa->o_grant; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (!(oa->o_valid & OBD_MD_FLFLAGS)) { oa->o_valid |= OBD_MD_FLFLAGS; oa->o_flags = 0; @@ -911,10 +914,10 @@ static int osc_shrink_grant(struct client_obd *cli) __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) * (cli->cl_max_pages_per_rpc << PAGE_SHIFT); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_avail_grant <= target_bytes) target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return osc_shrink_grant_to_target(cli, target_bytes); } @@ -924,7 +927,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) int rc = 0; struct ost_body *body; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* Don't shrink if we are already above or below the desired limit * We don't want to shrink below a single RPC, as that will negatively * impact block allocation and long-term performance. @@ -933,10 +936,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; if (target_bytes >= cli->cl_avail_grant) { - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); return 0; } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); body = kzalloc(sizeof(*body), GFP_NOFS); if (!body) @@ -944,10 +947,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) osc_announce_cached(cli, &body->oa, 0); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); body->oa.o_grant = cli->cl_avail_grant - target_bytes; cli->cl_avail_grant = target_bytes; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) { body->oa.o_valid |= OBD_MD_FLFLAGS; body->oa.o_flags = 0; @@ -1035,7 +1038,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) * race is tolerable here: if we're evicted, but imp_state already * left EVICTED state, then cl_dirty must be 0 already. */ - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) cli->cl_avail_grant = ocd->ocd_grant; else @@ -1053,7 +1056,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) /* determine the appropriate chunk size used by osc_extent. */ cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n", cli->cl_import->imp_obd->obd_name, @@ -1082,7 +1085,7 @@ static void handle_short_read(int nob_read, u32 page_count, if (pga[i]->count > nob_read) { /* EOF inside this page */ ptr = kmap(pga[i]->pg) + - (pga[i]->off & ~CFS_PAGE_MASK); + (pga[i]->off & ~PAGE_MASK); memset(ptr + nob_read, 0, pga[i]->count - nob_read); kunmap(pga[i]->pg); page_count--; @@ -1097,7 +1100,7 @@ static void handle_short_read(int nob_read, u32 page_count, /* zero remaining pages */ while (page_count-- > 0) { - ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); + ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK); memset(ptr, 0, pga[i]->count); kunmap(pga[i]->pg); i++; @@ -1144,7 +1147,8 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { if (p1->flag != p2->flag) { unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | - OBD_BRW_SYNC | OBD_BRW_ASYNC|OBD_BRW_NOQUOTA); + OBD_BRW_SYNC | OBD_BRW_ASYNC | + OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); /* warn if we try to combine flags that we don't know to be * safe to combine @@ -1188,32 +1192,29 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count, if (i == 0 && opc == OST_READ && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) { unsigned char *ptr = kmap(pga[i]->pg); - int off = pga[i]->off & ~CFS_PAGE_MASK; + int off = pga[i]->off & ~PAGE_MASK; memcpy(ptr + off, "bad1", min(4, nob)); kunmap(pga[i]->pg); } cfs_crypto_hash_update_page(hdesc, pga[i]->pg, - pga[i]->off & ~CFS_PAGE_MASK, + pga[i]->off & ~PAGE_MASK, count); CDEBUG(D_PAGE, "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n", pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index, (long)pga[i]->pg->flags, page_count(pga[i]->pg), page_private(pga[i]->pg), - (int)(pga[i]->off & ~CFS_PAGE_MASK)); + (int)(pga[i]->off & ~PAGE_MASK)); nob -= pga[i]->count; pg_count--; i++; } - bufsize = 4; + bufsize = sizeof(cksum); err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); - if (err) - cfs_crypto_hash_final(hdesc, NULL, NULL); - /* For sending we only compute the wrong checksum instead * of corrupting the data so it is still correct on a redo */ @@ -1312,7 +1313,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli, pg_prev = pga[0]; for (requested_nob = i = 0; i < page_count; i++, niobuf++) { struct brw_page *pg = pga[i]; - int poff = pg->off & ~CFS_PAGE_MASK; + int poff = pg->off & ~PAGE_MASK; LASSERT(pg->count > 0); /* make sure there is no gap in the middle of page array */ @@ -1658,6 +1659,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, aa->aa_resends++; new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; + new_req->rq_commit_cb = request->rq_commit_cb; /* cap resend delay to the current request timeout, this is similar to * what ptlrpc does (see after_reply()) */ @@ -1737,7 +1739,6 @@ static int brw_interpret(const struct lu_env *env, struct osc_brw_async_args *aa = data; struct osc_extent *ext; struct osc_extent *tmp; - struct cl_object *obj = NULL; struct client_obd *cli = aa->aa_cli; rc = osc_brw_fini_request(req, rc); @@ -1766,24 +1767,17 @@ static int brw_interpret(const struct lu_env *env, rc = -EIO; } - list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { - if (!obj && rc == 0) { - obj = osc2cl(ext->oe_obj); - cl_object_get(obj); - } - - list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 1, rc); - } - LASSERT(list_empty(&aa->aa_exts)); - LASSERT(list_empty(&aa->aa_oaps)); - - if (obj) { + if (rc == 0) { struct obdo *oa = aa->aa_oa; struct cl_attr *attr = &osc_env_info(env)->oti_attr; unsigned long valid = 0; + struct cl_object *obj; + struct osc_async_page *last; + + last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); + obj = osc2cl(last->oap_obj); - LASSERT(rc == 0); + cl_object_attr_lock(obj); if (oa->o_valid & OBD_MD_FLBLOCKS) { attr->cat_blocks = oa->o_blocks; valid |= CAT_BLOCKS; @@ -1800,21 +1794,45 @@ static int brw_interpret(const struct lu_env *env, attr->cat_ctime = oa->o_ctime; valid |= CAT_CTIME; } - if (valid != 0) { - cl_object_attr_lock(obj); - cl_object_attr_set(env, obj, attr, valid); - cl_object_attr_unlock(obj); + + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) { + struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; + loff_t last_off = last->oap_count + last->oap_obj_off; + + /* Change file size if this is an out of quota or + * direct IO write and it extends the file size + */ + if (loi->loi_lvb.lvb_size < last_off) { + attr->cat_size = last_off; + valid |= CAT_SIZE; + } + /* Extend KMS if it's not a lockless write */ + if (loi->loi_kms < last_off && + oap2osc_page(last)->ops_srvlock == 0) { + attr->cat_kms = last_off; + valid |= CAT_KMS; + } } - cl_object_put(env, obj); + + if (valid != 0) + cl_object_attr_set(env, obj, attr, valid); + cl_object_attr_unlock(obj); } kmem_cache_free(obdo_cachep, aa->aa_oa); + list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { + list_del_init(&ext->oe_link); + osc_extent_finish(env, ext, 1, rc); + } + LASSERT(list_empty(&aa->aa_exts)); + LASSERT(list_empty(&aa->aa_oaps)); + cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc : req->rq_bulk->bd_nob_transferred); osc_release_ppga(aa->aa_ppga, aa->aa_page_count); ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters * is called so we know whether to go to sync BRWs or wait for more * RPCs to complete @@ -1824,12 +1842,31 @@ static int brw_interpret(const struct lu_env *env, else cli->cl_r_in_flight--; osc_wake_cache_waiters(cli); - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); osc_io_unplug(env, cli, NULL); return rc; } +static void brw_commit(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + /* + * If osc_inc_unstable_pages (via osc_extent_finish) races with + * this called via the rq_commit_cb, I need to ensure + * osc_dec_unstable_pages is still called. Otherwise unstable + * pages may be leaked. + */ + if (req->rq_unstable) { + spin_unlock(&req->rq_lock); + osc_dec_unstable_pages(req); + spin_lock(&req->rq_lock); + } else { + req->rq_committed = 1; + } + spin_unlock(&req->rq_lock); +} + /** * Build an RPC by the list of extent @ext_list. The caller must ensure * that the total pages in this list are NOT over max pages per RPC. @@ -1920,7 +1957,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, pga[i] = &oap->oap_brw_page; pga[i]->off = oap->oap_obj_off + oap->oap_page_off; CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i]->pg, page_index(oap->oap_page), oap, + pga[i]->pg, oap->oap_page->index, oap, pga[i]->flag); i++; cl_req_page_add(env, clerq, page); @@ -1949,6 +1986,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, goto out; } + req->rq_commit_cb = brw_commit; req->rq_interpret_reply = brw_interpret; if (mem_tight != 0) @@ -1992,7 +2030,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, if (tmp) tmp->oap_request = ptlrpc_request_addref(req); - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); starting_offset >>= PAGE_SHIFT; if (cmd == OBD_BRW_READ) { cli->cl_r_in_flight++; @@ -2007,7 +2045,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, lprocfs_oh_tally_log2(&cli->cl_write_offset_hist, starting_offset + 1); } - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight", page_count, aa, cli->cl_r_in_flight, @@ -2055,14 +2093,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock, LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl); lock_res_and_lock(lock); - spin_lock(&osc_ast_guard); if (!lock->l_ast_data) lock->l_ast_data = data; if (lock->l_ast_data == data) set = 1; - spin_unlock(&osc_ast_guard); unlock_res_and_lock(lock); return set; @@ -2104,36 +2140,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, return rc; } -static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb, - obd_enqueue_update_f upcall, void *cookie, - __u64 *flags, int agl, int rc) +static int osc_enqueue_fini(struct ptlrpc_request *req, + osc_enqueue_upcall_f upcall, void *cookie, + struct lustre_handle *lockh, enum ldlm_mode mode, + __u64 *flags, int agl, int errcode) { - int intent = *flags & LDLM_FL_HAS_INTENT; - - if (intent) { - /* The request was created before ldlm_cli_enqueue call. */ - if (rc == ELDLM_LOCK_ABORTED) { - struct ldlm_reply *rep; + bool intent = *flags & LDLM_FL_HAS_INTENT; + int rc; - rep = req_capsule_server_get(&req->rq_pill, - &RMF_DLM_REP); + /* The request was created before ldlm_cli_enqueue call. */ + if (intent && errcode == ELDLM_LOCK_ABORTED) { + struct ldlm_reply *rep; - rep->lock_policy_res1 = - ptlrpc_status_ntoh(rep->lock_policy_res1); - if (rep->lock_policy_res1) - rc = rep->lock_policy_res1; - } - } + rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) || - (rc == 0)) { + rep->lock_policy_res1 = + ptlrpc_status_ntoh(rep->lock_policy_res1); + if (rep->lock_policy_res1) + errcode = rep->lock_policy_res1; + if (!agl) + *flags |= LDLM_FL_LVB_READY; + } else if (errcode == ELDLM_OK) { *flags |= LDLM_FL_LVB_READY; - CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n", - lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime); } /* Call the update callback. */ - rc = (*upcall)(cookie, rc); + rc = (*upcall)(cookie, lockh, errcode); + /* release the reference taken in ldlm_cli_enqueue() */ + if (errcode == ELDLM_LOCK_MATCHED) + errcode = ELDLM_OK; + if (errcode == ELDLM_OK && lustre_handle_is_used(lockh)) + ldlm_lock_decref(lockh, mode); + return rc; } @@ -2142,62 +2180,50 @@ static int osc_enqueue_interpret(const struct lu_env *env, struct osc_enqueue_args *aa, int rc) { struct ldlm_lock *lock; - struct lustre_handle handle; - __u32 mode; - struct ost_lvb *lvb; - __u32 lvb_len; - __u64 *flags = aa->oa_flags; - - /* Make a local copy of a lock handle and a mode, because aa->oa_* - * might be freed anytime after lock upcall has been called. - */ - lustre_handle_copy(&handle, aa->oa_lockh); - mode = aa->oa_ei->ei_mode; + struct lustre_handle *lockh = &aa->oa_lockh; + enum ldlm_mode mode = aa->oa_mode; + struct ost_lvb *lvb = aa->oa_lvb; + __u32 lvb_len = sizeof(*lvb); + __u64 flags = 0; + /* ldlm_cli_enqueue is holding a reference on the lock, so it must * be valid. */ - lock = ldlm_handle2lock(&handle); + lock = ldlm_handle2lock(lockh); + LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n", + lockh->cookie, req, aa); /* Take an additional reference so that a blocking AST that * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed * to arrive after an upcall has been executed by * osc_enqueue_fini(). */ - ldlm_lock_addref(&handle, mode); + ldlm_lock_addref(lockh, mode); + + /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); /* Let CP AST to grant the lock first. */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); - if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) { - lvb = NULL; - lvb_len = 0; - } else { - lvb = aa->oa_lvb; - lvb_len = sizeof(*aa->oa_lvb); + if (aa->oa_agl) { + LASSERT(!aa->oa_lvb); + LASSERT(!aa->oa_flags); + aa->oa_flags = &flags; } /* Complete obtaining the lock procedure. */ - rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1, - mode, flags, lvb, lvb_len, &handle, rc); + rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1, + aa->oa_mode, aa->oa_flags, lvb, lvb_len, + lockh, rc); /* Complete osc stuff. */ - rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie, - flags, aa->oa_agl, rc); + rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode, + aa->oa_flags, aa->oa_agl, rc); OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10); - /* Release the lock for async request. */ - if (lustre_handle_is_used(&handle) && rc == ELDLM_OK) - /* - * Releases a reference taken by ldlm_cli_enqueue(), if it is - * not already released by - * ldlm_cli_enqueue_fini()->failed_lock_cleanup() - */ - ldlm_lock_decref(&handle, mode); - - LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n", - aa->oa_lockh, req, aa); - ldlm_lock_decref(&handle, mode); + ldlm_lock_decref(lockh, mode); LDLM_LOCK_PUT(lock); return rc; } @@ -2209,29 +2235,29 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; * other synchronous requests, however keeping some locks and trying to obtain * others may take a considerable amount of time in a case of ost failure; and * when other sync requests do not get released lock from a client, the client - * is excluded from the cluster -- such scenarious make the life difficult, so + * is evicted from the cluster -- such scenaries make the life difficult, so * release locks just after they are obtained. */ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, __u64 *flags, ldlm_policy_data_t *policy, struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, void *cookie, + osc_enqueue_upcall_f upcall, void *cookie, struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, struct ptlrpc_request_set *rqset, int async, int agl) { struct obd_device *obd = exp->exp_obd; + struct lustre_handle lockh = { 0 }; struct ptlrpc_request *req = NULL; int intent = *flags & LDLM_FL_HAS_INTENT; - __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY); + __u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY; enum ldlm_mode mode; int rc; /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother. */ - policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; - policy->l_extent.end |= ~CFS_PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; + policy->l_extent.end |= ~PAGE_MASK; /* * kms is not valid when either object is completely fresh (so that no @@ -2259,64 +2285,46 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, if (einfo->ei_mode == LCK_PR) mode |= LCK_PW; mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id, - einfo->ei_type, policy, mode, lockh, 0); + einfo->ei_type, policy, mode, &lockh, 0); if (mode) { - struct ldlm_lock *matched = ldlm_handle2lock(lockh); + struct ldlm_lock *matched; - if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) { - /* For AGL, if enqueue RPC is sent but the lock is not - * granted, then skip to process this strpe. - * Return -ECANCELED to tell the caller. + if (*flags & LDLM_FL_TEST_LOCK) + return ELDLM_OK; + + matched = ldlm_handle2lock(&lockh); + if (agl) { + /* AGL enqueues DLM locks speculatively. Therefore if + * it already exists a DLM lock, it wll just inform the + * caller to cancel the AGL process for this stripe. */ - ldlm_lock_decref(lockh, mode); + ldlm_lock_decref(&lockh, mode); LDLM_LOCK_PUT(matched); return -ECANCELED; - } - - if (osc_set_lock_data_with_check(matched, einfo)) { + } else if (osc_set_lock_data_with_check(matched, einfo)) { *flags |= LDLM_FL_LVB_READY; - /* addref the lock only if not async requests and PW - * lock is matched whereas we asked for PR. - */ - if (!rqset && einfo->ei_mode != mode) - ldlm_lock_addref(lockh, LCK_PR); - if (intent) { - /* I would like to be able to ASSERT here that - * rss <= kms, but I can't, for reasons which - * are explained in lov_enqueue() - */ - } - - /* We already have a lock, and it's referenced. - * - * At this point, the cl_lock::cll_state is CLS_QUEUING, - * AGL upcall may change it to CLS_HELD directly. - */ - (*upcall)(cookie, ELDLM_OK); + /* We already have a lock, and it's referenced. */ + (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED); - if (einfo->ei_mode != mode) - ldlm_lock_decref(lockh, LCK_PW); - else if (rqset) - /* For async requests, decref the lock. */ - ldlm_lock_decref(lockh, einfo->ei_mode); + ldlm_lock_decref(&lockh, mode); LDLM_LOCK_PUT(matched); return ELDLM_OK; + } else { + ldlm_lock_decref(&lockh, mode); + LDLM_LOCK_PUT(matched); } - - ldlm_lock_decref(lockh, mode); - LDLM_LOCK_PUT(matched); } - no_match: +no_match: + if (*flags & LDLM_FL_TEST_LOCK) + return -ENOLCK; if (intent) { - LIST_HEAD(cancels); - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE_LVB); if (!req) return -ENOMEM; - rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0); + rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); if (rc) { ptlrpc_request_free(req); return rc; @@ -2331,21 +2339,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, *flags &= ~LDLM_FL_BLOCK_GRANTED; rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb, - sizeof(*lvb), LVB_T_OST, lockh, async); - if (rqset) { + sizeof(*lvb), LVB_T_OST, &lockh, async); + if (async) { if (!rc) { struct osc_enqueue_args *aa; - CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); - aa->oa_ei = einfo; aa->oa_exp = exp; - aa->oa_flags = flags; + aa->oa_mode = einfo->ei_mode; + aa->oa_type = einfo->ei_type; + lustre_handle_copy(&aa->oa_lockh, &lockh); aa->oa_upcall = upcall; aa->oa_cookie = cookie; - aa->oa_lvb = lvb; - aa->oa_lockh = lockh; aa->oa_agl = !!agl; + if (!agl) { + aa->oa_flags = flags; + aa->oa_lvb = lvb; + } else { + /* AGL is essentially to enqueue an DLM lock + * in advance, so we don't care about the + * result of AGL enqueue. + */ + aa->oa_lvb = NULL; + aa->oa_flags = NULL; + } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_enqueue_interpret; @@ -2359,7 +2377,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, return rc; } - rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc); + rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode, + flags, agl, rc); if (intent) ptlrpc_req_finished(req); @@ -2381,8 +2400,8 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother */ - policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; - policy->l_extent.end |= ~CFS_PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; + policy->l_extent.end |= ~PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ /* If we're trying to read, we also search for an existing PW lock. The @@ -2493,7 +2512,7 @@ static int osc_statfs_async(struct obd_export *exp, } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret; - CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args)); + CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); aa->aa_oi = oinfo; @@ -2787,7 +2806,7 @@ out: goto skip_locking; policy.l_extent.start = fm_key->fiemap.fm_start & - CFS_PAGE_MASK; + PAGE_MASK; if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <= fm_key->fiemap.fm_start + PAGE_SIZE - 1) @@ -2795,7 +2814,7 @@ out: else policy.l_extent.end = (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length + - PAGE_SIZE - 1) & CFS_PAGE_MASK; + PAGE_SIZE - 1) & PAGE_MASK; ostid_build_res_name(&fm_key->oa.o_oi, &res_id); mode = ldlm_lock_match(exp->exp_obd->obd_namespace, @@ -2913,7 +2932,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, int nr = atomic_read(&cli->cl_lru_in_list) >> 1; int target = *(int *)val; - nr = osc_lru_shrink(cli, min(nr, target)); + nr = osc_lru_shrink(env, cli, min(nr, target), true); *(int *)val -= nr; return 0; } @@ -2992,12 +3011,12 @@ static int osc_reconnect(const struct lu_env *env, if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { long lost_grant; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?: 2 * cli_brw_size(obd); lost_grant = cli->cl_lost_grant; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n", data->ocd_connect_flags, @@ -3047,10 +3066,10 @@ static int osc_import_event(struct obd_device *obd, switch (event) { case IMP_EVENT_DISCON: { cli = &obd->u.cli; - client_obd_list_lock(&cli->cl_loi_list_lock); + spin_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant = 0; cli->cl_lost_grant = 0; - client_obd_list_unlock(&cli->cl_loi_list_lock); + spin_unlock(&cli->cl_loi_list_lock); break; } case IMP_EVENT_INACTIVE: { @@ -3073,8 +3092,9 @@ static int osc_import_event(struct obd_device *obd, ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); cl_env_put(env, &refcheck); - } else + } else { rc = PTR_ERR(env); + } break; } case IMP_EVENT_ACTIVE: { @@ -3116,20 +3136,14 @@ static int osc_import_event(struct obd_device *obd, * \retval zero the lock can't be canceled * \retval other ok to cancel */ -static int osc_cancel_for_recovery(struct ldlm_lock *lock) +static int osc_cancel_weight(struct ldlm_lock *lock) { - check_res_locked(lock->l_resource); - /* - * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR. - * - * XXX as a future improvement, we can also cancel unused write lock - * if it doesn't have dirty data and active mmaps. + * Cancel all unused and granted extent lock. */ if (lock->l_resource->lr_type == LDLM_EXTENT && - (lock->l_granted_mode == LCK_PR || - lock->l_granted_mode == LCK_CR) && - (osc_dlm_lock_pageref(lock) == 0)) + lock->l_granted_mode == lock->l_req_mode && + osc_ldlm_weigh_ast(lock) == 0) return 1; return 0; @@ -3170,6 +3184,14 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) } cli->cl_writeback_work = handler; + handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli); + if (IS_ERR(handler)) { + rc = PTR_ERR(handler); + goto out_ptlrpcd_work; + } + + cli->cl_lru_work = handler; + rc = osc_quota_setup(obd); if (rc) goto out_ptlrpcd_work; @@ -3198,11 +3220,18 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) } INIT_LIST_HEAD(&cli->cl_grant_shrink_list); - ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery); + ns_register_cancel(obd->obd_namespace, osc_cancel_weight); return rc; out_ptlrpcd_work: - ptlrpcd_destroy_work(handler); + if (cli->cl_writeback_work) { + ptlrpcd_destroy_work(cli->cl_writeback_work); + cli->cl_writeback_work = NULL; + } + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } out_client_setup: client_obd_cleanup(obd); out_ptlrpcd: @@ -3241,6 +3270,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ptlrpcd_destroy_work(cli->cl_writeback_work); cli->cl_writeback_work = NULL; } + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } obd_cleanup_client_import(obd); ptlrpc_lprocfs_unregister_obd(obd); lprocfs_obd_cleanup(obd); @@ -3330,7 +3363,6 @@ static struct obd_ops osc_obd_ops = { }; extern struct lu_kmem_descr osc_caches[]; -extern spinlock_t osc_ast_guard; extern struct lock_class_key osc_ast_guard_class; static int __init osc_init(void) @@ -3357,9 +3389,6 @@ static int __init osc_init(void) if (rc) goto out_kmem; - spin_lock_init(&osc_ast_guard); - lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class); - /* This is obviously too much memory, only prevent overflow here */ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) { rc = -EINVAL; diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index cf3ac8eee..4b7912a2c 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -595,9 +595,9 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, struct obd_import *imp = request->rq_import; int rc; - if (unlikely(ctx)) + if (unlikely(ctx)) { request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx); - else { + } else { rc = sptlrpc_req_get_ctx(request); if (rc) goto out_free; @@ -1082,7 +1082,6 @@ static int ptlrpc_console_allow(struct ptlrpc_request *req) */ if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) && (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) { - /* Suppress timed out reconnect requests */ if (req->rq_timedout) return 0; @@ -2087,7 +2086,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n", set, timeout); - if (timeout == 0 && !cfs_signal_pending()) + if (timeout == 0 && !signal_pending(current)) /* * No requests are in-flight (ether timed out * or delayed), so we can allow interrupts. @@ -2114,7 +2113,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * it being ignored forever */ if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr && - cfs_signal_pending()) { + signal_pending(current)) { sigset_t blocked_sigs = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); @@ -2124,7 +2123,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * important signals since ptlrpc set is not easily * reentrant from userspace again */ - if (cfs_signal_pending()) + if (signal_pending(current)) ptlrpc_interrupted_set(set); cfs_restore_sigs(blocked_sigs); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c index 47be21ac9..fdcde9bbd 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/events.c +++ b/drivers/staging/lustre/lustre/ptlrpc/events.c @@ -69,7 +69,6 @@ void request_out_callback(lnet_event_t *ev) req->rq_req_unlink = 0; if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { - /* Failed send: make it seem like the reply timed out, just * like failing sends in client.c does currently... */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c index cd94fed0f..a4f7544f4 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -1001,6 +1001,7 @@ finish: return 0; } } else { + static bool warned; spin_lock(&imp->imp_lock); list_del(&imp->imp_conn_current->oic_item); @@ -1021,7 +1022,7 @@ finish: goto out; } - if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && (ocd->ocd_version > LUSTRE_VERSION_CODE + LUSTRE_VERSION_OFFSET_WARN || ocd->ocd_version < LUSTRE_VERSION_CODE - @@ -1029,10 +1030,8 @@ finish: /* Sigh, some compilers do not like #ifdef in the middle * of macro arguments */ - const char *older = "older. Consider upgrading server or downgrading client" - ; - const char *newer = "newer than client version. Consider upgrading client" - ; + const char *older = "older than client. Consider upgrading server"; + const char *newer = "newer than client. Consider recompiling application"; LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n", obd2cli_tgt(imp->imp_obd), @@ -1042,6 +1041,7 @@ finish: OBD_OCD_VERSION_FIX(ocd->ocd_version), ocd->ocd_version > LUSTRE_VERSION_CODE ? newer : older, LUSTRE_VERSION_STRING); + warned = true; } #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0) @@ -1370,7 +1370,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) if (rc) goto out; } - } if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) { @@ -1453,7 +1452,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(imp->imp_recovery_waitq, !ptlrpc_import_in_recovery(imp), &lwi); - } spin_lock(&imp->imp_lock); diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c index 5b06901e5..c0ecd1625 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/layout.c +++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c @@ -160,6 +160,16 @@ static const struct req_msg_field *fld_query_server[] = { &RMF_FLD_MDFLD }; +static const struct req_msg_field *fld_read_client[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *fld_read_server[] = { + &RMF_PTLRPC_BODY, + &RMF_GENERIC_DATA +}; + static const struct req_msg_field *mds_getattr_name_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, @@ -566,7 +576,7 @@ static const struct req_msg_field *ost_get_info_generic_server[] = { static const struct req_msg_field *ost_get_info_generic_client[] = { &RMF_PTLRPC_BODY, - &RMF_SETINFO_KEY + &RMF_GETINFO_KEY }; static const struct req_msg_field *ost_get_last_id_server[] = { @@ -574,6 +584,12 @@ static const struct req_msg_field *ost_get_last_id_server[] = { &RMF_OBD_ID }; +static const struct req_msg_field *ost_get_last_fid_client[] = { + &RMF_PTLRPC_BODY, + &RMF_GETINFO_KEY, + &RMF_FID, +}; + static const struct req_msg_field *ost_get_last_fid_server[] = { &RMF_PTLRPC_BODY, &RMF_FID, @@ -643,6 +659,7 @@ static struct req_format *req_formats[] = { &RQF_MGS_CONFIG_READ, &RQF_SEQ_QUERY, &RQF_FLD_QUERY, + &RQF_FLD_READ, &RQF_MDS_CONNECT, &RQF_MDS_DISCONNECT, &RQF_MDS_GET_INFO, @@ -696,7 +713,7 @@ static struct req_format *req_formats[] = { &RQF_OST_BRW_WRITE, &RQF_OST_STATFS, &RQF_OST_SET_GRANT_INFO, - &RQF_OST_GET_INFO_GENERIC, + &RQF_OST_GET_INFO, &RQF_OST_GET_INFO_LAST_ID, &RQF_OST_GET_INFO_LAST_FID, &RQF_OST_SET_INFO_LAST_FID, @@ -1162,6 +1179,10 @@ struct req_format RQF_FLD_QUERY = DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); EXPORT_SYMBOL(RQF_FLD_QUERY); +struct req_format RQF_FLD_READ = + DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server); +EXPORT_SYMBOL(RQF_FLD_READ); + struct req_format RQF_LOG_CANCEL = DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); EXPORT_SYMBOL(RQF_LOG_CANCEL); @@ -1519,10 +1540,10 @@ struct req_format RQF_OST_SET_GRANT_INFO = ost_body_only); EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO); -struct req_format RQF_OST_GET_INFO_GENERIC = +struct req_format RQF_OST_GET_INFO = DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client, ost_get_info_generic_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC); +EXPORT_SYMBOL(RQF_OST_GET_INFO); struct req_format RQF_OST_GET_INFO_LAST_ID = DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client, @@ -1530,7 +1551,7 @@ struct req_format RQF_OST_GET_INFO_LAST_ID = EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID); struct req_format RQF_OST_GET_INFO_LAST_FID = - DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client, + DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client, ost_get_last_fid_server); EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID); diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c index c95a91ce2..64c0f1e17 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -131,6 +131,7 @@ static struct ll_rpc_opcode { { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" }, { SEC_CTX_FINI, "sec_ctx_fini" }, { FLD_QUERY, "fld_query" }, + { FLD_READ, "fld_read" }, }; static struct ll_eopcode { @@ -679,11 +680,11 @@ static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file, /** * The second token is either NULL, or an optional [reg|hp] string */ - if (strcmp(cmd, "reg") == 0) + if (strcmp(cmd, "reg") == 0) { queue = PTLRPC_NRS_QUEUE_REG; - else if (strcmp(cmd, "hp") == 0) + } else if (strcmp(cmd, "hp") == 0) { queue = PTLRPC_NRS_QUEUE_HP; - else { + } else { rc = -EINVAL; goto out; } @@ -693,8 +694,9 @@ default_queue: if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) { rc = -ENODEV; goto out; - } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) + } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) { queue = PTLRPC_NRS_QUEUE_REG; + } /** * Serialize NRS core lprocfs operations with policy registration/ @@ -1320,6 +1322,5 @@ int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, up_read(&obd->u.cli.cl_sem); return count; - } EXPORT_SYMBOL(lprocfs_wr_pinger_recov); diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c index 710fb806f..c444f5168 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c +++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c @@ -975,7 +975,11 @@ static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt) LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); again: - nrs = nrs_svcpt2nrs(svcpt, hp); + /* scp_nrs_hp could be NULL due to short of memory. */ + nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg; + /* check the nrs_svcpt to see if nrs is initialized. */ + if (!nrs || !nrs->nrs_svcpt) + return; nrs->nrs_stopping = 1; list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) { @@ -1038,7 +1042,6 @@ static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc) LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex)); list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { - if (!nrs_policy_compatible(svc, desc) || unlikely(svc->srv_is_stopping)) continue; diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c index 492d63fad..811acf6fc 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c @@ -1160,7 +1160,6 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg) if (!pb) { CERROR("invalid msg %p: no ptlrpc body!\n", msg); return 0; - } return pb->pb_timeout; } @@ -1179,7 +1178,6 @@ __u32 lustre_msg_get_service_time(struct lustre_msg *msg) if (!pb) { CERROR("invalid msg %p: no ptlrpc body!\n", msg); return 0; - } return pb->pb_service_time; } @@ -1572,7 +1570,6 @@ static void lustre_swab_obdo(struct obdo *o) CLASSERT(offsetof(typeof(*o), o_padding_4) != 0); CLASSERT(offsetof(typeof(*o), o_padding_5) != 0); CLASSERT(offsetof(typeof(*o), o_padding_6) != 0); - } void lustre_swab_obd_statfs(struct obd_statfs *os) diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c index db003f5da..76a355a9d 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c @@ -387,7 +387,8 @@ static int ptlrpcd(void *arg) { struct ptlrpcd_ctl *pc = arg; struct ptlrpc_request_set *set; - struct lu_env env = { .le_ses = NULL }; + struct lu_context ses = { 0 }; + struct lu_env env = { .le_ses = &ses }; int rc = 0; int exit = 0; @@ -416,6 +417,13 @@ static int ptlrpcd(void *arg) */ rc = lu_context_init(&env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF); + if (rc == 0) { + rc = lu_context_init(env.le_ses, + LCT_SESSION | LCT_REMEMBER | LCT_NOREF); + if (rc != 0) + lu_context_fini(&env.le_ctx); + } + if (rc != 0) goto failed; @@ -436,9 +444,10 @@ static int ptlrpcd(void *arg) ptlrpc_expired_set, set); lu_context_enter(&env.le_ctx); - l_wait_event(set->set_waitq, - ptlrpcd_check(&env, pc), &lwi); + lu_context_enter(env.le_ses); + l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi); lu_context_exit(&env.le_ctx); + lu_context_exit(env.le_ses); /* * Abort inflight rpcs for forced stop case. @@ -461,6 +470,7 @@ static int ptlrpcd(void *arg) if (!list_empty(&set->set_requests)) ptlrpc_set_wait(set); lu_context_fini(&env.le_ctx); + lu_context_fini(env.le_ses); complete(&pc->pc_finishing); @@ -899,8 +909,11 @@ int ptlrpcd_addref(void) int rc = 0; mutex_lock(&ptlrpcd_mutex); - if (++ptlrpcd_users == 1) + if (++ptlrpcd_users == 1) { rc = ptlrpcd_init(); + if (rc < 0) + ptlrpcd_users--; + } mutex_unlock(&ptlrpcd_mutex); return rc; } diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c index d3872b8c9..02e6cda4c 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -41,7 +41,6 @@ #define DEBUG_SUBSYSTEM S_SEC #include "../../include/linux/libcfs/libcfs.h" -#include #include "../include/obd.h" #include "../include/obd_cksum.h" @@ -511,7 +510,6 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, { struct cfs_crypto_hash_desc *hdesc; int hashsize; - char hashbuf[64]; unsigned int bufsize; int i, err; @@ -529,21 +527,23 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, for (i = 0; i < desc->bd_iov_count; i++) { cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page, - desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK, + desc->bd_iov[i].kiov_offset & ~PAGE_MASK, desc->bd_iov[i].kiov_len); } + if (hashsize > buflen) { + unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; + bufsize = sizeof(hashbuf); - err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf, - &bufsize); + LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n", + bufsize, hashsize); + err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize); memcpy(buf, hashbuf, buflen); } else { bufsize = buflen; err = cfs_crypto_hash_final(hdesc, buf, &bufsize); } - if (err) - cfs_crypto_hash_final(hdesc, NULL, NULL); return err; } EXPORT_SYMBOL(sptlrpc_get_bulk_checksum); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c index 6276bf59c..37c9f4c45 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -162,7 +162,7 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc) continue; ptr = kmap(desc->bd_iov[i].kiov_page); - off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK; ptr[off] ^= 0x1; kunmap(desc->bd_iov[i].kiov_page); return; diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 1bbd1d39c..17c7b9749 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -838,6 +838,11 @@ static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt, { ptlrpc_server_hpreq_fini(req); + if (req->rq_session.lc_thread) { + lu_context_exit(&req->rq_session); + lu_context_fini(&req->rq_session); + } + ptlrpc_server_drop_request(req); } @@ -1579,6 +1584,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, } req->rq_svc_thread = thread; + if (thread) { + /* initialize request session, it is needed for request + * processing by target + */ + rc = lu_context_init(&req->rq_session, + LCT_SERVER_SESSION | LCT_NOREF); + if (rc) { + CERROR("%s: failure to initialize session: rc = %d\n", + thread->t_name, rc); + goto err_req; + } + req->rq_session.lc_thread = thread; + lu_context_enter(&req->rq_session); + req->rq_svc_thread->t_env->le_ses = &req->rq_session; + } ptlrpc_at_add_timed(req); @@ -1612,7 +1632,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, struct timespec64 arrived; unsigned long timediff_usecs; unsigned long arrived_usecs; - int rc; int fail_opc = 0; request = ptlrpc_server_request_get(svcpt, false); @@ -1649,21 +1668,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, at_get(&svcpt->scp_at_estimate)); } - rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF); - if (rc) { - CERROR("Failure to initialize session: %d\n", rc); - goto out_req; - } - request->rq_session.lc_thread = thread; - request->rq_session.lc_cookie = 0x5; - lu_context_enter(&request->rq_session); - - CDEBUG(D_NET, "got req %llu\n", request->rq_xid); - - request->rq_svc_thread = thread; - if (thread) - request->rq_svc_thread->t_env->le_ses = &request->rq_session; - if (likely(request->rq_export)) { if (unlikely(ptlrpc_check_req(request))) goto put_conn; @@ -1695,14 +1699,21 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING) CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val); - rc = svc->srv_ops.so_req_handler(request); + CDEBUG(D_NET, "got req %llu\n", request->rq_xid); + + /* re-assign request and sesson thread to the current one */ + request->rq_svc_thread = thread; + if (thread) { + LASSERT(request->rq_session.lc_thread); + request->rq_session.lc_thread = thread; + request->rq_session.lc_cookie = 0x55; + thread->t_env->le_ses = &request->rq_session; + } + svc->srv_ops.so_req_handler(request); ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE); put_conn: - lu_context_exit(&request->rq_session); - lu_context_fini(&request->rq_session); - if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) { DEBUG_REQ(D_WARNING, request, "Request took longer than estimated (%lld:%llds); " @@ -1756,7 +1767,6 @@ put_conn: request->rq_arrival_time.tv_sec); } -out_req: ptlrpc_server_finish_active_request(svcpt, request); return 1; diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c index 3ffd2d91f..aacc81083 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c +++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c @@ -276,7 +276,9 @@ void lustre_assert_wire_constants(void) (long long)FLD_QUERY); LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n", (long long)FLD_FIRST_OPC); - LASSERTF(FLD_LAST_OPC == 901, "found %lld\n", + LASSERTF(FLD_READ == 901, "found %lld\n", + (long long)FLD_READ); + LASSERTF(FLD_LAST_OPC == 902, "found %lld\n", (long long)FLD_LAST_OPC); LASSERTF(SEQ_QUERY == 700, "found %lld\n", (long long)SEQ_QUERY); @@ -1069,6 +1071,8 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_PINGLESS); LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD); + LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL, + "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID); LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", @@ -1639,6 +1643,12 @@ void lustre_assert_wire_constants(void) OBD_BRW_ASYNC); LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n", OBD_BRW_MEMALLOC); + LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n", + OBD_BRW_OVER_USRQUOTA); + LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n", + OBD_BRW_OVER_GRPQUOTA); + LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n", + OBD_BRW_SOFT_SYNC); /* Checks for struct ost_body */ LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n", -- cgit v1.2.3-54-g00ecf