From 863981e96738983919de841ec669e157e6bdaeb0 Mon Sep 17 00:00:00 2001
From: André Fabian Silva Delgado <emulatorman@parabola.nu>
Date: Sun, 11 Sep 2016 04:34:46 -0300
Subject: Linux-libre 4.7.1-gnu

---
 .../staging/lustre/include/linux/libcfs/libcfs.h   |   51 +-
 .../lustre/include/linux/libcfs/libcfs_cpu.h       |   79 +
 .../lustre/include/linux/libcfs/libcfs_crypto.h    |  136 +-
 .../lustre/include/linux/libcfs/libcfs_debug.h     |   18 +-
 .../lustre/include/linux/libcfs/libcfs_fail.h      |   15 +-
 .../lustre/include/linux/libcfs/libcfs_hash.h      |    4 +-
 .../lustre/include/linux/libcfs/libcfs_ioctl.h     |  161 +-
 .../lustre/include/linux/libcfs/libcfs_prim.h      |   31 +-
 .../lustre/include/linux/libcfs/libcfs_private.h   |   75 -
 .../lustre/include/linux/libcfs/libcfs_workitem.h  |   12 +-
 .../lustre/include/linux/libcfs/linux/libcfs.h     |    2 +-
 .../lustre/include/linux/libcfs/linux/linux-cpu.h  |    2 +-
 .../lustre/include/linux/libcfs/linux/linux-mem.h  |   80 -
 .../lustre/include/linux/libcfs/linux/linux-time.h |    4 +-
 .../staging/lustre/include/linux/lnet/lib-dlc.h    |   29 +-
 .../staging/lustre/include/linux/lnet/lib-lnet.h   |    9 +-
 .../staging/lustre/include/linux/lnet/lib-types.h  |    2 +
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c    |  405 +++-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h    |  134 +-
 .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c |  101 +-
 .../lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c  |  139 +-
 .../staging/lustre/lnet/klnds/socklnd/socklnd.c    |    1 -
 .../lustre/lnet/klnds/socklnd/socklnd_lib.c        |    3 -
 drivers/staging/lustre/lnet/libcfs/debug.c         |  126 +-
 drivers/staging/lustre/lnet/libcfs/fail.c          |    3 +
 drivers/staging/lustre/lnet/libcfs/hash.c          |    6 +-
 drivers/staging/lustre/lnet/libcfs/libcfs_lock.c   |   54 +-
 drivers/staging/lustre/lnet/libcfs/libcfs_mem.c    |   28 -
 .../staging/lustre/lnet/libcfs/linux/linux-cpu.c   |    9 +-
 .../lustre/lnet/libcfs/linux/linux-crypto.c        |  283 ++-
 .../lustre/lnet/libcfs/linux/linux-module.c        |  154 +-
 .../staging/lustre/lnet/libcfs/linux/linux-prim.c  |   31 -
 drivers/staging/lustre/lnet/libcfs/module.c        |  132 +-
 drivers/staging/lustre/lnet/libcfs/tracefile.c     |   17 +-
 drivers/staging/lustre/lnet/libcfs/workitem.c      |   12 +-
 drivers/staging/lustre/lnet/lnet/api-ni.c          |  143 +-
 drivers/staging/lustre/lnet/lnet/config.c          |    3 +
 drivers/staging/lustre/lnet/lnet/lib-move.c        |   10 +-
 drivers/staging/lustre/lnet/lnet/module.c          |    7 +-
 drivers/staging/lustre/lnet/selftest/brw_test.c    |   82 +-
 drivers/staging/lustre/lnet/selftest/conctl.c      |   52 +-
 drivers/staging/lustre/lnet/selftest/conrpc.c      |  215 +-
 drivers/staging/lustre/lnet/selftest/conrpc.h      |   40 +-
 drivers/staging/lustre/lnet/selftest/console.c     |  282 +--
 drivers/staging/lustre/lnet/selftest/console.h     |   47 +-
 drivers/staging/lustre/lnet/selftest/framework.c   |  270 +--
 drivers/staging/lustre/lnet/selftest/ping_test.c   |   44 +-
 drivers/staging/lustre/lnet/selftest/rpc.c         |  133 +-
 drivers/staging/lustre/lnet/selftest/rpc.h         |  156 +-
 drivers/staging/lustre/lnet/selftest/selftest.h    |  204 +-
 drivers/staging/lustre/lnet/selftest/timer.c       |   12 +-
 drivers/staging/lustre/lustre/fid/fid_request.c    |   12 +-
 drivers/staging/lustre/lustre/fld/fld_cache.c      |    3 +-
 drivers/staging/lustre/lustre/fld/fld_internal.h   |    9 +-
 drivers/staging/lustre/lustre/fld/fld_request.c    |   94 +-
 drivers/staging/lustre/lustre/include/cl_object.h  |  978 ++-------
 drivers/staging/lustre/lustre/include/lclient.h    |  408 ----
 drivers/staging/lustre/lustre/include/linux/obd.h  |  125 --
 drivers/staging/lustre/lustre/include/lu_object.h  |   75 +-
 .../lustre/lustre/include/lustre/lustre_idl.h      |  112 +-
 .../lustre/lustre/include/lustre/lustre_user.h     |   54 +-
 drivers/staging/lustre/lustre/include/lustre_cfg.h |    2 +-
 .../staging/lustre/lustre/include/lustre_disk.h    |    2 -
 drivers/staging/lustre/lustre/include/lustre_dlm.h |   14 +-
 .../lustre/lustre/include/lustre_dlm_flags.h       |  120 +-
 drivers/staging/lustre/lustre/include/lustre_fid.h |   22 +-
 .../staging/lustre/lustre/include/lustre_import.h  |    2 +-
 drivers/staging/lustre/lustre/include/lustre_lib.h |   60 +-
 drivers/staging/lustre/lustre/include/lustre_mdc.h |   18 +
 drivers/staging/lustre/lustre/include/lustre_net.h |    4 +-
 .../staging/lustre/lustre/include/lustre_param.h   |    1 +
 .../lustre/lustre/include/lustre_req_layout.h      |    3 +-
 drivers/staging/lustre/lustre/include/obd.h        |   77 +-
 drivers/staging/lustre/lustre/include/obd_cksum.h  |    1 +
 drivers/staging/lustre/lustre/include/obd_class.h  |    5 +-
 .../staging/lustre/lustre/include/obd_support.h    |    4 +
 drivers/staging/lustre/lustre/lclient/glimpse.c    |  270 ---
 drivers/staging/lustre/lustre/lclient/lcommon_cl.c | 1203 -----------
 .../staging/lustre/lustre/lclient/lcommon_misc.c   |  200 --
 drivers/staging/lustre/lustre/ldlm/l_lock.c        |    4 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_extent.c   |    4 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_flock.c    |   30 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_internal.h |   19 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lib.c      |   14 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lock.c     |  115 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c    |   28 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_request.c  |  163 +-
 drivers/staging/lustre/lustre/ldlm/ldlm_resource.c |   19 +-
 drivers/staging/lustre/lustre/llite/Makefile       |    5 +-
 drivers/staging/lustre/lustre/llite/dcache.c       |   15 +-
 drivers/staging/lustre/lustre/llite/dir.c          |   99 +-
 drivers/staging/lustre/lustre/llite/file.c         |  277 +--
 drivers/staging/lustre/lustre/llite/glimpse.c      |  255 +++
 drivers/staging/lustre/lustre/llite/lcommon_cl.c   |  327 +++
 drivers/staging/lustre/lustre/llite/lcommon_misc.c |  201 ++
 drivers/staging/lustre/lustre/llite/llite_close.c  |   71 +-
 .../staging/lustre/lustre/llite/llite_internal.h   |  274 +--
 drivers/staging/lustre/lustre/llite/llite_lib.c    |  176 +-
 drivers/staging/lustre/lustre/llite/llite_mmap.c   |   48 +-
 drivers/staging/lustre/lustre/llite/llite_nfs.c    |   29 +-
 drivers/staging/lustre/lustre/llite/lloop.c        |    3 +-
 drivers/staging/lustre/lustre/llite/lproc_llite.c  |   33 +-
 drivers/staging/lustre/lustre/llite/namei.c        |  143 +-
 drivers/staging/lustre/lustre/llite/rw.c           |  367 ++--
 drivers/staging/lustre/lustre/llite/rw26.c         |  318 ++-
 drivers/staging/lustre/lustre/llite/statahead.c    |   17 +-
 drivers/staging/lustre/lustre/llite/super25.c      |   14 +-
 drivers/staging/lustre/lustre/llite/symlink.c      |   10 +-
 drivers/staging/lustre/lustre/llite/vvp_dev.c      |  270 ++-
 drivers/staging/lustre/lustre/llite/vvp_internal.h |  332 ++-
 drivers/staging/lustre/lustre/llite/vvp_io.c       |  928 +++++----
 drivers/staging/lustre/lustre/llite/vvp_lock.c     |   53 +-
 drivers/staging/lustre/lustre/llite/vvp_object.c   |  141 +-
 drivers/staging/lustre/lustre/llite/vvp_page.c     |  211 +-
 drivers/staging/lustre/lustre/llite/vvp_req.c      |  121 ++
 drivers/staging/lustre/lustre/llite/xattr.c        |   45 +-
 drivers/staging/lustre/lustre/llite/xattr_cache.c  |    1 -
 drivers/staging/lustre/lustre/lmv/lmv_internal.h   |    3 -
 drivers/staging/lustre/lustre/lmv/lmv_obd.c        |  182 +-
 .../staging/lustre/lustre/lov/lov_cl_internal.h    |  105 +-
 drivers/staging/lustre/lustre/lov/lov_dev.c        |   15 +-
 drivers/staging/lustre/lustre/lov/lov_ea.c         |    5 -
 drivers/staging/lustre/lustre/lov/lov_internal.h   |   34 +-
 drivers/staging/lustre/lustre/lov/lov_io.c         |  246 +--
 drivers/staging/lustre/lustre/lov/lov_lock.c       |  996 +--------
 drivers/staging/lustre/lustre/lov/lov_merge.c      |   11 +
 drivers/staging/lustre/lustre/lov/lov_obd.c        |   26 +-
 drivers/staging/lustre/lustre/lov/lov_object.c     |   54 +-
 drivers/staging/lustre/lustre/lov/lov_offset.c     |   12 +
 drivers/staging/lustre/lustre/lov/lov_pack.c       |    8 +-
 drivers/staging/lustre/lustre/lov/lov_page.c       |  183 +-
 drivers/staging/lustre/lustre/lov/lov_pool.c       |   62 +-
 drivers/staging/lustre/lustre/lov/lov_request.c    |   11 +-
 drivers/staging/lustre/lustre/lov/lovsub_dev.c     |    9 +-
 drivers/staging/lustre/lustre/lov/lovsub_lock.c    |  386 +---
 drivers/staging/lustre/lustre/lov/lovsub_object.c  |    7 +-
 drivers/staging/lustre/lustre/lov/lovsub_page.c    |    4 +-
 drivers/staging/lustre/lustre/mdc/lproc_mdc.c      |    8 +-
 drivers/staging/lustre/lustre/mdc/mdc_lib.c        |   24 +-
 drivers/staging/lustre/lustre/mdc/mdc_locks.c      |    5 +-
 drivers/staging/lustre/lustre/mdc/mdc_request.c    |   26 +-
 drivers/staging/lustre/lustre/mgc/mgc_request.c    |   12 +-
 drivers/staging/lustre/lustre/obdclass/cl_io.c     |  430 ++--
 drivers/staging/lustre/lustre/obdclass/cl_lock.c   | 2152 +-------------------
 drivers/staging/lustre/lustre/obdclass/cl_object.c |  303 ++-
 drivers/staging/lustre/lustre/obdclass/cl_page.c   |  659 +-----
 drivers/staging/lustre/lustre/obdclass/class_obd.c |    5 +-
 drivers/staging/lustre/lustre/obdclass/debug.c     |    4 +-
 drivers/staging/lustre/lustre/obdclass/genops.c    |    1 -
 .../lustre/lustre/obdclass/linux/linux-module.c    |    4 +-
 drivers/staging/lustre/lustre/obdclass/llog.c      |    1 -
 .../lustre/lustre/obdclass/lprocfs_status.c        |   72 +-
 drivers/staging/lustre/lustre/obdclass/lu_object.c |    9 +-
 .../staging/lustre/lustre/obdclass/lustre_peer.c   |    3 +-
 .../staging/lustre/lustre/obdclass/obd_config.c    |   26 +-
 drivers/staging/lustre/lustre/obdclass/obd_mount.c |   15 +-
 drivers/staging/lustre/lustre/obdclass/obdo.c      |    3 +-
 .../staging/lustre/lustre/obdecho/echo_client.c    |  173 +-
 drivers/staging/lustre/lustre/osc/lproc_osc.c      |   68 +-
 drivers/staging/lustre/lustre/osc/osc_cache.c      |  531 ++++-
 .../staging/lustre/lustre/osc/osc_cl_internal.h    |  159 +-
 drivers/staging/lustre/lustre/osc/osc_internal.h   |   27 +-
 drivers/staging/lustre/lustre/osc/osc_io.c         |  283 +--
 drivers/staging/lustre/lustre/osc/osc_lock.c       | 1698 ++++++---------
 drivers/staging/lustre/lustre/osc/osc_object.c     |   38 +-
 drivers/staging/lustre/lustre/osc/osc_page.c       |  544 +++--
 drivers/staging/lustre/lustre/osc/osc_request.c    |  423 ++--
 drivers/staging/lustre/lustre/ptlrpc/client.c      |   11 +-
 drivers/staging/lustre/lustre/ptlrpc/events.c      |    1 -
 drivers/staging/lustre/lustre/ptlrpc/import.c      |   12 +-
 drivers/staging/lustre/lustre/ptlrpc/layout.c      |   31 +-
 .../staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c    |   11 +-
 drivers/staging/lustre/lustre/ptlrpc/nrs.c         |    7 +-
 .../staging/lustre/lustre/ptlrpc/pack_generic.c    |    3 -
 drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c     |   21 +-
 drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c    |   14 +-
 drivers/staging/lustre/lustre/ptlrpc/sec_plain.c   |    2 +-
 drivers/staging/lustre/lustre/ptlrpc/service.c     |   52 +-
 drivers/staging/lustre/lustre/ptlrpc/wiretest.c    |   12 +-
 179 files changed, 9401 insertions(+), 13947 deletions(-)
 delete mode 100644 drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
 delete mode 100644 drivers/staging/lustre/lustre/include/lclient.h
 delete mode 100644 drivers/staging/lustre/lustre/include/linux/obd.h
 delete mode 100644 drivers/staging/lustre/lustre/lclient/glimpse.c
 delete mode 100644 drivers/staging/lustre/lustre/lclient/lcommon_cl.c
 delete mode 100644 drivers/staging/lustre/lustre/lclient/lcommon_misc.c
 create mode 100644 drivers/staging/lustre/lustre/llite/glimpse.c
 create mode 100644 drivers/staging/lustre/lustre/llite/lcommon_cl.c
 create mode 100644 drivers/staging/lustre/lustre/llite/lcommon_misc.c
 create mode 100644 drivers/staging/lustre/lustre/llite/vvp_req.c

(limited to 'drivers/staging/lustre')

diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 40af75c42..4141afb10 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -59,42 +59,13 @@
 #define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
 #define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
 
-/*
- * libcfs pseudo device operations
- *
- * It's just draft now.
- */
-
-struct cfs_psdev_file {
-	unsigned long   off;
-	void	    *private_data;
-	unsigned long   reserved1;
-	unsigned long   reserved2;
-};
-
-struct cfs_psdev_ops {
-	int (*p_open)(unsigned long, void *);
-	int (*p_close)(unsigned long, void *);
-	int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
-	int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
-	int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void __user *);
-};
-
-/*
- * Drop into debugger, if possible. Implementation is provided by platform.
- */
-
-void cfs_enter_debugger(void);
-
 /*
  * Defined by platform
  */
-int unshare_fs_struct(void);
 sigset_t cfs_block_allsigs(void);
 sigset_t cfs_block_sigs(unsigned long sigs);
 sigset_t cfs_block_sigsinv(unsigned long sigs);
 void cfs_restore_sigs(sigset_t);
-int cfs_signal_pending(void);
 void cfs_clear_sigpending(void);
 
 /*
@@ -117,7 +88,25 @@ void cfs_get_random_bytes(void *buf, int size);
 #include "libcfs_workitem.h"
 #include "libcfs_hash.h"
 #include "libcfs_fail.h"
-#include "libcfs_crypto.h"
+
+struct libcfs_ioctl_handler {
+	struct list_head item;
+	int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func)			\
+	struct libcfs_ioctl_handler ident = {			\
+		.item		= LIST_HEAD_INIT(ident.item),	\
+		.handle_ioctl	= func				\
+	}
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
+			 const struct libcfs_ioctl_hdr __user *uparam);
+int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
+int libcfs_ioctl(unsigned long cmd, void __user *arg);
 
 /* container_of depends on "likely" which is defined in libcfs_private.h */
 static inline void *__container_of(void *ptr, unsigned long shift)
@@ -143,8 +132,6 @@ extern struct miscdevice libcfs_dev;
 extern char lnet_upcall[1024];
 extern char lnet_debug_log_upcall[1024];
 
-extern struct cfs_psdev_ops libcfs_psdev_ops;
-
 extern struct cfs_wi_sched *cfs_sched_rehash;
 
 struct lnet_debugfs_symlink_def {
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
index 9e62c5971..81d8079e3 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h
@@ -203,6 +203,85 @@ int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt);
  */
 int cfs_cpu_ht_nsiblings(int cpu);
 
+/*
+ * allocate per-cpu-partition data, returned value is an array of pointers,
+ * variable can be indexed by CPU ID.
+ *	cptab != NULL: size of array is number of CPU partitions
+ *	cptab == NULL: size of array is number of HW cores
+ */
+void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
+/*
+ * destory per-cpu-partition variable
+ */
+void cfs_percpt_free(void *vars);
+int cfs_percpt_number(void *vars);
+
+#define cfs_percpt_for_each(var, i, vars)		\
+	for (i = 0; i < cfs_percpt_number(vars) &&	\
+		((var) = (vars)[i]) != NULL; i++)
+
+/*
+ * percpu partition lock
+ *
+ * There are some use-cases like this in Lustre:
+ * . each CPU partition has it's own private data which is frequently changed,
+ *   and mostly by the local CPU partition.
+ * . all CPU partitions share some global data, these data are rarely changed.
+ *
+ * LNet is typical example.
+ * CPU partition lock is designed for this kind of use-cases:
+ * . each CPU partition has it's own private lock
+ * . change on private data just needs to take the private lock
+ * . read on shared data just needs to take _any_ of private locks
+ * . change on shared data needs to take _all_ private locks,
+ *   which is slow and should be really rare.
+ */
+enum {
+	CFS_PERCPT_LOCK_EX	= -1,	/* negative */
+};
+
+struct cfs_percpt_lock {
+	/* cpu-partition-table for this lock */
+	struct cfs_cpt_table     *pcl_cptab;
+	/* exclusively locked */
+	unsigned int		  pcl_locked;
+	/* private lock table */
+	spinlock_t		**pcl_locks;
+};
+
+/* return number of private locks */
+#define cfs_percpt_lock_num(pcl)	cfs_cpt_number(pcl->pcl_cptab)
+
+/*
+ * create a cpu-partition lock based on CPU partition table \a cptab,
+ * each private lock has extra \a psize bytes padding data
+ */
+struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
+					       struct lock_class_key *keys);
+/* destroy a cpu-partition lock */
+void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
+
+/* lock private lock \a index of \a pcl */
+void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
+
+/* unlock private lock \a index of \a pcl */
+void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
+
+#define CFS_PERCPT_LOCK_KEYS	256
+
+/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */
+#define cfs_percpt_lock_alloc(cptab)					\
+({									\
+	static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS];	\
+	struct cfs_percpt_lock *___lk;					\
+									\
+	if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS)		\
+		___lk = cfs_percpt_lock_create(cptab, NULL);		\
+	else								\
+		___lk = cfs_percpt_lock_create(cptab, ___keys);		\
+	___lk;								\
+})
+
 /**
  * iterate over all CPU partitions in \a cptab
  */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
index e8663697e..02be7d760 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h
@@ -46,7 +46,8 @@ enum cfs_crypto_hash_alg {
 	CFS_HASH_ALG_SHA384,
 	CFS_HASH_ALG_SHA512,
 	CFS_HASH_ALG_CRC32C,
-	CFS_HASH_ALG_MAX
+	CFS_HASH_ALG_MAX,
+	CFS_HASH_ALG_UNKNOWN	= 0xff
 };
 
 static struct cfs_crypto_hash_type hash_types[] = {
@@ -59,11 +60,22 @@ static struct cfs_crypto_hash_type hash_types[] = {
 	[CFS_HASH_ALG_SHA256]  = { "sha256",   0,     32 },
 	[CFS_HASH_ALG_SHA384]  = { "sha384",   0,     48 },
 	[CFS_HASH_ALG_SHA512]  = { "sha512",   0,     64 },
+	[CFS_HASH_ALG_MAX]	= { NULL,	0,	64 },
 };
 
-/**    Return pointer to type of hash for valid hash algorithm identifier */
+/* Maximum size of hash_types[].cht_size */
+#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX	64
+
+/**
+ * Return hash algorithm information for the specified algorithm identifier
+ *
+ * Hash information includes algorithm name, initial seed, hash size.
+ *
+ * \retval	cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*)
+ * \retval	NULL for unknown algorithm identifier
+ */
 static inline const struct cfs_crypto_hash_type *
-		    cfs_crypto_hash_type(unsigned char hash_alg)
+cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg)
 {
 	struct cfs_crypto_hash_type *ht;
 
@@ -75,8 +87,16 @@ static inline const struct cfs_crypto_hash_type *
 	return NULL;
 }
 
-/**     Return hash name for valid hash algorithm identifier or "unknown" */
-static inline const char *cfs_crypto_hash_name(unsigned char hash_alg)
+/**
+ * Return hash name for hash algorithm identifier
+ *
+ * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
+ *
+ * \retval	string name of known hash algorithm
+ * \retval	"unknown" if hash algorithm is unknown
+ */
+static inline const char *
+cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg)
 {
 	const struct cfs_crypto_hash_type *ht;
 
@@ -86,8 +106,15 @@ static inline const char *cfs_crypto_hash_name(unsigned char hash_alg)
 	return "unknown";
 }
 
-/**     Return digest size for valid algorithm identifier or 0 */
-static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg)
+/**
+ * Return digest size for hash algorithm type
+ *
+ * \param[in]	hash_alg hash alrgorithm id (CFS_HASH_ALG_*)
+ *
+ * \retval	hash algorithm digest size in bytes
+ * \retval	0 if hash algorithm type is unknown
+ */
+static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg)
 {
 	const struct cfs_crypto_hash_type *ht;
 
@@ -97,36 +124,24 @@ static inline int cfs_crypto_hash_digestsize(unsigned char hash_alg)
 	return 0;
 }
 
-/**     Return hash identifier for valid hash algorithm name or 0xFF */
+/**
+ * Find hash algorithm ID for the specified algorithm name
+ *
+ * \retval	hash algorithm ID for valid ID (CFS_HASH_ALG_*)
+ * \retval	CFS_HASH_ALG_UNKNOWN for unknown algorithm name
+ */
 static inline unsigned char cfs_crypto_hash_alg(const char *algname)
 {
-	unsigned char   i;
+	enum cfs_crypto_hash_alg hash_alg;
 
-	for (i = 0; i < CFS_HASH_ALG_MAX; i++)
-		if (!strcmp(hash_types[i].cht_name, algname))
-			break;
-	return (i == CFS_HASH_ALG_MAX ? 0xFF : i);
+	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
+		if (strcmp(hash_types[hash_alg].cht_name, algname) == 0)
+			return hash_alg;
+
+	return CFS_HASH_ALG_UNKNOWN;
 }
 
-/**     Calculate hash digest for buffer.
- *      @param alg	    id of hash algorithm
- *      @param buf	    buffer of data
- *      @param buf_len	buffer len
- *      @param key	    initial value for algorithm, if it is NULL,
- *			    default initial value should be used.
- *      @param key_len	len of initial value
- *      @param hash	   [out] pointer to hash, if it is NULL, hash_len is
- *			    set to valid digest size in bytes, retval -ENOSPC.
- *      @param hash_len       [in,out] size of hash buffer
- *      @returns	      status of operation
- *      @retval -EINVAL       if buf, buf_len, hash_len or alg_id is invalid
- *      @retval -ENODEV       if this algorithm is unsupported
- *      @retval -ENOSPC       if pointer to hash is NULL, or hash_len less than
- *			    digest size
- *      @retval 0	     for success
- *      @retval < 0	   other errors from lower layers.
- */
-int cfs_crypto_hash_digest(unsigned char alg,
+int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
 			   const void *buf, unsigned int buf_len,
 			   unsigned char *key, unsigned int key_len,
 			   unsigned char *hash, unsigned int *hash_len);
@@ -134,66 +149,17 @@ int cfs_crypto_hash_digest(unsigned char alg,
 /* cfs crypto hash descriptor */
 struct cfs_crypto_hash_desc;
 
-/**     Allocate and initialize descriptor for hash algorithm.
- *      @param alg	    algorithm id
- *      @param key	    initial value for algorithm, if it is NULL,
- *			    default initial value should be used.
- *      @param key_len	len of initial value
- *      @returns	      pointer to descriptor of hash instance
- *      @retval ERR_PTR(error) when errors occurred.
- */
-struct cfs_crypto_hash_desc*
-	cfs_crypto_hash_init(unsigned char alg,
-			     unsigned char *key, unsigned int key_len);
-
-/**    Update digest by part of data.
- *     @param desc	      hash descriptor
- *     @param page	      data page
- *     @param offset	    data offset
- *     @param len	       data len
- *     @returns		 status of operation
- *     @retval 0		for success.
- */
+struct cfs_crypto_hash_desc *
+cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
+		     unsigned char *key, unsigned int key_len);
 int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *desc,
 				struct page *page, unsigned int offset,
 				unsigned int len);
-
-/**    Update digest by part of data.
- *     @param desc	      hash descriptor
- *     @param buf	       pointer to data buffer
- *     @param buf_len	   size of data at buffer
- *     @returns		 status of operation
- *     @retval 0		for success.
- */
 int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *desc, const void *buf,
 			   unsigned int buf_len);
-
-/**    Finalize hash calculation, copy hash digest to buffer, destroy hash
- *     descriptor.
- *     @param desc	      hash descriptor
- *     @param hash	      buffer pointer to store hash digest
- *     @param hash_len	  pointer to hash buffer size, if NULL
- *			      destroy hash descriptor
- *     @returns		 status of operation
- *     @retval -ENOSPC	  if hash is NULL, or *hash_len less than
- *			      digest size
- *     @retval 0		for success
- *     @retval < 0	      other errors from lower layers.
- */
 int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *desc,
 			  unsigned char *hash, unsigned int *hash_len);
-/**
- *      Register crypto hash algorithms
- */
 int cfs_crypto_register(void);
-
-/**
- *      Unregister
- */
 void cfs_crypto_unregister(void);
-
-/**     Return hash speed in Mbytes per second for valid hash algorithm
- *      identifier. If test was unsuccessful -1 would be returned.
- */
-int cfs_crypto_hash_speed(unsigned char hash_alg);
+int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg);
 #endif
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
index 98430e710..455c54d0d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -85,7 +85,6 @@ struct ptldebug_header {
 #define PH_FLAG_FIRST_RECORD 1
 
 /* Debugging subsystems (32 bits, non-overlapping) */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
 #define S_UNDEFINED	0x00000001
 #define S_MDC		0x00000002
 #define S_MDS		0x00000004
@@ -118,10 +117,14 @@ struct ptldebug_header {
 #define S_MGS		0x20000000
 #define S_FID		0x40000000 /* b_new_cmd */
 #define S_FLD		0x80000000 /* b_new_cmd */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+
+#define LIBCFS_DEBUG_SUBSYS_NAMES {					\
+	"undefined", "mdc", "mds", "osc", "ost", "class", "log",	\
+	"llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter", "",	\
+	"echo", "ldlm", "lov", "lquota", "osd", "lfsck", "", "", "lmv",	\
+	"", "sec", "gss", "", "mgc", "mgs", "fid", "fld", NULL }
 
 /* Debugging masks (32 bits, non-overlapping) */
-/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
 #define D_TRACE		0x00000001 /* ENTRY/EXIT markers */
 #define D_INODE		0x00000002
 #define D_SUPER		0x00000004
@@ -151,9 +154,14 @@ struct ptldebug_header {
 #define D_QUOTA		0x04000000
 #define D_SEC		0x08000000
 #define D_LFSCK		0x10000000 /* For both OI scrub and LFSCK */
-/* keep these in sync with lnet/{utils,libcfs}/debug.c */
+#define D_HSM		0x20000000
 
-#define D_HSM	 D_TRACE
+#define LIBCFS_DEBUG_MASKS_NAMES {					\
+	"trace", "inode", "super", "ext2", "malloc", "cache", "info",	\
+	"ioctl", "neterror", "net", "warning", "buffs", "other",	\
+	"dentry", "nettrace", "page", "dlmtrace", "error", "emerg",	\
+	"ha", "rpctrace", "vfstrace", "reada", "mmap", "config",	\
+	"console", "quota", "sec", "lfsck", "hsm", NULL }
 
 #define D_CANTMASK   (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
 
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
index aa69c6a33..2e008bffc 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -38,6 +38,7 @@
 
 extern unsigned long cfs_fail_loc;
 extern unsigned int cfs_fail_val;
+extern int cfs_fail_err;
 
 extern wait_queue_head_t cfs_race_waitq;
 extern int cfs_race_state;
@@ -70,9 +71,14 @@ enum {
 #define CFS_FAIL_RAND	0x08000000 /* fail 1/N of the times */
 #define CFS_FAIL_USR1	0x04000000 /* user flag */
 
-#define CFS_FAIL_PRECHECK(id) (cfs_fail_loc &&				\
-			      (cfs_fail_loc & CFS_FAIL_MASK_LOC) ==	   \
-			      ((id) & CFS_FAIL_MASK_LOC))
+#define CFS_FAULT	0x02000000 /* match any CFS_FAULT_CHECK */
+
+static inline bool CFS_FAIL_PRECHECK(__u32 id)
+{
+	return cfs_fail_loc != 0 &&
+	       ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) ||
+	        (cfs_fail_loc & id & CFS_FAULT));
+}
 
 static inline int cfs_fail_check_set(__u32 id, __u32 value,
 				     int set, int quiet)
@@ -144,6 +150,9 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
 #define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
 	cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
 
+#define CFS_FAULT_CHECK(id)			\
+	CFS_FAIL_CHECK(CFS_FAULT | (id))
+
 /* The idea here is to synchronise two threads to force a race. The
  * first thread that calls this with a matching fail_loc is put to
  * sleep. The next thread that calls with the same fail_loc wakes up
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
index c3f2332fa..119986bc7 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h
@@ -245,7 +245,7 @@ struct cfs_hash {
 	/** # of iterators (caller of cfs_hash_for_each_*) */
 	__u32				hs_iterators;
 	/** rehash workitem */
-	cfs_workitem_t			hs_rehash_wi;
+	struct cfs_workitem		hs_rehash_wi;
 	/** refcount on this hash table */
 	atomic_t			hs_refcount;
 	/** rehash buckets-table */
@@ -262,7 +262,7 @@ struct cfs_hash {
 	/** bits when we found the max depth */
 	unsigned int			hs_dep_bits;
 	/** workitem to output max depth */
-	cfs_workitem_t			hs_dep_wi;
+	struct cfs_workitem		hs_dep_wi;
 #endif
 	/** name of htable */
 	char				hs_name[0];
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
index 5ca99bd6f..4b9102bd9 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -34,13 +34,16 @@
  * libcfs/include/libcfs/libcfs_ioctl.h
  *
  * Low-level ioctl data structures. Kernel ioctl functions declared here,
- * and user space functions are in libcfsutil_ioctl.h.
+ * and user space functions are in libcfs/util/ioctl.h.
  *
  */
 
 #ifndef __LIBCFS_IOCTL_H__
 #define __LIBCFS_IOCTL_H__
 
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
 #define LIBCFS_IOCTL_VERSION	0x0001000a
 #define LIBCFS_IOCTL_VERSION2	0x0001000b
 
@@ -49,6 +52,9 @@ struct libcfs_ioctl_hdr {
 	__u32 ioc_version;
 };
 
+/** max size to copy from userspace */
+#define LIBCFS_IOC_DATA_MAX	(128 * 1024)
+
 struct libcfs_ioctl_data {
 	struct libcfs_ioctl_hdr ioc_hdr;
 
@@ -73,67 +79,48 @@ struct libcfs_ioctl_data {
 	char ioc_bulk[0];
 };
 
-#define ioc_priority ioc_u32[0]
-
 struct libcfs_debug_ioctl_data {
 	struct libcfs_ioctl_hdr hdr;
 	unsigned int subs;
 	unsigned int debug;
 };
 
-#define LIBCFS_IOC_INIT(data)			   \
-do {						    \
-	memset(&data, 0, sizeof(data));		 \
-	data.ioc_version = LIBCFS_IOCTL_VERSION;	\
-	data.ioc_len = sizeof(data);		    \
-} while (0)
-
-struct libcfs_ioctl_handler {
-	struct list_head item;
-	int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-};
-
-#define DECLARE_IOCTL_HANDLER(ident, func)		      \
-	struct libcfs_ioctl_handler ident = {		   \
-		/* .item = */ LIST_HEAD_INIT(ident.item),   \
-		/* .handle_ioctl = */ func		      \
-	}
+/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */
+#define LIBCFS_IOC_DEBUG_MASK		   _IOWR('f', 250, long)
+#define IOCTL_LIBCFS_TYPE		   long
 
-/* FIXME check conflict with lustre_lib.h */
-#define LIBCFS_IOC_DEBUG_MASK	     _IOWR('f', 250, long)
-
-#define IOC_LIBCFS_TYPE		   'e'
-#define IOC_LIBCFS_MIN_NR		 30
+#define IOC_LIBCFS_TYPE			   ('e')
+#define IOC_LIBCFS_MIN_NR		   30
 /* libcfs ioctls */
-#define IOC_LIBCFS_PANIC		   _IOWR('e', 30, long)
-#define IOC_LIBCFS_CLEAR_DEBUG	     _IOWR('e', 31, long)
-#define IOC_LIBCFS_MARK_DEBUG	      _IOWR('e', 32, long)
-#define IOC_LIBCFS_MEMHOG		  _IOWR('e', 36, long)
+/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */
+#define IOC_LIBCFS_CLEAR_DEBUG		   _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_MARK_DEBUG		   _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
+/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */
 /* lnet ioctls */
-#define IOC_LIBCFS_GET_NI		  _IOWR('e', 50, long)
-#define IOC_LIBCFS_FAIL_NID		_IOWR('e', 51, long)
-#define IOC_LIBCFS_NOTIFY_ROUTER	   _IOWR('e', 55, long)
-#define IOC_LIBCFS_UNCONFIGURE	     _IOWR('e', 56, long)
-/*	#define IOC_LIBCFS_PORTALS_COMPATIBILITY   _IOWR('e', 57, long) */
-#define IOC_LIBCFS_LNET_DIST	       _IOWR('e', 58, long)
-#define IOC_LIBCFS_CONFIGURE	       _IOWR('e', 59, long)
-#define IOC_LIBCFS_TESTPROTOCOMPAT	 _IOWR('e', 60, long)
-#define IOC_LIBCFS_PING		    _IOWR('e', 61, long)
-/*	#define IOC_LIBCFS_DEBUG_PEER	      _IOWR('e', 62, long) */
-#define IOC_LIBCFS_LNETST		  _IOWR('e', 63, long)
-#define	IOC_LIBCFS_LNET_FAULT		_IOWR('e', 64, long)
+#define IOC_LIBCFS_GET_NI		   _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_FAIL_NID		   _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_NOTIFY_ROUTER	   _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_UNCONFIGURE		   _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
+/*	 IOC_LIBCFS_PORTALS_COMPATIBILITY  _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */
+#define IOC_LIBCFS_LNET_DIST		   _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CONFIGURE		   _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_TESTPROTOCOMPAT	   _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PING			   _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
+/*	IOC_LIBCFS_DEBUG_PEER		   _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */
+#define IOC_LIBCFS_LNETST		   _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LNET_FAULT		   _IOWR('e', 64, IOCTL_LIBCFS_TYPE)
 /* lnd ioctls */
-#define IOC_LIBCFS_REGISTER_MYNID	  _IOWR('e', 70, long)
-#define IOC_LIBCFS_CLOSE_CONNECTION	_IOWR('e', 71, long)
-#define IOC_LIBCFS_PUSH_CONNECTION	 _IOWR('e', 72, long)
-#define IOC_LIBCFS_GET_CONN		_IOWR('e', 73, long)
-#define IOC_LIBCFS_DEL_PEER		_IOWR('e', 74, long)
-#define IOC_LIBCFS_ADD_PEER		_IOWR('e', 75, long)
-#define IOC_LIBCFS_GET_PEER		_IOWR('e', 76, long)
+#define IOC_LIBCFS_REGISTER_MYNID	   _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CLOSE_CONNECTION	   _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PUSH_CONNECTION	   _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_CONN		   _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_PEER		   _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_PEER		   _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_PEER		   _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
 /* ioctl 77 is free for use */
-#define IOC_LIBCFS_ADD_INTERFACE	   _IOWR('e', 78, long)
-#define IOC_LIBCFS_DEL_INTERFACE	   _IOWR('e', 79, long)
-#define IOC_LIBCFS_GET_INTERFACE	   _IOWR('e', 80, long)
+#define IOC_LIBCFS_ADD_INTERFACE	   _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_INTERFACE	   _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_INTERFACE	   _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
 
 /*
  * DLC Specific IOCTL numbers.
@@ -155,76 +142,4 @@ struct libcfs_ioctl_handler {
 #define IOC_LIBCFS_GET_LNET_STATS	_IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE)
 #define IOC_LIBCFS_MAX_NR		91
 
-static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
-{
-	int len = sizeof(*data);
-
-	len += cfs_size_round(data->ioc_inllen1);
-	len += cfs_size_round(data->ioc_inllen2);
-	return len;
-}
-
-static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
-{
-	if (data->ioc_hdr.ioc_len > (1 << 30)) {
-		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
-		return 1;
-	}
-	if (data->ioc_inllen1 > (1<<30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
-		return 1;
-	}
-	if (data->ioc_inllen2 > (1<<30)) {
-		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_pbuf1 && !data->ioc_plen1) {
-		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_pbuf2 && !data->ioc_plen2) {
-		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_plen1 && !data->ioc_pbuf1) {
-		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
-		return 1;
-	}
-	if (data->ioc_plen2 && !data->ioc_pbuf2) {
-		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
-		return 1;
-	}
-	if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
-		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
-		return 1;
-	}
-	if (data->ioc_inllen1 &&
-	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
-		return 1;
-	}
-	if (data->ioc_inllen2 &&
-	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
-			   data->ioc_inllen2 - 1] != '\0') {
-		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
-		return 1;
-	}
-	return 0;
-}
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
-int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg,
-			     __u32 *buf_len);
-int libcfs_ioctl_popdata(void __user *arg, void *buf, int size);
-int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
-
 #endif /* __LIBCFS_IOCTL_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
index 082fe6de9..ac4e8cfe6 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_prim.h
@@ -40,21 +40,32 @@
 #ifndef __LIBCFS_PRIM_H__
 #define __LIBCFS_PRIM_H__
 
-void add_wait_queue_exclusive_head(wait_queue_head_t *, wait_queue_t *);
-
 /*
  * Memory
  */
-#ifndef memory_pressure_get
-#define memory_pressure_get() (0)
-#endif
-#ifndef memory_pressure_set
-#define memory_pressure_set() do {} while (0)
-#endif
-#ifndef memory_pressure_clr
-#define memory_pressure_clr() do {} while (0)
+#if BITS_PER_LONG == 32
+/* limit to lowmem on 32-bit systems */
+#define NUM_CACHEPAGES \
+	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
+#else
+#define NUM_CACHEPAGES totalram_pages
 #endif
 
+static inline unsigned int memory_pressure_get(void)
+{
+	return current->flags & PF_MEMALLOC;
+}
+
+static inline void memory_pressure_set(void)
+{
+	current->flags |= PF_MEMALLOC;
+}
+
+static inline void memory_pressure_clr(void)
+{
+	current->flags &= ~PF_MEMALLOC;
+}
+
 static inline int cfs_memory_pressure_get_and_set(void)
 {
 	int old = memory_pressure_get();
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 13335437c..2fd2a9690 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -181,25 +181,6 @@ int libcfs_debug_cleanup(void);
 int libcfs_debug_clear_buffer(void);
 int libcfs_debug_mark_buffer(const char *text);
 
-/*
- * allocate per-cpu-partition data, returned value is an array of pointers,
- * variable can be indexed by CPU ID.
- *	cptable != NULL: size of array is number of CPU partitions
- *	cptable == NULL: size of array is number of HW cores
- */
-void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size);
-/*
- * destroy per-cpu-partition variable
- */
-void  cfs_percpt_free(void *vars);
-int   cfs_percpt_number(void *vars);
-void *cfs_percpt_current(void *vars);
-void *cfs_percpt_index(void *vars, int idx);
-
-#define cfs_percpt_for_each(var, i, vars)		\
-	for (i = 0; i < cfs_percpt_number(vars) &&	\
-		    ((var) = (vars)[i]) != NULL; i++)
-
 /*
  * allocate a variable array, returned value is an array of pointers.
  * Caller can specify length of array by count.
@@ -302,62 +283,6 @@ do {							    \
 #define CFS_ALLOC_PTR(ptr)      LIBCFS_ALLOC(ptr, sizeof(*(ptr)))
 #define CFS_FREE_PTR(ptr)       LIBCFS_FREE(ptr, sizeof(*(ptr)))
 
-/*
- * percpu partition lock
- *
- * There are some use-cases like this in Lustre:
- * . each CPU partition has it's own private data which is frequently changed,
- *   and mostly by the local CPU partition.
- * . all CPU partitions share some global data, these data are rarely changed.
- *
- * LNet is typical example.
- * CPU partition lock is designed for this kind of use-cases:
- * . each CPU partition has it's own private lock
- * . change on private data just needs to take the private lock
- * . read on shared data just needs to take _any_ of private locks
- * . change on shared data needs to take _all_ private locks,
- *   which is slow and should be really rare.
- */
-
-enum {
-	CFS_PERCPT_LOCK_EX	= -1, /* negative */
-};
-
-struct cfs_percpt_lock {
-	/* cpu-partition-table for this lock */
-	struct cfs_cpt_table	*pcl_cptab;
-	/* exclusively locked */
-	unsigned int		pcl_locked;
-	/* private lock table */
-	spinlock_t		**pcl_locks;
-};
-
-/* return number of private locks */
-static inline int
-cfs_percpt_lock_num(struct cfs_percpt_lock *pcl)
-{
-	return cfs_cpt_number(pcl->pcl_cptab);
-}
-
-/*
- * create a cpu-partition lock based on CPU partition table \a cptab,
- * each private lock has extra \a psize bytes padding data
- */
-struct cfs_percpt_lock *cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab);
-/* destroy a cpu-partition lock */
-void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl);
-
-/* lock private lock \a index of \a pcl */
-void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index);
-/* unlock private lock \a index of \a pcl */
-void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index);
-/* create percpt (atomic) refcount based on @cptab */
-atomic_t **cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int val);
-/* destroy percpt refcount */
-void cfs_percpt_atomic_free(atomic_t **refs);
-/* return sum of all percpu refs */
-int cfs_percpt_atomic_summary(atomic_t **refs);
-
 /** Compile-time assertion.
 
  * Check an invariant described by a constant expression at compile time by
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
index 5cc64f327..f9b20c5ac 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_workitem.h
@@ -73,7 +73,7 @@ int cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab, int cpt,
 struct cfs_workitem;
 
 typedef int (*cfs_wi_action_t) (struct cfs_workitem *);
-typedef struct cfs_workitem {
+struct cfs_workitem {
 	/** chain on runq or rerunq */
 	struct list_head       wi_list;
 	/** working function */
@@ -84,10 +84,10 @@ typedef struct cfs_workitem {
 	unsigned short   wi_running:1;
 	/** scheduled */
 	unsigned short   wi_scheduled:1;
-} cfs_workitem_t;
+};
 
 static inline void
-cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action)
+cfs_wi_init(struct cfs_workitem *wi, void *data, cfs_wi_action_t action)
 {
 	INIT_LIST_HEAD(&wi->wi_list);
 
@@ -97,9 +97,9 @@ cfs_wi_init(cfs_workitem_t *wi, void *data, cfs_wi_action_t action)
 	wi->wi_action    = action;
 }
 
-void cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
-int  cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
-void cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi);
+void cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
+int  cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
+void cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi);
 
 int  cfs_wi_startup(void);
 void cfs_wi_shutdown(void);
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
index d94b26616..a268ef7aa 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/libcfs.h
@@ -60,6 +60,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
+#include <linux/pagemap.h>
 #include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
@@ -83,7 +84,6 @@
 #include <stdarg.h>
 #include "linux-cpu.h"
 #include "linux-time.h"
-#include "linux-mem.h"
 
 #define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
 
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
index c04979ae0..f63cb47bc 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-cpu.h
@@ -23,7 +23,7 @@
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
- * libcfs/include/libcfs/linux/linux-mem.h
+ * libcfs/include/libcfs/linux/linux-cpu.h
  *
  * Basic library routines.
  *
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
deleted file mode 100644
index 837eb2274..000000000
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * libcfs/include/libcfs/linux/linux-mem.h
- *
- * Basic library routines.
- */
-
-#ifndef __LIBCFS_LINUX_CFS_MEM_H__
-#define __LIBCFS_LINUX_CFS_MEM_H__
-
-#ifndef __LIBCFS_LIBCFS_H__
-#error Do not #include this file directly. #include <linux/libcfs/libcfs.h> instead
-#endif
-
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/memcontrol.h>
-#include <linux/mm_inline.h>
-
-#ifndef HAVE_LIBCFS_CPT
-/* Need this for cfs_cpt_table */
-#include "../libcfs_cpu.h"
-#endif
-
-#define CFS_PAGE_MASK		   (~((__u64)PAGE_SIZE-1))
-#define page_index(p)       ((p)->index)
-
-#define memory_pressure_get() (current->flags & PF_MEMALLOC)
-#define memory_pressure_set() do { current->flags |= PF_MEMALLOC; } while (0)
-#define memory_pressure_clr() do { current->flags &= ~PF_MEMALLOC; } while (0)
-
-#if BITS_PER_LONG == 32
-/* limit to lowmem on 32-bit systems */
-#define NUM_CACHEPAGES \
-	min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4)
-#else
-#define NUM_CACHEPAGES totalram_pages
-#endif
-
-#define DECL_MMSPACE		mm_segment_t __oldfs
-#define MMSPACE_OPEN \
-	do { __oldfs = get_fs(); set_fs(get_ds()); } while (0)
-#define MMSPACE_CLOSE	       set_fs(__oldfs)
-
-#endif /* __LINUX_CFS_MEM_H__ */
diff --git a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
index ed8764b11..7656b09b8 100644
--- a/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
+++ b/drivers/staging/lustre/include/linux/libcfs/linux/linux-time.h
@@ -70,12 +70,12 @@ static inline unsigned long cfs_time_current(void)
 
 static inline long cfs_time_seconds(int seconds)
 {
-	return ((long)seconds) * HZ;
+	return ((long)seconds) * msecs_to_jiffies(MSEC_PER_SEC);
 }
 
 static inline long cfs_duration_sec(long d)
 {
-	return d / HZ;
+	return d / msecs_to_jiffies(MSEC_PER_SEC);
 }
 
 #define cfs_time_current_64 get_jiffies_64
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
index 84a19e96e..6ce9accb9 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-dlc.h
@@ -37,10 +37,37 @@
 #define LNET_MAX_SHOW_NUM_CPT	128
 #define LNET_UNDEFINED_HOPS	((__u32) -1)
 
+struct lnet_ioctl_config_lnd_cmn_tunables {
+	__u32 lct_version;
+	__u32 lct_peer_timeout;
+	__u32 lct_peer_tx_credits;
+	__u32 lct_peer_rtr_credits;
+	__u32 lct_max_tx_credits;
+};
+
+struct lnet_ioctl_config_o2iblnd_tunables {
+	__u32 lnd_version;
+	__u32 lnd_peercredits_hiw;
+	__u32 lnd_map_on_demand;
+	__u32 lnd_concurrent_sends;
+	__u32 lnd_fmr_pool_size;
+	__u32 lnd_fmr_flush_trigger;
+	__u32 lnd_fmr_cache;
+	__u32 pad;
+};
+
+struct lnet_ioctl_config_lnd_tunables {
+	struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
+	union {
+		struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
+	} lt_tun_u;
+};
+
 struct lnet_ioctl_net_config {
 	char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
 	__u32 ni_status;
 	__u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
+	char cfg_bulk[0];
 };
 
 #define LNET_TINY_BUF_IDX	0
@@ -81,7 +108,7 @@ struct lnet_ioctl_config_data {
 			__s32 net_peer_rtr_credits;
 			__s32 net_max_tx_credits;
 			__u32 net_cksum_algo;
-			__u32 net_pad;
+			__u32 net_interface_count;
 		} cfg_net;
 		struct {
 			__u32 buf_enable;
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index dfc0208dc..513a8225f 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -463,10 +463,6 @@ int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
 void lnet_destroy_routes(void);
 int lnet_get_route(int idx, __u32 *net, __u32 *hops,
 		   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
-int lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid,
-			int *peer_timeout, int *peer_tx_credits,
-			int *peer_rtr_cr, int *max_tx_credits,
-			struct lnet_ioctl_net_config *net_config);
 int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
 
 void lnet_router_debugfs_init(void);
@@ -478,9 +474,8 @@ int lnet_rtrpools_enable(void);
 void lnet_rtrpools_disable(void);
 void lnet_rtrpools_free(int keep_pools);
 lnet_remotenet_t *lnet_find_net_locked(__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
-		    __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
-		    __s32 credits);
+int lnet_dyn_add_ni(lnet_pid_t requested_pid,
+		    struct lnet_ioctl_config_data *conf);
 int lnet_dyn_del_ni(__u32 net);
 int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
 
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 29c72f8c2..24c4a08e6 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -273,6 +273,8 @@ typedef struct lnet_ni {
 	int			**ni_refs;	/* percpt reference count */
 	time64_t		  ni_last_alive;/* when I was last alive */
 	lnet_ni_status_t	 *ni_status;	/* my health status */
+	/* per NI LND tunables */
+	struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
 	/* equivalent interfaces to use */
 	char			 *ni_interfaces[LNET_MAX_INTERFACES];
 } lnet_ni_t;
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 0d32e6541..6c59f2ff2 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -335,8 +335,8 @@ int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
 	peer->ibp_nid = nid;
 	peer->ibp_error = 0;
 	peer->ibp_last_alive = 0;
-	peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS;
-	peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits;
+	peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
+	peer->ibp_queue_depth = ni->ni_peertxcredits;
 	atomic_set(&peer->ibp_refcount, 1);  /* 1 ref for caller */
 
 	INIT_LIST_HEAD(&peer->ibp_list);     /* not in the peer table yet */
@@ -1283,65 +1283,86 @@ static void kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
 	}
 }
 
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
 				    int negotiated_nfrags)
 {
-	__u16 nfrags = (negotiated_nfrags != -1) ?
-			negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
+	kib_net_t *net = ni->ni_data;
+	kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+	__u16 nfrags;
+	int mod;
+
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+	mod = tunables->lnd_map_on_demand;
+	nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
 
 	LASSERT(hdev->ibh_mrs);
 
-	if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-	    nfrags <= rd->rd_nfrags)
+	if (mod > 0 && nfrags <= rd->rd_nfrags)
 		return NULL;
 
 	return hdev->ibh_mrs;
 }
 
-static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool)
+static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
 {
-	LASSERT(!pool->fpo_map_count);
+	LASSERT(!fpo->fpo_map_count);
 
-	if (pool->fpo_fmr_pool)
-		ib_destroy_fmr_pool(pool->fpo_fmr_pool);
+	if (fpo->fpo_is_fmr) {
+		if (fpo->fmr.fpo_fmr_pool)
+			ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
+	} else {
+		struct kib_fast_reg_descriptor *frd, *tmp;
+		int i = 0;
+
+		list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+					 frd_list) {
+			list_del(&frd->frd_list);
+			ib_dereg_mr(frd->frd_mr);
+			LIBCFS_FREE(frd, sizeof(*frd));
+			i++;
+		}
+		if (i < fpo->fast_reg.fpo_pool_size)
+			CERROR("FastReg pool still has %d regions registered\n",
+			       fpo->fast_reg.fpo_pool_size - i);
+	}
 
-	if (pool->fpo_hdev)
-		kiblnd_hdev_decref(pool->fpo_hdev);
+	if (fpo->fpo_hdev)
+		kiblnd_hdev_decref(fpo->fpo_hdev);
 
-	LIBCFS_FREE(pool, sizeof(*pool));
+	LIBCFS_FREE(fpo, sizeof(*fpo));
 }
 
 static void kiblnd_destroy_fmr_pool_list(struct list_head *head)
 {
-	kib_fmr_pool_t *pool;
+	kib_fmr_pool_t *fpo, *tmp;
 
-	while (!list_empty(head)) {
-		pool = list_entry(head->next, kib_fmr_pool_t, fpo_list);
-		list_del(&pool->fpo_list);
-		kiblnd_destroy_fmr_pool(pool);
+	list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
+		list_del(&fpo->fpo_list);
+		kiblnd_destroy_fmr_pool(fpo);
 	}
 }
 
-static int kiblnd_fmr_pool_size(int ncpts)
+static int
+kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+		     int ncpts)
 {
-	int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
+	int size = tunables->lnd_fmr_pool_size / ncpts;
 
 	return max(IBLND_FMR_POOL, size);
 }
 
-static int kiblnd_fmr_flush_trigger(int ncpts)
+static int
+kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+			 int ncpts)
 {
-	int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
+	int size = tunables->lnd_fmr_flush_trigger / ncpts;
 
 	return max(IBLND_FMR_POOL_FLUSH, size);
 }
 
-static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
-				  kib_fmr_pool_t **pp_fpo)
+static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
 {
-	/* FMR pool for RDMA */
-	kib_dev_t *dev = fps->fps_net->ibn_dev;
-	kib_fmr_pool_t *fpo;
 	struct ib_fmr_pool_param param = {
 		.max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE,
 		.page_shift        = PAGE_SHIFT,
@@ -1351,7 +1372,78 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
 		.dirty_watermark   = fps->fps_flush_trigger,
 		.flush_function    = NULL,
 		.flush_arg         = NULL,
-		.cache             = !!*kiblnd_tunables.kib_fmr_cache};
+		.cache             = !!fps->fps_cache };
+	int rc = 0;
+
+	fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
+						   &param);
+	if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
+		rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
+		if (rc != -ENOSYS)
+			CERROR("Failed to create FMR pool: %d\n", rc);
+		else
+			CERROR("FMRs are not supported\n");
+	}
+
+	return rc;
+}
+
+static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
+{
+	struct kib_fast_reg_descriptor *frd, *tmp;
+	int i, rc;
+
+	INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
+	fpo->fast_reg.fpo_pool_size = 0;
+	for (i = 0; i < fps->fps_pool_size; i++) {
+		LIBCFS_CPT_ALLOC(frd, lnet_cpt_table(), fps->fps_cpt,
+				 sizeof(*frd));
+		if (!frd) {
+			CERROR("Failed to allocate a new fast_reg descriptor\n");
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd,
+					  IB_MR_TYPE_MEM_REG,
+					  LNET_MAX_PAYLOAD / PAGE_SIZE);
+		if (IS_ERR(frd->frd_mr)) {
+			rc = PTR_ERR(frd->frd_mr);
+			CERROR("Failed to allocate ib_alloc_mr: %d\n", rc);
+			frd->frd_mr = NULL;
+			goto out_middle;
+		}
+
+		frd->frd_valid = true;
+
+		list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+		fpo->fast_reg.fpo_pool_size++;
+	}
+
+	return 0;
+
+out_middle:
+	if (frd->frd_mr)
+		ib_dereg_mr(frd->frd_mr);
+	LIBCFS_FREE(frd, sizeof(*frd));
+
+out:
+	list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+				 frd_list) {
+		list_del(&frd->frd_list);
+		ib_dereg_mr(frd->frd_mr);
+		LIBCFS_FREE(frd, sizeof(*frd));
+	}
+
+	return rc;
+}
+
+static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
+				  kib_fmr_pool_t **pp_fpo)
+{
+	kib_dev_t *dev = fps->fps_net->ibn_dev;
+	struct ib_device_attr *dev_attr;
+	kib_fmr_pool_t *fpo;
 	int rc;
 
 	LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
@@ -1359,22 +1451,41 @@ static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps,
 		return -ENOMEM;
 
 	fpo->fpo_hdev = kiblnd_current_hdev(dev);
-
-	fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, &param);
-	if (IS_ERR(fpo->fpo_fmr_pool)) {
-		rc = PTR_ERR(fpo->fpo_fmr_pool);
-		CERROR("Failed to create FMR pool: %d\n", rc);
-
-		kiblnd_hdev_decref(fpo->fpo_hdev);
-		LIBCFS_FREE(fpo, sizeof(*fpo));
-		return rc;
+	dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs;
+
+	/* Check for FMR or FastReg support */
+	fpo->fpo_is_fmr = 0;
+	if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
+	    fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
+	    fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
+	    fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
+		LCONSOLE_INFO("Using FMR for registration\n");
+		fpo->fpo_is_fmr = 1;
+	} else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+		LCONSOLE_INFO("Using FastReg for registration\n");
+	} else {
+		rc = -ENOSYS;
+		LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
+		goto out_fpo;
 	}
 
+	if (fpo->fpo_is_fmr)
+		rc = kiblnd_alloc_fmr_pool(fps, fpo);
+	else
+		rc = kiblnd_alloc_freg_pool(fps, fpo);
+	if (rc)
+		goto out_fpo;
+
 	fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
-	fpo->fpo_owner    = fps;
+	fpo->fpo_owner = fps;
 	*pp_fpo = fpo;
 
 	return 0;
+
+out_fpo:
+	kiblnd_hdev_decref(fpo->fpo_hdev);
+	LIBCFS_FREE(fpo, sizeof(*fpo));
+	return rc;
 }
 
 static void kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps,
@@ -1407,9 +1518,10 @@ static void kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
 	}
 }
 
-static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt,
-				   kib_net_t *net, int pool_size,
-				   int flush_trigger)
+static int
+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
+			kib_net_t *net,
+			struct lnet_ioctl_config_o2iblnd_tunables *tunables)
 {
 	kib_fmr_pool_t *fpo;
 	int rc;
@@ -1418,8 +1530,11 @@ static int kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt,
 
 	fps->fps_net = net;
 	fps->fps_cpt = cpt;
-	fps->fps_pool_size = pool_size;
-	fps->fps_flush_trigger = flush_trigger;
+
+	fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
+	fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
+	fps->fps_cache = tunables->lnd_fmr_cache;
+
 	spin_lock_init(&fps->fps_lock);
 	INIT_LIST_HEAD(&fps->fps_pool_list);
 	INIT_LIST_HEAD(&fps->fps_failed_pool_list);
@@ -1440,25 +1555,64 @@ static int kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, unsigned long now)
 	return cfs_time_aftereq(now, fpo->fpo_deadline);
 }
 
+static int
+kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd)
+{
+	__u64 *pages = tx->tx_pages;
+	kib_hca_dev_t *hdev;
+	int npages;
+	int size;
+	int i;
+
+	hdev = tx->tx_pool->tpo_hdev;
+
+	for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
+		for (size = 0; size <  rd->rd_frags[i].rf_nob;
+		     size += hdev->ibh_page_size) {
+			pages[npages++] = (rd->rd_frags[i].rf_addr &
+					   hdev->ibh_page_mask) + size;
+		}
+	}
+
+	return npages;
+}
+
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 {
 	LIST_HEAD(zombies);
 	kib_fmr_pool_t *fpo = fmr->fmr_pool;
-	kib_fmr_poolset_t *fps = fpo->fpo_owner;
+	kib_fmr_poolset_t *fps;
 	unsigned long now = cfs_time_current();
 	kib_fmr_pool_t *tmp;
 	int rc;
 
-	rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
-	LASSERT(!rc);
+	if (!fpo)
+		return;
 
-	if (status) {
-		rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool);
-		LASSERT(!rc);
-	}
+	fps = fpo->fpo_owner;
+	if (fpo->fpo_is_fmr) {
+		if (fmr->fmr_pfmr) {
+			rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
+			LASSERT(!rc);
+			fmr->fmr_pfmr = NULL;
+		}
+
+		if (status) {
+			rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
+			LASSERT(!rc);
+		}
+	} else {
+		struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
 
+		if (frd) {
+			frd->frd_valid = false;
+			spin_lock(&fps->fps_lock);
+			list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+			spin_unlock(&fps->fps_lock);
+			fmr->fmr_frd = NULL;
+		}
+	}
 	fmr->fmr_pool = NULL;
-	fmr->fmr_pfmr = NULL;
 
 	spin_lock(&fps->fps_lock);
 	fpo->fpo_map_count--;  /* decref the pool */
@@ -1479,11 +1633,15 @@ void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 		kiblnd_destroy_fmr_pool_list(&zombies);
 }
 
-int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
-			__u64 iov, kib_fmr_t *fmr)
+int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+			kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+			kib_fmr_t *fmr)
 {
-	struct ib_pool_fmr *pfmr;
+	__u64 *pages = tx->tx_pages;
+	bool is_rx = (rd != tx->tx_rd);
+        bool tx_pages_mapped = 0;
 	kib_fmr_pool_t *fpo;
+	int npages = 0;
 	__u64 version;
 	int rc;
 
@@ -1493,21 +1651,95 @@ int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
 	list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
 		fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
 		fpo->fpo_map_count++;
-		spin_unlock(&fps->fps_lock);
 
-		pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool,
-					    pages, npages, iov);
-		if (likely(!IS_ERR(pfmr))) {
-			fmr->fmr_pool = fpo;
-			fmr->fmr_pfmr = pfmr;
-			return 0;
+		if (fpo->fpo_is_fmr) {
+			struct ib_pool_fmr *pfmr;
+
+			spin_unlock(&fps->fps_lock);
+
+			if (!tx_pages_mapped) {
+				npages = kiblnd_map_tx_pages(tx, rd);
+				tx_pages_mapped = 1;
+			}
+
+			pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
+						    pages, npages, iov);
+			if (likely(!IS_ERR(pfmr))) {
+				fmr->fmr_key = is_rx ? pfmr->fmr->rkey :
+						       pfmr->fmr->lkey;
+				fmr->fmr_frd = NULL;
+				fmr->fmr_pfmr = pfmr;
+				fmr->fmr_pool = fpo;
+				return 0;
+			}
+			rc = PTR_ERR(pfmr);
+		} else {
+			if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
+				struct kib_fast_reg_descriptor *frd;
+				struct ib_reg_wr *wr;
+				struct ib_mr *mr;
+				int n;
+
+				frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
+						       struct kib_fast_reg_descriptor,
+						       frd_list);
+				list_del(&frd->frd_list);
+				spin_unlock(&fps->fps_lock);
+
+				mr = frd->frd_mr;
+
+				if (!frd->frd_valid) {
+					__u32 key = is_rx ? mr->rkey : mr->lkey;
+					struct ib_send_wr *inv_wr;
+
+					inv_wr = &frd->frd_inv_wr;
+					memset(inv_wr, 0, sizeof(*inv_wr));
+					inv_wr->opcode = IB_WR_LOCAL_INV;
+					inv_wr->wr_id = IBLND_WID_MR;
+					inv_wr->ex.invalidate_rkey = key;
+
+					/* Bump the key */
+					key = ib_inc_rkey(key);
+					ib_update_fast_reg_key(mr, key);
+				}
+
+				n = ib_map_mr_sg(mr, tx->tx_frags,
+						 tx->tx_nfrags, NULL, PAGE_SIZE);
+				if (unlikely(n != tx->tx_nfrags)) {
+					CERROR("Failed to map mr %d/%d elements\n",
+					       n, tx->tx_nfrags);
+					return n < 0 ? n : -EINVAL;
+				}
+
+				mr->iova = iov;
+
+				/* Prepare FastReg WR */
+				wr = &frd->frd_fastreg_wr;
+				memset(wr, 0, sizeof(*wr));
+				wr->wr.opcode = IB_WR_REG_MR;
+				wr->wr.wr_id = IBLND_WID_MR;
+				wr->wr.num_sge = 0;
+				wr->wr.send_flags = 0;
+				wr->mr = mr;
+				wr->key = is_rx ? mr->rkey : mr->lkey;
+				wr->access = (IB_ACCESS_LOCAL_WRITE |
+					      IB_ACCESS_REMOTE_WRITE);
+
+				fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
+				fmr->fmr_frd = frd;
+				fmr->fmr_pfmr = NULL;
+				fmr->fmr_pool = fpo;
+				return 0;
+			}
+			spin_unlock(&fps->fps_lock);
+			rc = -EBUSY;
 		}
 
 		spin_lock(&fps->fps_lock);
 		fpo->fpo_map_count--;
-		if (PTR_ERR(pfmr) != -EAGAIN) {
+		if (rc != -EAGAIN) {
 			spin_unlock(&fps->fps_lock);
-			return PTR_ERR(pfmr);
+			return rc;
 		}
 
 		/* EAGAIN and ... */
@@ -1932,25 +2164,28 @@ static void kiblnd_net_fini_pools(kib_net_t *net)
 	}
 }
 
-static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
+static int kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts,
+				 int ncpts)
 {
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 	unsigned long flags;
 	int cpt;
-	int		rc = 0;
+	int rc;
 	int i;
 
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
 	read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-	if (!*kiblnd_tunables.kib_map_on_demand) {
+	if (!tunables->lnd_map_on_demand) {
 		read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 		goto create_tx_pool;
 	}
 
 	read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
-	if (*kiblnd_tunables.kib_fmr_pool_size <
-	    *kiblnd_tunables.kib_ntx / 4) {
+	if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
 		CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
-		       *kiblnd_tunables.kib_fmr_pool_size,
+		       tunables->lnd_fmr_pool_size,
 		       *kiblnd_tunables.kib_ntx / 4);
 		rc = -EINVAL;
 		goto failed;
@@ -1965,8 +2200,11 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 	/*
 	 * premapping can fail if ibd_nmr > 1, so we always create
 	 * FMR pool and map-on-demand if premapping failed
+	 *
+	 * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset
+	 * The number of struct kib_fmr_poolsets create is equal to the
+	 * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt].
 	 */
-
 	net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(),
 					   sizeof(kib_fmr_poolset_t));
 	if (!net->ibn_fmr_ps) {
@@ -1977,9 +2215,8 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
 	for (i = 0; i < ncpts; i++) {
 		cpt = !cpts ? i : cpts[i];
-		rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
-					     kiblnd_fmr_pool_size(ncpts),
-					     kiblnd_fmr_flush_trigger(ncpts));
+		rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
+					     net, tunables);
 		if (rc) {
 			CERROR("Can't initialize FMR pool for CPT %d: %d\n",
 			       cpt, rc);
@@ -1991,6 +2228,11 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 		LASSERT(i == ncpts);
 
  create_tx_pool:
+	/*
+	 * cfs_precpt_alloc is creating an array of struct kib_tx_poolset
+	 * The number of struct kib_tx_poolsets create is equal to the
+	 * number of CPTs that exist, i.e net->ibn_tx_ps[cpt].
+	 */
 	net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
 					  sizeof(kib_tx_poolset_t));
 	if (!net->ibn_tx_ps) {
@@ -2694,10 +2936,9 @@ static int kiblnd_startup(lnet_ni_t *ni)
 	net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC +
 			       tv.tv_nsec / NSEC_PER_USEC;
 
-	ni->ni_peertimeout    = *kiblnd_tunables.kib_peertimeout;
-	ni->ni_maxtxcredits   = *kiblnd_tunables.kib_credits;
-	ni->ni_peertxcredits  = *kiblnd_tunables.kib_peertxcredits;
-	ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
+	rc = kiblnd_tunables_setup(ni);
+	if (rc)
+		goto net_failed;
 
 	if (ni->ni_interfaces[0]) {
 		/* Use the IPoIB interface specified in 'networks=' */
@@ -2736,7 +2977,7 @@ static int kiblnd_startup(lnet_ni_t *ni)
 	if (rc)
 		goto failed;
 
-	rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
+	rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
 	if (rc) {
 		CERROR("Failed to initialize NI pools: %d\n", rc);
 		goto failed;
@@ -2779,8 +3020,6 @@ static void __exit ko2iblnd_exit(void)
 
 static int __init ko2iblnd_init(void)
 {
-	int rc;
-
 	CLASSERT(sizeof(kib_msg_t) <= IBLND_MSG_SIZE);
 	CLASSERT(offsetof(kib_msg_t,
 			  ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
@@ -2789,9 +3028,7 @@ static int __init ko2iblnd_init(void)
 			  ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
 			  <= IBLND_MSG_SIZE);
 
-	rc = kiblnd_tunables_init();
-	if (rc)
-		return rc;
+	kiblnd_tunables_init();
 
 	lnet_register_lnd(&the_o2iblnd);
 
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index bfcbdd167..b22984fd9 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -87,22 +87,10 @@ typedef struct {
 	int *kib_timeout;                /* comms timeout (seconds) */
 	int *kib_keepalive;              /* keepalive timeout (seconds) */
 	int *kib_ntx;                    /* # tx descs */
-	int *kib_credits;                /* # concurrent sends */
-	int *kib_peertxcredits;          /* # concurrent sends to 1 peer */
-	int *kib_peerrtrcredits;         /* # per-peer router buffer credits */
-	int *kib_peercredits_hiw;        /* # when eagerly to return credits */
-	int *kib_peertimeout;            /* seconds to consider peer dead */
 	char **kib_default_ipif;         /* default IPoIB interface */
 	int *kib_retry_count;
 	int *kib_rnr_retry_count;
-	int *kib_concurrent_sends;       /* send work queue sizing */
 	int *kib_ib_mtu;                 /* IB MTU */
-	int *kib_map_on_demand;          /* map-on-demand if RD has more */
-					 /* fragments than this value, 0 */
-					 /* disable map-on-demand */
-	int *kib_fmr_pool_size;          /* # FMRs in pool */
-	int *kib_fmr_flush_trigger;      /* When to trigger FMR flush */
-	int *kib_fmr_cache;              /* enable FMR pool cache? */
 	int *kib_require_priv_port;      /* accept only privileged ports */
 	int *kib_use_priv_port; /* use privileged port for active connect */
 	int *kib_nscheds;                /* # threads on each CPT */
@@ -116,43 +104,21 @@ extern kib_tunables_t  kiblnd_tunables;
 #define IBLND_CREDITS_DEFAULT     8 /* default # of peer credits */
 #define IBLND_CREDITS_MAX	  ((typeof(((kib_msg_t *) 0)->ibm_credits)) - 1)  /* Max # of peer credits */
 
-#define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
-				     IBLND_MSG_QUEUE_SIZE_V1 :   \
-				     *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
-#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
-				     IBLND_CREDIT_HIGHWATER_V1 : \
-				     *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
+/* when eagerly to return credits */
+#define IBLND_CREDITS_HIGHWATER(t, v)	((v) == IBLND_MSG_VERSION_1 ? \
+					IBLND_CREDIT_HIGHWATER_V1 : \
+					t->lnd_peercredits_hiw)
 
 #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
 							       cb, dev, \
 							       ps, qpt)
 
-static inline int
-kiblnd_concurrent_sends_v1(void)
-{
-	if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
-		return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
-	if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
-		return IBLND_MSG_QUEUE_SIZE_V1 / 2;
-
-	return *kiblnd_tunables.kib_concurrent_sends;
-}
-
-#define IBLND_CONCURRENT_SENDS(v)  ((v) == IBLND_MSG_VERSION_1 ? \
-				     kiblnd_concurrent_sends_v1() : \
-				     *kiblnd_tunables.kib_concurrent_sends)
 /* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
 #define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
 #define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)
 
 #define IBLND_MSG_SIZE		(4 << 10)	 /* max size of queued messages (inc hdr) */
 #define IBLND_MAX_RDMA_FRAGS	 LNET_MAX_IOV	   /* max # of fragments supported */
-#define IBLND_CFG_RDMA_FRAGS       (*kiblnd_tunables.kib_map_on_demand ? \
-				    *kiblnd_tunables.kib_map_on_demand :      \
-				     IBLND_MAX_RDMA_FRAGS)  /* max # of fragments configured by user */
-#define IBLND_RDMA_FRAGS(v)	((v) == IBLND_MSG_VERSION_1 ? \
-				     IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)
 
 /************************/
 /* derived constants... */
@@ -171,7 +137,8 @@ kiblnd_concurrent_sends_v1(void)
 /* WRs and CQEs (per connection) */
 #define IBLND_RECV_WRS(c)	IBLND_RX_MSGS(c)
 #define IBLND_SEND_WRS(c)	\
-	((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+	((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
+							  c->ibc_peer->ibp_ni))
 #define IBLND_CQ_ENTRIES(c)	(IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
 
 struct kib_hca_dev;
@@ -286,24 +253,44 @@ typedef struct {
 	int                   fps_cpt;             /* CPT id */
 	int                   fps_pool_size;
 	int                   fps_flush_trigger;
+	int		      fps_cache;
 	int                   fps_increasing;      /* is allocating new pool */
 	unsigned long         fps_next_retry;      /* time stamp for retry if*/
 						   /* failed to allocate */
 } kib_fmr_poolset_t;
 
+struct kib_fast_reg_descriptor { /* For fast registration */
+	struct list_head		 frd_list;
+	struct ib_send_wr		 frd_inv_wr;
+	struct ib_reg_wr		 frd_fastreg_wr;
+	struct ib_mr			*frd_mr;
+	bool				 frd_valid;
+};
+
 typedef struct {
 	struct list_head      fpo_list;            /* chain on pool list */
 	struct kib_hca_dev    *fpo_hdev;           /* device for this pool */
 	kib_fmr_poolset_t     *fpo_owner;          /* owner of this pool */
-	struct ib_fmr_pool    *fpo_fmr_pool;       /* IB FMR pool */
+	union {
+		struct {
+			struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
+		} fmr;
+		struct { /* For fast registration */
+			struct list_head    fpo_pool_list;
+			int		    fpo_pool_size;
+		} fast_reg;
+	};
 	unsigned long         fpo_deadline;        /* deadline of this pool */
 	int                   fpo_failed;          /* fmr pool is failed */
 	int                   fpo_map_count;       /* # of mapped FMR */
+	int		      fpo_is_fmr;
 } kib_fmr_pool_t;
 
 typedef struct {
-	struct ib_pool_fmr    *fmr_pfmr;           /* IB pool fmr */
-	kib_fmr_pool_t        *fmr_pool;           /* pool of FMR */
+	kib_fmr_pool_t			*fmr_pool;	/* pool of FMR */
+	struct ib_pool_fmr		*fmr_pfmr;	/* IB pool fmr */
+	struct kib_fast_reg_descriptor	*fmr_frd;
+	u32				 fmr_key;
 } kib_fmr_t;
 
 typedef struct kib_net {
@@ -615,6 +602,48 @@ extern kib_data_t kiblnd_data;
 
 void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
 
+int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
+
+/* max # of fragments configured by user */
+static inline int
+kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
+{
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+	int mod;
+
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+	mod = tunables->lnd_map_on_demand;
+	return mod ? mod : IBLND_MAX_RDMA_FRAGS;
+}
+
+static inline int
+kiblnd_rdma_frags(int version, struct lnet_ni *ni)
+{
+	return version == IBLND_MSG_VERSION_1 ?
+			  IBLND_MAX_RDMA_FRAGS :
+			  kiblnd_cfg_rdma_frags(ni);
+}
+
+static inline int
+kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
+{
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+	int concurrent_sends;
+
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+	concurrent_sends = tunables->lnd_concurrent_sends;
+
+	if (version == IBLND_MSG_VERSION_1) {
+		if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
+			return IBLND_MSG_QUEUE_SIZE_V1 * 2;
+
+		if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
+			return IBLND_MSG_QUEUE_SIZE_V1 / 2;
+	}
+
+	return concurrent_sends;
+}
+
 static inline void
 kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
 {
@@ -737,10 +766,14 @@ kiblnd_send_keepalive(kib_conn_t *conn)
 static inline int
 kiblnd_need_noop(kib_conn_t *conn)
 {
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+
 	LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
 
 	if (conn->ibc_outstanding_credits <
-	    IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
+	    IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
 	    !kiblnd_send_keepalive(conn))
 		return 0; /* No need to send NOOP */
 
@@ -799,7 +832,8 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q)
 #define IBLND_WID_TX	1
 #define IBLND_WID_RX	2
 #define IBLND_WID_RDMA	3
-#define IBLND_WID_MASK	3UL
+#define IBLND_WID_MR	4
+#define IBLND_WID_MASK	7UL
 
 static inline __u64
 kiblnd_ptr2wreqid(void *ptr, int type)
@@ -947,20 +981,20 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM(e)     ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
 
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
-				    kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
 				    int negotiated_nfrags);
 void kiblnd_map_rx_descs(kib_conn_t *conn);
 void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
 struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
 
-int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
-			 int npages, __u64 iov, kib_fmr_t *fmr);
+int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+			 kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+			 kib_fmr_t *fmr);
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
 
-int  kiblnd_tunables_init(void);
-void kiblnd_tunables_fini(void);
+int kiblnd_tunables_setup(struct lnet_ni *ni);
+void kiblnd_tunables_init(void);
 
 int  kiblnd_connd(void *arg);
 int  kiblnd_scheduler(void *arg);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 2323e8d3a..845e49a52 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -561,36 +561,23 @@ kiblnd_kvaddr_to_page(unsigned long vaddr)
 }
 
 static int
-kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
+kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
 {
 	kib_hca_dev_t *hdev;
-	__u64 *pages = tx->tx_pages;
 	kib_fmr_poolset_t *fps;
-	int npages;
-	int size;
 	int cpt;
 	int rc;
-	int i;
 
 	LASSERT(tx->tx_pool);
 	LASSERT(tx->tx_pool->tpo_pool.po_owner);
 
 	hdev = tx->tx_pool->tpo_hdev;
-
-	for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
-		for (size = 0; size <  rd->rd_frags[i].rf_nob;
-			       size += hdev->ibh_page_size) {
-			pages[npages++] = (rd->rd_frags[i].rf_addr &
-					    hdev->ibh_page_mask) + size;
-		}
-	}
-
 	cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
 
 	fps = net->ibn_fmr_ps[cpt];
-	rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->fmr);
+	rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
 	if (rc) {
-		CERROR("Can't map %d pages: %d\n", npages, rc);
+		CERROR("Can't map %u bytes: %d\n", nob, rc);
 		return rc;
 	}
 
@@ -598,8 +585,7 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
 	 * If rd is not tx_rd, it's going to get sent to a peer, who will need
 	 * the rkey
 	 */
-	rd->rd_key = (rd != tx->tx_rd) ? tx->fmr.fmr_pfmr->fmr->rkey :
-					 tx->fmr.fmr_pfmr->fmr->lkey;
+	rd->rd_key = tx->fmr.fmr_key;
 	rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
 	rd->rd_frags[0].rf_nob = nob;
 	rd->rd_nfrags = 1;
@@ -613,10 +599,8 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
 
 	LASSERT(net);
 
-	if (net->ibn_fmr_ps && tx->fmr.fmr_pfmr) {
+	if (net->ibn_fmr_ps)
 		kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
-		tx->fmr.fmr_pfmr = NULL;
-	}
 
 	if (tx->tx_nfrags) {
 		kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
@@ -628,8 +612,8 @@ static void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
 static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 			 int nfrags)
 {
-	kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
 	kib_net_t *net = ni->ni_data;
+	kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
 	struct ib_mr *mr    = NULL;
 	__u32 nob;
 	int i;
@@ -652,7 +636,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 		nob += rd->rd_frags[i].rf_nob;
 	}
 
-	mr = kiblnd_find_rd_dma_mr(hdev, rd, tx->tx_conn ?
+	mr = kiblnd_find_rd_dma_mr(ni, rd, tx->tx_conn ?
 				   tx->tx_conn->ibc_max_frags : -1);
 	if (mr) {
 		/* found pre-mapping MR */
@@ -704,7 +688,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 		fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
 
 		sg_set_page(sg, page, fragnob, page_offset);
-		sg++;
+		sg = sg_next(sg);
 
 		if (offset + fragnob < iov->iov_len) {
 			offset += fragnob;
@@ -748,7 +732,7 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 
 		sg_set_page(sg, kiov->kiov_page, fragnob,
 			    kiov->kiov_offset + offset);
-		sg++;
+		sg = sg_next(sg);
 
 		offset = 0;
 		kiov++;
@@ -765,6 +749,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 {
 	kib_msg_t *msg = tx->tx_msg;
 	kib_peer_t *peer = conn->ibc_peer;
+	struct lnet_ni *ni = peer->ibp_ni;
 	int ver = conn->ibc_version;
 	int rc;
 	int done;
@@ -780,7 +765,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 	LASSERT(conn->ibc_credits >= 0);
 	LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
 
-	if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
+	if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) {
 		/* tx completions outstanding... */
 		CDEBUG(D_NET, "%s: posted enough\n",
 		       libcfs_nid2str(peer->ibp_nid));
@@ -851,14 +836,26 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
 		/* close_conn will launch failover */
 		rc = -ENETDOWN;
 	} else {
-		struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
+		struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
+		struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
+		struct ib_send_wr *wrq = &tx->tx_wrq[0].wr;
+
+		if (frd) {
+			if (!frd->frd_valid) {
+				wrq = &frd->frd_inv_wr;
+				wrq->next = &frd->frd_fastreg_wr.wr;
+			} else {
+				wrq = &frd->frd_fastreg_wr.wr;
+			}
+			frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr;
+		}
 
-		LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
+		LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
 			 "bad wr_id %llx, opc %d, flags %d, peer: %s\n",
-			 wrq->wr_id, wrq->opcode, wrq->send_flags,
-		libcfs_nid2str(conn->ibc_peer->ibp_nid));
-		wrq = NULL;
-		rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &wrq);
+			 bad->wr_id, bad->opcode, bad->send_flags,
+			 libcfs_nid2str(conn->ibc_peer->ibp_nid));
+		bad = NULL;
+		rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
 	}
 
 	conn->ibc_last_send = jiffies;
@@ -919,7 +916,7 @@ kiblnd_check_sends(kib_conn_t *conn)
 
 	spin_lock(&conn->ibc_lock);
 
-	LASSERT(conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver));
+	LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
 	LASSERT(!IBLND_OOB_CAPABLE(ver) ||
 		conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
 	LASSERT(conn->ibc_reserved_credits >= 0);
@@ -1066,7 +1063,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
 	kib_msg_t *ibmsg = tx->tx_msg;
 	kib_rdma_desc_t *srcrd = tx->tx_rd;
 	struct ib_sge *sge = &tx->tx_sge[0];
-	struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next;
+	struct ib_rdma_wr *wrq, *next;
 	int rc  = resid;
 	int srcidx = 0;
 	int dstidx = 0;
@@ -2333,11 +2330,11 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	}
 
 	if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
-	    IBLND_MSG_QUEUE_SIZE(version)) {
+	    kiblnd_msg_queue_size(version, ni)) {
 		CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n",
 		       libcfs_nid2str(nid),
 		       reqmsg->ibm_u.connparams.ibcp_queue_depth,
-		       IBLND_MSG_QUEUE_SIZE(version));
+		       kiblnd_msg_queue_size(version, ni));
 
 		if (version == IBLND_MSG_VERSION)
 			rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
@@ -2346,24 +2343,24 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	}
 
 	if (reqmsg->ibm_u.connparams.ibcp_max_frags >
-	    IBLND_RDMA_FRAGS(version)) {
+	    kiblnd_rdma_frags(version, ni)) {
 		CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
 		      libcfs_nid2str(nid), version,
 		      reqmsg->ibm_u.connparams.ibcp_max_frags,
-		      IBLND_RDMA_FRAGS(version));
+		      kiblnd_rdma_frags(version, ni));
 
 		if (version >= IBLND_MSG_VERSION)
 			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
 		goto failed;
 	} else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
-		   IBLND_RDMA_FRAGS(version) && !net->ibn_fmr_ps) {
+		   kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) {
 		CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
 		      libcfs_nid2str(nid), version,
 		      reqmsg->ibm_u.connparams.ibcp_max_frags,
-		      IBLND_RDMA_FRAGS(version));
+		      kiblnd_rdma_frags(version, ni));
 
-		if (version >= IBLND_MSG_VERSION)
+		if (version == IBLND_MSG_VERSION)
 			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
 		goto failed;
@@ -2524,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	return 0;
 
  failed:
-	if (ni)
+	if (ni) {
 		lnet_ni_decref(ni);
+		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+	}
 
 	rej.ibr_version             = version;
-	rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
-	rej.ibr_cp.ibcp_max_frags   = IBLND_RDMA_FRAGS(version);
 	kiblnd_reject(cmid, &rej);
 
 	return -ECONNREFUSED;
@@ -2580,12 +2578,15 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version,
 		reason = "Unknown";
 		break;
 
-	case IBLND_REJECT_RDMA_FRAGS:
+	case IBLND_REJECT_RDMA_FRAGS: {
+		struct lnet_ioctl_config_lnd_tunables *tunables;
+
 		if (!cp) {
 			reason = "can't negotiate max frags";
 			goto out;
 		}
-		if (!*kiblnd_tunables.kib_map_on_demand) {
+		tunables = peer->ibp_ni->ni_lnd_tunables;
+		if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
 			reason = "map_on_demand must be enabled";
 			goto out;
 		}
@@ -2597,7 +2598,7 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version,
 		peer->ibp_max_frags = frag_num;
 		reason = "rdma fragments";
 		break;
-
+	}
 	case IBLND_REJECT_MSG_QUEUE_SIZE:
 		if (!cp) {
 			reason = "can't negotiate queue depth";
@@ -3430,6 +3431,12 @@ kiblnd_complete(struct ib_wc *wc)
 	default:
 		LBUG();
 
+	case IBLND_WID_MR:
+		if (wc->status != IB_WC_SUCCESS &&
+		    wc->status != IB_WC_WR_FLUSH_ERR)
+			CNETERR("FastReg failed: %d\n", wc->status);
+		break;
+
 	case IBLND_WID_RDMA:
 		/*
 		 * We only get RDMA completion notification if it fails.  All
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
index b4607dad3..f8fdd4ae3 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -152,74 +152,135 @@ kib_tunables_t kiblnd_tunables = {
 	.kib_timeout           = &timeout,
 	.kib_keepalive         = &keepalive,
 	.kib_ntx               = &ntx,
-	.kib_credits           = &credits,
-	.kib_peertxcredits     = &peer_credits,
-	.kib_peercredits_hiw   = &peer_credits_hiw,
-	.kib_peerrtrcredits    = &peer_buffer_credits,
-	.kib_peertimeout       = &peer_timeout,
 	.kib_default_ipif      = &ipif_name,
 	.kib_retry_count       = &retry_count,
 	.kib_rnr_retry_count   = &rnr_retry_count,
-	.kib_concurrent_sends  = &concurrent_sends,
 	.kib_ib_mtu            = &ib_mtu,
-	.kib_map_on_demand     = &map_on_demand,
-	.kib_fmr_pool_size     = &fmr_pool_size,
-	.kib_fmr_flush_trigger = &fmr_flush_trigger,
-	.kib_fmr_cache         = &fmr_cache,
 	.kib_require_priv_port = &require_privileged_port,
 	.kib_use_priv_port     = &use_privileged_port,
 	.kib_nscheds           = &nscheds
 };
 
-int
-kiblnd_tunables_init(void)
+static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
+
+/* # messages/RDMAs in-flight */
+int kiblnd_msg_queue_size(int version, lnet_ni_t *ni)
 {
+	if (version == IBLND_MSG_VERSION_1)
+		return IBLND_MSG_QUEUE_SIZE_V1;
+	else if (ni)
+		return ni->ni_peertxcredits;
+	else
+		return peer_credits;
+}
+
+int kiblnd_tunables_setup(struct lnet_ni *ni)
+{
+	struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+	/*
+	 * if there was no tunables specified, setup the tunables to be
+	 * defaulted
+	 */
+	if (!ni->ni_lnd_tunables) {
+		LIBCFS_ALLOC(ni->ni_lnd_tunables,
+			     sizeof(*ni->ni_lnd_tunables));
+		if (!ni->ni_lnd_tunables)
+			return -ENOMEM;
+
+		memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+		       &default_tunables, sizeof(*tunables));
+	}
+	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+	/* Current API version */
+	tunables->lnd_version = 0;
+
 	if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
 		CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
 		       *kiblnd_tunables.kib_ib_mtu);
 		return -EINVAL;
 	}
 
-	if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
-		*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
+	if (!ni->ni_peertimeout)
+		ni->ni_peertimeout = peer_timeout;
+
+	if (!ni->ni_maxtxcredits)
+		ni->ni_maxtxcredits = credits;
+
+	if (!ni->ni_peertxcredits)
+		ni->ni_peertxcredits = peer_credits;
 
-	if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
-		*kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
+	if (!ni->ni_peerrtrcredits)
+		ni->ni_peerrtrcredits = peer_buffer_credits;
 
-	if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
-		*kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
+	if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
+		ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
 
-	if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
-		*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
+	if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
+		ni->ni_peertxcredits = IBLND_CREDITS_MAX;
 
-	if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
-		*kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
+	if (ni->ni_peertxcredits > credits)
+		ni->ni_peertxcredits = credits;
 
-	if (*kiblnd_tunables.kib_map_on_demand < 0 ||
-	    *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
-		*kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
+	if (!tunables->lnd_peercredits_hiw)
+		tunables->lnd_peercredits_hiw = peer_credits_hiw;
 
-	if (*kiblnd_tunables.kib_map_on_demand == 1)
-		*kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
+	if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
+		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
 
-	if (!*kiblnd_tunables.kib_concurrent_sends) {
-		if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-		    *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
-			*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
-		else
-			*kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
+	if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
+		tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+
+	if (tunables->lnd_map_on_demand < 0 ||
+	    tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
+		/* disable map-on-demand */
+		tunables->lnd_map_on_demand = 0;
+	}
+
+	if (tunables->lnd_map_on_demand == 1) {
+		/* don't make sense to create map if only one fragment */
+		tunables->lnd_map_on_demand = 2;
+	}
+
+	if (!tunables->lnd_concurrent_sends) {
+		if (tunables->lnd_map_on_demand > 0 &&
+		    tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
+			tunables->lnd_concurrent_sends =
+						ni->ni_peertxcredits * 2;
+		} else {
+			tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+		}
 	}
 
-	if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
-		*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
+	if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
+		tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
 
-	if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
-		*kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
+	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
+		tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
 
-	if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
+	if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
 		CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n",
-		      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
+		      tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
 	}
 
+	if (!tunables->lnd_fmr_pool_size)
+		tunables->lnd_fmr_pool_size = fmr_pool_size;
+	if (!tunables->lnd_fmr_flush_trigger)
+		tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
+	if (!tunables->lnd_fmr_cache)
+		tunables->lnd_fmr_cache = fmr_cache;
+
 	return 0;
 }
+
+void kiblnd_tunables_init(void)
+{
+	default_tunables.lnd_version = 0;
+	default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
+	default_tunables.lnd_map_on_demand = map_on_demand;
+	default_tunables.lnd_concurrent_sends = concurrent_sends;
+	default_tunables.lnd_fmr_pool_size = fmr_pool_size;
+	default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
+	default_tunables.lnd_fmr_cache = fmr_cache;
+}
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index cca7b2f7f..406c0e7a5 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -2582,7 +2582,6 @@ ksocknal_debug_peerhash(lnet_ni_t *ni)
 	}
 
 	read_unlock(&ksocknal_data.ksnd_global_lock);
-	return;
 }
 
 void
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index d4ce06d0a..964b4e338 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -675,7 +675,6 @@ ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn)
 	sock->sk->sk_user_data = conn;
 	sock->sk->sk_data_ready = ksocknal_data_ready;
 	sock->sk->sk_write_space = ksocknal_write_space;
-	return;
 }
 
 void
@@ -695,8 +694,6 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 	 * sk_user_data is NULL.
 	 */
 	sock->sk->sk_user_data = NULL;
-
-	return ;
 }
 
 int
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index c3d628bac..8c260c3d5 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -232,130 +232,24 @@ int libcfs_panic_in_progress;
 static const char *
 libcfs_debug_subsys2str(int subsys)
 {
-	switch (1 << subsys) {
-	default:
+	static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES;
+
+	if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
 		return NULL;
-	case S_UNDEFINED:
-		return "undefined";
-	case S_MDC:
-		return "mdc";
-	case S_MDS:
-		return "mds";
-	case S_OSC:
-		return "osc";
-	case S_OST:
-		return "ost";
-	case S_CLASS:
-		return "class";
-	case S_LOG:
-		return "log";
-	case S_LLITE:
-		return "llite";
-	case S_RPC:
-		return "rpc";
-	case S_LNET:
-		return "lnet";
-	case S_LND:
-		return "lnd";
-	case S_PINGER:
-		return "pinger";
-	case S_FILTER:
-		return "filter";
-	case S_ECHO:
-		return "echo";
-	case S_LDLM:
-		return "ldlm";
-	case S_LOV:
-		return "lov";
-	case S_LQUOTA:
-		return "lquota";
-	case S_OSD:
-		return "osd";
-	case S_LFSCK:
-		return "lfsck";
-	case S_LMV:
-		return "lmv";
-	case S_SEC:
-		return "sec";
-	case S_GSS:
-		return "gss";
-	case S_MGC:
-		return "mgc";
-	case S_MGS:
-		return "mgs";
-	case S_FID:
-		return "fid";
-	case S_FLD:
-		return "fld";
-	}
+
+	return libcfs_debug_subsystems[subsys];
 }
 
 /* libcfs_debug_token2mask() expects the returned string in lower-case */
 static const char *
 libcfs_debug_dbg2str(int debug)
 {
-	switch (1 << debug) {
-	default:
+	static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES;
+
+	if (debug >= ARRAY_SIZE(libcfs_debug_masks))
 		return NULL;
-	case D_TRACE:
-		return "trace";
-	case D_INODE:
-		return "inode";
-	case D_SUPER:
-		return "super";
-	case D_EXT2:
-		return "ext2";
-	case D_MALLOC:
-		return "malloc";
-	case D_CACHE:
-		return "cache";
-	case D_INFO:
-		return "info";
-	case D_IOCTL:
-		return "ioctl";
-	case D_NETERROR:
-		return "neterror";
-	case D_NET:
-		return "net";
-	case D_WARNING:
-		return "warning";
-	case D_BUFFS:
-		return "buffs";
-	case D_OTHER:
-		return "other";
-	case D_DENTRY:
-		return "dentry";
-	case D_NETTRACE:
-		return "nettrace";
-	case D_PAGE:
-		return "page";
-	case D_DLMTRACE:
-		return "dlmtrace";
-	case D_ERROR:
-		return "error";
-	case D_EMERG:
-		return "emerg";
-	case D_HA:
-		return "ha";
-	case D_RPCTRACE:
-		return "rpctrace";
-	case D_VFSTRACE:
-		return "vfstrace";
-	case D_READA:
-		return "reada";
-	case D_MMAP:
-		return "mmap";
-	case D_CONFIG:
-		return "config";
-	case D_CONSOLE:
-		return "console";
-	case D_QUOTA:
-		return "quota";
-	case D_SEC:
-		return "sec";
-	case D_LFSCK:
-		return "lfsck";
-	}
+
+	return libcfs_debug_masks[debug];
 }
 
 int
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index dadaf7685..086e690bd 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL(cfs_fail_loc);
 unsigned int cfs_fail_val;
 EXPORT_SYMBOL(cfs_fail_val);
 
+int cfs_fail_err;
+EXPORT_SYMBOL(cfs_fail_err);
+
 DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq);
 EXPORT_SYMBOL(cfs_race_waitq);
 
diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c
index f60feb3a3..cc45ed82b 100644
--- a/drivers/staging/lustre/lnet/libcfs/hash.c
+++ b/drivers/staging/lustre/lnet/libcfs/hash.c
@@ -942,10 +942,10 @@ cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
  *	     - CFS_HASH_SORT enable chained hash sort
  */
-static int cfs_hash_rehash_worker(cfs_workitem_t *wi);
+static int cfs_hash_rehash_worker(struct cfs_workitem *wi);
 
 #if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
-static int cfs_hash_dep_print(cfs_workitem_t *wi)
+static int cfs_hash_dep_print(struct cfs_workitem *wi)
 {
 	struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi);
 	int dep;
@@ -1847,7 +1847,7 @@ cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
 }
 
 static int
-cfs_hash_rehash_worker(cfs_workitem_t *wi)
+cfs_hash_rehash_worker(struct cfs_workitem *wi)
 {
 	struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_rehash_wi);
 	struct cfs_hash_bucket **bkts;
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
index 2de9eeae0..83543f928 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c
@@ -49,7 +49,8 @@ EXPORT_SYMBOL(cfs_percpt_lock_free);
  * reason we always allocate cacheline-aligned memory block.
  */
 struct cfs_percpt_lock *
-cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab)
+cfs_percpt_lock_create(struct cfs_cpt_table *cptab,
+		       struct lock_class_key *keys)
 {
 	struct cfs_percpt_lock	*pcl;
 	spinlock_t		*lock;
@@ -67,12 +68,18 @@ cfs_percpt_lock_alloc(struct cfs_cpt_table *cptab)
 		return NULL;
 	}
 
-	cfs_percpt_for_each(lock, i, pcl->pcl_locks)
+	if (!keys)
+		CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n");
+
+	cfs_percpt_for_each(lock, i, pcl->pcl_locks) {
 		spin_lock_init(lock);
+		if (keys != NULL)
+			lockdep_set_class(lock, &keys[i]);
+	}
 
 	return pcl;
 }
-EXPORT_SYMBOL(cfs_percpt_lock_alloc);
+EXPORT_SYMBOL(cfs_percpt_lock_create);
 
 /**
  * lock a CPU partition
@@ -142,44 +149,3 @@ cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index)
 	}
 }
 EXPORT_SYMBOL(cfs_percpt_unlock);
-
-/** free cpu-partition refcount */
-void
-cfs_percpt_atomic_free(atomic_t **refs)
-{
-	cfs_percpt_free(refs);
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_free);
-
-/** allocate cpu-partition refcount with initial value @init_val */
-atomic_t **
-cfs_percpt_atomic_alloc(struct cfs_cpt_table *cptab, int init_val)
-{
-	atomic_t	**refs;
-	atomic_t	*ref;
-	int		i;
-
-	refs = cfs_percpt_alloc(cptab, sizeof(*ref));
-	if (!refs)
-		return NULL;
-
-	cfs_percpt_for_each(ref, i, refs)
-		atomic_set(ref, init_val);
-	return refs;
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_alloc);
-
-/** return sum of cpu-partition refs */
-int
-cfs_percpt_atomic_summary(atomic_t **refs)
-{
-	atomic_t	*ref;
-	int		i;
-	int		val = 0;
-
-	cfs_percpt_for_each(ref, i, refs)
-		val += atomic_read(ref);
-
-	return val;
-}
-EXPORT_SYMBOL(cfs_percpt_atomic_summary);
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
index c5a695151..d0e81bb41 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c
@@ -114,34 +114,6 @@ cfs_percpt_number(void *vars)
 }
 EXPORT_SYMBOL(cfs_percpt_number);
 
-/*
- * return memory block shadowed from current CPU
- */
-void *
-cfs_percpt_current(void *vars)
-{
-	struct cfs_var_array *arr;
-	int    cpt;
-
-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-	cpt = cfs_cpt_current(arr->va_cptab, 0);
-	if (cpt < 0)
-		return NULL;
-
-	return arr->va_ptrs[cpt];
-}
-
-void *
-cfs_percpt_index(void *vars, int idx)
-{
-	struct cfs_var_array *arr;
-
-	arr = container_of(vars, struct cfs_var_array, va_ptrs[0]);
-
-	LASSERT(idx >= 0 && idx < arr->va_count);
-	return arr->va_ptrs[idx];
-}
-
 /*
  * free variable array, see more detail in cfs_array_alloc
  */
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index 389fb9eee..b52518c54 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -755,8 +755,13 @@ cfs_cpt_table_create(int ncpt)
 			struct cfs_cpu_partition *part;
 			int    n;
 
-			if (cpt >= ncpt)
-				goto failed;
+			/*
+			 * Each emulated NUMA node has all allowed CPUs in
+			 * the mask.
+			 * End loop when all partitions have assigned CPUs.
+			 */
+			if (cpt == ncpt)
+				break;
 
 			part = &cptab->ctb_parts[cpt];
 
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 8c9377ed8..84f9b7b47 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -30,13 +30,34 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 #include "../../../include/linux/libcfs/libcfs.h"
+#include "../../../include/linux/libcfs/libcfs_crypto.h"
 #include "linux-crypto.h"
+
 /**
- *  Array of  hash algorithm speed in MByte per second
+ *  Array of hash algorithm speed in MByte per second
  */
 static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX];
 
-static int cfs_crypto_hash_alloc(unsigned char alg_id,
+/**
+ * Initialize the state descriptor for the specified hash algorithm.
+ *
+ * An internal routine to allocate the hash-specific state in \a hdesc for
+ * use with cfs_crypto_hash_digest() to compute the hash of a single message,
+ * though possibly in multiple chunks.  The descriptor internal state should
+ * be freed with cfs_crypto_hash_final().
+ *
+ * \param[in]	  hash_alg	hash algorithm id (CFS_HASH_ALG_*)
+ * \param[out]	  type		pointer to the hash description in hash_types[]
+ *				array
+ * \param[in,out] hdesc		hash state descriptor to be initialized
+ * \param[in]	  key		initial hash value/state, NULL to use default
+ *				value
+ * \param[in]	  key_len	length of \a key
+ *
+ * \retval			0 on success
+ * \retval			negative errno on failure
+ */
+static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
 				 const struct cfs_crypto_hash_type **type,
 				 struct ahash_request **req,
 				 unsigned char *key,
@@ -45,11 +66,11 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id,
 	struct crypto_ahash *tfm;
 	int     err = 0;
 
-	*type = cfs_crypto_hash_type(alg_id);
+	*type = cfs_crypto_hash_type(hash_alg);
 
 	if (!*type) {
 		CWARN("Unsupported hash algorithm id = %d, max id is %d\n",
-		      alg_id, CFS_HASH_ALG_MAX);
+		      hash_alg, CFS_HASH_ALG_MAX);
 		return -EINVAL;
 	}
 	tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC);
@@ -70,12 +91,6 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id,
 
 	ahash_request_set_callback(*req, 0, NULL, NULL);
 
-	/** Shash have different logic for initialization then digest
-	 * shash: crypto_hash_setkey, crypto_hash_init
-	 * digest: crypto_digest_init, crypto_digest_setkey
-	 * Skip this function for digest, because we use shash logic at
-	 * cfs_crypto_hash_alloc.
-	 */
 	if (key)
 		err = crypto_ahash_setkey(tfm, key, key_len);
 	else if ((*type)->cht_key != 0)
@@ -90,7 +105,7 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id,
 
 	CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n",
 	       crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm),
-	       cfs_crypto_hash_speeds[alg_id]);
+	       cfs_crypto_hash_speeds[hash_alg]);
 
 	err = crypto_ahash_init(*req);
 	if (err) {
@@ -100,7 +115,33 @@ static int cfs_crypto_hash_alloc(unsigned char alg_id,
 	return err;
 }
 
-int cfs_crypto_hash_digest(unsigned char alg_id,
+/**
+ * Calculate hash digest for the passed buffer.
+ *
+ * This should be used when computing the hash on a single contiguous buffer.
+ * It combines the hash initialization, computation, and cleanup.
+ *
+ * \param[in]	  hash_alg	id of hash algorithm (CFS_HASH_ALG_*)
+ * \param[in]	  buf		data buffer on which to compute hash
+ * \param[in]	  buf_len	length of \a buf in bytes
+ * \param[in]	  key		initial value/state for algorithm,
+ *				if \a key = NULL use default initial value
+ * \param[in]	  key_len	length of \a key in bytes
+ * \param[out]	  hash		pointer to computed hash value,
+ *				if \a hash = NULL then \a hash_len is to digest
+ *				size in bytes, retval -ENOSPC
+ * \param[in,out] hash_len	size of \a hash buffer
+ *
+ * \retval -EINVAL		\a buf, \a buf_len, \a hash_len,
+ *				\a hash_alg invalid
+ * \retval -ENOENT		\a hash_alg is unsupported
+ * \retval -ENOSPC		\a hash is NULL, or \a hash_len less than
+ *				digest size
+ * \retval			0 for success
+ * \retval			negative errno for other errors from lower
+ *				layers.
+ */
+int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg,
 			   const void *buf, unsigned int buf_len,
 			   unsigned char *key, unsigned int key_len,
 			   unsigned char *hash, unsigned int *hash_len)
@@ -113,7 +154,7 @@ int cfs_crypto_hash_digest(unsigned char alg_id,
 	if (!buf || buf_len == 0 || !hash_len)
 		return -EINVAL;
 
-	err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len);
+	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
 	if (err != 0)
 		return err;
 
@@ -134,15 +175,32 @@ int cfs_crypto_hash_digest(unsigned char alg_id,
 }
 EXPORT_SYMBOL(cfs_crypto_hash_digest);
 
+/**
+ * Allocate and initialize desriptor for hash algorithm.
+ *
+ * This should be used to initialize a hash descriptor for multiple calls
+ * to a single hash function when computing the hash across multiple
+ * separate buffers or pages using cfs_crypto_hash_update{,_page}().
+ *
+ * The hash descriptor should be freed with cfs_crypto_hash_final().
+ *
+ * \param[in] hash_alg	algorithm id (CFS_HASH_ALG_*)
+ * \param[in] key	initial value/state for algorithm, if \a key = NULL
+ *			use default initial value
+ * \param[in] key_len	length of \a key in bytes
+ *
+ * \retval		pointer to descriptor of hash instance
+ * \retval		ERR_PTR(errno) in case of error
+ */
 struct cfs_crypto_hash_desc *
-	cfs_crypto_hash_init(unsigned char alg_id,
-			     unsigned char *key, unsigned int key_len)
+cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg,
+		     unsigned char *key, unsigned int key_len)
 {
 	struct ahash_request *req;
 	int		     err;
 	const struct cfs_crypto_hash_type       *type;
 
-	err = cfs_crypto_hash_alloc(alg_id, &type, &req, key, key_len);
+	err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len);
 
 	if (err)
 		return ERR_PTR(err);
@@ -150,6 +208,17 @@ struct cfs_crypto_hash_desc *
 }
 EXPORT_SYMBOL(cfs_crypto_hash_init);
 
+/**
+ * Update hash digest computed on data within the given \a page
+ *
+ * \param[in] hdesc	hash state descriptor
+ * \param[in] page	data page on which to compute the hash
+ * \param[in] offset	offset within \a page at which to start hash
+ * \param[in] len	length of data on which to compute hash
+ *
+ * \retval		0 for success
+ * \retval		negative errno on failure
+ */
 int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc,
 				struct page *page, unsigned int offset,
 				unsigned int len)
@@ -158,13 +227,23 @@ int cfs_crypto_hash_update_page(struct cfs_crypto_hash_desc *hdesc,
 	struct scatterlist sl;
 
 	sg_init_table(&sl, 1);
-	sg_set_page(&sl, page, len, offset & ~CFS_PAGE_MASK);
+	sg_set_page(&sl, page, len, offset & ~PAGE_MASK);
 
 	ahash_request_set_crypt(req, &sl, NULL, sl.length);
 	return crypto_ahash_update(req);
 }
 EXPORT_SYMBOL(cfs_crypto_hash_update_page);
 
+/**
+ * Update hash digest computed on the specified data
+ *
+ * \param[in] hdesc	hash state descriptor
+ * \param[in] buf	data buffer on which to compute the hash
+ * \param[in] buf_len	length of \buf on which to compute hash
+ *
+ * \retval		0 for success
+ * \retval		negative errno on failure
+ */
 int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc,
 			   const void *buf, unsigned int buf_len)
 {
@@ -178,7 +257,18 @@ int cfs_crypto_hash_update(struct cfs_crypto_hash_desc *hdesc,
 }
 EXPORT_SYMBOL(cfs_crypto_hash_update);
 
-/*      If hash_len pointer is NULL - destroy descriptor. */
+/**
+ * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor
+ *
+ * \param[in]	  hdesc		hash descriptor
+ * \param[out]	  hash		pointer to hash buffer to store hash digest
+ * \param[in,out] hash_len	pointer to hash buffer size, if \a hdesc = NULL
+ *				only free \a hdesc instead of computing the hash
+ *
+ * \retval	0 for success
+ * \retval	-EOVERFLOW if hash_len is too small for the hash digest
+ * \retval	negative errno for other errors from lower layers
+ */
 int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
 			  unsigned char *hash, unsigned int *hash_len)
 {
@@ -186,99 +276,153 @@ int cfs_crypto_hash_final(struct cfs_crypto_hash_desc *hdesc,
 	struct ahash_request *req = (void *)hdesc;
 	int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
 
-	if (!hash_len) {
-		crypto_free_ahash(crypto_ahash_reqtfm(req));
-		ahash_request_free(req);
-		return 0;
+	if (!hash || !hash_len) {
+		err = 0;
+		goto free_ahash;
 	}
-	if (!hash || *hash_len < size) {
-		*hash_len = size;
-		return -ENOSPC;
+	if (*hash_len < size) {
+		err = -EOVERFLOW;
+		goto free_ahash;
 	}
+
 	ahash_request_set_crypt(req, NULL, hash, 0);
 	err = crypto_ahash_final(req);
-
-	if (err < 0) {
-		/* May be caller can fix error */
-		return err;
-	}
+	if (!err)
+		*hash_len = size;
+free_ahash:
 	crypto_free_ahash(crypto_ahash_reqtfm(req));
 	ahash_request_free(req);
 	return err;
 }
 EXPORT_SYMBOL(cfs_crypto_hash_final);
 
-static void cfs_crypto_performance_test(unsigned char alg_id,
-					const unsigned char *buf,
-					unsigned int buf_len)
+/**
+ * Compute the speed of specified hash function
+ *
+ * Run a speed test on the given hash algorithm on buffer of the given size.
+ * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and
+ * is available through the cfs_crypto_hash_speed() function.
+ *
+ * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
+ * \param[in] buf	data buffer on which to compute the hash
+ * \param[in] buf_len	length of \buf on which to compute hash
+ */
+static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg)
 {
+	int buf_len = max(PAGE_SIZE, 1048576UL);
+	void *buf;
 	unsigned long		   start, end;
 	int			     bcount, err = 0;
-	int			     sec = 1; /* do test only 1 sec */
-	unsigned char		   hash[64];
-	unsigned int		    hash_len = 64;
-
-	for (start = jiffies, end = start + sec * HZ, bcount = 0;
-	     time_before(jiffies, end); bcount++) {
-		err = cfs_crypto_hash_digest(alg_id, buf, buf_len, NULL, 0,
-					     hash, &hash_len);
+	struct page *page;
+	unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
+	unsigned int hash_len = sizeof(hash);
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	buf = kmap(page);
+	memset(buf, 0xAD, PAGE_SIZE);
+	kunmap(page);
+
+	for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC),
+	     bcount = 0; time_before(jiffies, end); bcount++) {
+		struct cfs_crypto_hash_desc *hdesc;
+		int i;
+
+		hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0);
+		if (IS_ERR(hdesc)) {
+			err = PTR_ERR(hdesc);
+			break;
+		}
+
+		for (i = 0; i < buf_len / PAGE_SIZE; i++) {
+			err = cfs_crypto_hash_update_page(hdesc, page, 0,
+							  PAGE_SIZE);
+			if (err)
+				break;
+		}
+
+		err = cfs_crypto_hash_final(hdesc, hash, &hash_len);
 		if (err)
 			break;
 	}
 	end = jiffies;
-
+	__free_page(page);
+out_err:
 	if (err) {
-		cfs_crypto_hash_speeds[alg_id] =  -1;
-		CDEBUG(D_INFO, "Crypto hash algorithm %s, err = %d\n",
-		       cfs_crypto_hash_name(alg_id), err);
+		cfs_crypto_hash_speeds[hash_alg] = err;
+		CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n",
+		       cfs_crypto_hash_name(hash_alg), err);
 	} else {
 		unsigned long   tmp;
 
 		tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) *
 		       1000) / (1024 * 1024);
-		cfs_crypto_hash_speeds[alg_id] = (int)tmp;
+		cfs_crypto_hash_speeds[hash_alg] = (int)tmp;
+		CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n",
+		       cfs_crypto_hash_name(hash_alg),
+		       cfs_crypto_hash_speeds[hash_alg]);
 	}
-	CDEBUG(D_INFO, "Crypto hash algorithm %s speed = %d MB/s\n",
-	       cfs_crypto_hash_name(alg_id), cfs_crypto_hash_speeds[alg_id]);
 }
 
-int cfs_crypto_hash_speed(unsigned char hash_alg)
+/**
+ * hash speed in Mbytes per second for valid hash algorithm
+ *
+ * Return the performance of the specified \a hash_alg that was previously
+ * computed using cfs_crypto_performance_test().
+ *
+ * \param[in] hash_alg	hash algorithm id (CFS_HASH_ALG_*)
+ *
+ * \retval		positive speed of the hash function in MB/s
+ * \retval		-ENOENT if \a hash_alg is unsupported
+ * \retval		negative errno if \a hash_alg speed is unavailable
+ */
+int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg)
 {
 	if (hash_alg < CFS_HASH_ALG_MAX)
 		return cfs_crypto_hash_speeds[hash_alg];
-	return -1;
+	return -ENOENT;
 }
 EXPORT_SYMBOL(cfs_crypto_hash_speed);
 
 /**
- * Do performance test for all hash algorithms.
+ * Run the performance test for all hash algorithms.
+ *
+ * Run the cfs_crypto_performance_test() benchmark for all of the available
+ * hash functions using a 1MB buffer size.  This is a reasonable buffer size
+ * for Lustre RPCs, even if the actual RPC size is larger or smaller.
+ *
+ * Since the setup cost and computation speed of various hash algorithms is
+ * a function of the buffer size (and possibly internal contention of offload
+ * engines), this speed only represents an estimate of the actual speed under
+ * actual usage, but is reasonable for comparing available algorithms.
+ *
+ * The actual speeds are available via cfs_crypto_hash_speed() for later
+ * comparison.
+ *
+ * \retval	0 on success
+ * \retval	-ENOMEM if no memory is available for test buffer
  */
 static int cfs_crypto_test_hashes(void)
 {
-	unsigned char	   i;
-	unsigned char	   *data;
-	unsigned int	    j;
-	/* Data block size for testing hash. Maximum
-	 * kmalloc size for 2.6.18 kernel is 128K
-	 */
-	unsigned int	    data_len = 1 * 128 * 1024;
-
-	data = kmalloc(data_len, 0);
-	if (!data)
-		return -ENOMEM;
+	enum cfs_crypto_hash_alg hash_alg;
 
-	for (j = 0; j < data_len; j++)
-		data[j] = j & 0xff;
+	for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++)
+		cfs_crypto_performance_test(hash_alg);
 
-	for (i = 0; i < CFS_HASH_ALG_MAX; i++)
-		cfs_crypto_performance_test(i, data, data_len);
-
-	kfree(data);
 	return 0;
 }
 
 static int adler32;
 
+/**
+ * Register available hash functions
+ *
+ * \retval	0
+ */
 int cfs_crypto_register(void)
 {
 	request_module("crc32c");
@@ -290,6 +434,9 @@ int cfs_crypto_register(void)
 	return 0;
 }
 
+/**
+ * Unregister previously registered hash functions
+ */
 void cfs_crypto_unregister(void)
 {
 	if (adler32 == 0)
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
index ebc60ac9b..d89f71ee4 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-module.c
@@ -40,10 +40,75 @@
 
 #define LNET_MINOR 240
 
+static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
+{
+	size_t len = sizeof(*data);
+
+	len += cfs_size_round(data->ioc_inllen1);
+	len += cfs_size_round(data->ioc_inllen2);
+	return len;
+}
+
+static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
+{
+	if (data->ioc_hdr.ioc_len > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inllen1 > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inllen2 > BIT(30)) {
+		CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
+		return true;
+	}
+	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+		CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+		CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_pbuf1 && !data->ioc_plen1) {
+		CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_pbuf2 && !data->ioc_plen2) {
+		CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
+		return true;
+	}
+	if (data->ioc_plen1 && !data->ioc_pbuf1) {
+		CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+		return true;
+	}
+	if (data->ioc_plen2 && !data->ioc_pbuf2) {
+		CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+		return true;
+	}
+	if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) {
+		CERROR("LIBCFS ioctl: packlen != ioc_len\n");
+		return true;
+	}
+	if (data->ioc_inllen1 &&
+	    data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+		CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
+		return true;
+	}
+	if (data->ioc_inllen2 &&
+	    data->ioc_bulk[cfs_size_round(data->ioc_inllen1) +
+			   data->ioc_inllen2 - 1] != '\0') {
+		CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
+		return true;
+	}
+	return false;
+}
+
 int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
 {
 	if (libcfs_ioctl_is_invalid(data)) {
-		CERROR("LNET: ioctl not correctly formatted\n");
+		CERROR("libcfs ioctl: parameter not correctly formatted\n");
 		return -EINVAL;
 	}
 
@@ -57,68 +122,47 @@ int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data)
 	return 0;
 }
 
-int libcfs_ioctl_getdata_len(const struct libcfs_ioctl_hdr __user *arg,
-			     __u32 *len)
+int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
+			 const struct libcfs_ioctl_hdr __user *uhdr)
 {
 	struct libcfs_ioctl_hdr hdr;
+	int err = 0;
 
-	if (copy_from_user(&hdr, arg, sizeof(hdr)))
+	if (copy_from_user(&hdr, uhdr, sizeof(hdr)))
 		return -EFAULT;
 
 	if (hdr.ioc_version != LIBCFS_IOCTL_VERSION &&
 	    hdr.ioc_version != LIBCFS_IOCTL_VERSION2) {
-		CERROR("LNET: version mismatch expected %#x, got %#x\n",
+		CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n",
 		       LIBCFS_IOCTL_VERSION, hdr.ioc_version);
 		return -EINVAL;
 	}
 
-	*len = hdr.ioc_len;
-
-	return 0;
-}
-
-int libcfs_ioctl_popdata(void __user *arg, void *data, int size)
-{
-	if (copy_to_user(arg, data, size))
-		return -EFAULT;
-	return 0;
-}
-
-static int
-libcfs_psdev_open(struct inode *inode, struct file *file)
-{
-	int    rc = 0;
+	if (hdr.ioc_len < sizeof(struct libcfs_ioctl_data)) {
+		CERROR("libcfs ioctl: user buffer too small for ioctl\n");
+		return -EINVAL;
+	}
 
-	if (!inode)
+	if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) {
+		CERROR("libcfs ioctl: user buffer is too large %d/%d\n",
+		       hdr.ioc_len, LIBCFS_IOC_DATA_MAX);
 		return -EINVAL;
-	if (libcfs_psdev_ops.p_open)
-		rc = libcfs_psdev_ops.p_open(0, NULL);
-	else
-		return -EPERM;
-	return rc;
-}
+	}
 
-/* called when closing /dev/device */
-static int
-libcfs_psdev_release(struct inode *inode, struct file *file)
-{
-	int    rc = 0;
+	LIBCFS_ALLOC(*hdr_pp, hdr.ioc_len);
+	if (!*hdr_pp)
+		return -ENOMEM;
 
-	if (!inode)
-		return -EINVAL;
-	if (libcfs_psdev_ops.p_close)
-		rc = libcfs_psdev_ops.p_close(0, NULL);
-	else
-		rc = -EPERM;
-	return rc;
+	if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) {
+		LIBCFS_FREE(*hdr_pp, hdr.ioc_len);
+		err = -EFAULT;
+	}
+	return err;
 }
 
-static long libcfs_ioctl(struct file *file,
-			 unsigned int cmd, unsigned long arg)
+static long
+libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct cfs_psdev_file	 pfile;
-	int    rc = 0;
-
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
@@ -130,26 +174,12 @@ static long libcfs_ioctl(struct file *file,
 		return -EINVAL;
 	}
 
-	/* Handle platform-dependent IOC requests */
-	switch (cmd) {
-	case IOC_LIBCFS_PANIC:
-		if (!capable(CFS_CAP_SYS_BOOT))
-			return -EPERM;
-		panic("debugctl-invoked panic");
-		return 0;
-	}
-
-	if (libcfs_psdev_ops.p_ioctl)
-		rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void __user *)arg);
-	else
-		rc = -EPERM;
-	return rc;
+	return libcfs_ioctl(cmd, (void __user *)arg);
 }
 
 static const struct file_operations libcfs_fops = {
-	.unlocked_ioctl	= libcfs_ioctl,
-	.open		= libcfs_psdev_open,
-	.release	= libcfs_psdev_release,
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= libcfs_psdev_ioctl,
 };
 
 struct miscdevice libcfs_dev = {
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
index 890844602..bbe19a684 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -46,30 +46,6 @@
 #include <linux/kgdb.h>
 #endif
 
-/**
- * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
- * waiting threads, which is not always desirable because all threads will
- * be waken up again and again, even user only needs a few of them to be
- * active most time. This is not good for performance because cache can
- * be polluted by different threads.
- *
- * LIFO list can resolve this problem because we always wakeup the most
- * recent active thread by default.
- *
- * NB: please don't call non-exclusive & exclusive wait on the same
- * waitq if add_wait_queue_exclusive_head is used.
- */
-void
-add_wait_queue_exclusive_head(wait_queue_head_t *waitq, wait_queue_t *link)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&waitq->lock, flags);
-	__add_wait_queue_exclusive(waitq, link);
-	spin_unlock_irqrestore(&waitq->lock, flags);
-}
-EXPORT_SYMBOL(add_wait_queue_exclusive_head);
-
 sigset_t
 cfs_block_allsigs(void)
 {
@@ -128,13 +104,6 @@ cfs_restore_sigs(sigset_t old)
 }
 EXPORT_SYMBOL(cfs_restore_sigs);
 
-int
-cfs_signal_pending(void)
-{
-	return signal_pending(current);
-}
-EXPORT_SYMBOL(cfs_signal_pending);
-
 void
 cfs_clear_sigpending(void)
 {
diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c
index cdc640bfd..f2d041118 100644
--- a/drivers/staging/lustre/lnet/libcfs/module.c
+++ b/drivers/staging/lustre/lnet/libcfs/module.c
@@ -54,9 +54,6 @@
 
 # define DEBUG_SUBSYSTEM S_LNET
 
-#define LNET_MAX_IOCTL_BUF_LEN (sizeof(struct lnet_ioctl_net_config) + \
-				sizeof(struct lnet_ioctl_config_data))
-
 #include "../../include/linux/libcfs/libcfs.h"
 #include <asm/div64.h>
 
@@ -68,20 +65,6 @@
 
 static struct dentry *lnet_debugfs_root;
 
-/* called when opening /dev/device */
-static int libcfs_psdev_open(unsigned long flags, void *args)
-{
-	try_module_get(THIS_MODULE);
-	return 0;
-}
-
-/* called when closing /dev/device */
-static int libcfs_psdev_release(unsigned long flags, void *args)
-{
-	module_put(THIS_MODULE);
-	return 0;
-}
-
 static DECLARE_RWSEM(ioctl_list_sem);
 static LIST_HEAD(ioctl_list);
 
@@ -115,39 +98,47 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
 }
 EXPORT_SYMBOL(libcfs_deregister_ioctl);
 
-static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd,
-			       void __user *arg, struct libcfs_ioctl_hdr *hdr)
+int libcfs_ioctl(unsigned long cmd, void __user *uparam)
 {
 	struct libcfs_ioctl_data *data = NULL;
-	int err = -EINVAL;
+	struct libcfs_ioctl_hdr *hdr;
+	int err;
+
+	/* 'cmd' and permissions get checked in our arch-specific caller */
+	err = libcfs_ioctl_getdata(&hdr, uparam);
+	if (err) {
+		CDEBUG_LIMIT(D_ERROR,
+			     "libcfs ioctl: data header error %d\n", err);
+		return err;
+	}
 
-	/*
-	 * The libcfs_ioctl_data_adjust() function performs adjustment
-	 * operations on the libcfs_ioctl_data structure to make
-	 * it usable by the code.  This doesn't need to be called
-	 * for new data structures added.
-	 */
 	if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) {
+		/*
+		 * The libcfs_ioctl_data_adjust() function performs adjustment
+		 * operations on the libcfs_ioctl_data structure to make
+		 * it usable by the code.  This doesn't need to be called
+		 * for new data structures added.
+		 */
 		data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr);
 		err = libcfs_ioctl_data_adjust(data);
 		if (err)
-			return err;
+			goto out;
 	}
 
+	CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd);
 	switch (cmd) {
 	case IOC_LIBCFS_CLEAR_DEBUG:
 		libcfs_debug_clear_buffer();
-		return 0;
-	/*
-	 * case IOC_LIBCFS_PANIC:
-	 * Handled in arch/cfs_module.c
-	 */
+		break;
+
 	case IOC_LIBCFS_MARK_DEBUG:
-		if (!data->ioc_inlbuf1 ||
-		    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
-			return -EINVAL;
+		if (!data || !data->ioc_inlbuf1 ||
+		    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') {
+			err = -EINVAL;
+			goto out;
+		}
 		libcfs_debug_mark_buffer(data->ioc_inlbuf1);
-		return 0;
+		break;
 
 	default: {
 		struct libcfs_ioctl_handler *hand;
@@ -156,67 +147,23 @@ static int libcfs_ioctl_handle(struct cfs_psdev_file *pfile, unsigned long cmd,
 		down_read(&ioctl_list_sem);
 		list_for_each_entry(hand, &ioctl_list, item) {
 			err = hand->handle_ioctl(cmd, hdr);
-			if (err != -EINVAL) {
-				if (err == 0)
-					err = libcfs_ioctl_popdata(arg,
-							hdr, hdr->ioc_len);
-				break;
+			if (err == -EINVAL)
+				continue;
+
+			if (!err) {
+				if (copy_to_user(uparam, hdr, hdr->ioc_len))
+					err = -EFAULT;
 			}
+			break;
 		}
 		up_read(&ioctl_list_sem);
-		break;
-	}
-	}
-
-	return err;
-}
-
-static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd,
-			void __user *arg)
-{
-	struct libcfs_ioctl_hdr *hdr;
-	int err = 0;
-	__u32 buf_len;
-
-	err = libcfs_ioctl_getdata_len(arg, &buf_len);
-	if (err)
-		return err;
-
-	/*
-	 * do a check here to restrict the size of the memory
-	 * to allocate to guard against DoS attacks.
-	 */
-	if (buf_len > LNET_MAX_IOCTL_BUF_LEN) {
-		CERROR("LNET: user buffer exceeds kernel buffer\n");
-		return -EINVAL;
-	}
-
-	LIBCFS_ALLOC_GFP(hdr, buf_len, GFP_KERNEL);
-	if (!hdr)
-		return -ENOMEM;
-
-	/* 'cmd' and permissions get checked in our arch-specific caller */
-	if (copy_from_user(hdr, arg, buf_len)) {
-		CERROR("LNET ioctl: data error\n");
-		err = -EFAULT;
-		goto out;
+		break; }
 	}
-
-	err = libcfs_ioctl_handle(pfile, cmd, arg, hdr);
-
 out:
-	LIBCFS_FREE(hdr, buf_len);
+	LIBCFS_FREE(hdr, hdr->ioc_len);
 	return err;
 }
 
-struct cfs_psdev_ops libcfs_psdev_ops = {
-	libcfs_psdev_open,
-	libcfs_psdev_release,
-	NULL,
-	NULL,
-	libcfs_ioctl
-};
-
 int lprocfs_call_handler(void *data, int write, loff_t *ppos,
 			 void __user *buffer, size_t *lenp,
 			 int (*handler)(void *data, int write, loff_t pos,
@@ -477,6 +424,13 @@ static struct ctl_table lnet_table[] = {
 		.mode     = 0644,
 		.proc_handler = &proc_dointvec
 	},
+	{
+		.procname	= "fail_err",
+		.data		= &cfs_fail_err,
+		.maxlen		= sizeof(cfs_fail_err),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 	}
 };
diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c
index 244eb89ee..7739b9469 100644
--- a/drivers/staging/lustre/lnet/libcfs/tracefile.c
+++ b/drivers/staging/lustre/lnet/libcfs/tracefile.c
@@ -707,10 +707,9 @@ int cfs_tracefile_dump_all_pages(char *filename)
 	struct cfs_trace_page	*tage;
 	struct cfs_trace_page	*tmp;
 	char			*buf;
+	mm_segment_t __oldfs;
 	int rc;
 
-	DECL_MMSPACE;
-
 	cfs_tracefile_write_lock();
 
 	filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
@@ -729,11 +728,12 @@ int cfs_tracefile_dump_all_pages(char *filename)
 		rc = 0;
 		goto close;
 	}
+	__oldfs = get_fs();
+	set_fs(get_ds());
 
 	/* ok, for now, just write the pages.  in the future we'll be building
 	 * iobufs with the pages and calling generic_direct_IO
 	 */
-	MMSPACE_OPEN;
 	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 		__LASSERT_TAGE_INVARIANT(tage);
 
@@ -752,7 +752,7 @@ int cfs_tracefile_dump_all_pages(char *filename)
 		list_del(&tage->linkage);
 		cfs_tage_free(tage);
 	}
-	MMSPACE_CLOSE;
+	set_fs(__oldfs);
 	rc = vfs_fsync(filp, 1);
 	if (rc)
 		pr_err("sync returns %d\n", rc);
@@ -986,13 +986,12 @@ static int tracefiled(void *arg)
 	struct tracefiled_ctl *tctl = arg;
 	struct cfs_trace_page *tage;
 	struct cfs_trace_page *tmp;
+	mm_segment_t __oldfs;
 	struct file *filp;
 	char *buf;
 	int last_loop = 0;
 	int rc;
 
-	DECL_MMSPACE;
-
 	/* we're started late enough that we pick up init's fs context */
 	/* this is so broken in uml?  what on earth is going on? */
 
@@ -1025,8 +1024,8 @@ static int tracefiled(void *arg)
 			__LASSERT(list_empty(&pc.pc_pages));
 			goto end_loop;
 		}
-
-		MMSPACE_OPEN;
+		__oldfs = get_fs();
+		set_fs(get_ds());
 
 		list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 			static loff_t f_pos;
@@ -1051,7 +1050,7 @@ static int tracefiled(void *arg)
 				break;
 			}
 		}
-		MMSPACE_CLOSE;
+		set_fs(__oldfs);
 
 		filp_close(filp, NULL);
 		put_pages_on_daemon_list(&pc);
diff --git a/drivers/staging/lustre/lnet/libcfs/workitem.c b/drivers/staging/lustre/lnet/libcfs/workitem.c
index c72fe00dc..92236ae59 100644
--- a/drivers/staging/lustre/lnet/libcfs/workitem.c
+++ b/drivers/staging/lustre/lnet/libcfs/workitem.c
@@ -111,7 +111,7 @@ cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
  * 1. when it returns no one shall try to schedule the workitem.
  */
 void
-cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 {
 	LASSERT(!in_interrupt()); /* because we use plain spinlock */
 	LASSERT(!sched->ws_stopping);
@@ -138,7 +138,7 @@ EXPORT_SYMBOL(cfs_wi_exit);
  * cancel schedule request of workitem \a wi
  */
 int
-cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 {
 	int	rc;
 
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(cfs_wi_deschedule);
  * be added, and even dynamic creation of serialised queues might be supported.
  */
 void
-cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
+cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
 {
 	LASSERT(!in_interrupt()); /* because we use plain spinlock */
 	LASSERT(!sched->ws_stopping);
@@ -229,12 +229,12 @@ static int cfs_wi_scheduler(void *arg)
 	while (!sched->ws_stopping) {
 		int	     nloops = 0;
 		int	     rc;
-		cfs_workitem_t *wi;
+		struct cfs_workitem *wi;
 
 		while (!list_empty(&sched->ws_runq) &&
 		       nloops < CFS_WI_RESCHED) {
-			wi = list_entry(sched->ws_runq.next, cfs_workitem_t,
-					wi_list);
+			wi = list_entry(sched->ws_runq.next,
+					struct cfs_workitem, wi_list);
 			LASSERT(wi->wi_scheduled && !wi->wi_running);
 
 			list_del_init(&wi->wi_list);
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 876475554..fe0dbe746 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1215,9 +1215,9 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
 }
 
 static int
-lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
-		   __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
 {
+	struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
 	int rc = -EINVAL;
 	int lnd_type;
 	lnd_t *lnd;
@@ -1275,6 +1275,21 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 
 	ni->ni_lnd = lnd;
 
+	if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+		lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+	if (lnd_tunables) {
+		LIBCFS_ALLOC(ni->ni_lnd_tunables,
+			     sizeof(*ni->ni_lnd_tunables));
+		if (!ni->ni_lnd_tunables) {
+			mutex_unlock(&the_lnet.ln_lnd_mutex);
+			rc = -ENOMEM;
+			goto failed0;
+		}
+		memcpy(ni->ni_lnd_tunables, lnd_tunables,
+		       sizeof(*ni->ni_lnd_tunables));
+	}
+
 	rc = lnd->lnd_startup(ni);
 
 	mutex_unlock(&the_lnet.ln_lnd_mutex);
@@ -1292,20 +1307,28 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 	 * If given some LND tunable parameters, parse those now to
 	 * override the values in the NI structure.
 	 */
-	if (peer_buf_cr >= 0)
-		ni->ni_peerrtrcredits = peer_buf_cr;
-	if (peer_timeout >= 0)
-		ni->ni_peertimeout = peer_timeout;
+	if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
+		ni->ni_peerrtrcredits =
+			conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+	}
+	if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
+		ni->ni_peertimeout =
+			conf->cfg_config_u.cfg_net.net_peer_timeout;
+	}
 	/*
 	 * TODO
 	 * Note: For now, don't allow the user to change
 	 * peertxcredits as this number is used in the
 	 * IB LND to control queue depth.
-	 * if (peer_cr != -1)
-	 *	ni->ni_peertxcredits = peer_cr;
+	 *
+	 * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+	 *	ni->ni_peertxcredits =
+	 *		conf->cfg_config_u.cfg_net.net_peer_tx_credits;
 	 */
-	if (credits >= 0)
-		ni->ni_maxtxcredits = credits;
+	if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
+		ni->ni_maxtxcredits =
+			conf->cfg_config_u.cfg_net.net_max_tx_credits;
+	}
 
 	LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
 
@@ -1367,7 +1390,7 @@ lnet_startup_lndnis(struct list_head *nilist)
 	while (!list_empty(nilist)) {
 		ni = list_entry(nilist->next, lnet_ni_t, ni_list);
 		list_del(&ni->ni_list);
-		rc = lnet_startup_lndni(ni, -1, -1, -1, -1);
+		rc = lnet_startup_lndni(ni, NULL);
 
 		if (rc < 0)
 			goto failed;
@@ -1641,25 +1664,20 @@ EXPORT_SYMBOL(LNetNIFini);
  * parameters
  *
  * \param[in] ni network       interface structure
- * \param[out] cpt_count       the number of cpts the ni is on
- * \param[out] nid             Network Interface ID
- * \param[out] peer_timeout    NI peer timeout
- * \param[out] peer_tx_crdits  NI peer transmit credits
- * \param[out] peer_rtr_credits NI peer router credits
- * \param[out] max_tx_credits  NI max transmit credit
- * \param[out] net_config      Network configuration
+ * \param[out] config	       NI configuration
  */
 static void
-lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
-		  int *peer_timeout, int *peer_tx_credits,
-		  int *peer_rtr_credits, int *max_tx_credits,
-		  struct lnet_ioctl_net_config *net_config)
+lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
 {
+	struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
+	struct lnet_ioctl_net_config *net_config;
+	size_t min_size, tunable_size = 0;
 	int i;
 
-	if (!ni)
+	if (!ni || !config)
 		return;
 
+	net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
 	if (!net_config)
 		return;
 
@@ -1675,11 +1693,11 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
 			sizeof(net_config->ni_interfaces[i]));
 	}
 
-	*nid = ni->ni_nid;
-	*peer_timeout = ni->ni_peertimeout;
-	*peer_tx_credits = ni->ni_peertxcredits;
-	*peer_rtr_credits = ni->ni_peerrtrcredits;
-	*max_tx_credits = ni->ni_maxtxcredits;
+	config->cfg_nid = ni->ni_nid;
+	config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
+	config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
+	config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
+	config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
 
 	net_config->ni_status = ni->ni_status->ns_status;
 
@@ -1689,18 +1707,40 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
 		for (i = 0; i < num_cpts; i++)
 			net_config->ni_cpts[i] = ni->ni_cpts[i];
 
-		*cpt_count = num_cpts;
+		config->cfg_ncpts = num_cpts;
+	}
+
+	/*
+	 * See if user land tools sent in a newer and larger version
+	 * of struct lnet_tunables than what the kernel uses.
+	 */
+	min_size = sizeof(*config) + sizeof(*net_config);
+
+	if (config->cfg_hdr.ioc_len > min_size)
+		tunable_size = config->cfg_hdr.ioc_len - min_size;
+
+	/* Don't copy to much data to user space */
+	min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+	lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
+
+	if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
+		memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+		config->cfg_config_u.cfg_net.net_interface_count = 1;
+
+		/* Tell user land that kernel side has less data */
+		if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+			min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
+			config->cfg_hdr.ioc_len -= min_size;
+		}
 	}
 }
 
-int
-lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
-		    int *peer_tx_credits, int *peer_rtr_credits,
-		    int *max_tx_credits,
-		    struct lnet_ioctl_net_config *net_config)
+static int
+lnet_get_net_config(struct lnet_ioctl_config_data *config)
 {
 	struct lnet_ni *ni;
 	struct list_head *tmp;
+	int idx = config->cfg_count;
 	int cpt, i = 0;
 	int rc = -ENOENT;
 
@@ -1712,9 +1752,7 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
 
 		ni = list_entry(tmp, lnet_ni_t, ni_list);
 		lnet_ni_lock(ni);
-		lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
-				  peer_tx_credits, peer_rtr_credits,
-				  max_tx_credits, net_config);
+		lnet_fill_ni_info(ni, config);
 		lnet_ni_unlock(ni);
 		rc = 0;
 		break;
@@ -1725,10 +1763,9 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
 }
 
 int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
-		__s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
-		__s32 credits)
+lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
 {
+	char *nets = conf->cfg_config_u.cfg_net.net_intf;
 	lnet_ping_info_t *pinfo;
 	lnet_handle_md_t md_handle;
 	struct lnet_ni *ni;
@@ -1773,8 +1810,7 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
 
 	list_del_init(&ni->ni_list);
 
-	rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
-				peer_buf_cr, credits);
+	rc = lnet_startup_lndni(ni, conf);
 	if (rc)
 		goto failed1;
 
@@ -1864,6 +1900,10 @@ LNetCtl(unsigned int cmd, void *arg)
 	int rc;
 	unsigned long secs_passed;
 
+	BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX <
+		     sizeof(struct lnet_ioctl_net_config) +
+		     sizeof(struct lnet_ioctl_config_data));
+
 	switch (cmd) {
 	case IOC_LIBCFS_GET_NI:
 		rc = LNetGetId(data->ioc_count, &id);
@@ -1918,27 +1958,14 @@ LNetCtl(unsigned int cmd, void *arg)
 				      &config->cfg_config_u.cfg_route.rtr_priority);
 
 	case IOC_LIBCFS_GET_NET: {
-		struct lnet_ioctl_net_config *net_config;
-		size_t total = sizeof(*config) + sizeof(*net_config);
-
+		size_t total = sizeof(*config) +
+			       sizeof(struct lnet_ioctl_net_config);
 		config = arg;
 
 		if (config->cfg_hdr.ioc_len < total)
 			return -EINVAL;
 
-		net_config = (struct lnet_ioctl_net_config *)
-				config->cfg_bulk;
-		if (!net_config)
-			return -EINVAL;
-
-		return lnet_get_net_config(config->cfg_count,
-					   &config->cfg_ncpts,
-					   &config->cfg_nid,
-					   &config->cfg_config_u.cfg_net.net_peer_timeout,
-					   &config->cfg_config_u.cfg_net.net_peer_tx_credits,
-					   &config->cfg_config_u.cfg_net.net_peer_rtr_credits,
-					   &config->cfg_config_u.cfg_net.net_max_tx_credits,
-					   net_config);
+		return lnet_get_net_config(config);
 	}
 
 	case IOC_LIBCFS_GET_LNET_STATS: {
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index 449069c9e..480cc9c6c 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -107,6 +107,9 @@ lnet_ni_free(struct lnet_ni *ni)
 	if (ni->ni_cpts)
 		cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
 
+	if (ni->ni_lnd_tunables)
+		LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
+
 	for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++) {
 		LIBCFS_FREE(ni->ni_interfaces[i],
 			    strlen(ni->ni_interfaces[i]) + 1);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index f19aa9320..c5d5bedb3 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -407,7 +407,7 @@ lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset,
 		LASSERT(niov > 0);
 		LASSERT(nkiov > 0);
 		this_nob = min(iov->iov_len - iovoffset,
-			       (__kernel_size_t) kiov->kiov_len - kiovoffset);
+			       (__kernel_size_t)kiov->kiov_len - kiovoffset);
 		this_nob = min(this_nob, nob);
 
 		if (!addr)
@@ -477,7 +477,7 @@ lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
 	do {
 		LASSERT(nkiov > 0);
 		LASSERT(niov > 0);
-		this_nob = min((__kernel_size_t) kiov->kiov_len - kiovoffset,
+		this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset,
 			       iov->iov_len - iovoffset);
 		this_nob = min(this_nob, nob);
 
@@ -996,7 +996,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
 			LASSERT(msg2->msg_txpeer->lp_ni == ni);
 			LASSERT(msg2->msg_tx_delayed);
 
-			(void) lnet_post_send_locked(msg2, 1);
+			(void)lnet_post_send_locked(msg2, 1);
 		}
 	}
 
@@ -1019,7 +1019,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
 			LASSERT(msg2->msg_txpeer == txpeer);
 			LASSERT(msg2->msg_tx_delayed);
 
-			(void) lnet_post_send_locked(msg2, 1);
+			(void)lnet_post_send_locked(msg2, 1);
 		}
 	}
 
@@ -1142,7 +1142,7 @@ routing_off:
 					  lnet_msg_t, msg_list);
 			list_del(&msg2->msg_list);
 
-			(void) lnet_post_routed_recv_locked(msg2, 1);
+			(void)lnet_post_routed_recv_locked(msg2, 1);
 		}
 	}
 	if (rxpeer) {
diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c
index 93037c116..246b5c141 100644
--- a/drivers/staging/lustre/lnet/lnet/module.c
+++ b/drivers/staging/lustre/lnet/lnet/module.c
@@ -108,12 +108,7 @@ lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
 		rc = -EINVAL;
 		goto out_unlock;
 	}
-	rc = lnet_dyn_add_ni(LNET_PID_LUSTRE,
-			     conf->cfg_config_u.cfg_net.net_intf,
-			     conf->cfg_config_u.cfg_net.net_peer_timeout,
-			     conf->cfg_config_u.cfg_net.net_peer_tx_credits,
-			     conf->cfg_config_u.cfg_net.net_peer_rtr_credits,
-			     conf->cfg_config_u.cfg_net.net_max_tx_credits);
+	rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
 out_unlock:
 	mutex_unlock(&lnet_config_mutex);
 
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index dcb6e506f..a63d86c4c 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -49,10 +49,10 @@ module_param(brw_inject_errors, int, 0644);
 MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default");
 
 static void
-brw_client_fini(sfw_test_instance_t *tsi)
+brw_client_fini(struct sfw_test_instance *tsi)
 {
-	srpc_bulk_t *bulk;
-	sfw_test_unit_t	*tsu;
+	struct srpc_bulk *bulk;
+	struct sfw_test_unit	*tsu;
 
 	LASSERT(tsi->tsi_is_client);
 
@@ -67,21 +67,21 @@ brw_client_fini(sfw_test_instance_t *tsi)
 }
 
 static int
-brw_client_init(sfw_test_instance_t *tsi)
+brw_client_init(struct sfw_test_instance *tsi)
 {
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
 	int flags;
 	int npg;
 	int len;
 	int opc;
-	srpc_bulk_t *bulk;
-	sfw_test_unit_t *tsu;
+	struct srpc_bulk *bulk;
+	struct sfw_test_unit *tsu;
 
 	LASSERT(sn);
 	LASSERT(tsi->tsi_is_client);
 
 	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-		test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
+		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
 
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
@@ -91,9 +91,8 @@ brw_client_init(sfw_test_instance_t *tsi)
 		 * but we have to keep it for compatibility
 		 */
 		len = npg * PAGE_SIZE;
-
 	} else {
-		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
+		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
 
 		/*
 		 * I should never get this step if it's unknown feature
@@ -225,7 +224,7 @@ bad_data:
 }
 
 static void
-brw_fill_bulk(srpc_bulk_t *bk, int pattern, __u64 magic)
+brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
 {
 	int i;
 	struct page *pg;
@@ -237,7 +236,7 @@ brw_fill_bulk(srpc_bulk_t *bk, int pattern, __u64 magic)
 }
 
 static int
-brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic)
+brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
 {
 	int i;
 	struct page *pg;
@@ -255,14 +254,14 @@ brw_check_bulk(srpc_bulk_t *bk, int pattern, __u64 magic)
 }
 
 static int
-brw_client_prep_rpc(sfw_test_unit_t *tsu,
-		    lnet_process_id_t dest, srpc_client_rpc_t **rpcpp)
+brw_client_prep_rpc(struct sfw_test_unit *tsu,
+		    lnet_process_id_t dest, struct srpc_client_rpc **rpcpp)
 {
-	srpc_bulk_t *bulk = tsu->tsu_private;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
-	srpc_client_rpc_t *rpc;
-	srpc_brw_reqst_t *req;
+	struct srpc_bulk *bulk = tsu->tsu_private;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
+	struct srpc_client_rpc *rpc;
+	struct srpc_brw_reqst *req;
 	int flags;
 	int npg;
 	int len;
@@ -273,15 +272,14 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu,
 	LASSERT(bulk);
 
 	if (!(sn->sn_features & LST_FEAT_BULK_LEN)) {
-		test_bulk_req_t *breq = &tsi->tsi_u.bulk_v0;
+		struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0;
 
 		opc = breq->blk_opc;
 		flags = breq->blk_flags;
 		npg = breq->blk_npg;
 		len = npg * PAGE_SIZE;
-
 	} else {
-		test_bulk_req_v1_t *breq = &tsi->tsi_u.bulk_v1;
+		struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
 
 		/*
 		 * I should never get this step if it's unknown feature
@@ -299,7 +297,7 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu,
 	if (rc)
 		return rc;
 
-	memcpy(&rpc->crpc_bulk, bulk, offsetof(srpc_bulk_t, bk_iovs[npg]));
+	memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg]));
 	if (opc == LST_BRW_WRITE)
 		brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC);
 	else
@@ -315,21 +313,21 @@ brw_client_prep_rpc(sfw_test_unit_t *tsu,
 }
 
 static void
-brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
+brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
 {
 	__u64 magic = BRW_MAGIC;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
-	srpc_msg_t *msg = &rpc->crpc_replymsg;
-	srpc_brw_reply_t *reply = &msg->msg_body.brw_reply;
-	srpc_brw_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
+	struct srpc_msg *msg = &rpc->crpc_replymsg;
+	struct srpc_brw_reply *reply = &msg->msg_body.brw_reply;
+	struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst;
 
 	LASSERT(sn);
 
 	if (rpc->crpc_status) {
 		CERROR("BRW RPC to %s failed with %d\n",
 		       libcfs_id2str(rpc->crpc_dest), rpc->crpc_status);
-		if (!tsi->tsi_stopping) /* rpc could have been aborted */
+		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
 			atomic_inc(&sn->sn_brw_errors);
 		return;
 	}
@@ -363,7 +361,7 @@ brw_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
 static void
 brw_server_rpc_done(struct srpc_server_rpc *rpc)
 {
-	srpc_bulk_t *blk = rpc->srpc_bulk;
+	struct srpc_bulk *blk = rpc->srpc_bulk;
 
 	if (!blk)
 		return;
@@ -384,9 +382,9 @@ static int
 brw_bulk_ready(struct srpc_server_rpc *rpc, int status)
 {
 	__u64 magic = BRW_MAGIC;
-	srpc_brw_reply_t *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
-	srpc_brw_reqst_t *reqst;
-	srpc_msg_t *reqstmsg;
+	struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply;
+	struct srpc_brw_reqst *reqst;
+	struct srpc_msg *reqstmsg;
 
 	LASSERT(rpc->srpc_bulk);
 	LASSERT(rpc->srpc_reqstbuf);
@@ -420,10 +418,10 @@ static int
 brw_server_handle(struct srpc_server_rpc *rpc)
 {
 	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	srpc_msg_t *replymsg = &rpc->srpc_replymsg;
-	srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
-	srpc_brw_reply_t *reply = &replymsg->msg_body.brw_reply;
-	srpc_brw_reqst_t *reqst = &reqstmsg->msg_body.brw_reqst;
+	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
+	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
+	struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
+	struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
 	int npg;
 	int rc;
 
@@ -459,7 +457,7 @@ brw_server_handle(struct srpc_server_rpc *rpc)
 
 	if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
 		/* compat with old version */
-		if (reqst->brw_len & ~CFS_PAGE_MASK) {
+		if (reqst->brw_len & ~PAGE_MASK) {
 			reply->brw_status = EINVAL;
 			return 0;
 		}
@@ -490,7 +488,8 @@ brw_server_handle(struct srpc_server_rpc *rpc)
 	return 0;
 }
 
-sfw_test_client_ops_t brw_test_client;
+struct sfw_test_client_ops brw_test_client;
+
 void brw_init_test_client(void)
 {
 	brw_test_client.tso_init = brw_client_init;
@@ -499,7 +498,8 @@ void brw_init_test_client(void)
 	brw_test_client.tso_done_rpc = brw_client_done_rpc;
 };
 
-srpc_service_t brw_test_service;
+struct srpc_service brw_test_service;
+
 void brw_init_test_service(void)
 {
 	brw_test_service.sv_id = SRPC_SERVICE_BRW;
diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c
index 79ee6c0bf..408c614b6 100644
--- a/drivers/staging/lustre/lnet/selftest/conctl.c
+++ b/drivers/staging/lustre/lnet/selftest/conctl.c
@@ -51,9 +51,9 @@ lst_session_new_ioctl(lstio_session_new_args_t *args)
 	char *name;
 	int rc;
 
-	if (!args->lstio_ses_idp || /* address for output sid */
-	    !args->lstio_ses_key ||    /* no key is specified */
-	    !args->lstio_ses_namep || /* session name */
+	if (!args->lstio_ses_idp ||	/* address for output sid */
+	    !args->lstio_ses_key ||	/* no key is specified */
+	    !args->lstio_ses_namep ||	/* session name */
 	    args->lstio_ses_nmlen <= 0 ||
 	    args->lstio_ses_nmlen > LST_NAME_SIZE)
 		return -EINVAL;
@@ -95,11 +95,11 @@ lst_session_info_ioctl(lstio_session_info_args_t *args)
 {
 	/* no checking of key */
 
-	if (!args->lstio_ses_idp || /* address for output sid */
-	    !args->lstio_ses_keyp || /* address for output key */
-	    !args->lstio_ses_featp || /* address for output features */
-	    !args->lstio_ses_ndinfo || /* address for output ndinfo */
-	    !args->lstio_ses_namep || /* address for output name */
+	if (!args->lstio_ses_idp ||	/* address for output sid */
+	    !args->lstio_ses_keyp ||	/* address for output key */
+	    !args->lstio_ses_featp ||	/* address for output features */
+	    !args->lstio_ses_ndinfo ||	/* address for output ndinfo */
+	    !args->lstio_ses_namep ||	/* address for output name */
 	    args->lstio_ses_nmlen <= 0 ||
 	    args->lstio_ses_nmlen > LST_NAME_SIZE)
 		return -EINVAL;
@@ -125,7 +125,7 @@ lst_debug_ioctl(lstio_debug_args_t *args)
 	if (!args->lstio_dbg_resultp)
 		return -EINVAL;
 
-	if (args->lstio_dbg_namep && /* name of batch/group */
+	if (args->lstio_dbg_namep &&	/* name of batch/group */
 	    (args->lstio_dbg_nmlen <= 0 ||
 	     args->lstio_dbg_nmlen > LST_NAME_SIZE))
 		return -EINVAL;
@@ -326,7 +326,7 @@ lst_nodes_add_ioctl(lstio_group_nodes_args_t *args)
 	if (args->lstio_grp_key != console_session.ses_key)
 		return -EACCES;
 
-	if (!args->lstio_grp_idsp || /* array of ids */
+	if (!args->lstio_grp_idsp ||	/* array of ids */
 	    args->lstio_grp_count <= 0 ||
 	    !args->lstio_grp_resultp ||
 	    !args->lstio_grp_featp ||
@@ -394,13 +394,13 @@ lst_group_info_ioctl(lstio_group_info_args_t *args)
 	    args->lstio_grp_nmlen > LST_NAME_SIZE)
 		return -EINVAL;
 
-	if (!args->lstio_grp_entp &&  /* output: group entry */
-	    !args->lstio_grp_dentsp)  /* output: node entry */
+	if (!args->lstio_grp_entp &&	/* output: group entry */
+	    !args->lstio_grp_dentsp)	/* output: node entry */
 		return -EINVAL;
 
-	if (args->lstio_grp_dentsp) { /* have node entry */
-		if (!args->lstio_grp_idxp || /* node index */
-		    !args->lstio_grp_ndentp) /* # of node entry */
+	if (args->lstio_grp_dentsp) {		/* have node entry */
+		if (!args->lstio_grp_idxp ||	/* node index */
+		    !args->lstio_grp_ndentp)	/* # of node entry */
 			return -EINVAL;
 
 		if (copy_from_user(&ndent, args->lstio_grp_ndentp,
@@ -612,18 +612,18 @@ lst_batch_info_ioctl(lstio_batch_info_args_t *args)
 	if (args->lstio_bat_key != console_session.ses_key)
 		return -EACCES;
 
-	if (!args->lstio_bat_namep || /* batch name */
+	if (!args->lstio_bat_namep ||	/* batch name */
 	    args->lstio_bat_nmlen <= 0 ||
 	    args->lstio_bat_nmlen > LST_NAME_SIZE)
 		return -EINVAL;
 
-	if (!args->lstio_bat_entp && /* output: batch entry */
-	    !args->lstio_bat_dentsp) /* output: node entry */
+	if (!args->lstio_bat_entp &&	/* output: batch entry */
+	    !args->lstio_bat_dentsp)	/* output: node entry */
 		return -EINVAL;
 
-	if (args->lstio_bat_dentsp) { /* have node entry */
-		if (!args->lstio_bat_idxp || /* node index */
-		    !args->lstio_bat_ndentp) /* # of node entry */
+	if (args->lstio_bat_dentsp) {		/* have node entry */
+		if (!args->lstio_bat_idxp ||	/* node index */
+		    !args->lstio_bat_ndentp)	/* # of node entry */
 			return -EINVAL;
 
 		if (copy_from_user(&index, args->lstio_bat_idxp,
@@ -722,18 +722,18 @@ static int lst_test_add_ioctl(lstio_test_args_t *args)
 
 	if (!args->lstio_tes_resultp ||
 	    !args->lstio_tes_retp ||
-	    !args->lstio_tes_bat_name || /* no specified batch */
+	    !args->lstio_tes_bat_name ||	/* no specified batch */
 	    args->lstio_tes_bat_nmlen <= 0 ||
 	    args->lstio_tes_bat_nmlen > LST_NAME_SIZE ||
-	    !args->lstio_tes_sgrp_name || /* no source group */
+	    !args->lstio_tes_sgrp_name ||	/* no source group */
 	    args->lstio_tes_sgrp_nmlen <= 0 ||
 	    args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE ||
-	    !args->lstio_tes_dgrp_name || /* no target group */
+	    !args->lstio_tes_dgrp_name ||	/* no target group */
 	    args->lstio_tes_dgrp_nmlen <= 0 ||
 	    args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE)
 		return -EINVAL;
 
-	if (!args->lstio_tes_loop || /* negative is infinite */
+	if (!args->lstio_tes_loop ||		/* negative is infinite */
 	    args->lstio_tes_concur <= 0 ||
 	    args->lstio_tes_dist <= 0 ||
 	    args->lstio_tes_span <= 0)
@@ -743,7 +743,7 @@ static int lst_test_add_ioctl(lstio_test_args_t *args)
 	if (args->lstio_tes_param &&
 	    (args->lstio_tes_param_len <= 0 ||
 	     args->lstio_tes_param_len >
-	     PAGE_SIZE - sizeof(lstcon_test_t)))
+	     PAGE_SIZE - sizeof(struct lstcon_test)))
 		return -EINVAL;
 
 	LIBCFS_ALLOC(batch_name, args->lstio_tes_bat_nmlen + 1);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 35a227d0c..6f6875811 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -46,13 +46,13 @@
 #include "conrpc.h"
 #include "console.h"
 
-void lstcon_rpc_stat_reply(lstcon_rpc_trans_t *, srpc_msg_t *,
-			   lstcon_node_t *, lstcon_trans_stat_t *);
+void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *,
+			   struct lstcon_node *, lstcon_trans_stat_t *);
 
 static void
-lstcon_rpc_done(srpc_client_rpc_t *rpc)
+lstcon_rpc_done(struct srpc_client_rpc *rpc)
 {
-	lstcon_rpc_t *crpc = (lstcon_rpc_t *)rpc->crpc_priv;
+	struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv;
 
 	LASSERT(crpc && rpc == crpc->crp_rpc);
 	LASSERT(crpc->crp_posted && !crpc->crp_finished);
@@ -90,8 +90,8 @@ lstcon_rpc_done(srpc_client_rpc_t *rpc)
 }
 
 static int
-lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats,
-		int bulk_npg, int bulk_len, int embedded, lstcon_rpc_t *crpc)
+lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned feats,
+		int bulk_npg, int bulk_len, int embedded, struct lstcon_rpc *crpc)
 {
 	crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service,
 				       feats, bulk_npg, bulk_len,
@@ -115,16 +115,16 @@ lstcon_rpc_init(lstcon_node_t *nd, int service, unsigned feats,
 }
 
 static int
-lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats,
-		int bulk_npg, int bulk_len, lstcon_rpc_t **crpcpp)
+lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned feats,
+		int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp)
 {
-	lstcon_rpc_t *crpc = NULL;
+	struct lstcon_rpc *crpc = NULL;
 	int rc;
 
 	spin_lock(&console_session.ses_rpc_lock);
 
 	crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist,
-					lstcon_rpc_t, crp_link);
+					struct lstcon_rpc, crp_link);
 	if (crpc)
 		list_del_init(&crpc->crp_link);
 
@@ -148,9 +148,9 @@ lstcon_rpc_prep(lstcon_node_t *nd, int service, unsigned feats,
 }
 
 void
-lstcon_rpc_put(lstcon_rpc_t *crpc)
+lstcon_rpc_put(struct lstcon_rpc *crpc)
 {
-	srpc_bulk_t *bulk = &crpc->crp_rpc->crpc_bulk;
+	struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk;
 	int i;
 
 	LASSERT(list_empty(&crpc->crp_link));
@@ -183,9 +183,9 @@ lstcon_rpc_put(lstcon_rpc_t *crpc)
 }
 
 static void
-lstcon_rpc_post(lstcon_rpc_t *crpc)
+lstcon_rpc_post(struct lstcon_rpc *crpc)
 {
-	lstcon_rpc_trans_t *trans = crpc->crp_trans;
+	struct lstcon_rpc_trans *trans = crpc->crp_trans;
 
 	LASSERT(trans);
 
@@ -236,9 +236,9 @@ lstcon_rpc_trans_name(int transop)
 
 int
 lstcon_rpc_trans_prep(struct list_head *translist, int transop,
-		      lstcon_rpc_trans_t **transpp)
+		      struct lstcon_rpc_trans **transpp)
 {
-	lstcon_rpc_trans_t *trans;
+	struct lstcon_rpc_trans *trans;
 
 	if (translist) {
 		list_for_each_entry(trans, translist, tas_link) {
@@ -278,26 +278,26 @@ lstcon_rpc_trans_prep(struct list_head *translist, int transop,
 }
 
 void
-lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *crpc)
+lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc)
 {
 	list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list);
 	crpc->crp_trans = trans;
 }
 
 void
-lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error)
+lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error)
 {
-	srpc_client_rpc_t *rpc;
-	lstcon_rpc_t *crpc;
-	lstcon_node_t *nd;
+	struct srpc_client_rpc *rpc;
+	struct lstcon_rpc *crpc;
+	struct lstcon_node *nd;
 
 	list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) {
 		rpc = crpc->crp_rpc;
 
 		spin_lock(&rpc->crpc_lock);
 
-		if (!crpc->crp_posted || /* not posted */
-		    crpc->crp_stamp) { /* rpc done or aborted already */
+		if (!crpc->crp_posted ||	/* not posted */
+		    crpc->crp_stamp) {		/* rpc done or aborted already */
 			if (!crpc->crp_stamp) {
 				crpc->crp_stamp = cfs_time_current();
 				crpc->crp_status = -EINTR;
@@ -326,7 +326,7 @@ lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error)
 }
 
 static int
-lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans)
+lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans)
 {
 	if (console_session.ses_shutdown &&
 	    !list_empty(&trans->tas_olink)) /* Not an end session RPC */
@@ -336,9 +336,9 @@ lstcon_rpc_trans_check(lstcon_rpc_trans_t *trans)
 }
 
 int
-lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout)
+lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout)
 {
-	lstcon_rpc_t *crpc;
+	struct lstcon_rpc *crpc;
 	int rc;
 
 	if (list_empty(&trans->tas_rpcs_list))
@@ -386,11 +386,11 @@ lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout)
 }
 
 static int
-lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp)
+lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp)
 {
-	lstcon_node_t *nd = crpc->crp_node;
-	srpc_client_rpc_t *rpc = crpc->crp_rpc;
-	srpc_generic_reply_t *rep;
+	struct lstcon_node *nd = crpc->crp_node;
+	struct srpc_client_rpc *rpc = crpc->crp_rpc;
+	struct srpc_generic_reply *rep;
 
 	LASSERT(nd && rpc);
 	LASSERT(crpc->crp_stamp);
@@ -423,10 +423,10 @@ lstcon_rpc_get_reply(lstcon_rpc_t *crpc, srpc_msg_t **msgpp)
 }
 
 void
-lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat)
+lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, lstcon_trans_stat_t *stat)
 {
-	lstcon_rpc_t *crpc;
-	srpc_msg_t *rep;
+	struct lstcon_rpc *crpc;
+	struct srpc_msg *rep;
 	int error;
 
 	LASSERT(stat);
@@ -466,17 +466,17 @@ lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans, lstcon_trans_stat_t *stat)
 }
 
 int
-lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
+lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
 			     struct list_head __user *head_up,
 			     lstcon_rpc_readent_func_t readent)
 {
 	struct list_head tmp;
 	struct list_head __user *next;
 	lstcon_rpc_ent_t *ent;
-	srpc_generic_reply_t *rep;
-	lstcon_rpc_t *crpc;
-	srpc_msg_t *msg;
-	lstcon_node_t *nd;
+	struct srpc_generic_reply *rep;
+	struct lstcon_rpc *crpc;
+	struct srpc_msg *msg;
+	struct lstcon_node *nd;
 	long dur;
 	struct timeval tv;
 	int error;
@@ -520,7 +520,7 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
 			continue;
 
 		/* RPC is done */
-		rep = (srpc_generic_reply_t *)&msg->msg_body.reply;
+		rep = (struct srpc_generic_reply *)&msg->msg_body.reply;
 
 		if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(lst_sid_t)) ||
 		    copy_to_user(&ent->rpe_fwk_errno, &rep->status,
@@ -531,7 +531,6 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
 			continue;
 
 		error = readent(trans->tas_opc, msg, ent);
-
 		if (error)
 			return error;
 	}
@@ -540,11 +539,11 @@ lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
 }
 
 void
-lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
+lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans)
 {
-	srpc_client_rpc_t *rpc;
-	lstcon_rpc_t *crpc;
-	lstcon_rpc_t *tmp;
+	struct srpc_client_rpc *rpc;
+	struct lstcon_rpc *crpc;
+	struct lstcon_rpc *tmp;
 	int count = 0;
 
 	list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) {
@@ -563,10 +562,10 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
 		}
 
 		/*
-		 * rpcs can be still not callbacked (even LNetMDUnlink is called)
-		 * because huge timeout for inaccessible network, don't make
-		 * user wait for them, just abandon them, they will be recycled
-		 * in callback
+		 * rpcs can be still not callbacked (even LNetMDUnlink is
+		 * called) because huge timeout for inaccessible network,
+		 * don't make user wait for them, just abandon them, they
+		 * will be recycled in callback
 		 */
 		LASSERT(crpc->crp_status);
 
@@ -593,11 +592,11 @@ lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans)
 }
 
 int
-lstcon_sesrpc_prep(lstcon_node_t *nd, int transop,
-		   unsigned feats, lstcon_rpc_t **crpc)
+lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
+		   unsigned feats, struct lstcon_rpc **crpc)
 {
-	srpc_mksn_reqst_t *msrq;
-	srpc_rmsn_reqst_t *rsrq;
+	struct srpc_mksn_reqst *msrq;
+	struct srpc_rmsn_reqst *rsrq;
 	int rc;
 
 	switch (transop) {
@@ -632,9 +631,9 @@ lstcon_sesrpc_prep(lstcon_node_t *nd, int transop,
 }
 
 int
-lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
+lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc)
 {
-	srpc_debug_reqst_t *drq;
+	struct srpc_debug_reqst *drq;
 	int rc;
 
 	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc);
@@ -650,11 +649,11 @@ lstcon_dbgrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
 }
 
 int
-lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
-		   lstcon_tsb_hdr_t *tsb, lstcon_rpc_t **crpc)
+lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
+		   struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc)
 {
-	lstcon_batch_t *batch;
-	srpc_batch_reqst_t *brq;
+	struct lstcon_batch *batch;
+	struct srpc_batch_reqst *brq;
 	int rc;
 
 	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc);
@@ -676,16 +675,16 @@ lstcon_batrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 
 	LASSERT(!tsb->tsb_index);
 
-	batch = (lstcon_batch_t *)tsb;
+	batch = (struct lstcon_batch *)tsb;
 	brq->bar_arg = batch->bat_arg;
 
 	return 0;
 }
 
 int
-lstcon_statrpc_prep(lstcon_node_t *nd, unsigned feats, lstcon_rpc_t **crpc)
+lstcon_statrpc_prep(struct lstcon_node *nd, unsigned feats, struct lstcon_rpc **crpc)
 {
-	srpc_stat_reqst_t *srq;
+	struct srpc_stat_reqst *srq;
 	int rc;
 
 	rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc);
@@ -716,12 +715,12 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
 }
 
 static int
-lstcon_dstnodes_prep(lstcon_group_t *grp, int idx,
+lstcon_dstnodes_prep(struct lstcon_group *grp, int idx,
 		     int dist, int span, int nkiov, lnet_kiov_t *kiov)
 {
 	lnet_process_id_packed_t *pid;
-	lstcon_ndlink_t *ndl;
-	lstcon_node_t *nd;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_node *nd;
 	int start;
 	int end;
 	int i = 0;
@@ -770,9 +769,9 @@ lstcon_dstnodes_prep(lstcon_group_t *grp, int idx,
 }
 
 static int
-lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req)
+lstcon_pingrpc_prep(lst_test_ping_param_t *param, struct srpc_test_reqst *req)
 {
-	test_ping_req_t *prq = &req->tsr_u.ping;
+	struct test_ping_req *prq = &req->tsr_u.ping;
 
 	prq->png_size = param->png_size;
 	prq->png_flags = param->png_flags;
@@ -781,9 +780,9 @@ lstcon_pingrpc_prep(lst_test_ping_param_t *param, srpc_test_reqst_t *req)
 }
 
 static int
-lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
+lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req)
 {
-	test_bulk_req_t *brq = &req->tsr_u.bulk_v0;
+	struct test_bulk_req *brq = &req->tsr_u.bulk_v0;
 
 	brq->blk_opc = param->blk_opc;
 	brq->blk_npg = (param->blk_size + PAGE_SIZE - 1) /
@@ -794,9 +793,9 @@ lstcon_bulkrpc_v0_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
 }
 
 static int
-lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
+lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, struct srpc_test_reqst *req)
 {
-	test_bulk_req_v1_t *brq = &req->tsr_u.bulk_v1;
+	struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1;
 
 	brq->blk_opc = param->blk_opc;
 	brq->blk_flags = param->blk_flags;
@@ -807,13 +806,13 @@ lstcon_bulkrpc_v1_prep(lst_test_bulk_param_t *param, srpc_test_reqst_t *req)
 }
 
 int
-lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
-		    lstcon_test_t *test, lstcon_rpc_t **crpc)
+lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
+		    struct lstcon_test *test, struct lstcon_rpc **crpc)
 {
-	lstcon_group_t *sgrp = test->tes_src_grp;
-	lstcon_group_t *dgrp = test->tes_dst_grp;
-	srpc_test_reqst_t *trq;
-	srpc_bulk_t *bulk;
+	struct lstcon_group *sgrp = test->tes_src_grp;
+	struct lstcon_group *dgrp = test->tes_dst_grp;
+	struct srpc_test_reqst *trq;
+	struct srpc_bulk *bulk;
 	int i;
 	int npg = 0;
 	int nob = 0;
@@ -841,7 +840,6 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 
 		trq->tsr_ndest = 0;
 		trq->tsr_loop = nmax * test->tes_dist * test->tes_concur;
-
 	} else {
 		bulk = &(*crpc)->crp_rpc->crpc_bulk;
 
@@ -917,10 +915,10 @@ lstcon_testrpc_prep(lstcon_node_t *nd, int transop, unsigned feats,
 }
 
 static int
-lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans,
-			 lstcon_node_t *nd, srpc_msg_t *reply)
+lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans,
+			 struct lstcon_node *nd, struct srpc_msg *reply)
 {
-	srpc_mksn_reply_t *mksn_rep = &reply->msg_body.mksn_reply;
+	struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply;
 	int status = mksn_rep->mksn_status;
 
 	if (!status &&
@@ -940,7 +938,7 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans,
 
 	if (!trans->tas_feats_updated) {
 		spin_lock(&console_session.ses_rpc_lock);
-		if (!trans->tas_feats_updated) { /* recheck with lock */
+		if (!trans->tas_feats_updated) {	/* recheck with lock */
 			trans->tas_feats_updated = 1;
 			trans->tas_features = reply->msg_ses_feats;
 		}
@@ -964,14 +962,14 @@ lstcon_sesnew_stat_reply(lstcon_rpc_trans_t *trans,
 }
 
 void
-lstcon_rpc_stat_reply(lstcon_rpc_trans_t *trans, srpc_msg_t *msg,
-		      lstcon_node_t *nd, lstcon_trans_stat_t *stat)
+lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg,
+		      struct lstcon_node *nd, lstcon_trans_stat_t *stat)
 {
-	srpc_rmsn_reply_t *rmsn_rep;
-	srpc_debug_reply_t *dbg_rep;
-	srpc_batch_reply_t *bat_rep;
-	srpc_test_reply_t *test_rep;
-	srpc_stat_reply_t *stat_rep;
+	struct srpc_rmsn_reply *rmsn_rep;
+	struct srpc_debug_reply *dbg_rep;
+	struct srpc_batch_reply *bat_rep;
+	struct srpc_test_reply *test_rep;
+	struct srpc_stat_reply *stat_rep;
 	int rc = 0;
 
 	switch (trans->tas_opc) {
@@ -1085,12 +1083,12 @@ int
 lstcon_rpc_trans_ndlist(struct list_head *ndlist,
 			struct list_head *translist, int transop,
 			void *arg, lstcon_rpc_cond_func_t condition,
-			lstcon_rpc_trans_t **transpp)
+			struct lstcon_rpc_trans **transpp)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_ndlink_t *ndl;
-	lstcon_node_t *nd;
-	lstcon_rpc_t *rpc;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_node *nd;
+	struct lstcon_rpc *rpc;
 	unsigned feats;
 	int rc;
 
@@ -1130,14 +1128,16 @@ lstcon_rpc_trans_ndlist(struct list_head *ndlist,
 		case LST_TRANS_TSBCLIADD:
 		case LST_TRANS_TSBSRVADD:
 			rc = lstcon_testrpc_prep(nd, transop, feats,
-						 (lstcon_test_t *)arg, &rpc);
+						 (struct lstcon_test *)arg,
+						 &rpc);
 			break;
 		case LST_TRANS_TSBRUN:
 		case LST_TRANS_TSBSTOP:
 		case LST_TRANS_TSBCLIQRY:
 		case LST_TRANS_TSBSRVQRY:
 			rc = lstcon_batrpc_prep(nd, transop, feats,
-						(lstcon_tsb_hdr_t *)arg, &rpc);
+						(struct lstcon_tsb_hdr *)arg,
+						&rpc);
 			break;
 		case LST_TRANS_STATQRY:
 			rc = lstcon_statrpc_prep(nd, feats, &rpc);
@@ -1170,17 +1170,18 @@ static void
 lstcon_rpc_pinger(void *arg)
 {
 	struct stt_timer *ptimer = (struct stt_timer *)arg;
-	lstcon_rpc_trans_t *trans;
-	lstcon_rpc_t *crpc;
-	srpc_msg_t *rep;
-	srpc_debug_reqst_t *drq;
-	lstcon_ndlink_t *ndl;
-	lstcon_node_t *nd;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_rpc *crpc;
+	struct srpc_msg *rep;
+	struct srpc_debug_reqst *drq;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_node *nd;
 	int intv;
 	int count = 0;
 	int rc;
 
-	/* RPC pinger is a special case of transaction,
+	/*
+	 * RPC pinger is a special case of transaction,
 	 * it's called by timer at 8 seconds interval.
 	 */
 	mutex_lock(&console_session.ses_mutex);
@@ -1326,9 +1327,9 @@ lstcon_rpc_pinger_stop(void)
 void
 lstcon_rpc_cleanup_wait(void)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_rpc_t *crpc;
-	lstcon_rpc_t *temp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_rpc *crpc;
+	struct lstcon_rpc *temp;
 	struct list_head *pacer;
 	struct list_head zlist;
 
@@ -1338,7 +1339,7 @@ lstcon_rpc_cleanup_wait(void)
 
 	while (!list_empty(&console_session.ses_trans_list)) {
 		list_for_each(pacer, &console_session.ses_trans_list) {
-			trans = list_entry(pacer, lstcon_rpc_trans_t,
+			trans = list_entry(pacer, struct lstcon_rpc_trans,
 					   tas_link);
 
 			CDEBUG(D_NET, "Session closed, wakeup transaction %s\n",
@@ -1370,7 +1371,7 @@ lstcon_rpc_cleanup_wait(void)
 
 	list_for_each_entry_safe(crpc, temp, &zlist, crp_link) {
 		list_del(&crpc->crp_link);
-		LIBCFS_FREE(crpc, sizeof(lstcon_rpc_t));
+		LIBCFS_FREE(crpc, sizeof(struct lstcon_rpc));
 	}
 }
 
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h
index 3e7839dad..90c3385a3 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.h
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.h
@@ -63,9 +63,9 @@ struct lstcon_tsb_hdr;
 struct lstcon_test;
 struct lstcon_node;
 
-typedef struct lstcon_rpc {
+struct lstcon_rpc {
 	struct list_head	 crp_link;	/* chain on rpc transaction */
-	srpc_client_rpc_t	*crp_rpc;	/* client rpc */
+	struct srpc_client_rpc	*crp_rpc;	/* client rpc */
 	struct lstcon_node	*crp_node;	/* destination node */
 	struct lstcon_rpc_trans *crp_trans;	/* conrpc transaction */
 
@@ -76,9 +76,9 @@ typedef struct lstcon_rpc {
 	unsigned int		 crp_embedded:1;
 	int			 crp_status;	/* console rpc errors */
 	unsigned long		 crp_stamp;	/* replied time stamp */
-} lstcon_rpc_t;
+};
 
-typedef struct lstcon_rpc_trans {
+struct lstcon_rpc_trans {
 	struct list_head  tas_olink;	     /* link chain on owner list */
 	struct list_head  tas_link;	     /* link chain on global list */
 	int		  tas_opc;	     /* operation code of transaction */
@@ -87,7 +87,7 @@ typedef struct lstcon_rpc_trans {
 	wait_queue_head_t tas_waitq;	     /* wait queue head */
 	atomic_t	  tas_remaining;     /* # of un-scheduled rpcs */
 	struct list_head  tas_rpcs_list;     /* queued requests */
-} lstcon_rpc_trans_t;
+};
 
 #define LST_TRANS_PRIVATE	0x1000
 
@@ -106,35 +106,35 @@ typedef struct lstcon_rpc_trans {
 #define LST_TRANS_STATQRY	0x21
 
 typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *);
-typedef int (*lstcon_rpc_readent_func_t)(int, srpc_msg_t *,
+typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *,
 					 lstcon_rpc_ent_t __user *);
 
 int  lstcon_sesrpc_prep(struct lstcon_node *nd, int transop,
-			unsigned version, lstcon_rpc_t **crpc);
+			unsigned version, struct lstcon_rpc **crpc);
 int  lstcon_dbgrpc_prep(struct lstcon_node *nd,
-			unsigned version, lstcon_rpc_t **crpc);
+			unsigned version, struct lstcon_rpc **crpc);
 int  lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
-			struct lstcon_tsb_hdr *tsb, lstcon_rpc_t **crpc);
+			struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc);
 int  lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned version,
-			 struct lstcon_test *test, lstcon_rpc_t **crpc);
+			 struct lstcon_test *test, struct lstcon_rpc **crpc);
 int  lstcon_statrpc_prep(struct lstcon_node *nd, unsigned version,
-			 lstcon_rpc_t **crpc);
-void lstcon_rpc_put(lstcon_rpc_t *crpc);
+			 struct lstcon_rpc **crpc);
+void lstcon_rpc_put(struct lstcon_rpc *crpc);
 int  lstcon_rpc_trans_prep(struct list_head *translist,
-			   int transop, lstcon_rpc_trans_t **transpp);
+			   int transop, struct lstcon_rpc_trans **transpp);
 int  lstcon_rpc_trans_ndlist(struct list_head *ndlist,
 			     struct list_head *translist, int transop,
 			     void *arg, lstcon_rpc_cond_func_t condition,
-			     lstcon_rpc_trans_t **transpp);
-void lstcon_rpc_trans_stat(lstcon_rpc_trans_t *trans,
+			     struct lstcon_rpc_trans **transpp);
+void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans,
 			   lstcon_trans_stat_t *stat);
-int  lstcon_rpc_trans_interpreter(lstcon_rpc_trans_t *trans,
+int  lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans,
 				  struct list_head __user *head_up,
 				  lstcon_rpc_readent_func_t readent);
-void lstcon_rpc_trans_abort(lstcon_rpc_trans_t *trans, int error);
-void lstcon_rpc_trans_destroy(lstcon_rpc_trans_t *trans);
-void lstcon_rpc_trans_addreq(lstcon_rpc_trans_t *trans, lstcon_rpc_t *req);
-int  lstcon_rpc_trans_postwait(lstcon_rpc_trans_t *trans, int timeout);
+void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error);
+void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans);
+void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *req);
+int  lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout);
 int  lstcon_rpc_pinger_start(void);
 void lstcon_rpc_pinger_stop(void);
 void lstcon_rpc_cleanup_wait(void);
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index 1a923ea3a..a03e52d29 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -61,7 +61,7 @@ do {							\
 struct lstcon_session console_session;
 
 static void
-lstcon_node_get(lstcon_node_t *nd)
+lstcon_node_get(struct lstcon_node *nd)
 {
 	LASSERT(nd->nd_ref >= 1);
 
@@ -69,9 +69,9 @@ lstcon_node_get(lstcon_node_t *nd)
 }
 
 static int
-lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
+lstcon_node_find(lnet_process_id_t id, struct lstcon_node **ndpp, int create)
 {
-	lstcon_ndlink_t	*ndl;
+	struct lstcon_ndlink	*ndl;
 	unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE;
 
 	LASSERT(id.nid != LNET_NID_ANY);
@@ -90,11 +90,11 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
 	if (!create)
 		return -ENOENT;
 
-	LIBCFS_ALLOC(*ndpp, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
+	LIBCFS_ALLOC(*ndpp, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink));
 	if (!*ndpp)
 		return -ENOMEM;
 
-	ndl = (lstcon_ndlink_t *)(*ndpp + 1);
+	ndl = (struct lstcon_ndlink *)(*ndpp + 1);
 
 	ndl->ndl_node = *ndpp;
 
@@ -103,7 +103,7 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
 	ndl->ndl_node->nd_stamp = cfs_time_current();
 	ndl->ndl_node->nd_state = LST_NODE_UNKNOWN;
 	ndl->ndl_node->nd_timeout = 0;
-	memset(&ndl->ndl_node->nd_ping, 0, sizeof(lstcon_rpc_t));
+	memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc));
 
 	/*
 	 * queued in global hash & list, no refcount is taken by
@@ -117,16 +117,16 @@ lstcon_node_find(lnet_process_id_t id, lstcon_node_t **ndpp, int create)
 }
 
 static void
-lstcon_node_put(lstcon_node_t *nd)
+lstcon_node_put(struct lstcon_node *nd)
 {
-	lstcon_ndlink_t *ndl;
+	struct lstcon_ndlink *ndl;
 
 	LASSERT(nd->nd_ref > 0);
 
 	if (--nd->nd_ref > 0)
 		return;
 
-	ndl = (lstcon_ndlink_t *)(nd + 1);
+	ndl = (struct lstcon_ndlink *)(nd + 1);
 
 	LASSERT(!list_empty(&ndl->ndl_link));
 	LASSERT(!list_empty(&ndl->ndl_hlink));
@@ -135,16 +135,16 @@ lstcon_node_put(lstcon_node_t *nd)
 	list_del(&ndl->ndl_link);
 	list_del(&ndl->ndl_hlink);
 
-	LIBCFS_FREE(nd, sizeof(lstcon_node_t) + sizeof(lstcon_ndlink_t));
+	LIBCFS_FREE(nd, sizeof(struct lstcon_node) + sizeof(struct lstcon_ndlink));
 }
 
 static int
 lstcon_ndlink_find(struct list_head *hash,
-		   lnet_process_id_t id, lstcon_ndlink_t **ndlpp, int create)
+		   lnet_process_id_t id, struct lstcon_ndlink **ndlpp, int create)
 {
 	unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE;
-	lstcon_ndlink_t *ndl;
-	lstcon_node_t *nd;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_node *nd;
 	int rc;
 
 	if (id.nid == LNET_NID_ANY)
@@ -168,7 +168,7 @@ lstcon_ndlink_find(struct list_head *hash,
 	if (rc)
 		return rc;
 
-	LIBCFS_ALLOC(ndl, sizeof(lstcon_ndlink_t));
+	LIBCFS_ALLOC(ndl, sizeof(struct lstcon_ndlink));
 	if (!ndl) {
 		lstcon_node_put(nd);
 		return -ENOMEM;
@@ -184,7 +184,7 @@ lstcon_ndlink_find(struct list_head *hash,
 }
 
 static void
-lstcon_ndlink_release(lstcon_ndlink_t *ndl)
+lstcon_ndlink_release(struct lstcon_ndlink *ndl)
 {
 	LASSERT(list_empty(&ndl->ndl_link));
 	LASSERT(!list_empty(&ndl->ndl_hlink));
@@ -196,12 +196,12 @@ lstcon_ndlink_release(lstcon_ndlink_t *ndl)
 }
 
 static int
-lstcon_group_alloc(char *name, lstcon_group_t **grpp)
+lstcon_group_alloc(char *name, struct lstcon_group **grpp)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 	int i;
 
-	LIBCFS_ALLOC(grp, offsetof(lstcon_group_t,
+	LIBCFS_ALLOC(grp, offsetof(struct lstcon_group,
 				   grp_ndl_hash[LST_NODE_HASHSIZE]));
 	if (!grp)
 		return -ENOMEM;
@@ -209,7 +209,7 @@ lstcon_group_alloc(char *name, lstcon_group_t **grpp)
 	grp->grp_ref = 1;
 	if (name) {
 		if (strlen(name) > sizeof(grp->grp_name) - 1) {
-			LIBCFS_FREE(grp, offsetof(lstcon_group_t,
+			LIBCFS_FREE(grp, offsetof(struct lstcon_group,
 				    grp_ndl_hash[LST_NODE_HASHSIZE]));
 			return -E2BIG;
 		}
@@ -229,18 +229,18 @@ lstcon_group_alloc(char *name, lstcon_group_t **grpp)
 }
 
 static void
-lstcon_group_addref(lstcon_group_t *grp)
+lstcon_group_addref(struct lstcon_group *grp)
 {
 	grp->grp_ref++;
 }
 
-static void lstcon_group_ndlink_release(lstcon_group_t *, lstcon_ndlink_t *);
+static void lstcon_group_ndlink_release(struct lstcon_group *, struct lstcon_ndlink *);
 
 static void
-lstcon_group_drain(lstcon_group_t *grp, int keep)
+lstcon_group_drain(struct lstcon_group *grp, int keep)
 {
-	lstcon_ndlink_t *ndl;
-	lstcon_ndlink_t *tmp;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_ndlink *tmp;
 
 	list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) {
 		if (!(ndl->ndl_node->nd_state & keep))
@@ -249,7 +249,7 @@ lstcon_group_drain(lstcon_group_t *grp, int keep)
 }
 
 static void
-lstcon_group_decref(lstcon_group_t *grp)
+lstcon_group_decref(struct lstcon_group *grp)
 {
 	int i;
 
@@ -264,20 +264,20 @@ lstcon_group_decref(lstcon_group_t *grp)
 	for (i = 0; i < LST_NODE_HASHSIZE; i++)
 		LASSERT(list_empty(&grp->grp_ndl_hash[i]));
 
-	LIBCFS_FREE(grp, offsetof(lstcon_group_t,
+	LIBCFS_FREE(grp, offsetof(struct lstcon_group,
 				  grp_ndl_hash[LST_NODE_HASHSIZE]));
 }
 
 static int
-lstcon_group_find(const char *name, lstcon_group_t **grpp)
+lstcon_group_find(const char *name, struct lstcon_group **grpp)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 
 	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
 		if (strncmp(grp->grp_name, name, LST_NAME_SIZE))
 			continue;
 
-		lstcon_group_addref(grp);  /* +1 ref for caller */
+		lstcon_group_addref(grp); /* +1 ref for caller */
 		*grpp = grp;
 		return 0;
 	}
@@ -286,8 +286,8 @@ lstcon_group_find(const char *name, lstcon_group_t **grpp)
 }
 
 static int
-lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id,
-			 lstcon_ndlink_t **ndlpp, int create)
+lstcon_group_ndlink_find(struct lstcon_group *grp, lnet_process_id_t id,
+			 struct lstcon_ndlink **ndlpp, int create)
 {
 	int rc;
 
@@ -305,7 +305,7 @@ lstcon_group_ndlink_find(lstcon_group_t *grp, lnet_process_id_t id,
 }
 
 static void
-lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl)
+lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl)
 {
 	list_del_init(&ndl->ndl_link);
 	lstcon_ndlink_release(ndl);
@@ -313,8 +313,8 @@ lstcon_group_ndlink_release(lstcon_group_t *grp, lstcon_ndlink_t *ndl)
 }
 
 static void
-lstcon_group_ndlink_move(lstcon_group_t *old,
-			 lstcon_group_t *new, lstcon_ndlink_t *ndl)
+lstcon_group_ndlink_move(struct lstcon_group *old,
+			 struct lstcon_group *new, struct lstcon_ndlink *ndl)
 {
 	unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) %
 					LST_NODE_HASHSIZE;
@@ -329,21 +329,21 @@ lstcon_group_ndlink_move(lstcon_group_t *old,
 }
 
 static void
-lstcon_group_move(lstcon_group_t *old, lstcon_group_t *new)
+lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new)
 {
-	lstcon_ndlink_t *ndl;
+	struct lstcon_ndlink *ndl;
 
 	while (!list_empty(&old->grp_ndl_list)) {
 		ndl = list_entry(old->grp_ndl_list.next,
-				 lstcon_ndlink_t, ndl_link);
+				 struct lstcon_ndlink, ndl_link);
 		lstcon_group_ndlink_move(old, new, ndl);
 	}
 }
 
 static int
-lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg)
 {
-	lstcon_group_t *grp = (lstcon_group_t *)arg;
+	struct lstcon_group *grp = (struct lstcon_group *)arg;
 
 	switch (transop) {
 	case LST_TRANS_SESNEW:
@@ -370,10 +370,10 @@ lstcon_sesrpc_condition(int transop, lstcon_node_t *nd, void *arg)
 }
 
 static int
-lstcon_sesrpc_readent(int transop, srpc_msg_t *msg,
+lstcon_sesrpc_readent(int transop, struct srpc_msg *msg,
 		      lstcon_rpc_ent_t __user *ent_up)
 {
-	srpc_debug_reply_t *rep;
+	struct srpc_debug_reply *rep;
 
 	switch (transop) {
 	case LST_TRANS_SESNEW:
@@ -399,13 +399,13 @@ lstcon_sesrpc_readent(int transop, srpc_msg_t *msg,
 }
 
 static int
-lstcon_group_nodes_add(lstcon_group_t *grp,
+lstcon_group_nodes_add(struct lstcon_group *grp,
 		       int count, lnet_process_id_t __user *ids_up,
 		       unsigned *featp, struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_ndlink_t	*ndl;
-	lstcon_group_t *tmp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_ndlink	*ndl;
+	struct lstcon_group *tmp;
 	lnet_process_id_t id;
 	int i;
 	int rc;
@@ -466,13 +466,13 @@ lstcon_group_nodes_add(lstcon_group_t *grp,
 }
 
 static int
-lstcon_group_nodes_remove(lstcon_group_t *grp,
+lstcon_group_nodes_remove(struct lstcon_group *grp,
 			  int count, lnet_process_id_t __user *ids_up,
 			  struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_ndlink_t *ndl;
-	lstcon_group_t *tmp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_group *tmp;
 	lnet_process_id_t id;
 	int rc;
 	int i;
@@ -523,7 +523,7 @@ error:
 int
 lstcon_group_add(char *name)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST;
@@ -548,7 +548,7 @@ int
 lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up,
 		 unsigned *featp, struct list_head __user *result_up)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 	int rc;
 
 	LASSERT(count > 0);
@@ -578,8 +578,8 @@ lstcon_nodes_add(char *name, int count, lnet_process_id_t __user *ids_up,
 int
 lstcon_group_del(char *name)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_group_t *grp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_group *grp;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -621,7 +621,7 @@ lstcon_group_del(char *name)
 int
 lstcon_group_clean(char *name, int args)
 {
-	lstcon_group_t *grp = NULL;
+	struct lstcon_group *grp = NULL;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -654,7 +654,7 @@ int
 lstcon_nodes_remove(char *name, int count, lnet_process_id_t __user *ids_up,
 		    struct list_head __user *result_up)
 {
-	lstcon_group_t *grp = NULL;
+	struct lstcon_group *grp = NULL;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -683,8 +683,8 @@ lstcon_nodes_remove(char *name, int count, lnet_process_id_t __user *ids_up,
 int
 lstcon_group_refresh(char *name, struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_group_t *grp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_group *grp;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -725,7 +725,7 @@ lstcon_group_refresh(char *name, struct list_head __user *result_up)
 int
 lstcon_group_list(int index, int len, char __user *name_up)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 
 	LASSERT(index >= 0);
 	LASSERT(name_up);
@@ -733,7 +733,7 @@ lstcon_group_list(int index, int len, char __user *name_up)
 	list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) {
 		if (!index--) {
 			return copy_to_user(name_up, grp->grp_name, len) ?
-			       -EFAULT : 0;
+					    -EFAULT : 0;
 		}
 	}
 
@@ -744,8 +744,8 @@ static int
 lstcon_nodes_getent(struct list_head *head, int *index_p,
 		    int *count_p, lstcon_node_ent_t __user *dents_up)
 {
-	lstcon_ndlink_t *ndl;
-	lstcon_node_t *nd;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_node *nd;
 	int count = 0;
 	int index = 0;
 
@@ -786,8 +786,8 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p,
 		  lstcon_node_ent_t __user *dents_up)
 {
 	lstcon_ndlist_ent_t *gentp;
-	lstcon_group_t *grp;
-	lstcon_ndlink_t *ndl;
+	struct lstcon_group *grp;
+	struct lstcon_ndlink *ndl;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -828,9 +828,9 @@ lstcon_group_info(char *name, lstcon_ndlist_ent_t __user *gents_p,
 }
 
 static int
-lstcon_batch_find(const char *name, lstcon_batch_t **batpp)
+lstcon_batch_find(const char *name, struct lstcon_batch **batpp)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 
 	list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) {
 		if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) {
@@ -845,7 +845,7 @@ lstcon_batch_find(const char *name, lstcon_batch_t **batpp)
 int
 lstcon_batch_add(char *name)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 	int i;
 	int rc;
 
@@ -855,7 +855,7 @@ lstcon_batch_add(char *name)
 		return rc;
 	}
 
-	LIBCFS_ALLOC(bat, sizeof(lstcon_batch_t));
+	LIBCFS_ALLOC(bat, sizeof(struct lstcon_batch));
 	if (!bat) {
 		CERROR("Can't allocate descriptor for batch %s\n", name);
 		return -ENOMEM;
@@ -865,7 +865,7 @@ lstcon_batch_add(char *name)
 		     sizeof(struct list_head) * LST_NODE_HASHSIZE);
 	if (!bat->bat_cli_hash) {
 		CERROR("Can't allocate hash for batch %s\n", name);
-		LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+		LIBCFS_FREE(bat, sizeof(struct lstcon_batch));
 
 		return -ENOMEM;
 	}
@@ -875,7 +875,7 @@ lstcon_batch_add(char *name)
 	if (!bat->bat_srv_hash) {
 		CERROR("Can't allocate hash for batch %s\n", name);
 		LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE);
-		LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+		LIBCFS_FREE(bat, sizeof(struct lstcon_batch));
 
 		return -ENOMEM;
 	}
@@ -883,7 +883,7 @@ lstcon_batch_add(char *name)
 	if (strlen(name) > sizeof(bat->bat_name) - 1) {
 		LIBCFS_FREE(bat->bat_srv_hash, LST_NODE_HASHSIZE);
 		LIBCFS_FREE(bat->bat_cli_hash, LST_NODE_HASHSIZE);
-		LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+		LIBCFS_FREE(bat, sizeof(struct lstcon_batch));
 		return -E2BIG;
 	}
 	strncpy(bat->bat_name, name, sizeof(bat->bat_name));
@@ -911,7 +911,7 @@ lstcon_batch_add(char *name)
 int
 lstcon_batch_list(int index, int len, char __user *name_up)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 
 	LASSERT(name_up);
 	LASSERT(index >= 0);
@@ -934,9 +934,9 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up,
 	lstcon_test_batch_ent_t *entp;
 	struct list_head *clilst;
 	struct list_head *srvlst;
-	lstcon_test_t *test = NULL;
-	lstcon_batch_t *bat;
-	lstcon_ndlink_t	*ndl;
+	struct lstcon_test *test = NULL;
+	struct lstcon_batch *bat;
+	struct lstcon_ndlink	*ndl;
 	int rc;
 
 	rc = lstcon_batch_find(name, &bat);
@@ -977,7 +977,6 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up,
 	if (!test) {
 		entp->u.tbe_batch.bae_ntest = bat->bat_ntest;
 		entp->u.tbe_batch.bae_state = bat->bat_state;
-
 	} else {
 		entp->u.tbe_test.tse_type = test->tes_type;
 		entp->u.tbe_test.tse_loop = test->tes_loop;
@@ -999,7 +998,7 @@ lstcon_batch_info(char *name, lstcon_test_batch_ent_t __user *ent_up,
 }
 
 static int
-lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg)
 {
 	switch (transop) {
 	case LST_TRANS_TSBRUN:
@@ -1021,10 +1020,10 @@ lstcon_batrpc_condition(int transop, lstcon_node_t *nd, void *arg)
 }
 
 static int
-lstcon_batch_op(lstcon_batch_t *bat, int transop,
+lstcon_batch_op(struct lstcon_batch *bat, int transop,
 		struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
+	struct lstcon_rpc_trans *trans;
 	int rc;
 
 	rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list,
@@ -1047,7 +1046,7 @@ lstcon_batch_op(lstcon_batch_t *bat, int transop,
 int
 lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 	int rc;
 
 	if (lstcon_batch_find(name, &bat)) {
@@ -1069,7 +1068,7 @@ lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up)
 int
 lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 	int rc;
 
 	if (lstcon_batch_find(name, &bat)) {
@@ -1089,17 +1088,17 @@ lstcon_batch_stop(char *name, int force, struct list_head __user *result_up)
 }
 
 static void
-lstcon_batch_destroy(lstcon_batch_t *bat)
+lstcon_batch_destroy(struct lstcon_batch *bat)
 {
-	lstcon_ndlink_t *ndl;
-	lstcon_test_t *test;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_test *test;
 	int i;
 
 	list_del(&bat->bat_link);
 
 	while (!list_empty(&bat->bat_test_list)) {
 		test = list_entry(bat->bat_test_list.next,
-				  lstcon_test_t, tes_link);
+				  struct lstcon_test, tes_link);
 		LASSERT(list_empty(&test->tes_trans_list));
 
 		list_del(&test->tes_link);
@@ -1107,7 +1106,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat)
 		lstcon_group_decref(test->tes_src_grp);
 		lstcon_group_decref(test->tes_dst_grp);
 
-		LIBCFS_FREE(test, offsetof(lstcon_test_t,
+		LIBCFS_FREE(test, offsetof(struct lstcon_test,
 					   tes_param[test->tes_paramlen]));
 	}
 
@@ -1115,7 +1114,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat)
 
 	while (!list_empty(&bat->bat_cli_list)) {
 		ndl = list_entry(bat->bat_cli_list.next,
-				 lstcon_ndlink_t, ndl_link);
+				 struct lstcon_ndlink, ndl_link);
 		list_del_init(&ndl->ndl_link);
 
 		lstcon_ndlink_release(ndl);
@@ -1123,7 +1122,7 @@ lstcon_batch_destroy(lstcon_batch_t *bat)
 
 	while (!list_empty(&bat->bat_srv_list)) {
 		ndl = list_entry(bat->bat_srv_list.next,
-				 lstcon_ndlink_t, ndl_link);
+				 struct lstcon_ndlink, ndl_link);
 		list_del_init(&ndl->ndl_link);
 
 		lstcon_ndlink_release(ndl);
@@ -1138,19 +1137,19 @@ lstcon_batch_destroy(lstcon_batch_t *bat)
 		    sizeof(struct list_head) * LST_NODE_HASHSIZE);
 	LIBCFS_FREE(bat->bat_srv_hash,
 		    sizeof(struct list_head) * LST_NODE_HASHSIZE);
-	LIBCFS_FREE(bat, sizeof(lstcon_batch_t));
+	LIBCFS_FREE(bat, sizeof(struct lstcon_batch));
 }
 
 static int
-lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg)
+lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg)
 {
-	lstcon_test_t *test;
-	lstcon_batch_t *batch;
-	lstcon_ndlink_t *ndl;
+	struct lstcon_test *test;
+	struct lstcon_batch *batch;
+	struct lstcon_ndlink *ndl;
 	struct list_head *hash;
 	struct list_head *head;
 
-	test = (lstcon_test_t *)arg;
+	test = (struct lstcon_test *)arg;
 	LASSERT(test);
 
 	batch = test->tes_batch;
@@ -1186,10 +1185,10 @@ lstcon_testrpc_condition(int transop, lstcon_node_t *nd, void *arg)
 }
 
 static int
-lstcon_test_nodes_add(lstcon_test_t *test, struct list_head __user *result_up)
+lstcon_test_nodes_add(struct lstcon_test *test, struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_group_t *grp;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_group *grp;
 	int transop;
 	int rc;
 
@@ -1237,7 +1236,7 @@ again:
 }
 
 static int
-lstcon_verify_batch(const char *name, lstcon_batch_t **batch)
+lstcon_verify_batch(const char *name, struct lstcon_batch **batch)
 {
 	int rc;
 
@@ -1256,10 +1255,10 @@ lstcon_verify_batch(const char *name, lstcon_batch_t **batch)
 }
 
 static int
-lstcon_verify_group(const char *name, lstcon_group_t **grp)
+lstcon_verify_group(const char *name, struct lstcon_group **grp)
 {
 	int rc;
-	lstcon_ndlink_t	*ndl;
+	struct lstcon_ndlink	*ndl;
 
 	rc = lstcon_group_find(name, grp);
 	if (rc) {
@@ -1284,11 +1283,11 @@ lstcon_test_add(char *batch_name, int type, int loop,
 		void *param, int paramlen, int *retp,
 		struct list_head __user *result_up)
 {
-	lstcon_test_t *test = NULL;
+	struct lstcon_test *test = NULL;
 	int rc;
-	lstcon_group_t *src_grp = NULL;
-	lstcon_group_t *dst_grp = NULL;
-	lstcon_batch_t *batch = NULL;
+	struct lstcon_group *src_grp = NULL;
+	struct lstcon_group *dst_grp = NULL;
+	struct lstcon_batch *batch = NULL;
 
 	/*
 	 * verify that a batch of the given name exists, and the groups
@@ -1310,7 +1309,7 @@ lstcon_test_add(char *batch_name, int type, int loop,
 	if (dst_grp->grp_userland)
 		*retp = 1;
 
-	LIBCFS_ALLOC(test, offsetof(lstcon_test_t, tes_param[paramlen]));
+	LIBCFS_ALLOC(test, offsetof(struct lstcon_test, tes_param[paramlen]));
 	if (!test) {
 		CERROR("Can't allocate test descriptor\n");
 		rc = -ENOMEM;
@@ -1357,7 +1356,7 @@ lstcon_test_add(char *batch_name, int type, int loop,
 	return rc;
 out:
 	if (test)
-		LIBCFS_FREE(test, offsetof(lstcon_test_t, tes_param[paramlen]));
+		LIBCFS_FREE(test, offsetof(struct lstcon_test, tes_param[paramlen]));
 
 	if (dst_grp)
 		lstcon_group_decref(dst_grp);
@@ -1369,9 +1368,9 @@ out:
 }
 
 static int
-lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp)
+lstcon_test_find(struct lstcon_batch *batch, int idx, struct lstcon_test **testpp)
 {
-	lstcon_test_t *test;
+	struct lstcon_test *test;
 
 	list_for_each_entry(test, &batch->bat_test_list, tes_link) {
 		if (idx == test->tes_hdr.tsb_index) {
@@ -1384,10 +1383,10 @@ lstcon_test_find(lstcon_batch_t *batch, int idx, lstcon_test_t **testpp)
 }
 
 static int
-lstcon_tsbrpc_readent(int transop, srpc_msg_t *msg,
+lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg,
 		      lstcon_rpc_ent_t __user *ent_up)
 {
-	srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
+	struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
 
 	LASSERT(transop == LST_TRANS_TSBCLIQRY ||
 		transop == LST_TRANS_TSBSRVQRY);
@@ -1404,12 +1403,12 @@ int
 lstcon_test_batch_query(char *name, int testidx, int client,
 			int timeout, struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
+	struct lstcon_rpc_trans *trans;
 	struct list_head *translist;
 	struct list_head *ndlist;
-	lstcon_tsb_hdr_t *hdr;
-	lstcon_batch_t *batch;
-	lstcon_test_t *test = NULL;
+	struct lstcon_tsb_hdr *hdr;
+	struct lstcon_batch *batch;
+	struct lstcon_test *test = NULL;
 	int transop;
 	int rc;
 
@@ -1423,7 +1422,6 @@ lstcon_test_batch_query(char *name, int testidx, int client,
 		translist = &batch->bat_trans_list;
 		ndlist = &batch->bat_cli_list;
 		hdr = &batch->bat_hdr;
-
 	} else {
 		/* query specified test only */
 		rc = lstcon_test_find(batch, testidx, &test);
@@ -1448,7 +1446,8 @@ lstcon_test_batch_query(char *name, int testidx, int client,
 
 	lstcon_rpc_trans_postwait(trans, timeout);
 
-	if (!testidx && /* query a batch, not a test */
+	/* query a batch, not a test */
+	if (!testidx &&
 	    !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) &&
 	    !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) {
 		/* all RPCs finished, and no active test */
@@ -1463,10 +1462,10 @@ lstcon_test_batch_query(char *name, int testidx, int client,
 }
 
 static int
-lstcon_statrpc_readent(int transop, srpc_msg_t *msg,
+lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
 		       lstcon_rpc_ent_t __user *ent_up)
 {
-	srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
+	struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
 	sfw_counters_t __user *sfwk_stat;
 	srpc_counters_t __user *srpc_stat;
 	lnet_counters_t __user *lnet_stat;
@@ -1491,7 +1490,7 @@ lstcon_ndlist_stat(struct list_head *ndlist,
 		   int timeout, struct list_head __user *result_up)
 {
 	struct list_head head;
-	lstcon_rpc_trans_t *trans;
+	struct lstcon_rpc_trans *trans;
 	int rc;
 
 	INIT_LIST_HEAD(&head);
@@ -1516,7 +1515,7 @@ int
 lstcon_group_stat(char *grp_name, int timeout,
 		  struct list_head __user *result_up)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 	int rc;
 
 	rc = lstcon_group_find(grp_name, &grp);
@@ -1536,8 +1535,8 @@ int
 lstcon_nodes_stat(int count, lnet_process_id_t __user *ids_up,
 		  int timeout, struct list_head __user *result_up)
 {
-	lstcon_ndlink_t	*ndl;
-	lstcon_group_t *tmp;
+	struct lstcon_ndlink	*ndl;
+	struct lstcon_group *tmp;
 	lnet_process_id_t id;
 	int i;
 	int rc;
@@ -1581,7 +1580,7 @@ lstcon_debug_ndlist(struct list_head *ndlist,
 		    struct list_head *translist,
 		    int timeout, struct list_head __user *result_up)
 {
-	lstcon_rpc_trans_t *trans;
+	struct lstcon_rpc_trans *trans;
 	int rc;
 
 	rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY,
@@ -1611,7 +1610,7 @@ int
 lstcon_batch_debug(int timeout, char *name,
 		   int client, struct list_head __user *result_up)
 {
-	lstcon_batch_t *bat;
+	struct lstcon_batch *bat;
 	int rc;
 
 	rc = lstcon_batch_find(name, &bat);
@@ -1629,7 +1628,7 @@ int
 lstcon_group_debug(int timeout, char *name,
 		   struct list_head __user *result_up)
 {
-	lstcon_group_t *grp;
+	struct lstcon_group *grp;
 	int rc;
 
 	rc = lstcon_group_find(name, &grp);
@@ -1649,8 +1648,8 @@ lstcon_nodes_debug(int timeout,
 		   struct list_head __user *result_up)
 {
 	lnet_process_id_t id;
-	lstcon_ndlink_t *ndl;
-	lstcon_group_t *grp;
+	struct lstcon_ndlink *ndl;
+	struct lstcon_group *grp;
 	int i;
 	int rc;
 
@@ -1749,7 +1748,7 @@ lstcon_session_new(char *name, int key, unsigned feats,
 
 	if (strlen(name) > sizeof(console_session.ses_name) - 1)
 		return -E2BIG;
-	strncpy(console_session.ses_name, name,
+	strlcpy(console_session.ses_name, name,
 		sizeof(console_session.ses_name));
 
 	rc = lstcon_batch_add(LST_DEFAULT_BATCH);
@@ -1758,7 +1757,7 @@ lstcon_session_new(char *name, int key, unsigned feats,
 
 	rc = lstcon_rpc_pinger_start();
 	if (rc) {
-		lstcon_batch_t *bat = NULL;
+		struct lstcon_batch *bat = NULL;
 
 		lstcon_batch_find(LST_DEFAULT_BATCH, &bat);
 		lstcon_batch_destroy(bat);
@@ -1782,7 +1781,7 @@ lstcon_session_info(lst_sid_t __user *sid_up, int __user *key_up,
 		    char __user *name_up, int len)
 {
 	lstcon_ndlist_ent_t *entp;
-	lstcon_ndlink_t *ndl;
+	struct lstcon_ndlink *ndl;
 	int rc = 0;
 
 	if (console_session.ses_state != LST_SESSION_ACTIVE)
@@ -1813,9 +1812,9 @@ lstcon_session_info(lst_sid_t __user *sid_up, int __user *key_up,
 int
 lstcon_session_end(void)
 {
-	lstcon_rpc_trans_t *trans;
-	lstcon_group_t *grp;
-	lstcon_batch_t *bat;
+	struct lstcon_rpc_trans *trans;
+	struct lstcon_group *grp;
+	struct lstcon_batch *bat;
 	int rc = 0;
 
 	LASSERT(console_session.ses_state == LST_SESSION_ACTIVE);
@@ -1849,7 +1848,7 @@ lstcon_session_end(void)
 	/* destroy all batches */
 	while (!list_empty(&console_session.ses_bat_list)) {
 		bat = list_entry(console_session.ses_bat_list.next,
-				 lstcon_batch_t, bat_link);
+				 struct lstcon_batch, bat_link);
 
 		lstcon_batch_destroy(bat);
 	}
@@ -1857,7 +1856,7 @@ lstcon_session_end(void)
 	/* destroy all groups */
 	while (!list_empty(&console_session.ses_grp_list)) {
 		grp = list_entry(console_session.ses_grp_list.next,
-				 lstcon_group_t, grp_link);
+				 struct lstcon_group, grp_link);
 		LASSERT(grp->grp_ref == 1);
 
 		lstcon_group_decref(grp);
@@ -1906,12 +1905,12 @@ lstcon_session_feats_check(unsigned feats)
 static int
 lstcon_acceptor_handle(struct srpc_server_rpc *rpc)
 {
-	srpc_msg_t *rep	= &rpc->srpc_replymsg;
-	srpc_msg_t *req	= &rpc->srpc_reqstbuf->buf_msg;
-	srpc_join_reqst_t *jreq = &req->msg_body.join_reqst;
-	srpc_join_reply_t *jrep = &rep->msg_body.join_reply;
-	lstcon_group_t *grp = NULL;
-	lstcon_ndlink_t *ndl;
+	struct srpc_msg *rep	= &rpc->srpc_replymsg;
+	struct srpc_msg *req	= &rpc->srpc_reqstbuf->buf_msg;
+	struct srpc_join_reqst *jreq = &req->msg_body.join_reqst;
+	struct srpc_join_reply *jrep = &rep->msg_body.join_reply;
+	struct lstcon_group *grp = NULL;
+	struct lstcon_ndlink *ndl;
 	int rc = 0;
 
 	sfw_unpack_message(req);
@@ -1987,7 +1986,8 @@ out:
 	return rc;
 }
 
-static srpc_service_t lstcon_acceptor_service;
+static struct srpc_service lstcon_acceptor_service;
+
 static void lstcon_init_acceptor_service(void)
 {
 	/* initialize selftest console acceptor service table */
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index 554f58244..becd22e41 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -50,22 +50,25 @@
 #include "selftest.h"
 #include "conrpc.h"
 
-typedef struct lstcon_node {
+/* node descriptor */
+struct lstcon_node {
 	lnet_process_id_t nd_id;      /* id of the node */
 	int		  nd_ref;     /* reference count */
 	int		  nd_state;   /* state of the node */
 	int		  nd_timeout; /* session timeout */
 	unsigned long	  nd_stamp;   /* timestamp of last replied RPC */
 	struct lstcon_rpc nd_ping;    /* ping rpc */
-} lstcon_node_t; /* node descriptor */
+};
 
-typedef struct {
+/* node link descriptor */
+struct lstcon_ndlink {
 	struct list_head ndl_link;    /* chain on list */
 	struct list_head ndl_hlink;   /* chain on hash */
-	lstcon_node_t	 *ndl_node;   /* pointer to node */
-} lstcon_ndlink_t; /* node link descriptor */
+	struct lstcon_node	*ndl_node;	/* pointer to node */
+};
 
-typedef struct {
+/* (alias of nodes) group descriptor */
+struct lstcon_group {
 	struct list_head grp_link;		  /* chain on global group list
 						   */
 	int		 grp_ref;		  /* reference count */
@@ -76,18 +79,19 @@ typedef struct {
 	struct list_head grp_trans_list;	  /* transaction list */
 	struct list_head grp_ndl_list;		  /* nodes list */
 	struct list_head grp_ndl_hash[0];	  /* hash table for nodes */
-} lstcon_group_t; /* (alias of nodes) group descriptor */
+};
 
 #define LST_BATCH_IDLE	  0xB0	    /* idle batch */
 #define LST_BATCH_RUNNING 0xB1	    /* running batch */
 
-typedef struct lstcon_tsb_hdr {
+struct lstcon_tsb_hdr {
 	lst_bid_t	 tsb_id;	 /* batch ID */
 	int		 tsb_index;	 /* test index */
-} lstcon_tsb_hdr_t;
+};
 
-typedef struct {
-	lstcon_tsb_hdr_t bat_hdr;	  /* test_batch header */
+/* (tests ) batch descriptor */
+struct lstcon_batch {
+	struct lstcon_tsb_hdr	bat_hdr;	/* test_batch header */
 	struct list_head bat_link;	  /* chain on session's batches list */
 	int		 bat_ntest;	  /* # of test */
 	int		 bat_state;	  /* state of the batch */
@@ -95,20 +99,21 @@ typedef struct {
 					   * for run, force for stop */
 	char		 bat_name[LST_NAME_SIZE];/* name of batch */
 
-	struct list_head bat_test_list;   /* list head of tests (lstcon_test_t)
+	struct list_head bat_test_list;   /* list head of tests (struct lstcon_test)
 					   */
 	struct list_head bat_trans_list;  /* list head of transaction */
 	struct list_head bat_cli_list;	  /* list head of client nodes
-					   * (lstcon_node_t) */
+					   * (struct lstcon_node) */
 	struct list_head *bat_cli_hash;   /* hash table of client nodes */
 	struct list_head bat_srv_list;	  /* list head of server nodes */
 	struct list_head *bat_srv_hash;   /* hash table of server nodes */
-} lstcon_batch_t; /* (tests ) batch descriptor */
+};
 
-typedef struct lstcon_test {
-	lstcon_tsb_hdr_t tes_hdr;	 /* test batch header */
+/* a single test descriptor */
+struct lstcon_test {
+	struct lstcon_tsb_hdr	tes_hdr;	/* test batch header */
 	struct list_head tes_link;	 /* chain on batch's tests list */
-	lstcon_batch_t	 *tes_batch;	 /* pointer to batch */
+	struct lstcon_batch	*tes_batch;	 /* pointer to batch */
 
 	int		 tes_type;	 /* type of the test, i.e: bulk, ping */
 	int		 tes_stop_onerr; /* stop on error */
@@ -120,12 +125,12 @@ typedef struct lstcon_test {
 	int		 tes_cliidx;	 /* client index, used for RPC creating */
 
 	struct list_head tes_trans_list; /* transaction list */
-	lstcon_group_t	 *tes_src_grp;	 /* group run the test */
-	lstcon_group_t	 *tes_dst_grp;	 /* target group */
+	struct lstcon_group	*tes_src_grp;	/* group run the test */
+	struct lstcon_group	*tes_dst_grp;	/* target group */
 
 	int		 tes_paramlen;	 /* test parameter length */
 	char		 tes_param[0];	 /* test parameter */
-} lstcon_test_t; /* a single test descriptor */
+};
 
 #define LST_GLOBAL_HASHSIZE 503	     /* global nodes hash table size */
 #define LST_NODE_HASHSIZE   239	     /* node hash table (for batch or group) */
@@ -152,7 +157,7 @@ struct lstcon_session {
 	unsigned	    ses_expired:1;    /* console is timedout */
 	__u64		    ses_id_cookie;    /* batch id cookie */
 	char		    ses_name[LST_NAME_SIZE];/* session name */
-	lstcon_rpc_trans_t  *ses_ping;	      /* session pinger */
+	struct lstcon_rpc_trans	*ses_ping;		/* session pinger */
 	struct stt_timer	 ses_ping_timer;   /* timer for pinger */
 	lstcon_trans_stat_t ses_trans_stat;   /* transaction stats */
 
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index e2c532399..30e4f71f1 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -109,19 +109,19 @@ static struct smoketest_framework {
 	struct list_head  fw_tests;	      /* registered test cases */
 	atomic_t	  fw_nzombies;	      /* # zombie sessions */
 	spinlock_t	  fw_lock;	      /* serialise */
-	sfw_session_t	  *fw_session;	      /* _the_ session */
+	struct sfw_session	  *fw_session;	      /* _the_ session */
 	int		  fw_shuttingdown;    /* shutdown in progress */
 	struct srpc_server_rpc *fw_active_srpc;/* running RPC */
 } sfw_data;
 
 /* forward ref's */
-int sfw_stop_batch(sfw_batch_t *tsb, int force);
-void sfw_destroy_session(sfw_session_t *sn);
+int sfw_stop_batch(struct sfw_batch *tsb, int force);
+void sfw_destroy_session(struct sfw_session *sn);
 
-static inline sfw_test_case_t *
+static inline struct sfw_test_case *
 sfw_find_test_case(int id)
 {
-	sfw_test_case_t *tsc;
+	struct sfw_test_case *tsc;
 
 	LASSERT(id <= SRPC_SERVICE_MAX_ID);
 	LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID);
@@ -135,9 +135,9 @@ sfw_find_test_case(int id)
 }
 
 static int
-sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops)
+sfw_register_test(struct srpc_service *service, struct sfw_test_client_ops *cliops)
 {
-	sfw_test_case_t *tsc;
+	struct sfw_test_case *tsc;
 
 	if (sfw_find_test_case(service->sv_id)) {
 		CERROR("Failed to register test %s (%d)\n",
@@ -145,7 +145,7 @@ sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops)
 		return -EEXIST;
 	}
 
-	LIBCFS_ALLOC(tsc, sizeof(sfw_test_case_t));
+	LIBCFS_ALLOC(tsc, sizeof(struct sfw_test_case));
 	if (!tsc)
 		return -ENOMEM;
 
@@ -159,7 +159,7 @@ sfw_register_test(srpc_service_t *service, sfw_test_client_ops_t *cliops)
 static void
 sfw_add_session_timer(void)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 	struct stt_timer *timer = &sn->sn_timer;
 
 	LASSERT(!sfw_data.fw_shuttingdown);
@@ -177,7 +177,7 @@ sfw_add_session_timer(void)
 static int
 sfw_del_session_timer(void)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 
 	if (!sn || !sn->sn_timer_active)
 		return 0;
@@ -196,10 +196,10 @@ static void
 sfw_deactivate_session(void)
 __must_hold(&sfw_data.fw_lock)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 	int nactive = 0;
-	sfw_batch_t *tsb;
-	sfw_test_case_t *tsc;
+	struct sfw_batch *tsb;
+	struct sfw_test_case *tsc;
 
 	if (!sn)
 		return;
@@ -226,7 +226,7 @@ __must_hold(&sfw_data.fw_lock)
 	}
 
 	if (nactive)
-		return;   /* wait for active batches to stop */
+		return;	/* wait for active batches to stop */
 
 	list_del_init(&sn->sn_list);
 	spin_unlock(&sfw_data.fw_lock);
@@ -239,7 +239,7 @@ __must_hold(&sfw_data.fw_lock)
 static void
 sfw_session_expired(void *data)
 {
-	sfw_session_t *sn = data;
+	struct sfw_session *sn = data;
 
 	spin_lock(&sfw_data.fw_lock);
 
@@ -257,12 +257,12 @@ sfw_session_expired(void *data)
 }
 
 static inline void
-sfw_init_session(sfw_session_t *sn, lst_sid_t sid,
+sfw_init_session(struct sfw_session *sn, lst_sid_t sid,
 		 unsigned features, const char *name)
 {
 	struct stt_timer *timer = &sn->sn_timer;
 
-	memset(sn, 0, sizeof(sfw_session_t));
+	memset(sn, 0, sizeof(struct sfw_session));
 	INIT_LIST_HEAD(&sn->sn_list);
 	INIT_LIST_HEAD(&sn->sn_batches);
 	atomic_set(&sn->sn_refcount, 1);	/* +1 for caller */
@@ -298,7 +298,7 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc)
 }
 
 static void
-sfw_client_rpc_fini(srpc_client_rpc_t *rpc)
+sfw_client_rpc_fini(struct srpc_client_rpc *rpc)
 {
 	LASSERT(!rpc->crpc_bulk.bk_niov);
 	LASSERT(list_empty(&rpc->crpc_list));
@@ -318,11 +318,11 @@ sfw_client_rpc_fini(srpc_client_rpc_t *rpc)
 	spin_unlock(&sfw_data.fw_lock);
 }
 
-static sfw_batch_t *
+static struct sfw_batch *
 sfw_find_batch(lst_bid_t bid)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
-	sfw_batch_t *bat;
+	struct sfw_session *sn = sfw_data.fw_session;
+	struct sfw_batch *bat;
 
 	LASSERT(sn);
 
@@ -334,11 +334,11 @@ sfw_find_batch(lst_bid_t bid)
 	return NULL;
 }
 
-static sfw_batch_t *
+static struct sfw_batch *
 sfw_bid2batch(lst_bid_t bid)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
-	sfw_batch_t *bat;
+	struct sfw_session *sn = sfw_data.fw_session;
+	struct sfw_batch *bat;
 
 	LASSERT(sn);
 
@@ -346,7 +346,7 @@ sfw_bid2batch(lst_bid_t bid)
 	if (bat)
 		return bat;
 
-	LIBCFS_ALLOC(bat, sizeof(sfw_batch_t));
+	LIBCFS_ALLOC(bat, sizeof(struct sfw_batch));
 	if (!bat)
 		return NULL;
 
@@ -361,11 +361,11 @@ sfw_bid2batch(lst_bid_t bid)
 }
 
 static int
-sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
+sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 	sfw_counters_t *cnt = &reply->str_fw;
-	sfw_batch_t *bat;
+	struct sfw_batch *bat;
 
 	reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id;
 
@@ -402,10 +402,10 @@ sfw_get_stats(srpc_stat_reqst_t *request, srpc_stat_reply_t *reply)
 }
 
 int
-sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
+sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
-	srpc_msg_t *msg = container_of(request, srpc_msg_t,
+	struct sfw_session *sn = sfw_data.fw_session;
+	struct srpc_msg *msg = container_of(request, struct srpc_msg,
 				       msg_body.mksn_reqst);
 	int cplen = 0;
 
@@ -438,7 +438,7 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
 	/*
 	 * reject the request if it requires unknown features
 	 * NB: old version will always accept all features because it's not
-	 * aware of srpc_msg_t::msg_ses_feats, it's a defect but it's also
+	 * aware of srpc_msg::msg_ses_feats, it's a defect but it's also
 	 * harmless because it will return zero feature to console, and it's
 	 * console's responsibility to make sure all nodes in a session have
 	 * same feature mask.
@@ -449,7 +449,7 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
 	}
 
 	/* brand new or create by force */
-	LIBCFS_ALLOC(sn, sizeof(sfw_session_t));
+	LIBCFS_ALLOC(sn, sizeof(struct sfw_session));
 	if (!sn) {
 		CERROR("dropping RPC mksn under memory pressure\n");
 		return -ENOMEM;
@@ -473,9 +473,9 @@ sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply)
 }
 
 static int
-sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
+sfw_remove_session(struct srpc_rmsn_reqst *request, struct srpc_rmsn_reply *reply)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 
 	reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id;
 
@@ -505,9 +505,9 @@ sfw_remove_session(srpc_rmsn_reqst_t *request, srpc_rmsn_reply_t *reply)
 }
 
 static int
-sfw_debug_session(srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
+sfw_debug_session(struct srpc_debug_reqst *request, struct srpc_debug_reply *reply)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 
 	if (!sn) {
 		reply->dbg_status = ESRCH;
@@ -526,10 +526,10 @@ sfw_debug_session(srpc_debug_reqst_t *request, srpc_debug_reply_t *reply)
 }
 
 static void
-sfw_test_rpc_fini(srpc_client_rpc_t *rpc)
+sfw_test_rpc_fini(struct srpc_client_rpc *rpc)
 {
-	sfw_test_unit_t *tsu = rpc->crpc_priv;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
+	struct sfw_test_unit *tsu = rpc->crpc_priv;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
 
 	/* Called with hold of tsi->tsi_lock */
 	LASSERT(list_empty(&rpc->crpc_list));
@@ -537,7 +537,7 @@ sfw_test_rpc_fini(srpc_client_rpc_t *rpc)
 }
 
 static inline int
-sfw_test_buffers(sfw_test_instance_t *tsi)
+sfw_test_buffers(struct sfw_test_instance *tsi)
 {
 	struct sfw_test_case *tsc;
 	struct srpc_service *svc;
@@ -614,10 +614,10 @@ sfw_unload_test(struct sfw_test_instance *tsi)
 }
 
 static void
-sfw_destroy_test_instance(sfw_test_instance_t *tsi)
+sfw_destroy_test_instance(struct sfw_test_instance *tsi)
 {
-	srpc_client_rpc_t *rpc;
-	sfw_test_unit_t *tsu;
+	struct srpc_client_rpc *rpc;
+	struct sfw_test_unit *tsu;
 
 	if (!tsi->tsi_is_client)
 		goto clean;
@@ -630,14 +630,14 @@ sfw_destroy_test_instance(sfw_test_instance_t *tsi)
 
 	while (!list_empty(&tsi->tsi_units)) {
 		tsu = list_entry(tsi->tsi_units.next,
-				 sfw_test_unit_t, tsu_list);
+				 struct sfw_test_unit, tsu_list);
 		list_del(&tsu->tsu_list);
 		LIBCFS_FREE(tsu, sizeof(*tsu));
 	}
 
 	while (!list_empty(&tsi->tsi_free_rpcs)) {
 		rpc = list_entry(tsi->tsi_free_rpcs.next,
-				 srpc_client_rpc_t, crpc_list);
+				 struct srpc_client_rpc, crpc_list);
 		list_del(&rpc->crpc_list);
 		LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
 	}
@@ -648,34 +648,34 @@ clean:
 }
 
 static void
-sfw_destroy_batch(sfw_batch_t *tsb)
+sfw_destroy_batch(struct sfw_batch *tsb)
 {
-	sfw_test_instance_t *tsi;
+	struct sfw_test_instance *tsi;
 
 	LASSERT(!sfw_batch_active(tsb));
 	LASSERT(list_empty(&tsb->bat_list));
 
 	while (!list_empty(&tsb->bat_tests)) {
 		tsi = list_entry(tsb->bat_tests.next,
-				 sfw_test_instance_t, tsi_list);
+				 struct sfw_test_instance, tsi_list);
 		list_del_init(&tsi->tsi_list);
 		sfw_destroy_test_instance(tsi);
 	}
 
-	LIBCFS_FREE(tsb, sizeof(sfw_batch_t));
+	LIBCFS_FREE(tsb, sizeof(struct sfw_batch));
 }
 
 void
-sfw_destroy_session(sfw_session_t *sn)
+sfw_destroy_session(struct sfw_session *sn)
 {
-	sfw_batch_t *batch;
+	struct sfw_batch *batch;
 
 	LASSERT(list_empty(&sn->sn_list));
 	LASSERT(sn != sfw_data.fw_session);
 
 	while (!list_empty(&sn->sn_batches)) {
 		batch = list_entry(sn->sn_batches.next,
-				   sfw_batch_t, bat_list);
+				   struct sfw_batch, bat_list);
 		list_del_init(&batch->bat_list);
 		sfw_destroy_batch(batch);
 	}
@@ -685,28 +685,28 @@ sfw_destroy_session(sfw_session_t *sn)
 }
 
 static void
-sfw_unpack_addtest_req(srpc_msg_t *msg)
+sfw_unpack_addtest_req(struct srpc_msg *msg)
 {
-	srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
+	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
 
 	LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST);
 	LASSERT(req->tsr_is_client);
 
 	if (msg->msg_magic == SRPC_MSG_MAGIC)
-		return; /* no flipping needed */
+		return;	/* no flipping needed */
 
 	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
 
 	if (req->tsr_service == SRPC_SERVICE_BRW) {
 		if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) {
-			test_bulk_req_t *bulk = &req->tsr_u.bulk_v0;
+			struct test_bulk_req *bulk = &req->tsr_u.bulk_v0;
 
 			__swab32s(&bulk->blk_opc);
 			__swab32s(&bulk->blk_npg);
 			__swab32s(&bulk->blk_flags);
 
 		} else {
-			test_bulk_req_v1_t *bulk = &req->tsr_u.bulk_v1;
+			struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1;
 
 			__swab16s(&bulk->blk_opc);
 			__swab16s(&bulk->blk_flags);
@@ -718,7 +718,7 @@ sfw_unpack_addtest_req(srpc_msg_t *msg)
 	}
 
 	if (req->tsr_service == SRPC_SERVICE_PING) {
-		test_ping_req_t *ping = &req->tsr_u.ping;
+		struct test_ping_req *ping = &req->tsr_u.ping;
 
 		__swab32s(&ping->png_size);
 		__swab32s(&ping->png_flags);
@@ -729,14 +729,14 @@ sfw_unpack_addtest_req(srpc_msg_t *msg)
 }
 
 static int
-sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc)
+sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
 {
-	srpc_msg_t *msg = &rpc->srpc_reqstbuf->buf_msg;
-	srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
-	srpc_bulk_t *bk = rpc->srpc_bulk;
+	struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg;
+	struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
+	struct srpc_bulk *bk = rpc->srpc_bulk;
 	int ndest = req->tsr_ndest;
-	sfw_test_unit_t *tsu;
-	sfw_test_instance_t *tsi;
+	struct sfw_test_unit *tsu;
+	struct sfw_test_instance *tsi;
 	int i;
 	int rc;
 
@@ -789,13 +789,13 @@ sfw_add_test_instance(sfw_batch_t *tsb, struct srpc_server_rpc *rpc)
 		int j;
 
 		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
-		LASSERT(dests);  /* my pages are within KVM always */
+		LASSERT(dests);		/* my pages are within KVM always */
 		id = dests[i % SFW_ID_PER_PAGE];
 		if (msg->msg_magic != SRPC_MSG_MAGIC)
 			sfw_unpack_id(id);
 
 		for (j = 0; j < tsi->tsi_concur; j++) {
-			LIBCFS_ALLOC(tsu, sizeof(sfw_test_unit_t));
+			LIBCFS_ALLOC(tsu, sizeof(struct sfw_test_unit));
 			if (!tsu) {
 				rc = -ENOMEM;
 				CERROR("Can't allocate tsu for %d\n",
@@ -824,11 +824,11 @@ error:
 }
 
 static void
-sfw_test_unit_done(sfw_test_unit_t *tsu)
+sfw_test_unit_done(struct sfw_test_unit *tsu)
 {
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	sfw_batch_t *tsb = tsi->tsi_batch;
-	sfw_session_t *sn = tsb->bat_session;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct sfw_batch *tsb = tsi->tsi_batch;
+	struct sfw_session *sn = tsb->bat_session;
 
 	LASSERT(sfw_test_active(tsi));
 
@@ -844,8 +844,8 @@ sfw_test_unit_done(sfw_test_unit_t *tsu)
 
 	spin_lock(&sfw_data.fw_lock);
 
-	if (!atomic_dec_and_test(&tsb->bat_nactive) ||/* tsb still active */
-	    sn == sfw_data.fw_session) {		  /* sn also active */
+	if (!atomic_dec_and_test(&tsb->bat_nactive) ||	/* tsb still active */
+	    sn == sfw_data.fw_session) {		/* sn also active */
 		spin_unlock(&sfw_data.fw_lock);
 		return;
 	}
@@ -866,10 +866,10 @@ sfw_test_unit_done(sfw_test_unit_t *tsu)
 }
 
 static void
-sfw_test_rpc_done(srpc_client_rpc_t *rpc)
+sfw_test_rpc_done(struct srpc_client_rpc *rpc)
 {
-	sfw_test_unit_t *tsu = rpc->crpc_priv;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
+	struct sfw_test_unit *tsu = rpc->crpc_priv;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
 	int done = 0;
 
 	tsi->tsi_ops->tso_done_rpc(tsu, rpc);
@@ -900,19 +900,19 @@ sfw_test_rpc_done(srpc_client_rpc_t *rpc)
 }
 
 int
-sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
+sfw_create_test_rpc(struct sfw_test_unit *tsu, lnet_process_id_t peer,
 		    unsigned features, int nblk, int blklen,
-		    srpc_client_rpc_t **rpcpp)
+		    struct srpc_client_rpc **rpcpp)
 {
-	srpc_client_rpc_t *rpc = NULL;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
+	struct srpc_client_rpc *rpc = NULL;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
 
 	spin_lock(&tsi->tsi_lock);
 
 	LASSERT(sfw_test_active(tsi));
 		/* pick request from buffer */
 	rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs,
-				       srpc_client_rpc_t, crpc_list);
+				       struct srpc_client_rpc, crpc_list);
 	if (rpc) {
 		LASSERT(nblk == rpc->crpc_bulk.bk_niov);
 		list_del_init(&rpc->crpc_list);
@@ -942,11 +942,11 @@ sfw_create_test_rpc(sfw_test_unit_t *tsu, lnet_process_id_t peer,
 }
 
 static int
-sfw_run_test(swi_workitem_t *wi)
+sfw_run_test(struct swi_workitem *wi)
 {
-	sfw_test_unit_t *tsu = wi->swi_workitem.wi_data;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	srpc_client_rpc_t *rpc = NULL;
+	struct sfw_test_unit *tsu = wi->swi_workitem.wi_data;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct srpc_client_rpc *rpc = NULL;
 
 	LASSERT(wi == &tsu->tsu_worker);
 
@@ -991,11 +991,11 @@ test_done:
 }
 
 static int
-sfw_run_batch(sfw_batch_t *tsb)
+sfw_run_batch(struct sfw_batch *tsb)
 {
-	swi_workitem_t *wi;
-	sfw_test_unit_t *tsu;
-	sfw_test_instance_t *tsi;
+	struct swi_workitem *wi;
+	struct sfw_test_unit *tsu;
+	struct sfw_test_instance *tsi;
 
 	if (sfw_batch_active(tsb)) {
 		CDEBUG(D_NET, "Batch already active: %llu (%d)\n",
@@ -1026,10 +1026,10 @@ sfw_run_batch(sfw_batch_t *tsb)
 }
 
 int
-sfw_stop_batch(sfw_batch_t *tsb, int force)
+sfw_stop_batch(struct sfw_batch *tsb, int force)
 {
-	sfw_test_instance_t *tsi;
-	srpc_client_rpc_t *rpc;
+	struct sfw_test_instance *tsi;
+	struct srpc_client_rpc *rpc;
 
 	if (!sfw_batch_active(tsb)) {
 		CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id);
@@ -1068,9 +1068,9 @@ sfw_stop_batch(sfw_batch_t *tsb, int force)
 }
 
 static int
-sfw_query_batch(sfw_batch_t *tsb, int testidx, srpc_batch_reply_t *reply)
+sfw_query_batch(struct sfw_batch *tsb, int testidx, struct srpc_batch_reply *reply)
 {
-	sfw_test_instance_t *tsi;
+	struct sfw_test_instance *tsi;
 
 	if (testidx < 0)
 		return -EINVAL;
@@ -1115,11 +1115,11 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
 static int
 sfw_add_test(struct srpc_server_rpc *rpc)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
-	srpc_test_reply_t *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
-	srpc_test_reqst_t *request;
+	struct sfw_session *sn = sfw_data.fw_session;
+	struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply;
+	struct srpc_test_reqst *request;
 	int rc;
-	sfw_batch_t *bat;
+	struct sfw_batch *bat;
 
 	request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst;
 	reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id;
@@ -1183,11 +1183,11 @@ sfw_add_test(struct srpc_server_rpc *rpc)
 }
 
 static int
-sfw_control_batch(srpc_batch_reqst_t *request, srpc_batch_reply_t *reply)
+sfw_control_batch(struct srpc_batch_reqst *request, struct srpc_batch_reply *reply)
 {
-	sfw_session_t *sn = sfw_data.fw_session;
+	struct sfw_session *sn = sfw_data.fw_session;
 	int rc = 0;
-	sfw_batch_t *bat;
+	struct sfw_batch *bat;
 
 	reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id;
 
@@ -1227,8 +1227,8 @@ static int
 sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
 {
 	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	srpc_msg_t *reply = &rpc->srpc_replymsg;
-	srpc_msg_t *request = &rpc->srpc_reqstbuf->buf_msg;
+	struct srpc_msg *reply = &rpc->srpc_replymsg;
+	struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg;
 	unsigned features = LST_FEATS_MASK;
 	int rc = 0;
 
@@ -1244,7 +1244,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
 
 	/* Remove timer to avoid racing with it or expiring active session */
 	if (sfw_del_session_timer()) {
-		CERROR("Dropping RPC (%s) from %s: racing with expiry timer.",
+		CERROR("dropping RPC %s from %s: racing with expiry timer\n",
 		       sv->sv_name, libcfs_id2str(rpc->srpc_peer));
 		spin_unlock(&sfw_data.fw_lock);
 		return -EAGAIN;
@@ -1261,7 +1261,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
 
 	if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION &&
 	    sv->sv_id != SRPC_SERVICE_DEBUG) {
-		sfw_session_t *sn = sfw_data.fw_session;
+		struct sfw_session *sn = sfw_data.fw_session;
 
 		if (sn &&
 		    sn->sn_features != request->msg_ses_feats) {
@@ -1273,7 +1273,7 @@ sfw_handle_server_rpc(struct srpc_server_rpc *rpc)
 		}
 
 	} else if (request->msg_ses_feats & ~LST_FEATS_MASK) {
-		/**
+		/*
 		 * NB: at this point, old version will ignore features and
 		 * create new session anyway, so console should be able
 		 * to handle this
@@ -1377,12 +1377,12 @@ sfw_bulk_ready(struct srpc_server_rpc *rpc, int status)
 	return rc;
 }
 
-srpc_client_rpc_t *
+struct srpc_client_rpc *
 sfw_create_rpc(lnet_process_id_t peer, int service,
 	       unsigned features, int nbulkiov, int bulklen,
-	       void (*done)(srpc_client_rpc_t *), void *priv)
+	       void (*done)(struct srpc_client_rpc *), void *priv)
 {
-	srpc_client_rpc_t *rpc = NULL;
+	struct srpc_client_rpc *rpc = NULL;
 
 	spin_lock(&sfw_data.fw_lock);
 
@@ -1391,7 +1391,7 @@ sfw_create_rpc(lnet_process_id_t peer, int service,
 
 	if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) {
 		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
-				 srpc_client_rpc_t, crpc_list);
+				 struct srpc_client_rpc, crpc_list);
 		list_del(&rpc->crpc_list);
 
 		srpc_init_client_rpc(rpc, peer, service, 0, 0,
@@ -1415,7 +1415,7 @@ sfw_create_rpc(lnet_process_id_t peer, int service,
 }
 
 void
-sfw_unpack_message(srpc_msg_t *msg)
+sfw_unpack_message(struct srpc_msg *msg)
 {
 	if (msg->msg_magic == SRPC_MSG_MAGIC)
 		return; /* no flipping needed */
@@ -1424,7 +1424,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC));
 
 	if (msg->msg_type == SRPC_MSG_STAT_REQST) {
-		srpc_stat_reqst_t *req = &msg->msg_body.stat_reqst;
+		struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst;
 
 		__swab32s(&req->str_type);
 		__swab64s(&req->str_rpyid);
@@ -1433,7 +1433,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_STAT_REPLY) {
-		srpc_stat_reply_t *rep = &msg->msg_body.stat_reply;
+		struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
 
 		__swab32s(&rep->str_status);
 		sfw_unpack_sid(rep->str_sid);
@@ -1444,7 +1444,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_MKSN_REQST) {
-		srpc_mksn_reqst_t *req = &msg->msg_body.mksn_reqst;
+		struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst;
 
 		__swab64s(&req->mksn_rpyid);
 		__swab32s(&req->mksn_force);
@@ -1453,7 +1453,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_MKSN_REPLY) {
-		srpc_mksn_reply_t *rep = &msg->msg_body.mksn_reply;
+		struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply;
 
 		__swab32s(&rep->mksn_status);
 		__swab32s(&rep->mksn_timeout);
@@ -1462,7 +1462,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_RMSN_REQST) {
-		srpc_rmsn_reqst_t *req = &msg->msg_body.rmsn_reqst;
+		struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst;
 
 		__swab64s(&req->rmsn_rpyid);
 		sfw_unpack_sid(req->rmsn_sid);
@@ -1470,7 +1470,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_RMSN_REPLY) {
-		srpc_rmsn_reply_t *rep = &msg->msg_body.rmsn_reply;
+		struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply;
 
 		__swab32s(&rep->rmsn_status);
 		sfw_unpack_sid(rep->rmsn_sid);
@@ -1478,7 +1478,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_DEBUG_REQST) {
-		srpc_debug_reqst_t *req = &msg->msg_body.dbg_reqst;
+		struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst;
 
 		__swab64s(&req->dbg_rpyid);
 		__swab32s(&req->dbg_flags);
@@ -1487,7 +1487,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) {
-		srpc_debug_reply_t *rep = &msg->msg_body.dbg_reply;
+		struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply;
 
 		__swab32s(&rep->dbg_nbatch);
 		__swab32s(&rep->dbg_timeout);
@@ -1496,7 +1496,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_BATCH_REQST) {
-		srpc_batch_reqst_t *req = &msg->msg_body.bat_reqst;
+		struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst;
 
 		__swab32s(&req->bar_opc);
 		__swab64s(&req->bar_rpyid);
@@ -1508,7 +1508,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_BATCH_REPLY) {
-		srpc_batch_reply_t *rep = &msg->msg_body.bat_reply;
+		struct srpc_batch_reply *rep = &msg->msg_body.bat_reply;
 
 		__swab32s(&rep->bar_status);
 		sfw_unpack_sid(rep->bar_sid);
@@ -1516,7 +1516,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_TEST_REQST) {
-		srpc_test_reqst_t *req = &msg->msg_body.tes_reqst;
+		struct srpc_test_reqst *req = &msg->msg_body.tes_reqst;
 
 		__swab64s(&req->tsr_rpyid);
 		__swab64s(&req->tsr_bulkid);
@@ -1530,7 +1530,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_TEST_REPLY) {
-		srpc_test_reply_t *rep = &msg->msg_body.tes_reply;
+		struct srpc_test_reply *rep = &msg->msg_body.tes_reply;
 
 		__swab32s(&rep->tsr_status);
 		sfw_unpack_sid(rep->tsr_sid);
@@ -1538,7 +1538,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_JOIN_REQST) {
-		srpc_join_reqst_t *req = &msg->msg_body.join_reqst;
+		struct srpc_join_reqst *req = &msg->msg_body.join_reqst;
 
 		__swab64s(&req->join_rpyid);
 		sfw_unpack_sid(req->join_sid);
@@ -1546,7 +1546,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 	}
 
 	if (msg->msg_type == SRPC_MSG_JOIN_REPLY) {
-		srpc_join_reply_t *rep = &msg->msg_body.join_reply;
+		struct srpc_join_reply *rep = &msg->msg_body.join_reply;
 
 		__swab32s(&rep->join_status);
 		__swab32s(&rep->join_timeout);
@@ -1558,7 +1558,7 @@ sfw_unpack_message(srpc_msg_t *msg)
 }
 
 void
-sfw_abort_rpc(srpc_client_rpc_t *rpc)
+sfw_abort_rpc(struct srpc_client_rpc *rpc)
 {
 	LASSERT(atomic_read(&rpc->crpc_refcount) > 0);
 	LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID);
@@ -1569,7 +1569,7 @@ sfw_abort_rpc(srpc_client_rpc_t *rpc)
 }
 
 void
-sfw_post_rpc(srpc_client_rpc_t *rpc)
+sfw_post_rpc(struct srpc_client_rpc *rpc)
 {
 	spin_lock(&rpc->crpc_lock);
 
@@ -1584,7 +1584,7 @@ sfw_post_rpc(srpc_client_rpc_t *rpc)
 	spin_unlock(&rpc->crpc_lock);
 }
 
-static srpc_service_t sfw_services[] = {
+static struct srpc_service sfw_services[] = {
 	{
 		/* sv_id */    SRPC_SERVICE_DEBUG,
 		/* sv_name */  "debug",
@@ -1628,8 +1628,8 @@ sfw_startup(void)
 	int i;
 	int rc;
 	int error;
-	srpc_service_t *sv;
-	sfw_test_case_t *tsc;
+	struct srpc_service *sv;
+	struct sfw_test_case *tsc;
 
 	if (session_timeout < 0) {
 		CERROR("Session timeout must be non-negative: %d\n",
@@ -1721,8 +1721,8 @@ sfw_startup(void)
 void
 sfw_shutdown(void)
 {
-	srpc_service_t *sv;
-	sfw_test_case_t	*tsc;
+	struct srpc_service *sv;
+	struct sfw_test_case	*tsc;
 	int i;
 
 	spin_lock(&sfw_data.fw_lock);
@@ -1759,10 +1759,10 @@ sfw_shutdown(void)
 	}
 
 	while (!list_empty(&sfw_data.fw_zombie_rpcs)) {
-		srpc_client_rpc_t *rpc;
+		struct srpc_client_rpc *rpc;
 
 		rpc = list_entry(sfw_data.fw_zombie_rpcs.next,
-				 srpc_client_rpc_t, crpc_list);
+				 struct srpc_client_rpc, crpc_list);
 		list_del(&rpc->crpc_list);
 
 		LIBCFS_FREE(rpc, srpc_client_rpc_size(rpc));
@@ -1778,7 +1778,7 @@ sfw_shutdown(void)
 
 	while (!list_empty(&sfw_data.fw_tests)) {
 		tsc = list_entry(sfw_data.fw_tests.next,
-				 sfw_test_case_t, tsc_list);
+				 struct sfw_test_case, tsc_list);
 
 		srpc_wait_service_shutdown(tsc->tsc_srv_service);
 
diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c
index 81a45045e..ad26fe9dd 100644
--- a/drivers/staging/lustre/lnet/selftest/ping_test.c
+++ b/drivers/staging/lustre/lnet/selftest/ping_test.c
@@ -56,9 +56,9 @@ struct lst_ping_data {
 static struct lst_ping_data  lst_ping_data;
 
 static int
-ping_client_init(sfw_test_instance_t *tsi)
+ping_client_init(struct sfw_test_instance *tsi)
 {
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
 
 	LASSERT(tsi->tsi_is_client);
 	LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK));
@@ -70,9 +70,9 @@ ping_client_init(sfw_test_instance_t *tsi)
 }
 
 static void
-ping_client_fini(sfw_test_instance_t *tsi)
+ping_client_fini(struct sfw_test_instance *tsi)
 {
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
 	int errors;
 
 	LASSERT(sn);
@@ -86,12 +86,12 @@ ping_client_fini(sfw_test_instance_t *tsi)
 }
 
 static int
-ping_client_prep_rpc(sfw_test_unit_t *tsu,
-		     lnet_process_id_t dest, srpc_client_rpc_t **rpc)
+ping_client_prep_rpc(struct sfw_test_unit *tsu, lnet_process_id_t dest,
+		     struct srpc_client_rpc **rpc)
 {
-	srpc_ping_reqst_t *req;
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
+	struct srpc_ping_reqst *req;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
 	struct timespec64 ts;
 	int rc;
 
@@ -118,18 +118,18 @@ ping_client_prep_rpc(sfw_test_unit_t *tsu,
 }
 
 static void
-ping_client_done_rpc(sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc)
+ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc)
 {
-	sfw_test_instance_t *tsi = tsu->tsu_instance;
-	sfw_session_t *sn = tsi->tsi_batch->bat_session;
-	srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
-	srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
+	struct sfw_test_instance *tsi = tsu->tsu_instance;
+	struct sfw_session *sn = tsi->tsi_batch->bat_session;
+	struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst;
+	struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply;
 	struct timespec64 ts;
 
 	LASSERT(sn);
 
 	if (rpc->crpc_status) {
-		if (!tsi->tsi_stopping) /* rpc could have been aborted */
+		if (!tsi->tsi_stopping)	/* rpc could have been aborted */
 			atomic_inc(&sn->sn_ping_errors);
 		CERROR("Unable to ping %s (%d): %d\n",
 		       libcfs_id2str(rpc->crpc_dest),
@@ -171,10 +171,10 @@ static int
 ping_server_handle(struct srpc_server_rpc *rpc)
 {
 	struct srpc_service *sv = rpc->srpc_scd->scd_svc;
-	srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
-	srpc_msg_t *replymsg = &rpc->srpc_replymsg;
-	srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst;
-	srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
+	struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
+	struct srpc_msg *replymsg = &rpc->srpc_replymsg;
+	struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst;
+	struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply;
 
 	LASSERT(sv->sv_id == SRPC_SERVICE_PING);
 
@@ -210,7 +210,8 @@ ping_server_handle(struct srpc_server_rpc *rpc)
 	return 0;
 }
 
-sfw_test_client_ops_t ping_test_client;
+struct sfw_test_client_ops ping_test_client;
+
 void ping_init_test_client(void)
 {
 	ping_test_client.tso_init = ping_client_init;
@@ -219,7 +220,8 @@ void ping_init_test_client(void)
 	ping_test_client.tso_done_rpc = ping_client_done_rpc;
 }
 
-srpc_service_t ping_test_service;
+struct srpc_service ping_test_service;
+
 void ping_init_test_service(void)
 {
 	ping_test_service.sv_id = SRPC_SERVICE_PING;
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 7d7748d96..3c45a7cfa 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -46,19 +46,19 @@
 
 #include "selftest.h"
 
-typedef enum {
+enum srpc_state {
 	SRPC_STATE_NONE,
 	SRPC_STATE_NI_INIT,
 	SRPC_STATE_EQ_INIT,
 	SRPC_STATE_RUNNING,
 	SRPC_STATE_STOPPING,
-} srpc_state_t;
+};
 
 static struct smoketest_rpc {
 	spinlock_t	 rpc_glock;	/* global lock */
-	srpc_service_t	*rpc_services[SRPC_SERVICE_MAX_ID + 1];
+	struct srpc_service	*rpc_services[SRPC_SERVICE_MAX_ID + 1];
 	lnet_handle_eq_t rpc_lnet_eq;	/* _the_ LNet event queue */
-	srpc_state_t	 rpc_state;
+	enum srpc_state	 rpc_state;
 	srpc_counters_t	 rpc_counters;
 	__u64		 rpc_matchbits;	/* matchbits counter */
 } srpc_data;
@@ -71,7 +71,7 @@ srpc_serv_portal(int svc_id)
 }
 
 /* forward ref's */
-int srpc_handle_rpc(swi_workitem_t *wi);
+int srpc_handle_rpc(struct swi_workitem *wi);
 
 void srpc_get_counters(srpc_counters_t *cnt)
 {
@@ -88,7 +88,7 @@ void srpc_set_counters(const srpc_counters_t *cnt)
 }
 
 static int
-srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
+srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob)
 {
 	nob = min_t(int, nob, PAGE_SIZE);
 
@@ -102,7 +102,7 @@ srpc_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i, int nob)
 }
 
 void
-srpc_free_bulk(srpc_bulk_t *bk)
+srpc_free_bulk(struct srpc_bulk *bk)
 {
 	int i;
 	struct page *pg;
@@ -117,25 +117,25 @@ srpc_free_bulk(srpc_bulk_t *bk)
 		__free_page(pg);
 	}
 
-	LIBCFS_FREE(bk, offsetof(srpc_bulk_t, bk_iovs[bk->bk_niov]));
+	LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_niov]));
 }
 
-srpc_bulk_t *
+struct srpc_bulk *
 srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len, int sink)
 {
-	srpc_bulk_t *bk;
+	struct srpc_bulk *bk;
 	int i;
 
 	LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
 
 	LIBCFS_CPT_ALLOC(bk, lnet_cpt_table(), cpt,
-			 offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
+			 offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
 	if (!bk) {
 		CERROR("Can't allocate descriptor for %d pages\n", bulk_npg);
 		return NULL;
 	}
 
-	memset(bk, 0, offsetof(srpc_bulk_t, bk_iovs[bulk_npg]));
+	memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
 	bk->bk_sink = sink;
 	bk->bk_len = bulk_len;
 	bk->bk_niov = bulk_npg;
@@ -256,7 +256,7 @@ srpc_service_init(struct srpc_service *svc)
 	svc->sv_shuttingdown = 0;
 
 	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(struct srpc_service_cd));
+					    sizeof(*svc->sv_cpt_data));
 	if (!svc->sv_cpt_data)
 		return -ENOMEM;
 
@@ -338,7 +338,7 @@ srpc_add_service(struct srpc_service *sv)
 }
 
 int
-srpc_remove_service(srpc_service_t *sv)
+srpc_remove_service(struct srpc_service *sv)
 {
 	int id = sv->sv_id;
 
@@ -357,7 +357,7 @@ srpc_remove_service(srpc_service_t *sv)
 static int
 srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
 		       int len, int options, lnet_process_id_t peer,
-		       lnet_handle_md_t *mdh, srpc_event_t *ev)
+		       lnet_handle_md_t *mdh, struct srpc_event *ev)
 {
 	int rc;
 	lnet_md_t md;
@@ -396,7 +396,7 @@ srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf,
 static int
 srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
 		      int options, lnet_process_id_t peer, lnet_nid_t self,
-		      lnet_handle_md_t *mdh, srpc_event_t *ev)
+		      lnet_handle_md_t *mdh, struct srpc_event *ev)
 {
 	int rc;
 	lnet_md_t md;
@@ -449,7 +449,7 @@ srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
 
 static int
 srpc_post_passive_rqtbuf(int service, int local, void *buf, int len,
-			 lnet_handle_md_t *mdh, srpc_event_t *ev)
+			 lnet_handle_md_t *mdh, struct srpc_event *ev)
 {
 	lnet_process_id_t any = { 0 };
 
@@ -697,7 +697,7 @@ srpc_finish_service(struct srpc_service *sv)
 
 /* called with sv->sv_lock held */
 static void
-srpc_service_recycle_buffer(struct srpc_service_cd *scd, srpc_buffer_t *buf)
+srpc_service_recycle_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf)
 __must_hold(&scd->scd_lock)
 {
 	if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) {
@@ -755,11 +755,11 @@ srpc_abort_service(struct srpc_service *sv)
 }
 
 void
-srpc_shutdown_service(srpc_service_t *sv)
+srpc_shutdown_service(struct srpc_service *sv)
 {
 	struct srpc_service_cd *scd;
 	struct srpc_server_rpc *rpc;
-	srpc_buffer_t *buf;
+	struct srpc_buffer *buf;
 	int i;
 
 	CDEBUG(D_NET, "Shutting down service: id %d, name %s\n",
@@ -792,9 +792,9 @@ srpc_shutdown_service(srpc_service_t *sv)
 }
 
 static int
-srpc_send_request(srpc_client_rpc_t *rpc)
+srpc_send_request(struct srpc_client_rpc *rpc)
 {
-	srpc_event_t *ev = &rpc->crpc_reqstev;
+	struct srpc_event *ev = &rpc->crpc_reqstev;
 	int rc;
 
 	ev->ev_fired = 0;
@@ -803,7 +803,7 @@ srpc_send_request(srpc_client_rpc_t *rpc)
 
 	 rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service),
 				    rpc->crpc_service, &rpc->crpc_reqstmsg,
-				    sizeof(srpc_msg_t), LNET_MD_OP_PUT,
+				    sizeof(struct srpc_msg), LNET_MD_OP_PUT,
 				    rpc->crpc_dest, LNET_NID_ANY,
 				    &rpc->crpc_reqstmdh, ev);
 	if (rc) {
@@ -814,9 +814,9 @@ srpc_send_request(srpc_client_rpc_t *rpc)
 }
 
 static int
-srpc_prepare_reply(srpc_client_rpc_t *rpc)
+srpc_prepare_reply(struct srpc_client_rpc *rpc)
 {
-	srpc_event_t *ev = &rpc->crpc_replyev;
+	struct srpc_event *ev = &rpc->crpc_replyev;
 	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid;
 	int rc;
 
@@ -827,7 +827,8 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc)
 	*id = srpc_next_id();
 
 	rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id,
-				    &rpc->crpc_replymsg, sizeof(srpc_msg_t),
+				    &rpc->crpc_replymsg,
+				    sizeof(struct srpc_msg),
 				    LNET_MD_OP_PUT, rpc->crpc_dest,
 				    &rpc->crpc_replymdh, ev);
 	if (rc) {
@@ -838,10 +839,10 @@ srpc_prepare_reply(srpc_client_rpc_t *rpc)
 }
 
 static int
-srpc_prepare_bulk(srpc_client_rpc_t *rpc)
+srpc_prepare_bulk(struct srpc_client_rpc *rpc)
 {
-	srpc_bulk_t *bk = &rpc->crpc_bulk;
-	srpc_event_t *ev = &rpc->crpc_bulkev;
+	struct srpc_bulk *bk = &rpc->crpc_bulk;
+	struct srpc_event *ev = &rpc->crpc_bulkev;
 	__u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid;
 	int rc;
 	int opt;
@@ -873,8 +874,8 @@ srpc_prepare_bulk(srpc_client_rpc_t *rpc)
 static int
 srpc_do_bulk(struct srpc_server_rpc *rpc)
 {
-	srpc_event_t *ev = &rpc->srpc_ev;
-	srpc_bulk_t *bk = rpc->srpc_bulk;
+	struct srpc_event *ev = &rpc->srpc_ev;
+	struct srpc_bulk *bk = rpc->srpc_bulk;
 	__u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid;
 	int rc;
 	int opt;
@@ -903,7 +904,7 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
 {
 	struct srpc_service_cd *scd = rpc->srpc_scd;
 	struct srpc_service *sv = scd->scd_svc;
-	srpc_buffer_t *buffer;
+	struct srpc_buffer *buffer;
 
 	LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE);
 
@@ -948,7 +949,7 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
 
 	if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) {
 		buffer = list_entry(scd->scd_buf_blocked.next,
-				    srpc_buffer_t, buf_list);
+				    struct srpc_buffer, buf_list);
 		list_del(&buffer->buf_list);
 
 		srpc_init_server_rpc(rpc, scd, buffer);
@@ -963,12 +964,12 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
 
 /* handles an incoming RPC */
 int
-srpc_handle_rpc(swi_workitem_t *wi)
+srpc_handle_rpc(struct swi_workitem *wi)
 {
 	struct srpc_server_rpc *rpc = wi->swi_workitem.wi_data;
 	struct srpc_service_cd *scd = rpc->srpc_scd;
 	struct srpc_service *sv = scd->scd_svc;
-	srpc_event_t *ev = &rpc->srpc_ev;
+	struct srpc_event *ev = &rpc->srpc_ev;
 	int rc = 0;
 
 	LASSERT(wi == &rpc->srpc_wi);
@@ -995,8 +996,8 @@ srpc_handle_rpc(swi_workitem_t *wi)
 	default:
 		LBUG();
 	case SWI_STATE_NEWBORN: {
-		srpc_msg_t *msg;
-		srpc_generic_reply_t *reply;
+		struct srpc_msg *msg;
+		struct srpc_generic_reply *reply;
 
 		msg = &rpc->srpc_reqstbuf->buf_msg;
 		reply = &rpc->srpc_replymsg.msg_body.reply;
@@ -1077,7 +1078,7 @@ srpc_handle_rpc(swi_workitem_t *wi)
 static void
 srpc_client_rpc_expired(void *data)
 {
-	srpc_client_rpc_t *rpc = data;
+	struct srpc_client_rpc *rpc = data;
 
 	CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n",
 	      rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
@@ -1096,7 +1097,7 @@ srpc_client_rpc_expired(void *data)
 }
 
 static void
-srpc_add_client_rpc_timer(srpc_client_rpc_t *rpc)
+srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc)
 {
 	struct stt_timer *timer = &rpc->crpc_timer;
 
@@ -1117,7 +1118,7 @@ srpc_add_client_rpc_timer(srpc_client_rpc_t *rpc)
  * running on any CPU.
  */
 static void
-srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc)
+srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc)
 {
 	/* timer not planted or already exploded */
 	if (!rpc->crpc_timeout)
@@ -1138,9 +1139,9 @@ srpc_del_client_rpc_timer(srpc_client_rpc_t *rpc)
 }
 
 static void
-srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status)
+srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status)
 {
-	swi_workitem_t *wi = &rpc->crpc_wi;
+	struct swi_workitem *wi = &rpc->crpc_wi;
 
 	LASSERT(status || wi->swi_state == SWI_STATE_DONE);
 
@@ -1175,11 +1176,11 @@ srpc_client_rpc_done(srpc_client_rpc_t *rpc, int status)
 
 /* sends an outgoing RPC */
 int
-srpc_send_rpc(swi_workitem_t *wi)
+srpc_send_rpc(struct swi_workitem *wi)
 {
 	int rc = 0;
-	srpc_client_rpc_t *rpc;
-	srpc_msg_t *reply;
+	struct srpc_client_rpc *rpc;
+	struct srpc_msg *reply;
 	int do_bulk;
 
 	LASSERT(wi);
@@ -1237,7 +1238,7 @@ srpc_send_rpc(swi_workitem_t *wi)
 		wi->swi_state = SWI_STATE_REQUEST_SENT;
 		/* perhaps more events, fall thru */
 	case SWI_STATE_REQUEST_SENT: {
-		srpc_msg_type_t type = srpc_service2reply(rpc->crpc_service);
+		enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service);
 
 		if (!rpc->crpc_replyev.ev_fired)
 			break;
@@ -1308,15 +1309,15 @@ abort:
 	return 0;
 }
 
-srpc_client_rpc_t *
+struct srpc_client_rpc *
 srpc_create_client_rpc(lnet_process_id_t peer, int service,
 		       int nbulkiov, int bulklen,
-		       void (*rpc_done)(srpc_client_rpc_t *),
-		       void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
+		       void (*rpc_done)(struct srpc_client_rpc *),
+		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
 {
-	srpc_client_rpc_t *rpc;
+	struct srpc_client_rpc *rpc;
 
-	LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
+	LIBCFS_ALLOC(rpc, offsetof(struct srpc_client_rpc,
 				   crpc_bulk.bk_iovs[nbulkiov]));
 	if (!rpc)
 		return NULL;
@@ -1328,12 +1329,12 @@ srpc_create_client_rpc(lnet_process_id_t peer, int service,
 
 /* called with rpc->crpc_lock held */
 void
-srpc_abort_rpc(srpc_client_rpc_t *rpc, int why)
+srpc_abort_rpc(struct srpc_client_rpc *rpc, int why)
 {
 	LASSERT(why);
 
-	if (rpc->crpc_aborted || /* already aborted */
-	    rpc->crpc_closed)	 /* callback imminent */
+	if (rpc->crpc_aborted ||	/* already aborted */
+	    rpc->crpc_closed)		/* callback imminent */
 		return;
 
 	CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n",
@@ -1347,7 +1348,7 @@ srpc_abort_rpc(srpc_client_rpc_t *rpc, int why)
 
 /* called with rpc->crpc_lock held */
 void
-srpc_post_rpc(srpc_client_rpc_t *rpc)
+srpc_post_rpc(struct srpc_client_rpc *rpc)
 {
 	LASSERT(!rpc->crpc_aborted);
 	LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING);
@@ -1363,7 +1364,7 @@ srpc_post_rpc(srpc_client_rpc_t *rpc)
 int
 srpc_send_reply(struct srpc_server_rpc *rpc)
 {
-	srpc_event_t *ev = &rpc->srpc_ev;
+	struct srpc_event *ev = &rpc->srpc_ev;
 	struct srpc_msg *msg = &rpc->srpc_replymsg;
 	struct srpc_buffer *buffer = rpc->srpc_reqstbuf;
 	struct srpc_service_cd *scd = rpc->srpc_scd;
@@ -1401,7 +1402,7 @@ srpc_send_reply(struct srpc_server_rpc *rpc)
 				   rpc->srpc_peer, rpc->srpc_self,
 				   &rpc->srpc_replymdh, ev);
 	if (rc)
-		ev->ev_fired = 1;  /* no more event expected */
+		ev->ev_fired = 1; /* no more event expected */
 	return rc;
 }
 
@@ -1410,13 +1411,13 @@ static void
 srpc_lnet_ev_handler(lnet_event_t *ev)
 {
 	struct srpc_service_cd *scd;
-	srpc_event_t *rpcev = ev->md.user_ptr;
-	srpc_client_rpc_t *crpc;
+	struct srpc_event *rpcev = ev->md.user_ptr;
+	struct srpc_client_rpc *crpc;
 	struct srpc_server_rpc *srpc;
-	srpc_buffer_t *buffer;
-	srpc_service_t *sv;
-	srpc_msg_t *msg;
-	srpc_msg_type_t type;
+	struct srpc_buffer *buffer;
+	struct srpc_service *sv;
+	struct srpc_msg *msg;
+	enum srpc_msg_type type;
 
 	LASSERT(!in_interrupt());
 
@@ -1486,7 +1487,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev)
 		LASSERT(ev->type != LNET_EVENT_UNLINK ||
 			sv->sv_shuttingdown);
 
-		buffer = container_of(ev->md.start, srpc_buffer_t, buf_msg);
+		buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg);
 		buffer->buf_peer = ev->initiator;
 		buffer->buf_self = ev->target.nid;
 
@@ -1509,7 +1510,7 @@ srpc_lnet_ev_handler(lnet_event_t *ev)
 			scd->scd_buf_err = 0;
 		}
 
-		if (!scd->scd_buf_err && /* adding buffer is enabled */
+		if (!scd->scd_buf_err &&	/* adding buffer is enabled */
 		    !scd->scd_buf_adjust &&
 		    scd->scd_buf_nposted < scd->scd_buf_low) {
 			scd->scd_buf_adjust = max(scd->scd_buf_total / 2,
@@ -1663,7 +1664,7 @@ srpc_shutdown(void)
 		spin_lock(&srpc_data.rpc_glock);
 
 		for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) {
-			srpc_service_t *sv = srpc_data.rpc_services[i];
+			struct srpc_service *sv = srpc_data.rpc_services[i];
 
 			LASSERTF(!sv, "service not empty: id %d, name %s\n",
 				 i, sv->sv_name);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h
index a79c315f2..c9b904cad 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.h
+++ b/drivers/staging/lustre/lnet/selftest/rpc.h
@@ -44,7 +44,7 @@
  *
  * XXX: *REPLY == *REQST + 1
  */
-typedef enum {
+enum srpc_msg_type {
 	SRPC_MSG_MKSN_REQST	= 0,
 	SRPC_MSG_MKSN_REPLY	= 1,
 	SRPC_MSG_RMSN_REQST	= 2,
@@ -63,7 +63,7 @@ typedef enum {
 	SRPC_MSG_PING_REPLY	= 15,
 	SRPC_MSG_JOIN_REQST	= 16,
 	SRPC_MSG_JOIN_REPLY	= 17,
-} srpc_msg_type_t;
+};
 
 /* CAVEAT EMPTOR:
  * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer,
@@ -72,122 +72,122 @@ typedef enum {
  * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field
  * session id if needed.
  */
-typedef struct {
+struct srpc_generic_reqst {
 	__u64			rpyid;		/* reply buffer matchbits */
 	__u64			bulkid;		/* bulk buffer matchbits */
-} WIRE_ATTR srpc_generic_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_generic_reply {
 	__u32			status;
 	lst_sid_t		sid;
-} WIRE_ATTR srpc_generic_reply_t;
+} WIRE_ATTR;
 
 /* FRAMEWORK RPCs */
-typedef struct {
+struct srpc_mksn_reqst {
 	__u64			mksn_rpyid;	/* reply buffer matchbits */
 	lst_sid_t		mksn_sid;	/* session id */
 	__u32			mksn_force;	/* use brute force */
 	char			mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR srpc_mksn_reqst_t; /* make session request */
+} WIRE_ATTR; /* make session request */
 
-typedef struct {
+struct srpc_mksn_reply {
 	__u32			mksn_status;	/* session status */
 	lst_sid_t		mksn_sid;	/* session id */
 	__u32			mksn_timeout;	/* session timeout */
 	char			mksn_name[LST_NAME_SIZE];
-} WIRE_ATTR srpc_mksn_reply_t; /* make session reply */
+} WIRE_ATTR; /* make session reply */
 
-typedef struct {
+struct srpc_rmsn_reqst {
 	__u64			rmsn_rpyid;	/* reply buffer matchbits */
 	lst_sid_t		rmsn_sid;	/* session id */
-} WIRE_ATTR srpc_rmsn_reqst_t; /* remove session request */
+} WIRE_ATTR; /* remove session request */
 
-typedef struct {
+struct srpc_rmsn_reply {
 	__u32			rmsn_status;
 	lst_sid_t		rmsn_sid;	/* session id */
-} WIRE_ATTR srpc_rmsn_reply_t; /* remove session reply */
+} WIRE_ATTR; /* remove session reply */
 
-typedef struct {
+struct srpc_join_reqst {
 	__u64			join_rpyid;	/* reply buffer matchbits */
 	lst_sid_t		join_sid;	/* session id to join */
 	char			join_group[LST_NAME_SIZE]; /* group name */
-} WIRE_ATTR srpc_join_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_join_reply {
 	__u32			join_status;	/* returned status */
 	lst_sid_t		join_sid;	/* session id */
 	__u32			join_timeout;	/* # seconds' inactivity to
 						 * expire */
 	char			join_session[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR srpc_join_reply_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_debug_reqst {
 	__u64			dbg_rpyid;	/* reply buffer matchbits */
 	lst_sid_t		dbg_sid;	/* session id */
 	__u32			dbg_flags;	/* bitmap of debug */
-} WIRE_ATTR srpc_debug_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_debug_reply {
 	__u32			dbg_status;	/* returned code */
 	lst_sid_t		dbg_sid;	/* session id */
 	__u32			dbg_timeout;	/* session timeout */
 	__u32			dbg_nbatch;	/* # of batches in the node */
 	char			dbg_name[LST_NAME_SIZE]; /* session name */
-} WIRE_ATTR srpc_debug_reply_t;
+} WIRE_ATTR;
 
 #define SRPC_BATCH_OPC_RUN	1
 #define SRPC_BATCH_OPC_STOP	2
 #define SRPC_BATCH_OPC_QUERY	3
 
-typedef struct {
+struct srpc_batch_reqst {
 	__u64		   bar_rpyid;	   /* reply buffer matchbits */
 	lst_sid_t	   bar_sid;	   /* session id */
 	lst_bid_t	   bar_bid;	   /* batch id */
 	__u32		   bar_opc;	   /* create/start/stop batch */
 	__u32		   bar_testidx;    /* index of test */
 	__u32		   bar_arg;	   /* parameters */
-} WIRE_ATTR srpc_batch_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_batch_reply {
 	__u32		   bar_status;	   /* status of request */
 	lst_sid_t	   bar_sid;	   /* session id */
 	__u32		   bar_active;	   /* # of active tests in batch/test */
 	__u32		   bar_time;	   /* remained time */
-} WIRE_ATTR srpc_batch_reply_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_stat_reqst {
 	__u64		   str_rpyid;	   /* reply buffer matchbits */
 	lst_sid_t	   str_sid;	   /* session id */
 	__u32		   str_type;	   /* type of stat */
-} WIRE_ATTR srpc_stat_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_stat_reply {
 	__u32		   str_status;
 	lst_sid_t	   str_sid;
 	sfw_counters_t	   str_fw;
 	srpc_counters_t    str_rpc;
 	lnet_counters_t    str_lnet;
-} WIRE_ATTR srpc_stat_reply_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct test_bulk_req {
 	__u32		   blk_opc;	   /* bulk operation code */
 	__u32		   blk_npg;	   /* # of pages */
 	__u32		   blk_flags;	   /* reserved flags */
-} WIRE_ATTR test_bulk_req_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct test_bulk_req_v1 {
 	__u16		   blk_opc;	   /* bulk operation code */
 	__u16		   blk_flags;	   /* data check flags */
 	__u32		   blk_len;	   /* data length */
 	__u32		   blk_offset;	   /* reserved: offset */
-} WIRE_ATTR test_bulk_req_v1_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct test_ping_req {
 	__u32		   png_size;	   /* size of ping message */
 	__u32		   png_flags;	   /* reserved flags */
-} WIRE_ATTR test_ping_req_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_test_reqst {
 	__u64			tsr_rpyid;	/* reply buffer matchbits */
 	__u64			tsr_bulkid;	/* bulk buffer matchbits */
 	lst_sid_t		tsr_sid;	/* session id */
@@ -201,82 +201,82 @@ typedef struct {
 	__u32			tsr_ndest;	/* # of dest nodes */
 
 	union {
-		test_ping_req_t		ping;
-		test_bulk_req_t		bulk_v0;
-		test_bulk_req_v1_t	bulk_v1;
-	}		tsr_u;
-} WIRE_ATTR srpc_test_reqst_t;
+		struct test_ping_req	ping;
+		struct test_bulk_req	bulk_v0;
+		struct test_bulk_req_v1	bulk_v1;
+	} tsr_u;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_test_reply {
 	__u32			tsr_status;	/* returned code */
 	lst_sid_t		tsr_sid;
-} WIRE_ATTR srpc_test_reply_t;
+} WIRE_ATTR;
 
 /* TEST RPCs */
-typedef struct {
+struct srpc_ping_reqst {
 	__u64		   pnr_rpyid;
 	__u32		   pnr_magic;
 	__u32		   pnr_seq;
 	__u64		   pnr_time_sec;
 	__u64		   pnr_time_usec;
-} WIRE_ATTR srpc_ping_reqst_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_ping_reply {
 	__u32		   pnr_status;
 	__u32		   pnr_magic;
 	__u32		   pnr_seq;
-} WIRE_ATTR srpc_ping_reply_t;
+} WIRE_ATTR;
 
-typedef struct {
+struct srpc_brw_reqst {
 	__u64		   brw_rpyid;	   /* reply buffer matchbits */
 	__u64		   brw_bulkid;	   /* bulk buffer matchbits */
 	__u32		   brw_rw;	   /* read or write */
 	__u32		   brw_len;	   /* bulk data len */
 	__u32		   brw_flags;	   /* bulk data patterns */
-} WIRE_ATTR srpc_brw_reqst_t; /* bulk r/w request */
+} WIRE_ATTR; /* bulk r/w request */
 
-typedef struct {
+struct srpc_brw_reply {
 	__u32		   brw_status;
-} WIRE_ATTR srpc_brw_reply_t; /* bulk r/w reply */
+} WIRE_ATTR; /* bulk r/w reply */
 
 #define SRPC_MSG_MAGIC		0xeeb0f00d
 #define SRPC_MSG_VERSION	1
 
-typedef struct srpc_msg {
+struct srpc_msg {
 	__u32	msg_magic;     /* magic number */
 	__u32	msg_version;   /* message version number */
-	__u32	msg_type;      /* type of message body: srpc_msg_type_t */
+	__u32	msg_type;      /* type of message body: srpc_msg_type */
 	__u32	msg_reserved0;
 	__u32	msg_reserved1;
 	__u32	msg_ses_feats; /* test session features */
 	union {
-		srpc_generic_reqst_t reqst;
-		srpc_generic_reply_t reply;
-
-		srpc_mksn_reqst_t    mksn_reqst;
-		srpc_mksn_reply_t    mksn_reply;
-		srpc_rmsn_reqst_t    rmsn_reqst;
-		srpc_rmsn_reply_t    rmsn_reply;
-		srpc_debug_reqst_t   dbg_reqst;
-		srpc_debug_reply_t   dbg_reply;
-		srpc_batch_reqst_t   bat_reqst;
-		srpc_batch_reply_t   bat_reply;
-		srpc_stat_reqst_t    stat_reqst;
-		srpc_stat_reply_t    stat_reply;
-		srpc_test_reqst_t    tes_reqst;
-		srpc_test_reply_t    tes_reply;
-		srpc_join_reqst_t    join_reqst;
-		srpc_join_reply_t    join_reply;
-
-		srpc_ping_reqst_t    ping_reqst;
-		srpc_ping_reply_t    ping_reply;
-		srpc_brw_reqst_t     brw_reqst;
-		srpc_brw_reply_t     brw_reply;
+		struct srpc_generic_reqst	reqst;
+		struct srpc_generic_reply	reply;
+
+		struct srpc_mksn_reqst		mksn_reqst;
+		struct srpc_mksn_reply		mksn_reply;
+		struct srpc_rmsn_reqst		rmsn_reqst;
+		struct srpc_rmsn_reply		rmsn_reply;
+		struct srpc_debug_reqst		dbg_reqst;
+		struct srpc_debug_reply		dbg_reply;
+		struct srpc_batch_reqst		bat_reqst;
+		struct srpc_batch_reply		bat_reply;
+		struct srpc_stat_reqst		stat_reqst;
+		struct srpc_stat_reply		stat_reply;
+		struct srpc_test_reqst		tes_reqst;
+		struct srpc_test_reply		tes_reply;
+		struct srpc_join_reqst		join_reqst;
+		struct srpc_join_reply		join_reply;
+
+		struct srpc_ping_reqst		ping_reqst;
+		struct srpc_ping_reply		ping_reply;
+		struct srpc_brw_reqst		brw_reqst;
+		struct srpc_brw_reply		brw_reply;
 	}     msg_body;
-} WIRE_ATTR srpc_msg_t;
+} WIRE_ATTR;
 
 static inline void
-srpc_unpack_msg_hdr(srpc_msg_t *msg)
+srpc_unpack_msg_hdr(struct srpc_msg *msg)
 {
 	if (msg->msg_magic == SRPC_MSG_MAGIC)
 		return; /* no flipping needed */
diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h
index e689ca184..4eac1c9e6 100644
--- a/drivers/staging/lustre/lnet/selftest/selftest.h
+++ b/drivers/staging/lustre/lnet/selftest/selftest.h
@@ -93,7 +93,7 @@ struct sfw_test_instance;
 /* all reply/bulk RDMAs go to this portal */
 #define SRPC_RDMA_PORTAL		52
 
-static inline srpc_msg_type_t
+static inline enum srpc_msg_type
 srpc_service2request(int service)
 {
 	switch (service) {
@@ -128,13 +128,13 @@ srpc_service2request(int service)
 	}
 }
 
-static inline srpc_msg_type_t
+static inline enum srpc_msg_type
 srpc_service2reply(int service)
 {
 	return srpc_service2request(service) + 1;
 }
 
-typedef enum {
+enum srpc_event_type {
 	SRPC_BULK_REQ_RCVD   = 1, /* passive bulk request(PUT sink/GET source)
 				   * received */
 	SRPC_BULK_PUT_SENT   = 2, /* active bulk PUT sent (source) */
@@ -143,57 +143,58 @@ typedef enum {
 	SRPC_REPLY_SENT      = 5, /* outgoing reply sent */
 	SRPC_REQUEST_RCVD    = 6, /* incoming request received */
 	SRPC_REQUEST_SENT    = 7, /* outgoing request sent */
-} srpc_event_type_t;
+};
 
 /* RPC event */
-typedef struct {
-	srpc_event_type_t ev_type;   /* what's up */
+struct srpc_event {
+	enum srpc_event_type	ev_type;	/* what's up */
 	lnet_event_kind_t ev_lnet;   /* LNet event type */
 	int		  ev_fired;  /* LNet event fired? */
 	int		  ev_status; /* LNet event status */
 	void		  *ev_data;  /* owning server/client RPC */
-} srpc_event_t;
+};
 
-typedef struct {
+/* bulk descriptor */
+struct srpc_bulk {
 	int		 bk_len;     /* len of bulk data */
 	lnet_handle_md_t bk_mdh;
 	int		 bk_sink;    /* sink/source */
 	int		 bk_niov;    /* # iov in bk_iovs */
 	lnet_kiov_t	 bk_iovs[0];
-} srpc_bulk_t; /* bulk descriptor */
+};
 
 /* message buffer descriptor */
-typedef struct srpc_buffer {
+struct srpc_buffer {
 	struct list_head  buf_list; /* chain on srpc_service::*_msgq */
-	srpc_msg_t	  buf_msg;
+	struct srpc_msg	  buf_msg;
 	lnet_handle_md_t  buf_mdh;
 	lnet_nid_t	  buf_self;
 	lnet_process_id_t buf_peer;
-} srpc_buffer_t;
+};
 
 struct swi_workitem;
 typedef int (*swi_action_t) (struct swi_workitem *);
 
-typedef struct swi_workitem {
+struct swi_workitem {
 	struct cfs_wi_sched *swi_sched;
-	cfs_workitem_t	    swi_workitem;
+	struct cfs_workitem swi_workitem;
 	swi_action_t	    swi_action;
 	int		    swi_state;
-} swi_workitem_t;
+};
 
 /* server-side state of a RPC */
 struct srpc_server_rpc {
 	/* chain on srpc_service::*_rpcq */
 	struct list_head       srpc_list;
 	struct srpc_service_cd *srpc_scd;
-	swi_workitem_t	       srpc_wi;
-	srpc_event_t	       srpc_ev;      /* bulk/reply event */
+	struct swi_workitem	srpc_wi;
+	struct srpc_event	srpc_ev;	/* bulk/reply event */
 	lnet_nid_t	       srpc_self;
 	lnet_process_id_t      srpc_peer;
-	srpc_msg_t	       srpc_replymsg;
+	struct srpc_msg		srpc_replymsg;
 	lnet_handle_md_t       srpc_replymdh;
-	srpc_buffer_t	       *srpc_reqstbuf;
-	srpc_bulk_t	       *srpc_bulk;
+	struct srpc_buffer	*srpc_reqstbuf;
+	struct srpc_bulk	*srpc_bulk;
 
 	unsigned int	       srpc_aborted; /* being given up */
 	int		       srpc_status;
@@ -201,14 +202,14 @@ struct srpc_server_rpc {
 };
 
 /* client-side state of a RPC */
-typedef struct srpc_client_rpc {
+struct srpc_client_rpc {
 	struct list_head  crpc_list;	  /* chain on user's lists */
 	spinlock_t	  crpc_lock;	  /* serialize */
 	int		  crpc_service;
 	atomic_t	  crpc_refcount;
 	int		  crpc_timeout;   /* # seconds to wait for reply */
 	struct stt_timer       crpc_timer;
-	swi_workitem_t	  crpc_wi;
+	struct swi_workitem	crpc_wi;
 	lnet_process_id_t crpc_dest;
 
 	void		  (*crpc_done)(struct srpc_client_rpc *);
@@ -221,20 +222,20 @@ typedef struct srpc_client_rpc {
 	unsigned int	  crpc_closed:1;  /* completed */
 
 	/* RPC events */
-	srpc_event_t	  crpc_bulkev;	  /* bulk event */
-	srpc_event_t	  crpc_reqstev;   /* request event */
-	srpc_event_t	  crpc_replyev;   /* reply event */
+	struct srpc_event	crpc_bulkev;	/* bulk event */
+	struct srpc_event	crpc_reqstev;	/* request event */
+	struct srpc_event	crpc_replyev;	/* reply event */
 
 	/* bulk, request(reqst), and reply exchanged on wire */
-	srpc_msg_t	  crpc_reqstmsg;
-	srpc_msg_t	  crpc_replymsg;
+	struct srpc_msg		crpc_reqstmsg;
+	struct srpc_msg		crpc_replymsg;
 	lnet_handle_md_t  crpc_reqstmdh;
 	lnet_handle_md_t  crpc_replymdh;
-	srpc_bulk_t	  crpc_bulk;
-} srpc_client_rpc_t;
+	struct srpc_bulk	crpc_bulk;
+};
 
 #define srpc_client_rpc_size(rpc)					\
-offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
+offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov])
 
 #define srpc_client_rpc_addref(rpc)					\
 do {									\
@@ -266,13 +267,13 @@ struct srpc_service_cd {
 	/** backref to service */
 	struct srpc_service	*scd_svc;
 	/** event buffer */
-	srpc_event_t		scd_ev;
+	struct srpc_event	scd_ev;
 	/** free RPC descriptors */
 	struct list_head	scd_rpc_free;
 	/** in-flight RPCs */
 	struct list_head	scd_rpc_active;
 	/** workitem for posting buffer */
-	swi_workitem_t		scd_buf_wi;
+	struct swi_workitem	scd_buf_wi;
 	/** CPT id */
 	int			scd_cpt;
 	/** error code for scd_buf_wi */
@@ -306,7 +307,7 @@ struct srpc_service_cd {
 #define SFW_FRWK_WI_MIN		16
 #define SFW_FRWK_WI_MAX		256
 
-typedef struct srpc_service {
+struct srpc_service {
 	int			sv_id;		/* service id */
 	const char		*sv_name;	/* human readable name */
 	int			sv_wi_total;	/* total server workitems */
@@ -320,9 +321,9 @@ typedef struct srpc_service {
 	 */
 	int (*sv_handler)(struct srpc_server_rpc *);
 	int (*sv_bulk_ready)(struct srpc_server_rpc *, int);
-} srpc_service_t;
+};
 
-typedef struct {
+struct sfw_session {
 	struct list_head sn_list;    /* chain on fw_zombie_sessions */
 	lst_sid_t	 sn_id;      /* unique identifier */
 	unsigned int	 sn_timeout; /* # seconds' inactivity to expire */
@@ -335,37 +336,37 @@ typedef struct {
 	atomic_t	 sn_brw_errors;
 	atomic_t	 sn_ping_errors;
 	unsigned long	 sn_started;
-} sfw_session_t;
+};
 
 #define sfw_sid_equal(sid0, sid1)     ((sid0).ses_nid == (sid1).ses_nid && \
 				       (sid0).ses_stamp == (sid1).ses_stamp)
 
-typedef struct {
+struct sfw_batch {
 	struct list_head bat_list;	/* chain on sn_batches */
 	lst_bid_t	 bat_id;	/* batch id */
 	int		 bat_error;	/* error code of batch */
-	sfw_session_t	 *bat_session;	/* batch's session */
+	struct sfw_session	*bat_session;	/* batch's session */
 	atomic_t	 bat_nactive;	/* # of active tests */
 	struct list_head bat_tests;	/* test instances */
-} sfw_batch_t;
+};
 
-typedef struct {
+struct sfw_test_client_ops {
 	int  (*tso_init)(struct sfw_test_instance *tsi); /* initialize test
 							  * client */
 	void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test
 							  * client */
 	int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,
 			     lnet_process_id_t dest,
-			     srpc_client_rpc_t **rpc);	 /* prep a tests rpc */
+			     struct srpc_client_rpc **rpc);	/* prep a tests rpc */
 	void (*tso_done_rpc)(struct sfw_test_unit *tsu,
-			     srpc_client_rpc_t *rpc);	 /* done a test rpc */
-} sfw_test_client_ops_t;
+			     struct srpc_client_rpc *rpc);	/* done a test rpc */
+};
 
-typedef struct sfw_test_instance {
+struct sfw_test_instance {
 	struct list_head	   tsi_list;		/* chain on batch */
 	int			   tsi_service;		/* test type */
-	sfw_batch_t		   *tsi_batch;		/* batch */
-	sfw_test_client_ops_t	   *tsi_ops;		/* test client operation
+	struct sfw_batch		*tsi_batch;	/* batch */
+	struct sfw_test_client_ops	*tsi_ops;	/* test client operation
 							 */
 
 	/* public parameter for all test units */
@@ -384,11 +385,11 @@ typedef struct sfw_test_instance {
 	struct list_head	   tsi_active_rpcs;	/* active rpcs */
 
 	union {
-		test_ping_req_t	   ping;    /* ping parameter */
-		test_bulk_req_t    bulk_v0; /* bulk parameter */
-		test_bulk_req_v1_t bulk_v1; /* bulk v1 parameter */
+		struct test_ping_req	ping;		/* ping parameter */
+		struct test_bulk_req	bulk_v0;	/* bulk parameter */
+		struct test_bulk_req_v1	bulk_v1;	/* bulk v1 parameter */
 	} tsi_u;
-} sfw_test_instance_t;
+};
 
 /* XXX: trailing (PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at the end of
  * pages are not used */
@@ -397,57 +398,58 @@ typedef struct sfw_test_instance {
 #define SFW_MAX_NDESTS	   (LNET_MAX_IOV * SFW_ID_PER_PAGE)
 #define sfw_id_pages(n)    (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE)
 
-typedef struct sfw_test_unit {
+struct sfw_test_unit {
 	struct list_head    tsu_list;	   /* chain on lst_test_instance */
 	lnet_process_id_t   tsu_dest;	   /* id of dest node */
 	int		    tsu_loop;	   /* loop count of the test */
-	sfw_test_instance_t *tsu_instance; /* pointer to test instance */
+	struct sfw_test_instance	*tsu_instance; /* pointer to test instance */
 	void		    *tsu_private;  /* private data */
-	swi_workitem_t	    tsu_worker;    /* workitem of the test unit */
-} sfw_test_unit_t;
+	struct swi_workitem	tsu_worker;	/* workitem of the test unit */
+};
 
-typedef struct sfw_test_case {
+struct sfw_test_case {
 	struct list_head      tsc_list;		/* chain on fw_tests */
-	srpc_service_t	      *tsc_srv_service; /* test service */
-	sfw_test_client_ops_t *tsc_cli_ops;	/* ops of test client */
-} sfw_test_case_t;
+	struct srpc_service		*tsc_srv_service;	/* test service */
+	struct sfw_test_client_ops	*tsc_cli_ops;	/* ops of test client */
+};
 
-srpc_client_rpc_t *
+struct srpc_client_rpc *
 sfw_create_rpc(lnet_process_id_t peer, int service,
 	       unsigned features, int nbulkiov, int bulklen,
-	       void (*done)(srpc_client_rpc_t *), void *priv);
-int sfw_create_test_rpc(sfw_test_unit_t *tsu,
+	       void (*done)(struct srpc_client_rpc *), void *priv);
+int sfw_create_test_rpc(struct sfw_test_unit *tsu,
 			lnet_process_id_t peer, unsigned features,
-			int nblk, int blklen, srpc_client_rpc_t **rpc);
-void sfw_abort_rpc(srpc_client_rpc_t *rpc);
-void sfw_post_rpc(srpc_client_rpc_t *rpc);
-void sfw_client_rpc_done(srpc_client_rpc_t *rpc);
-void sfw_unpack_message(srpc_msg_t *msg);
+			int nblk, int blklen, struct srpc_client_rpc **rpc);
+void sfw_abort_rpc(struct srpc_client_rpc *rpc);
+void sfw_post_rpc(struct srpc_client_rpc *rpc);
+void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
+void sfw_unpack_message(struct srpc_msg *msg);
 void sfw_free_pages(struct srpc_server_rpc *rpc);
-void sfw_add_bulk_page(srpc_bulk_t *bk, struct page *pg, int i);
+void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
 int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
 		    int sink);
-int sfw_make_session(srpc_mksn_reqst_t *request, srpc_mksn_reply_t *reply);
+int sfw_make_session(struct srpc_mksn_reqst *request,
+		     struct srpc_mksn_reply *reply);
 
-srpc_client_rpc_t *
+struct srpc_client_rpc *
 srpc_create_client_rpc(lnet_process_id_t peer, int service,
 		       int nbulkiov, int bulklen,
-		       void (*rpc_done)(srpc_client_rpc_t *),
-		       void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
-void srpc_post_rpc(srpc_client_rpc_t *rpc);
-void srpc_abort_rpc(srpc_client_rpc_t *rpc, int why);
-void srpc_free_bulk(srpc_bulk_t *bk);
-srpc_bulk_t *srpc_alloc_bulk(int cpt, unsigned bulk_npg, unsigned bulk_len,
-			     int sink);
-int srpc_send_rpc(swi_workitem_t *wi);
+		       void (*rpc_done)(struct srpc_client_rpc *),
+		       void (*rpc_fini)(struct srpc_client_rpc *), void *priv);
+void srpc_post_rpc(struct srpc_client_rpc *rpc);
+void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
+void srpc_free_bulk(struct srpc_bulk *bk);
+struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned bulk_npg,
+				  unsigned bulk_len, int sink);
+int srpc_send_rpc(struct swi_workitem *wi);
 int srpc_send_reply(struct srpc_server_rpc *rpc);
-int srpc_add_service(srpc_service_t *sv);
-int srpc_remove_service(srpc_service_t *sv);
-void srpc_shutdown_service(srpc_service_t *sv);
-void srpc_abort_service(srpc_service_t *sv);
-int srpc_finish_service(srpc_service_t *sv);
-int srpc_service_add_buffers(srpc_service_t *sv, int nbuffer);
-void srpc_service_remove_buffers(srpc_service_t *sv, int nbuffer);
+int srpc_add_service(struct srpc_service *sv);
+int srpc_remove_service(struct srpc_service *sv);
+void srpc_shutdown_service(struct srpc_service *sv);
+void srpc_abort_service(struct srpc_service *sv);
+int srpc_finish_service(struct srpc_service *sv);
+int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer);
+void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer);
 void srpc_get_counters(srpc_counters_t *cnt);
 void srpc_set_counters(const srpc_counters_t *cnt);
 
@@ -461,15 +463,17 @@ srpc_serv_is_framework(struct srpc_service *svc)
 }
 
 static inline int
-swi_wi_action(cfs_workitem_t *wi)
+swi_wi_action(struct cfs_workitem *wi)
 {
-	swi_workitem_t *swi = container_of(wi, swi_workitem_t, swi_workitem);
+	struct swi_workitem *swi;
+
+	swi = container_of(wi, struct swi_workitem, swi_workitem);
 
 	return swi->swi_action(swi);
 }
 
 static inline void
-swi_init_workitem(swi_workitem_t *swi, void *data,
+swi_init_workitem(struct swi_workitem *swi, void *data,
 		  swi_action_t action, struct cfs_wi_sched *sched)
 {
 	swi->swi_sched = sched;
@@ -479,19 +483,19 @@ swi_init_workitem(swi_workitem_t *swi, void *data,
 }
 
 static inline void
-swi_schedule_workitem(swi_workitem_t *wi)
+swi_schedule_workitem(struct swi_workitem *wi)
 {
 	cfs_wi_schedule(wi->swi_sched, &wi->swi_workitem);
 }
 
 static inline void
-swi_exit_workitem(swi_workitem_t *swi)
+swi_exit_workitem(struct swi_workitem *swi)
 {
 	cfs_wi_exit(swi->swi_sched, &swi->swi_workitem);
 }
 
 static inline int
-swi_deschedule_workitem(swi_workitem_t *swi)
+swi_deschedule_workitem(struct swi_workitem *swi)
 {
 	return cfs_wi_deschedule(swi->swi_sched, &swi->swi_workitem);
 }
@@ -502,7 +506,7 @@ void sfw_shutdown(void);
 void srpc_shutdown(void);
 
 static inline void
-srpc_destroy_client_rpc(srpc_client_rpc_t *rpc)
+srpc_destroy_client_rpc(struct srpc_client_rpc *rpc)
 {
 	LASSERT(rpc);
 	LASSERT(!srpc_event_pending(rpc));
@@ -515,14 +519,14 @@ srpc_destroy_client_rpc(srpc_client_rpc_t *rpc)
 }
 
 static inline void
-srpc_init_client_rpc(srpc_client_rpc_t *rpc, lnet_process_id_t peer,
+srpc_init_client_rpc(struct srpc_client_rpc *rpc, lnet_process_id_t peer,
 		     int service, int nbulkiov, int bulklen,
-		     void (*rpc_done)(srpc_client_rpc_t *),
-		     void (*rpc_fini)(srpc_client_rpc_t *), void *priv)
+		     void (*rpc_done)(struct srpc_client_rpc *),
+		     void (*rpc_fini)(struct srpc_client_rpc *), void *priv)
 {
 	LASSERT(nbulkiov <= LNET_MAX_IOV);
 
-	memset(rpc, 0, offsetof(srpc_client_rpc_t,
+	memset(rpc, 0, offsetof(struct srpc_client_rpc,
 				crpc_bulk.bk_iovs[nbulkiov]));
 
 	INIT_LIST_HEAD(&rpc->crpc_list);
@@ -592,7 +596,7 @@ do {									\
 } while (0)
 
 static inline void
-srpc_wait_service_shutdown(srpc_service_t *sv)
+srpc_wait_service_shutdown(struct srpc_service *sv)
 {
 	int i = 2;
 
@@ -607,16 +611,16 @@ srpc_wait_service_shutdown(srpc_service_t *sv)
 	}
 }
 
-extern sfw_test_client_ops_t brw_test_client;
+extern struct sfw_test_client_ops brw_test_client;
 void brw_init_test_client(void);
 
-extern srpc_service_t brw_test_service;
+extern struct srpc_service brw_test_service;
 void brw_init_test_service(void);
 
-extern sfw_test_client_ops_t ping_test_client;
+extern struct sfw_test_client_ops ping_test_client;
 void ping_init_test_client(void);
 
-extern srpc_service_t ping_test_service;
+extern struct srpc_service ping_test_service;
 void ping_init_test_service(void);
 
 #endif /* __SELFTEST_SELFTEST_H__ */
diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c
index 8be52526a..b6c4aae00 100644
--- a/drivers/staging/lustre/lnet/selftest/timer.c
+++ b/drivers/staging/lustre/lnet/selftest/timer.c
@@ -49,7 +49,7 @@
  * sorted by increasing expiry time. The number of slots is 2**7 (128),
  * to cover a time period of 1024 seconds into the future before wrapping.
  */
-#define STTIMER_MINPOLL        3   /* log2 min poll interval (8 s) */
+#define STTIMER_MINPOLL        3	/* log2 min poll interval (8 s) */
 #define STTIMER_SLOTTIME       (1 << STTIMER_MINPOLL)
 #define STTIMER_SLOTTIMEMASK   (~(STTIMER_SLOTTIME - 1))
 #define STTIMER_NSLOTS	       (1 << 7)
@@ -170,20 +170,22 @@ stt_check_timers(unsigned long *last)
 static int
 stt_timer_main(void *arg)
 {
+	int rc = 0;
+
 	cfs_block_allsigs();
 
 	while (!stt_data.stt_shuttingdown) {
 		stt_check_timers(&stt_data.stt_prev_slot);
 
-		wait_event_timeout(stt_data.stt_waitq,
-				   stt_data.stt_shuttingdown,
-				   cfs_time_seconds(STTIMER_SLOTTIME));
+		rc = wait_event_timeout(stt_data.stt_waitq,
+					stt_data.stt_shuttingdown,
+					cfs_time_seconds(STTIMER_SLOTTIME));
 	}
 
 	spin_lock(&stt_data.stt_lock);
 	stt_data.stt_nthreads--;
 	spin_unlock(&stt_data.stt_lock);
-	return 0;
+	return rc;
 }
 
 static int
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 39269c3c5..3a4df6264 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -66,6 +66,7 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 	unsigned int           debug_mask;
 	int                    rc;
 
+	LASSERT(exp && !IS_ERR(exp));
 	req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY,
 					LUSTRE_MDS_VERSION, SEQ_QUERY);
 	if (!req)
@@ -101,19 +102,22 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 			req->rq_no_delay = req->rq_no_resend = 1;
 		debug_mask = D_CONSOLE;
 	} else {
-		if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+		if (seq->lcs_type == LUSTRE_SEQ_METADATA) {
+			req->rq_reply_portal = MDC_REPLY_PORTAL;
 			req->rq_request_portal = SEQ_METADATA_PORTAL;
-		else
+		} else {
+			req->rq_reply_portal = OSC_REPLY_PORTAL;
 			req->rq_request_portal = SEQ_DATA_PORTAL;
+		}
 		debug_mask = D_INFO;
 	}
 
 	ptlrpc_at_set_req_timeout(req);
 
-	if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
 		mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	rc = ptlrpc_queue_wait(req);
-	if (seq->lcs_type == LUSTRE_SEQ_METADATA)
+	if (opc != SEQ_ALLOC_SUPER && seq->lcs_type == LUSTRE_SEQ_METADATA)
 		mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	if (rc)
 		goto out_req;
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
index 062f388cf..5a04e99d9 100644
--- a/drivers/staging/lustre/lustre/fld/fld_cache.c
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -178,8 +178,9 @@ restart_fixup:
 				if (n_range->lsr_end <= c_range->lsr_end) {
 					*n_range = *c_range;
 					fld_cache_entry_delete(cache, f_curr);
-				} else
+				} else {
 					n_range->lsr_start = c_range->lsr_end;
+				}
 			}
 
 			/* we could have overlap over next
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index e8a3caf20..75d6a4863 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -101,12 +101,6 @@ struct fld_cache {
 	unsigned int		 fci_no_shrink:1;
 };
 
-enum fld_op {
-	FLD_CREATE = 0,
-	FLD_DELETE = 1,
-	FLD_LOOKUP = 2
-};
-
 enum {
 	/* 4M of FLD cache will not hurt client a lot. */
 	FLD_SERVER_CACHE_SIZE      = (4 * 0x100000),
@@ -126,7 +120,8 @@ enum {
 extern struct lu_fld_hash fld_hash[];
 
 int fld_client_rpc(struct obd_export *exp,
-		   struct lu_seq_range *range, __u32 fld_op);
+		   struct lu_seq_range *range, __u32 fld_op,
+		   struct ptlrpc_request **reqp);
 
 extern struct lprocfs_vars fld_client_debugfs_list[];
 
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index a3d122d85..304c0ec26 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -64,9 +64,9 @@ static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&mcw->mcw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 };
 
@@ -75,15 +75,15 @@ static void fld_enter_request(struct client_obd *cli)
 	struct mdc_cache_waiter mcw;
 	struct l_wait_info lwi = { 0 };
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
 		init_waitqueue_head(&mcw.mcw_waitq);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
 	} else {
 		cli->cl_r_in_flight++;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 }
 
@@ -92,10 +92,9 @@ static void fld_exit_request(struct client_obd *cli)
 	struct list_head *l, *tmp;
 	struct mdc_cache_waiter *mcw;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_r_in_flight--;
 	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-
 		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 			/* No free request slots anymore */
 			break;
@@ -106,7 +105,7 @@ static void fld_exit_request(struct client_obd *cli)
 		cli->cl_r_in_flight++;
 		wake_up(&mcw->mcw_waitq);
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
@@ -392,55 +391,82 @@ void fld_client_fini(struct lu_client_fld *fld)
 EXPORT_SYMBOL(fld_client_fini);
 
 int fld_client_rpc(struct obd_export *exp,
-		   struct lu_seq_range *range, __u32 fld_op)
+		   struct lu_seq_range *range, __u32 fld_op,
+		   struct ptlrpc_request **reqp)
 {
-	struct ptlrpc_request *req;
+	struct ptlrpc_request *req = NULL;
 	struct lu_seq_range   *prange;
 	__u32		 *op;
-	int		    rc;
+	int		    rc = 0;
 	struct obd_import     *imp;
 
 	LASSERT(exp);
 
 	imp = class_exp2cliimp(exp);
-	req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, LUSTRE_MDS_VERSION,
-					FLD_QUERY);
-	if (!req)
-		return -ENOMEM;
-
-	op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
-	*op = fld_op;
+	switch (fld_op) {
+	case FLD_QUERY:
+		req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
+						LUSTRE_MDS_VERSION, FLD_QUERY);
+		if (!req)
+			return -ENOMEM;
+
+		/*
+		 * XXX: only needed when talking to old server(< 2.6), it should
+		 * be removed when < 2.6 server is not supported
+		 */
+		op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC);
+		*op = FLD_LOOKUP;
+
+		if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
+			req->rq_allow_replay = 1;
+		break;
+	case FLD_READ:
+		req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ,
+						LUSTRE_MDS_VERSION, FLD_READ);
+		if (!req)
+			return -ENOMEM;
+
+		req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA,
+				     RCL_SERVER, PAGE_SIZE);
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
+	if (rc)
+		return rc;
 
 	prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD);
 	*prange = *range;
-
 	ptlrpc_request_set_replen(req);
 	req->rq_request_portal = FLD_REQUEST_PORTAL;
 	req->rq_reply_portal = MDC_REPLY_PORTAL;
 	ptlrpc_at_set_req_timeout(req);
 
-	if (fld_op == FLD_LOOKUP &&
-	    imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS)
-		req->rq_allow_replay = 1;
-
-	if (fld_op != FLD_LOOKUP)
-		mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	fld_enter_request(&exp->exp_obd->u.cli);
 	rc = ptlrpc_queue_wait(req);
 	fld_exit_request(&exp->exp_obd->u.cli);
-	if (fld_op != FLD_LOOKUP)
-		mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
 	if (rc)
 		goto out_req;
 
-	prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
-	if (!prange) {
-		rc = -EFAULT;
-		goto out_req;
+	if (fld_op == FLD_QUERY) {
+		prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD);
+		if (!prange) {
+			rc = -EFAULT;
+			goto out_req;
+		}
+		*range = *prange;
 	}
-	*range = *prange;
+
 out_req:
-	ptlrpc_req_finished(req);
+	if (rc || !reqp) {
+		ptlrpc_req_finished(req);
+		req = NULL;
+	}
+
+	if (reqp)
+		*reqp = req;
+
 	return rc;
 }
 
@@ -468,7 +494,7 @@ int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds,
 
 	res.lsr_start = seq;
 	fld_range_set_type(&res, flags);
-	rc = fld_client_rpc(target->ft_exp, &res, FLD_LOOKUP);
+	rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL);
 
 	if (rc == 0) {
 		*mds = res.lsr_index;
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index fb971ded5..d4c33dd11 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -82,7 +82,6 @@
  *  - i_mutex
  *      - PG_locked
  *	  - cl_object_header::coh_page_guard
- *	  - cl_object_header::coh_lock_guard
  *	  - lu_site::ls_guard
  *
  * See the top comment in cl_object.c for the description of overall locking and
@@ -98,9 +97,12 @@
  * super-class definitions.
  */
 #include "lu_object.h"
+#include <linux/atomic.h>
 #include "linux/lustre_compat25.h"
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
 
 struct inode;
 
@@ -138,7 +140,7 @@ struct cl_device_operations {
 	 * cl_req_slice_add().
 	 *
 	 * \see osc_req_init(), lov_req_init(), lovsub_req_init()
-	 * \see ccc_req_init()
+	 * \see vvp_req_init()
 	 */
 	int (*cdo_req_init)(const struct lu_env *env, struct cl_device *dev,
 			    struct cl_req *req);
@@ -147,7 +149,7 @@ struct cl_device_operations {
 /**
  * Device in the client stack.
  *
- * \see ccc_device, lov_device, lovsub_device, osc_device
+ * \see vvp_device, lov_device, lovsub_device, osc_device
  */
 struct cl_device {
 	/** Super-class. */
@@ -243,7 +245,7 @@ enum cl_attr_valid {
  *    be discarded from the memory, all its sub-objects are torn-down and
  *    destroyed too.
  *
- * \see ccc_object, lov_object, lovsub_object, osc_object
+ * \see vvp_object, lov_object, lovsub_object, osc_object
  */
 struct cl_object {
 	/** super class */
@@ -322,7 +324,7 @@ struct cl_object_operations {
 	 *	 to be used instead of newly created.
 	 */
 	int  (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
-			      struct cl_page *page, struct page *vmpage);
+				struct cl_page *page, pgoff_t index);
 	/**
 	 * Initialize lock slice for this layer. Called top-to-bottom through
 	 * every object layer when a new cl_lock is instantiated. Layer
@@ -383,11 +385,17 @@ struct cl_object_operations {
 	 * object. Layers are supposed to fill parts of \a lvb that will be
 	 * shipped to the glimpse originator as a glimpse result.
 	 *
-	 * \see ccc_object_glimpse(), lovsub_object_glimpse(),
+	 * \see vvp_object_glimpse(), lovsub_object_glimpse(),
 	 * \see osc_object_glimpse()
 	 */
 	int (*coo_glimpse)(const struct lu_env *env,
 			   const struct cl_object *obj, struct ost_lvb *lvb);
+	/**
+	 * Object prune method. Called when the layout is going to change on
+	 * this object, therefore each layer has to clean up their cache,
+	 * mainly pages and locks.
+	 */
+	int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
 };
 
 /**
@@ -398,22 +406,6 @@ struct cl_object_header {
 	 * here.
 	 */
 	struct lu_object_header  coh_lu;
-	/** \name locks
-	 * \todo XXX move locks below to the separate cache-lines, they are
-	 * mostly useless otherwise.
-	 */
-	/** @{ */
-	/** Lock protecting page tree. */
-	spinlock_t		 coh_page_guard;
-	/** Lock protecting lock list. */
-	spinlock_t		 coh_lock_guard;
-	/** @} locks */
-	/** Radix tree of cl_page's, cached for this object. */
-	struct radix_tree_root   coh_tree;
-	/** # of pages in radix tree. */
-	unsigned long	    coh_pages;
-	/** List of cl_lock's granted for this object. */
-	struct list_head	       coh_locks;
 
 	/**
 	 * Parent object. It is assumed that an object has a well-defined
@@ -460,10 +452,6 @@ struct cl_object_header {
 					co_lu.lo_linkage)
 /** @} cl_object */
 
-#ifndef pgoff_t
-#define pgoff_t unsigned long
-#endif
-
 #define CL_PAGE_EOF ((pgoff_t)~0ull)
 
 /** \addtogroup cl_page cl_page
@@ -727,16 +715,10 @@ struct cl_page {
 	atomic_t	     cp_ref;
 	/** An object this page is a part of. Immutable after creation. */
 	struct cl_object	*cp_obj;
-	/** Logical page index within the object. Immutable after creation. */
-	pgoff_t		  cp_index;
 	/** List of slices. Immutable after creation. */
 	struct list_head	       cp_layers;
-	/** Parent page, NULL for top-level page. Immutable after creation. */
-	struct cl_page	  *cp_parent;
-	/** Lower-layer page. NULL for bottommost page. Immutable after
-	 * creation.
-	 */
-	struct cl_page	  *cp_child;
+	/** vmpage */
+	struct page		*cp_vmpage;
 	/**
 	 * Page state. This field is const to avoid accidental update, it is
 	 * modified only internally within cl_page.c. Protected by a VM lock.
@@ -787,10 +769,11 @@ struct cl_page {
 /**
  * Per-layer part of cl_page.
  *
- * \see ccc_page, lov_page, osc_page
+ * \see vvp_page, lov_page, osc_page
  */
 struct cl_page_slice {
 	struct cl_page		  *cpl_page;
+	pgoff_t				 cpl_index;
 	/**
 	 * Object slice corresponding to this page slice. Immutable after
 	 * creation.
@@ -804,16 +787,9 @@ struct cl_page_slice {
 /**
  * Lock mode. For the client extent locks.
  *
- * \warning: cl_lock_mode_match() assumes particular ordering here.
  * \ingroup cl_lock
  */
 enum cl_lock_mode {
-	/**
-	 * Mode of a lock that protects no data, and exists only as a
-	 * placeholder. This is used for `glimpse' requests. A phantom lock
-	 * might get promoted to real lock at some point.
-	 */
-	CLM_PHANTOM,
 	CLM_READ,
 	CLM_WRITE,
 	CLM_GROUP
@@ -845,11 +821,6 @@ struct cl_page_operations {
 	 * provided by the topmost layer, see cl_page_disown0() as an example.
 	 */
 
-	/**
-	 * \return the underlying VM page. Optional.
-	 */
-	struct page *(*cpo_vmpage)(const struct lu_env *env,
-				   const struct cl_page_slice *slice);
 	/**
 	 * Called when \a io acquires this page into the exclusive
 	 * ownership. When this method returns, it is guaranteed that the is
@@ -896,14 +867,6 @@ struct cl_page_operations {
 	 */
 	void  (*cpo_export)(const struct lu_env *env,
 			    const struct cl_page_slice *slice, int uptodate);
-	/**
-	 * Unmaps page from the user space (if it is mapped).
-	 *
-	 * \see cl_page_unmap()
-	 * \see vvp_page_unmap()
-	 */
-	int (*cpo_unmap)(const struct lu_env *env,
-			 const struct cl_page_slice *slice, struct cl_io *io);
 	/**
 	 * Checks whether underlying VM page is locked (in the suitable
 	 * sense). Used for assertions.
@@ -957,7 +920,7 @@ struct cl_page_operations {
 	 */
 	int (*cpo_is_under_lock)(const struct lu_env *env,
 				 const struct cl_page_slice *slice,
-				 struct cl_io *io);
+				 struct cl_io *io, pgoff_t *max);
 
 	/**
 	 * Optional debugging helper. Prints given page slice.
@@ -1027,26 +990,6 @@ struct cl_page_operations {
 		 */
 		int  (*cpo_make_ready)(const struct lu_env *env,
 				       const struct cl_page_slice *slice);
-		/**
-		 * Announce that this page is to be written out
-		 * opportunistically, that is, page is dirty, it is not
-		 * necessary to start write-out transfer right now, but
-		 * eventually page has to be written out.
-		 *
-		 * Main caller of this is the write path (see
-		 * vvp_io_commit_write()), using this method to build a
-		 * "transfer cache" from which large transfers are then
-		 * constructed by the req-formation engine.
-		 *
-		 * \todo XXX it would make sense to add page-age tracking
-		 * semantics here, and to oblige the req-formation engine to
-		 * send the page out not later than it is too old.
-		 *
-		 * \see cl_page_cache_add()
-		 */
-		int  (*cpo_cache_add)(const struct lu_env *env,
-				      const struct cl_page_slice *slice,
-				      struct cl_io *io);
 	} io[CRT_NR];
 	/**
 	 * Tell transfer engine that only [to, from] part of a page should be
@@ -1098,9 +1041,8 @@ struct cl_page_operations {
  */
 #define CL_PAGE_DEBUG(mask, env, page, format, ...)		     \
 do {								    \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
-									\
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);	\
 		cl_page_print(env, &msgdata, lu_cdebug_printer, page);  \
 		CDEBUG(mask, format, ## __VA_ARGS__);		  \
 	}							       \
@@ -1111,9 +1053,8 @@ do {								    \
  */
 #define CL_PAGE_HEADER(mask, env, page, format, ...)			  \
 do {									  \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		      \
-									      \
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {			 \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \
 		CDEBUG(mask, format, ## __VA_ARGS__);			\
 	}								     \
@@ -1130,6 +1071,12 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
 #define cl_page_in_use(pg)       __page_in_use(pg, 1)
 #define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
 
+static inline struct page *cl_page_vmpage(struct cl_page *page)
+{
+	LASSERT(page->cp_vmpage);
+	return page->cp_vmpage;
+}
+
 /** @} cl_page */
 
 /** \addtogroup cl_lock cl_lock
@@ -1150,12 +1097,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to
  * cl_lock::cll_layers list through cl_lock_slice::cls_linkage.
  *
- * All locks for a given object are linked into cl_object_header::coh_locks
- * list (protected by cl_object_header::coh_lock_guard spin-lock) through
- * cl_lock::cll_linkage. Currently this list is not sorted in any way. We can
- * sort it in starting lock offset, or use altogether different data structure
- * like a tree.
- *
  * Typical cl_lock consists of the two layers:
  *
  *     - vvp_lock (vvp specific data), and
@@ -1177,111 +1118,29 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  *
  * LIFE CYCLE
  *
- * cl_lock is reference counted. When reference counter drops to 0, lock is
- * placed in the cache, except when lock is in CLS_FREEING state. CLS_FREEING
- * lock is destroyed when last reference is released. Referencing between
- * top-lock and its sub-locks is described in the lov documentation module.
- *
- * STATE MACHINE
- *
- * Also, cl_lock is a state machine. This requires some clarification. One of
- * the goals of client IO re-write was to make IO path non-blocking, or at
- * least to make it easier to make it non-blocking in the future. Here
- * `non-blocking' means that when a system call (read, write, truncate)
- * reaches a situation where it has to wait for a communication with the
- * server, it should --instead of waiting-- remember its current state and
- * switch to some other work.  E.g,. instead of waiting for a lock enqueue,
- * client should proceed doing IO on the next stripe, etc. Obviously this is
- * rather radical redesign, and it is not planned to be fully implemented at
- * this time, instead we are putting some infrastructure in place, that would
- * make it easier to do asynchronous non-blocking IO easier in the
- * future. Specifically, where old locking code goes to sleep (waiting for
- * enqueue, for example), new code returns cl_lock_transition::CLO_WAIT. When
- * enqueue reply comes, its completion handler signals that lock state-machine
- * is ready to transit to the next state. There is some generic code in
- * cl_lock.c that sleeps, waiting for these signals. As a result, for users of
- * this cl_lock.c code, it looks like locking is done in normal blocking
- * fashion, and it the same time it is possible to switch to the non-blocking
- * locking (simply by returning cl_lock_transition::CLO_WAIT from cl_lock.c
- * functions).
- *
- * For a description of state machine states and transitions see enum
- * cl_lock_state.
- *
- * There are two ways to restrict a set of states which lock might move to:
- *
- *     - placing a "hold" on a lock guarantees that lock will not be moved
- *       into cl_lock_state::CLS_FREEING state until hold is released. Hold
- *       can be only acquired on a lock that is not in
- *       cl_lock_state::CLS_FREEING. All holds on a lock are counted in
- *       cl_lock::cll_holds. Hold protects lock from cancellation and
- *       destruction. Requests to cancel and destroy a lock on hold will be
- *       recorded, but only honored when last hold on a lock is released;
- *
- *     - placing a "user" on a lock guarantees that lock will not leave
- *       cl_lock_state::CLS_NEW, cl_lock_state::CLS_QUEUING,
- *       cl_lock_state::CLS_ENQUEUED and cl_lock_state::CLS_HELD set of
- *       states, once it enters this set. That is, if a user is added onto a
- *       lock in a state not from this set, it doesn't immediately enforce
- *       lock to move to this set, but once lock enters this set it will
- *       remain there until all users are removed. Lock users are counted in
- *       cl_lock::cll_users.
- *
- *       User is used to assure that lock is not canceled or destroyed while
- *       it is being enqueued, or actively used by some IO.
- *
- *       Currently, a user always comes with a hold (cl_lock_invariant()
- *       checks that a number of holds is not less than a number of users).
- *
- * CONCURRENCY
- *
- * This is how lock state-machine operates. struct cl_lock contains a mutex
- * cl_lock::cll_guard that protects struct fields.
- *
- *     - mutex is taken, and cl_lock::cll_state is examined.
- *
- *     - for every state there are possible target states where lock can move
- *       into. They are tried in order. Attempts to move into next state are
- *       done by _try() functions in cl_lock.c:cl_{enqueue,unlock,wait}_try().
- *
- *     - if the transition can be performed immediately, state is changed,
- *       and mutex is released.
- *
- *     - if the transition requires blocking, _try() function returns
- *       cl_lock_transition::CLO_WAIT. Caller unlocks mutex and goes to
- *       sleep, waiting for possibility of lock state change. It is woken
- *       up when some event occurs, that makes lock state change possible
- *       (e.g., the reception of the reply from the server), and repeats
- *       the loop.
- *
- * Top-lock and sub-lock has separate mutexes and the latter has to be taken
- * first to avoid dead-lock.
- *
- * To see an example of interaction of all these issues, take a look at the
- * lov_cl.c:lov_lock_enqueue() function. It is called as a part of
- * cl_enqueue_try(), and tries to advance top-lock to ENQUEUED state, by
- * advancing state-machines of its sub-locks (lov_lock_enqueue_one()). Note
- * also, that it uses trylock to grab sub-lock mutex to avoid dead-lock. It
- * also has to handle CEF_ASYNC enqueue, when sub-locks enqueues have to be
- * done in parallel, rather than one after another (this is used for glimpse
- * locks, that cannot dead-lock).
+ * cl_lock is a cacheless data container for the requirements of locks to
+ * complete the IO. cl_lock is created before I/O starts and destroyed when the
+ * I/O is complete.
+ *
+ * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached
+ * to cl_lock at OSC layer. LDLM lock is still cacheable.
  *
  * INTERFACE AND USAGE
  *
- * struct cl_lock_operations provide a number of call-backs that are invoked
- * when events of interest occurs. Layers can intercept and handle glimpse,
- * blocking, cancel ASTs and a reception of the reply from the server.
+ * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel.  A
+ * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue()
+ * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock
+ * consists of multiple sub cl_locks, each sub locks will be enqueued
+ * correspondingly. At OSC layer, the lock enqueue request will tend to reuse
+ * cached LDLM lock; otherwise a new LDLM lock will have to be requested from
+ * OST side.
  *
- * One important difference with the old client locking model is that new
- * client has a representation for the top-lock, whereas in the old code only
- * sub-locks existed as real data structures and file-level locks are
- * represented by "request sets" that are created and destroyed on each and
- * every lock creation.
+ * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel()
+ * method will be called for each layer to release the resource held by this
+ * lock. At OSC layer, the reference count of LDLM lock, which is held at
+ * clo_enqueue time, is released.
  *
- * Top-locks are cached, and can be found in the cache by the system calls. It
- * is possible that top-lock is in cache, but some of its sub-locks were
- * canceled and destroyed. In that case top-lock has to be enqueued again
- * before it can be used.
+ * LDLM lock can only be canceled if there is no cl_lock using it.
  *
  * Overall process of the locking during IO operation is as following:
  *
@@ -1294,7 +1153,7 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  *
  *     - when all locks are acquired, IO is performed;
  *
- *     - locks are released into cache.
+ *     - locks are released after IO is complete.
  *
  * Striping introduces major additional complexity into locking. The
  * fundamental problem is that it is generally unsafe to actively use (hold)
@@ -1316,16 +1175,6 @@ static inline int __page_in_use(const struct cl_page *page, int refc)
  * buf is a part of memory mapped Lustre file, a lock or locks protecting buf
  * has to be held together with the usual lock on [offset, offset + count].
  *
- * As multi-stripe locks have to be allowed, it makes sense to cache them, so
- * that, for example, a sequence of O_APPEND writes can proceed quickly
- * without going down to the individual stripes to do lock matching. On the
- * other hand, multi-stripe locks shouldn't be used by normal read/write
- * calls. To achieve this, every layer can implement ->clo_fits_into() method,
- * that is called by lock matching code (cl_lock_lookup()), and that can be
- * used to selectively disable matching of certain locks for certain IOs. For
- * example, lov layer implements lov_lock_fits_into() that allow multi-stripe
- * locks to be matched only for truncates and O_APPEND writes.
- *
  * Interaction with DLM
  *
  * In the expected setup, cl_lock is ultimately backed up by a collection of
@@ -1356,295 +1205,27 @@ struct cl_lock_descr {
 	__u32	     cld_enq_flags;
 };
 
-#define DDESCR "%s(%d):[%lu, %lu]"
+#define DDESCR "%s(%d):[%lu, %lu]:%x"
 #define PDESCR(descr)						   \
 	cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode,	\
-	(descr)->cld_start, (descr)->cld_end
+	(descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags
 
 const char *cl_lock_mode_name(const enum cl_lock_mode mode);
 
-/**
- * Lock state-machine states.
- *
- * \htmlonly
- * <pre>
- *
- * Possible state transitions:
- *
- *	      +------------------>NEW
- *	      |		    |
- *	      |		    | cl_enqueue_try()
- *	      |		    |
- *	      |    cl_unuse_try()  V
- *	      |  +--------------QUEUING (*)
- *	      |  |		 |
- *	      |  |		 | cl_enqueue_try()
- *	      |  |		 |
- *	      |  | cl_unuse_try()  V
- *    sub-lock  |  +-------------ENQUEUED (*)
- *    canceled  |  |		 |
- *	      |  |		 | cl_wait_try()
- *	      |  |		 |
- *	      |  |		(R)
- *	      |  |		 |
- *	      |  |		 V
- *	      |  |		HELD<---------+
- *	      |  |		 |	    |
- *	      |  |		 |	    | cl_use_try()
- *	      |  |  cl_unuse_try() |	    |
- *	      |  |		 |	    |
- *	      |  |		 V	 ---+
- *	      |  +------------>INTRANSIT (D) <--+
- *	      |		    |	    |
- *	      |     cl_unuse_try() |	    | cached lock found
- *	      |		    |	    | cl_use_try()
- *	      |		    |	    |
- *	      |		    V	    |
- *	      +------------------CACHED---------+
- *				   |
- *				  (C)
- *				   |
- *				   V
- *				FREEING
- *
- * Legend:
- *
- *	 In states marked with (*) transition to the same state (i.e., a loop
- *	 in the diagram) is possible.
- *
- *	 (R) is the point where Receive call-back is invoked: it allows layers
- *	 to handle arrival of lock reply.
- *
- *	 (C) is the point where Cancellation call-back is invoked.
- *
- *	 (D) is the transit state which means the lock is changing.
- *
- *	 Transition to FREEING state is possible from any other state in the
- *	 diagram in case of unrecoverable error.
- * </pre>
- * \endhtmlonly
- *
- * These states are for individual cl_lock object. Top-lock and its sub-locks
- * can be in the different states. Another way to say this is that we have
- * nested state-machines.
- *
- * Separate QUEUING and ENQUEUED states are needed to support non-blocking
- * operation for locks with multiple sub-locks. Imagine lock on a file F, that
- * intersects 3 stripes S0, S1, and S2. To enqueue F client has to send
- * enqueue to S0, wait for its completion, then send enqueue for S1, wait for
- * its completion and at last enqueue lock for S2, and wait for its
- * completion. In that case, top-lock is in QUEUING state while S0, S1 are
- * handled, and is in ENQUEUED state after enqueue to S2 has been sent (note
- * that in this case, sub-locks move from state to state, and top-lock remains
- * in the same state).
- */
-enum cl_lock_state {
-	/**
-	 * Lock that wasn't yet enqueued
-	 */
-	CLS_NEW,
-	/**
-	 * Enqueue is in progress, blocking for some intermediate interaction
-	 * with the other side.
-	 */
-	CLS_QUEUING,
-	/**
-	 * Lock is fully enqueued, waiting for server to reply when it is
-	 * granted.
-	 */
-	CLS_ENQUEUED,
-	/**
-	 * Lock granted, actively used by some IO.
-	 */
-	CLS_HELD,
-	/**
-	 * This state is used to mark the lock is being used, or unused.
-	 * We need this state because the lock may have several sublocks,
-	 * so it's impossible to have an atomic way to bring all sublocks
-	 * into CLS_HELD state at use case, or all sublocks to CLS_CACHED
-	 * at unuse case.
-	 * If a thread is referring to a lock, and it sees the lock is in this
-	 * state, it must wait for the lock.
-	 * See state diagram for details.
-	 */
-	CLS_INTRANSIT,
-	/**
-	 * Lock granted, not used.
-	 */
-	CLS_CACHED,
-	/**
-	 * Lock is being destroyed.
-	 */
-	CLS_FREEING,
-	CLS_NR
-};
-
-enum cl_lock_flags {
-	/**
-	 * lock has been cancelled. This flag is never cleared once set (by
-	 * cl_lock_cancel0()).
-	 */
-	CLF_CANCELLED	= 1 << 0,
-	/** cancellation is pending for this lock. */
-	CLF_CANCELPEND	= 1 << 1,
-	/** destruction is pending for this lock. */
-	CLF_DOOMED	= 1 << 2,
-	/** from enqueue RPC reply upcall. */
-	CLF_FROM_UPCALL	= 1 << 3,
-};
-
-/**
- * Lock closure.
- *
- * Lock closure is a collection of locks (both top-locks and sub-locks) that
- * might be updated in a result of an operation on a certain lock (which lock
- * this is a closure of).
- *
- * Closures are needed to guarantee dead-lock freedom in the presence of
- *
- *     - nested state-machines (top-lock state-machine composed of sub-lock
- *       state-machines), and
- *
- *     - shared sub-locks.
- *
- * Specifically, many operations, such as lock enqueue, wait, unlock,
- * etc. start from a top-lock, and then operate on a sub-locks of this
- * top-lock, holding a top-lock mutex. When sub-lock state changes as a result
- * of such operation, this change has to be propagated to all top-locks that
- * share this sub-lock. Obviously, no natural lock ordering (e.g.,
- * top-to-bottom or bottom-to-top) captures this scenario, so try-locking has
- * to be used. Lock closure systematizes this try-and-repeat logic.
- */
-struct cl_lock_closure {
-	/**
-	 * Lock that is mutexed when closure construction is started. When
-	 * closure in is `wait' mode (cl_lock_closure::clc_wait), mutex on
-	 * origin is released before waiting.
-	 */
-	struct cl_lock   *clc_origin;
-	/**
-	 * List of enclosed locks, so far. Locks are linked here through
-	 * cl_lock::cll_inclosure.
-	 */
-	struct list_head	clc_list;
-	/**
-	 * True iff closure is in a `wait' mode. This determines what
-	 * cl_lock_enclosure() does when a lock L to be added to the closure
-	 * is currently mutexed by some other thread.
-	 *
-	 * If cl_lock_closure::clc_wait is not set, then closure construction
-	 * fails with CLO_REPEAT immediately.
-	 *
-	 * In wait mode, cl_lock_enclosure() waits until next attempt to build
-	 * a closure might succeed. To this end it releases an origin mutex
-	 * (cl_lock_closure::clc_origin), that has to be the only lock mutex
-	 * owned by the current thread, and then waits on L mutex (by grabbing
-	 * it and immediately releasing), before returning CLO_REPEAT to the
-	 * caller.
-	 */
-	int	       clc_wait;
-	/** Number of locks in the closure. */
-	int	       clc_nr;
-};
-
 /**
  * Layered client lock.
  */
 struct cl_lock {
-	/** Reference counter. */
-	atomic_t	  cll_ref;
 	/** List of slices. Immutable after creation. */
 	struct list_head	    cll_layers;
-	/**
-	 * Linkage into cl_lock::cll_descr::cld_obj::coh_locks list. Protected
-	 * by cl_lock::cll_descr::cld_obj::coh_lock_guard.
-	 */
-	struct list_head	    cll_linkage;
-	/**
-	 * Parameters of this lock. Protected by
-	 * cl_lock::cll_descr::cld_obj::coh_lock_guard nested within
-	 * cl_lock::cll_guard. Modified only on lock creation and in
-	 * cl_lock_modify().
-	 */
+	/** lock attribute, extent, cl_object, etc. */
 	struct cl_lock_descr  cll_descr;
-	/** Protected by cl_lock::cll_guard. */
-	enum cl_lock_state    cll_state;
-	/** signals state changes. */
-	wait_queue_head_t	   cll_wq;
-	/**
-	 * Recursive lock, most fields in cl_lock{} are protected by this.
-	 *
-	 * Locking rules: this mutex is never held across network
-	 * communication, except when lock is being canceled.
-	 *
-	 * Lock ordering: a mutex of a sub-lock is taken first, then a mutex
-	 * on a top-lock. Other direction is implemented through a
-	 * try-lock-repeat loop. Mutices of unrelated locks can be taken only
-	 * by try-locking.
-	 *
-	 * \see osc_lock_enqueue_wait(), lov_lock_cancel(), lov_sublock_wait().
-	 */
-	struct mutex		cll_guard;
-	struct task_struct	*cll_guarder;
-	int		   cll_depth;
-
-	/**
-	 * the owner for INTRANSIT state
-	 */
-	struct task_struct	*cll_intransit_owner;
-	int		   cll_error;
-	/**
-	 * Number of holds on a lock. A hold prevents a lock from being
-	 * canceled and destroyed. Protected by cl_lock::cll_guard.
-	 *
-	 * \see cl_lock_hold(), cl_lock_unhold(), cl_lock_release()
-	 */
-	int		   cll_holds;
-	 /**
-	  * Number of lock users. Valid in cl_lock_state::CLS_HELD state
-	  * only. Lock user pins lock in CLS_HELD state. Protected by
-	  * cl_lock::cll_guard.
-	  *
-	  * \see cl_wait(), cl_unuse().
-	  */
-	int		   cll_users;
-	/**
-	 * Flag bit-mask. Values from enum cl_lock_flags. Updates are
-	 * protected by cl_lock::cll_guard.
-	 */
-	unsigned long	 cll_flags;
-	/**
-	 * A linkage into a list of locks in a closure.
-	 *
-	 * \see cl_lock_closure
-	 */
-	struct list_head	    cll_inclosure;
-	/**
-	 * Confict lock at queuing time.
-	 */
-	struct cl_lock       *cll_conflict;
-	/**
-	 * A list of references to this lock, for debugging.
-	 */
-	struct lu_ref	 cll_reference;
-	/**
-	 * A list of holds on this lock, for debugging.
-	 */
-	struct lu_ref	 cll_holders;
-	/**
-	 * A reference for cl_lock::cll_descr::cld_obj. For debugging.
-	 */
-	struct lu_ref_link    cll_obj_ref;
-#ifdef CONFIG_LOCKDEP
-	/* "dep_map" name is assumed by lockdep.h macros. */
-	struct lockdep_map    dep_map;
-#endif
 };
 
 /**
  * Per-layer part of cl_lock
  *
- * \see ccc_lock, lov_lock, lovsub_lock, osc_lock
+ * \see vvp_lock, lov_lock, lovsub_lock, osc_lock
  */
 struct cl_lock_slice {
 	struct cl_lock		  *cls_lock;
@@ -1657,175 +1238,37 @@ struct cl_lock_slice {
 	struct list_head		       cls_linkage;
 };
 
-/**
- * Possible (non-error) return values of ->clo_{enqueue,wait,unlock}().
- *
- * NOTE: lov_subresult() depends on ordering here.
- */
-enum cl_lock_transition {
-	/** operation cannot be completed immediately. Wait for state change. */
-	CLO_WAIT	= 1,
-	/** operation had to release lock mutex, restart. */
-	CLO_REPEAT      = 2,
-	/** lower layer re-enqueued. */
-	CLO_REENQUEUED  = 3,
-};
-
 /**
  *
  * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops
  */
 struct cl_lock_operations {
-	/**
-	 * \name statemachine
-	 *
-	 * State machine transitions. These 3 methods are called to transfer
-	 * lock from one state to another, as described in the commentary
-	 * above enum #cl_lock_state.
-	 *
-	 * \retval 0	  this layer has nothing more to do to before
-	 *		       transition to the target state happens;
-	 *
-	 * \retval CLO_REPEAT method had to release and re-acquire cl_lock
-	 *		    mutex, repeat invocation of transition method
-	 *		    across all layers;
-	 *
-	 * \retval CLO_WAIT   this layer cannot move to the target state
-	 *		    immediately, as it has to wait for certain event
-	 *		    (e.g., the communication with the server). It
-	 *		    is guaranteed, that when the state transfer
-	 *		    becomes possible, cl_lock::cll_wq wait-queue
-	 *		    is signaled. Caller can wait for this event by
-	 *		    calling cl_lock_state_wait();
-	 *
-	 * \retval -ve	failure, abort state transition, move the lock
-	 *		    into cl_lock_state::CLS_FREEING state, and set
-	 *		    cl_lock::cll_error.
-	 *
-	 * Once all layers voted to agree to transition (by returning 0), lock
-	 * is moved into corresponding target state. All state transition
-	 * methods are optional.
-	 */
 	/** @{ */
 	/**
 	 * Attempts to enqueue the lock. Called top-to-bottom.
 	 *
-	 * \see ccc_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
+	 * \retval 0	this layer has enqueued the lock successfully
+	 * \retval >0	this layer has enqueued the lock, but need to wait on
+	 *		@anchor for resources
+	 * \retval -ve	failure
+	 *
+	 * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(),
 	 * \see osc_lock_enqueue()
 	 */
 	int  (*clo_enqueue)(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *io, __u32 enqflags);
+			    struct cl_io *io, struct cl_sync_io *anchor);
 	/**
-	 * Attempts to wait for enqueue result. Called top-to-bottom.
-	 *
-	 * \see ccc_lock_wait(), lov_lock_wait(), osc_lock_wait()
-	 */
-	int  (*clo_wait)(const struct lu_env *env,
-			 const struct cl_lock_slice *slice);
-	/**
-	 * Attempts to unlock the lock. Called bottom-to-top. In addition to
-	 * usual return values of lock state-machine methods, this can return
-	 * -ESTALE to indicate that lock cannot be returned to the cache, and
-	 * has to be re-initialized.
-	 * unuse is a one-shot operation, so it must NOT return CLO_WAIT.
-	 *
-	 * \see ccc_lock_unuse(), lov_lock_unuse(), osc_lock_unuse()
-	 */
-	int  (*clo_unuse)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice);
-	/**
-	 * Notifies layer that cached lock is started being used.
-	 *
-	 * \pre lock->cll_state == CLS_CACHED
-	 *
-	 * \see lov_lock_use(), osc_lock_use()
-	 */
-	int  (*clo_use)(const struct lu_env *env,
-			const struct cl_lock_slice *slice);
-	/** @} statemachine */
-	/**
-	 * A method invoked when lock state is changed (as a result of state
-	 * transition). This is used, for example, to track when the state of
-	 * a sub-lock changes, to propagate this change to the corresponding
-	 * top-lock. Optional
-	 *
-	 * \see lovsub_lock_state()
-	 */
-	void (*clo_state)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  enum cl_lock_state st);
-	/**
-	 * Returns true, iff given lock is suitable for the given io, idea
-	 * being, that there are certain "unsafe" locks, e.g., ones acquired
-	 * for O_APPEND writes, that we don't want to re-use for a normal
-	 * write, to avoid the danger of cascading evictions. Optional. Runs
-	 * under cl_object_header::coh_lock_guard.
-	 *
-	 * XXX this should take more information about lock needed by
-	 * io. Probably lock description or something similar.
-	 *
-	 * \see lov_fits_into()
-	 */
-	int (*clo_fits_into)(const struct lu_env *env,
-			     const struct cl_lock_slice *slice,
-			     const struct cl_lock_descr *need,
-			     const struct cl_io *io);
-	/**
-	 * \name ast
-	 * Asynchronous System Traps. All of then are optional, all are
-	 * executed bottom-to-top.
-	 */
-	/** @{ */
-
-	/**
-	 * Cancellation callback. Cancel a lock voluntarily, or under
-	 * the request of server.
+	 * Cancel a lock, release its DLM lock ref, while does not cancel the
+	 * DLM lock
 	 */
 	void (*clo_cancel)(const struct lu_env *env,
 			   const struct cl_lock_slice *slice);
-	/**
-	 * Lock weighting ast. Executed to estimate how precious this lock
-	 * is. The sum of results across all layers is used to determine
-	 * whether lock worth keeping in cache given present memory usage.
-	 *
-	 * \see osc_lock_weigh(), vvp_lock_weigh(), lovsub_lock_weigh().
-	 */
-	unsigned long (*clo_weigh)(const struct lu_env *env,
-				   const struct cl_lock_slice *slice);
-	/** @} ast */
-
-	/**
-	 * \see lovsub_lock_closure()
-	 */
-	int (*clo_closure)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice,
-			   struct cl_lock_closure *closure);
-	/**
-	 * Executed bottom-to-top when lock description changes (e.g., as a
-	 * result of server granting more generous lock than was requested).
-	 *
-	 * \see lovsub_lock_modify()
-	 */
-	int (*clo_modify)(const struct lu_env *env,
-			  const struct cl_lock_slice *slice,
-			  const struct cl_lock_descr *updated);
-	/**
-	 * Notifies layers (bottom-to-top) that lock is going to be
-	 * destroyed. Responsibility of layers is to prevent new references on
-	 * this lock from being acquired once this method returns.
-	 *
-	 * This can be called multiple times due to the races.
-	 *
-	 * \see cl_lock_delete()
-	 * \see osc_lock_delete(), lovsub_lock_delete()
-	 */
-	void (*clo_delete)(const struct lu_env *env,
-			   const struct cl_lock_slice *slice);
+	/** @} */
 	/**
 	 * Destructor. Frees resources and the slice.
 	 *
-	 * \see ccc_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
+	 * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(),
 	 * \see osc_lock_fini()
 	 */
 	void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice);
@@ -2016,7 +1459,7 @@ enum cl_io_state {
  * This is usually embedded into layer session data, rather than allocated
  * dynamically.
  *
- * \see vvp_io, lov_io, osc_io, ccc_io
+ * \see vvp_io, lov_io, osc_io
  */
 struct cl_io_slice {
 	struct cl_io		  *cis_io;
@@ -2031,6 +1474,8 @@ struct cl_io_slice {
 	struct list_head		     cis_linkage;
 };
 
+typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
+			      struct cl_page *);
 /**
  * Per-layer io operations.
  * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -2114,7 +1559,7 @@ struct cl_io_operations {
 		void (*cio_fini)(const struct lu_env *env,
 				 const struct cl_io_slice *slice);
 	} op[CIT_OP_NR];
-	struct {
+
 		/**
 		 * Submit pages from \a queue->c2_qin for IO, and move
 		 * successfully submitted pages into \a queue->c2_qout. Return
@@ -2127,7 +1572,15 @@ struct cl_io_operations {
 				   const struct cl_io_slice *slice,
 				   enum cl_req_type crt,
 				   struct cl_2queue *queue);
-	} req_op[CRT_NR];
+	/**
+	 * Queue async page for write.
+	 * The difference between cio_submit and cio_queue is that
+	 * cio_submit is for urgent request.
+	 */
+	int  (*cio_commit_async)(const struct lu_env *env,
+				 const struct cl_io_slice *slice,
+				 struct cl_page_list *queue, int from, int to,
+				 cl_commit_cbt cb);
 	/**
 	 * Read missing page.
 	 *
@@ -2139,31 +1592,6 @@ struct cl_io_operations {
 	int (*cio_read_page)(const struct lu_env *env,
 			     const struct cl_io_slice *slice,
 			     const struct cl_page_slice *page);
-	/**
-	 * Prepare write of a \a page. Called bottom-to-top by a top-level
-	 * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
-	 * get data from user-level buffer.
-	 *
-	 * \pre io->ci_type == CIT_WRITE
-	 *
-	 * \see vvp_io_prepare_write(), lov_io_prepare_write(),
-	 * osc_io_prepare_write().
-	 */
-	int (*cio_prepare_write)(const struct lu_env *env,
-				 const struct cl_io_slice *slice,
-				 const struct cl_page_slice *page,
-				 unsigned from, unsigned to);
-	/**
-	 *
-	 * \pre io->ci_type == CIT_WRITE
-	 *
-	 * \see vvp_io_commit_write(), lov_io_commit_write(),
-	 * osc_io_commit_write().
-	 */
-	int (*cio_commit_write)(const struct lu_env *env,
-				const struct cl_io_slice *slice,
-				const struct cl_page_slice *page,
-				unsigned from, unsigned to);
 	/**
 	 * Optional debugging helper. Print given io slice.
 	 */
@@ -2215,10 +1643,14 @@ enum cl_enq_flags {
 	 * for async glimpse lock.
 	 */
 	CEF_AGL	  = 0x00000020,
+	/**
+	 * enqueue a lock to test DLM lock existence.
+	 */
+	CEF_PEEK	= 0x00000040,
 	/**
 	 * mask of enq_flags.
 	 */
-	CEF_MASK	 = 0x0000003f,
+	CEF_MASK         = 0x0000007f,
 };
 
 /**
@@ -2228,12 +1660,12 @@ enum cl_enq_flags {
 struct cl_io_lock_link {
 	/** linkage into one of cl_lockset lists. */
 	struct list_head	   cill_linkage;
-	struct cl_lock_descr cill_descr;
-	struct cl_lock      *cill_lock;
+	struct cl_lock          cill_lock;
 	/** optional destructor */
 	void	       (*cill_fini)(const struct lu_env *env,
 				    struct cl_io_lock_link *link);
 };
+#define cill_descr	cill_lock.cll_descr
 
 /**
  * Lock-set represents a collection of locks, that io needs at a
@@ -2267,8 +1699,6 @@ struct cl_io_lock_link {
 struct cl_lockset {
 	/** locks to be acquired. */
 	struct list_head  cls_todo;
-	/** locks currently being processed. */
-	struct list_head  cls_curr;
 	/** locks acquired. */
 	struct list_head  cls_done;
 };
@@ -2632,9 +2062,7 @@ struct cl_site {
 	 * and top-locks (and top-pages) are accounted here.
 	 */
 	struct cache_stats    cs_pages;
-	struct cache_stats    cs_locks;
 	atomic_t	  cs_pages_state[CPS_NR];
-	atomic_t	  cs_locks_state[CLS_NR];
 };
 
 int  cl_site_init(struct cl_site *s, struct cl_device *top);
@@ -2725,7 +2153,7 @@ static inline void cl_device_fini(struct cl_device *d)
 }
 
 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
-		       struct cl_object *obj,
+		       struct cl_object *obj, pgoff_t index,
 		       const struct cl_page_operations *ops);
 void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
 		       struct cl_object *obj,
@@ -2758,7 +2186,7 @@ int  cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
 		       struct ost_lvb *lvb);
 int  cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 		 const struct cl_object_conf *conf);
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj);
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
 
 /**
@@ -2772,7 +2200,7 @@ static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1)
 static inline void cl_object_page_init(struct cl_object *clob, int size)
 {
 	clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
-	cl_object_header(clob)->coh_page_bufsize += ALIGN(size, 8);
+	cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
 }
 
 static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2781,6 +2209,16 @@ static inline void *cl_object_page_slice(struct cl_object *clob,
 	return (void *)((char *)page + clob->co_slice_off);
 }
 
+/**
+ * Return refcount of cl_object.
+ */
+static inline int cl_object_refc(struct cl_object *clob)
+{
+	struct lu_object_header *header = clob->co_lu.lo_header;
+
+	return atomic_read(&header->loh_ref);
+}
+
 /** @} cl_object */
 
 /** \defgroup cl_page cl_page
@@ -2794,28 +2232,20 @@ enum {
 };
 
 /* callback of cl_page_gang_lookup() */
-typedef int   (*cl_page_gang_cb_t)  (const struct lu_env *, struct cl_io *,
-				     struct cl_page *, void *);
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
-			struct cl_io *io, pgoff_t start, pgoff_t end,
-			cl_page_gang_cb_t cb, void *cbdata);
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index);
 struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj,
 			     pgoff_t idx, struct page *vmpage,
 			     enum cl_page_type type);
-struct cl_page *cl_page_find_sub(const struct lu_env *env,
-				 struct cl_object *obj,
-				 pgoff_t idx, struct page *vmpage,
-				     struct cl_page *parent);
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+			      struct cl_object *o, pgoff_t ind,
+			      struct page *vmpage,
+			      enum cl_page_type type);
 void cl_page_get(struct cl_page *page);
 void cl_page_put(const struct lu_env *env, struct cl_page *page);
 void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer,
 		   const struct cl_page *pg);
 void cl_page_header_print(const struct lu_env *env, void *cookie,
 			  lu_printer_t printer, const struct cl_page *pg);
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page);
 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj);
-struct cl_page *cl_page_top(struct cl_page *page);
 
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
 				       const struct lu_device_type *dtype);
@@ -2872,12 +2302,10 @@ int cl_page_flush(const struct lu_env *env, struct cl_io *io,
 void cl_page_discard(const struct lu_env *env, struct cl_io *io,
 		     struct cl_page *pg);
 void cl_page_delete(const struct lu_env *env, struct cl_page *pg);
-int cl_page_unmap(const struct lu_env *env, struct cl_io *io,
-		  struct cl_page *pg);
 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg);
 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate);
 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page);
+			  struct cl_page *page, pgoff_t *max_index);
 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
 pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
 int cl_page_size(const struct cl_object *obj);
@@ -2890,138 +2318,66 @@ void cl_lock_descr_print(const struct lu_env *env, void *cookie,
 			 const struct cl_lock_descr *descr);
 /* @} helper */
 
+/**
+ * Data structure managing a client's cached pages. A count of
+ * "unstable" pages is maintained, and an LRU of clean pages is
+ * maintained. "unstable" pages are pages pinned by the ptlrpc
+ * layer for recovery purposes.
+ */
+struct cl_client_cache {
+	/**
+	 * # of users (OSCs)
+	 */
+	atomic_t		ccc_users;
+	/**
+	 * # of threads are doing shrinking
+	 */
+	unsigned int		ccc_lru_shrinkers;
+	/**
+	 * # of LRU entries available
+	 */
+	atomic_t		ccc_lru_left;
+	/**
+	 * List of entities(OSCs) for this LRU cache
+	 */
+	struct list_head	ccc_lru;
+	/**
+	 * Max # of LRU entries
+	 */
+	unsigned long		ccc_lru_max;
+	/**
+	 * Lock to protect ccc_lru list
+	 */
+	spinlock_t		ccc_lru_lock;
+	/**
+	 * # of unstable pages for this mount point
+	 */
+	atomic_t		ccc_unstable_nr;
+	/**
+	 * Waitq for awaiting unstable pages to reach zero.
+	 * Used at umounting time and signaled on BRW commit
+	 */
+        wait_queue_head_t	ccc_unstable_waitq;
+
+};
+
 /** @} cl_page */
 
 /** \defgroup cl_lock cl_lock
  * @{
  */
 
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source);
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-				const struct cl_lock_descr *need,
-				const char *scope, const void *source);
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-				 struct cl_object *obj, pgoff_t index,
-				 struct cl_lock *except, int pending,
-				 int canceld);
-static inline struct cl_lock *cl_lock_at_page(const struct lu_env *env,
-					      struct cl_object *obj,
-					      struct cl_page *page,
-					      struct cl_lock *except,
-					      int pending, int canceld)
-{
-	LASSERT(cl_object_header(obj) == cl_object_header(page->cp_obj));
-	return cl_lock_at_pgoff(env, obj, page->cp_index, except,
-				pending, canceld);
-}
-
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock);
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+		 const struct cl_io *io);
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock);
 const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
 				       const struct lu_device_type *dtype);
-
-void cl_lock_get(struct cl_lock *lock);
-void cl_lock_get_trust(struct cl_lock *lock);
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-		      const char *scope, const void *source);
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-			  const char *scope, const void *source);
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-		    const char *scope, const void *source);
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-		     const char *scope, const void *source);
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock);
-
-int cl_lock_is_intransit(struct cl_lock *lock);
-
-int cl_lock_enqueue_wait(const struct lu_env *env, struct cl_lock *lock,
-			 int keep_mutex);
-
-/** \name statemachine statemachine
- * Interface to lock state machine consists of 3 parts:
- *
- *     - "try" functions that attempt to effect a state transition. If state
- *     transition is not possible right now (e.g., if it has to wait for some
- *     asynchronous event to occur), these functions return
- *     cl_lock_transition::CLO_WAIT.
- *
- *     - "non-try" functions that implement synchronous blocking interface on
- *     top of non-blocking "try" functions. These functions repeatedly call
- *     corresponding "try" versions, and if state transition is not possible
- *     immediately, wait for lock state change.
- *
- *     - methods from cl_lock_operations, called by "try" functions. Lock can
- *     be advanced to the target state only when all layers voted that they
- *     are ready for this transition. "Try" functions call methods under lock
- *     mutex. If a layer had to release a mutex, it re-acquires it and returns
- *     cl_lock_transition::CLO_REPEAT, causing "try" function to call all
- *     layers again.
- *
- * TRY	      NON-TRY      METHOD			    FINAL STATE
- *
- * cl_enqueue_try() cl_enqueue() cl_lock_operations::clo_enqueue() CLS_ENQUEUED
- *
- * cl_wait_try()    cl_wait()    cl_lock_operations::clo_wait()    CLS_HELD
- *
- * cl_unuse_try()   cl_unuse()   cl_lock_operations::clo_unuse()   CLS_CACHED
- *
- * cl_use_try()     NONE	 cl_lock_operations::clo_use()     CLS_HELD
- *
- * @{
- */
-
-int cl_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock);
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-		   struct cl_io *io, __u32 flags);
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock);
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic);
-
-/** @} statemachine */
-
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-		       enum cl_lock_state state);
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need);
-
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_is_mutexed(struct cl_lock *lock);
-int cl_lock_nr_mutexed(const struct lu_env *env);
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock);
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-		      const struct cl_lock_descr *need);
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-			const struct cl_lock_descr *need);
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need);
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-		   const struct cl_lock_descr *desc);
-
-void cl_lock_closure_init(const struct lu_env *env,
-			  struct cl_lock_closure *closure,
-			  struct cl_lock *origin, int wait);
-void cl_lock_closure_fini(struct cl_lock_closure *closure);
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-			  struct cl_lock_closure *closure);
-void cl_lock_disclosure(const struct lu_env *env,
-			struct cl_lock_closure *closure);
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-		      struct cl_lock_closure *closure);
-
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock);
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock, struct cl_sync_io *anchor);
 void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock);
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error);
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int wait);
-
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock);
 
 /** @} cl_lock */
 
@@ -3050,15 +2406,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
 			 struct cl_lock_descr *descr);
 int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 		    struct cl_page *page);
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
-			struct cl_page *page, unsigned from, unsigned to);
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
-		       struct cl_page *page, unsigned from, unsigned to);
 int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 		    enum cl_req_type iot, struct cl_2queue *queue);
 int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		      enum cl_req_type iot, struct cl_2queue *queue,
 		      long timeout);
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+		       struct cl_page_list *queue, int from, int to,
+		       cl_commit_cbt cb);
 int cl_io_is_going(const struct lu_env *env);
 
 /**
@@ -3114,6 +2469,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
 	return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
 }
 
+static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
+{
+	LASSERT(plist->pl_nr > 0);
+	return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
+}
+
 /**
  * Iterate over pages in a page list.
  */
@@ -3130,9 +2491,14 @@ void cl_page_list_init(struct cl_page_list *plist);
 void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
 void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
 		       struct cl_page *page);
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+			    struct cl_page *page);
 void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+		      struct cl_page *page);
 void cl_page_list_disown(const struct lu_env *env,
 			 struct cl_io *io, struct cl_page_list *plist);
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);
 
 void cl_2queue_init(struct cl_2queue *queue);
 void cl_2queue_disown(const struct lu_env *env,
@@ -3177,13 +2543,18 @@ struct cl_sync_io {
 	atomic_t		csi_barrier;
 	/** completion to be signaled when transfer is complete. */
 	wait_queue_head_t		csi_waitq;
+	/** callback to invoke when this IO is finished */
+	void			(*csi_end_io)(const struct lu_env *,
+					      struct cl_sync_io *);
 };
 
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages);
-int  cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
-		     struct cl_page_list *queue, struct cl_sync_io *anchor,
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+		     void (*end)(const struct lu_env *, struct cl_sync_io *));
+int  cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
 		     long timeout);
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret);
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+		     int ioret);
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor);
 
 /** @} cl_sync_io */
 
@@ -3241,6 +2612,9 @@ void *cl_env_reenter(void);
 void cl_env_reexit(void *cookie);
 void cl_env_implant(struct lu_env *env, int *refcheck);
 void cl_env_unplant(struct lu_env *env, int *refcheck);
+unsigned int cl_env_cache_purge(unsigned int nr);
+struct lu_env *cl_env_percpu_get(void);
+void cl_env_percpu_put(struct lu_env *env);
 
 /** @} cl_env */
 
diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
deleted file mode 100644
index 5d839a9f7..000000000
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Definitions shared between vvp and liblustre, and other clients in the
- * future.
- *
- *   Author: Oleg Drokin <oleg.drokin@sun.com>
- *   Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef LCLIENT_H
-#define LCLIENT_H
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
-		    struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
-	return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
-	return cl_glimpse_size0(inode, 1);
-}
-
-/**
- * Locking policy for setattr.
- */
-enum ccc_setattr_lock_type {
-	/** Locking is done by server */
-	SETATTR_NOLOCK,
-	/** Extent lock is enqueued */
-	SETATTR_EXTENT_LOCK,
-	/** Existing local extent lock is used */
-	SETATTR_MATCH_LOCK
-};
-
-/**
- * IO state private to vvp or slp layers.
- */
-struct ccc_io {
-	/** super class */
-	struct cl_io_slice     cui_cl;
-	struct cl_io_lock_link cui_link;
-	/**
-	 * I/O vector information to or from which read/write is going.
-	 */
-	struct iov_iter *cui_iter;
-	/**
-	 * Total size for the left IO.
-	 */
-	size_t cui_tot_count;
-
-	union {
-		struct {
-			enum ccc_setattr_lock_type cui_local_lock;
-		} setattr;
-	} u;
-	/**
-	 * True iff io is processing glimpse right now.
-	 */
-	int		  cui_glimpse;
-	/**
-	 * Layout version when this IO is initialized
-	 */
-	__u32		cui_layout_gen;
-	/**
-	 * File descriptor against which IO is done.
-	 */
-	struct ll_file_data *cui_fd;
-	struct kiocb *cui_iocb;
-};
-
-/**
- * True, if \a io is a normal io, False for splice_{read,write}.
- * must be implemented in arch specific code.
- */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io);
-
-extern struct lu_context_key ccc_key;
-extern struct lu_context_key ccc_session_key;
-
-struct ccc_thread_info {
-	struct cl_lock_descr cti_descr;
-	struct cl_io	 cti_io;
-	struct cl_attr       cti_attr;
-};
-
-static inline struct ccc_thread_info *ccc_env_info(const struct lu_env *env)
-{
-	struct ccc_thread_info      *info;
-
-	info = lu_context_key_get(&env->le_ctx, &ccc_key);
-	LASSERT(info);
-	return info;
-}
-
-static inline struct cl_attr *ccc_env_thread_attr(const struct lu_env *env)
-{
-	struct cl_attr *attr = &ccc_env_info(env)->cti_attr;
-
-	memset(attr, 0, sizeof(*attr));
-	return attr;
-}
-
-static inline struct cl_io *ccc_env_thread_io(const struct lu_env *env)
-{
-	struct cl_io *io = &ccc_env_info(env)->cti_io;
-
-	memset(io, 0, sizeof(*io));
-	return io;
-}
-
-struct ccc_session {
-	struct ccc_io cs_ios;
-};
-
-static inline struct ccc_session *ccc_env_session(const struct lu_env *env)
-{
-	struct ccc_session *ses;
-
-	ses = lu_context_key_get(env->le_ses, &ccc_session_key);
-	LASSERT(ses);
-	return ses;
-}
-
-static inline struct ccc_io *ccc_env_io(const struct lu_env *env)
-{
-	return &ccc_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct ccc_object {
-	struct cl_object_header cob_header;
-	struct cl_object	cob_cl;
-	struct inode	   *cob_inode;
-
-	/**
-	 * A list of dirty pages pending IO in the cache. Used by
-	 * SOM. Protected by ll_inode_info::lli_lock.
-	 *
-	 * \see ccc_page::cpg_pending_linkage
-	 */
-	struct list_head	     cob_pending_list;
-
-	/**
-	 * Access this counter is protected by inode->i_sem. Now that
-	 * the lifetime of transient pages must be covered by inode sem,
-	 * we don't need to hold any lock..
-	 */
-	int		     cob_transient_pages;
-	/**
-	 * Number of outstanding mmaps on this file.
-	 *
-	 * \see ll_vm_open(), ll_vm_close().
-	 */
-	atomic_t	    cob_mmap_cnt;
-
-	/**
-	 * various flags
-	 * cob_discard_page_warned
-	 *     if pages belonging to this object are discarded when a client
-	 * is evicted, some debug info will be printed, this flag will be set
-	 * during processing the first discarded page, then avoid flooding
-	 * debug message for lots of discarded pages.
-	 *
-	 * \see ll_dirty_page_discard_warn.
-	 */
-	unsigned int		cob_discard_page_warned:1;
-};
-
-/**
- * ccc-private page state.
- */
-struct ccc_page {
-	struct cl_page_slice cpg_cl;
-	int		  cpg_defer_uptodate;
-	int		  cpg_ra_used;
-	int		  cpg_write_queued;
-	/**
-	 * Non-empty iff this page is already counted in
-	 * ccc_object::cob_pending_list. Protected by
-	 * ccc_object::cob_pending_guard. This list is only used as a flag,
-	 * that is, never iterated through, only checked for list_empty(), but
-	 * having a list is useful for debugging.
-	 */
-	struct list_head	   cpg_pending_linkage;
-	/** VM page */
-	struct page	  *cpg_page;
-};
-
-static inline struct ccc_page *cl2ccc_page(const struct cl_page_slice *slice)
-{
-	return container_of(slice, struct ccc_page, cpg_cl);
-}
-
-struct ccc_device {
-	struct cl_device    cdv_cl;
-	struct super_block *cdv_sb;
-	struct cl_device   *cdv_next;
-};
-
-struct ccc_lock {
-	struct cl_lock_slice clk_cl;
-};
-
-struct ccc_req {
-	struct cl_req_slice  crq_cl;
-};
-
-void *ccc_key_init	(const struct lu_context *ctx,
-			   struct lu_context_key *key);
-void  ccc_key_fini	(const struct lu_context *ctx,
-			   struct lu_context_key *key, void *data);
-void *ccc_session_key_init(const struct lu_context *ctx,
-			   struct lu_context_key *key);
-void  ccc_session_key_fini(const struct lu_context *ctx,
-			   struct lu_context_key *key, void *data);
-
-int	      ccc_device_init  (const struct lu_env *env,
-				   struct lu_device *d,
-				   const char *name, struct lu_device *next);
-struct lu_device *ccc_device_fini (const struct lu_env *env,
-				   struct lu_device *d);
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
-				   struct lu_device_type *t,
-				   struct lustre_cfg *cfg,
-				   const struct lu_device_operations *luops,
-				   const struct cl_device_operations *clops);
-struct lu_device *ccc_device_free (const struct lu_env *env,
-				   struct lu_device *d);
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *hdr,
-				   struct lu_device *dev,
-				   const struct cl_object_operations *clops,
-				   const struct lu_object_operations *luops);
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
-		 struct cl_req *req);
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-int ccc_global_init(struct lu_device_type *device_type);
-void ccc_global_fini(struct lu_device_type *device_type);
-int ccc_object_init0(const struct lu_env *env, struct ccc_object *vob,
-		     const struct cl_object_conf *conf);
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
-		    const struct lu_object_conf *conf);
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj);
-int ccc_lock_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_lock *lock, const struct cl_io *io,
-		  const struct cl_lock_operations *lkops);
-int ccc_object_glimpse(const struct lu_env *env,
-		       const struct cl_object *obj, struct ost_lvb *lvb);
-struct page *ccc_page_vmpage(const struct lu_env *env,
-			    const struct cl_page_slice *slice);
-int ccc_page_is_under_lock(const struct lu_env *env,
-			   const struct cl_page_slice *slice, struct cl_io *io);
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice);
-int ccc_transient_page_prep(const struct lu_env *env,
-			    const struct cl_page_slice *slice,
-			    struct cl_io *io);
-void ccc_lock_delete(const struct lu_env *env,
-		     const struct cl_lock_slice *slice);
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice);
-int ccc_lock_enqueue(const struct lu_env *env,
-		     const struct cl_lock_slice *slice,
-		     struct cl_io *io, __u32 enqflags);
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice);
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io);
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state);
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
-			  __u32 enqflags, enum cl_lock_mode mode,
-			  pgoff_t start, pgoff_t end);
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
-		    __u32 enqflags, enum cl_lock_mode mode,
-		    loff_t start, loff_t end);
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios);
-void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios,
-		    size_t nob);
-void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio,
-		       struct cl_io *io);
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_io *io, loff_t start, size_t count, int *exceed);
-void ccc_req_completion(const struct lu_env *env,
-			const struct cl_req_slice *slice, int ioret);
-void ccc_req_attr_set(const struct lu_env *env,
-		      const struct cl_req_slice *slice,
-		      const struct cl_object *obj,
-		      struct cl_req_attr *oa, u64 flags);
-
-struct lu_device   *ccc2lu_dev      (struct ccc_device *vdv);
-struct lu_object   *ccc2lu	  (struct ccc_object *vob);
-struct ccc_device  *lu2ccc_dev      (const struct lu_device *d);
-struct ccc_device  *cl2ccc_dev      (const struct cl_device *d);
-struct ccc_object  *lu2ccc	  (const struct lu_object *obj);
-struct ccc_object  *cl2ccc	  (const struct cl_object *obj);
-struct ccc_lock    *cl2ccc_lock     (const struct cl_lock_slice *slice);
-struct ccc_io      *cl2ccc_io       (const struct lu_env *env,
-				     const struct cl_io_slice *slice);
-struct ccc_req     *cl2ccc_req      (const struct cl_req_slice *slice);
-struct page	 *cl2vm_page      (const struct cl_page_slice *slice);
-struct inode       *ccc_object_inode(const struct cl_object *obj);
-struct ccc_object  *cl_inode2ccc    (struct inode *inode);
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
-
-int ccc_object_invariant(const struct cl_object *obj);
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-int cl_local_size(struct inode *inode);
-
-__u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-# define CLOBINVRNT(env, clob, expr)					\
-	((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
-		  struct obd_device *watched,
-		  enum obd_notify_event ev, void *owner, void *data);
-
-struct ccc_grouplock {
-	struct lu_env   *cg_env;
-	struct cl_io    *cg_io;
-	struct cl_lock  *cg_lock;
-	unsigned long    cg_gid;
-};
-
-int  cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
-		      struct ccc_grouplock *cg);
-void cl_put_grouplock(struct ccc_grouplock *cg);
-
-/**
- * New interfaces to get and put lov_stripe_md from lov layer. This violates
- * layering because lov_stripe_md is supposed to be a private data in lov.
- *
- * NB: If you find you have to use these interfaces for your new code, please
- * think about it again. These interfaces may be removed in the future for
- * better layering.
- */
-struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
-void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
-int lov_read_and_clear_async_rc(struct cl_object *clob);
-
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
-void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
-
-/**
- * Data structure managing a client's cached clean pages. An LRU of
- * pages is maintained, along with other statistics.
- */
-struct cl_client_cache {
-	atomic_t	ccc_users;    /* # of users (OSCs) of this data */
-	struct list_head	ccc_lru;      /* LRU list of cached clean pages */
-	spinlock_t	ccc_lru_lock; /* lock for list */
-	atomic_t	ccc_lru_left; /* # of LRU entries available */
-	unsigned long	ccc_lru_max;  /* Max # of LRU entries possible */
-	unsigned int	ccc_lru_shrinkers; /* # of threads reclaiming */
-};
-
-#endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/linux/obd.h b/drivers/staging/lustre/lustre/include/linux/obd.h
deleted file mode 100644
index 3907bf4ce..000000000
--- a/drivers/staging/lustre/lustre/include/linux/obd.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef __LINUX_OBD_H
-#define __LINUX_OBD_H
-
-#ifndef __OBD_H
-#error Do not #include this file directly. #include <obd.h> instead
-#endif
-
-#include "../obd_support.h"
-
-#include <linux/fs.h>
-#include <linux/list.h>
-#include <linux/sched.h>  /* for struct task_struct, for current.h */
-#include <linux/mount.h>
-
-#include "../lustre_intent.h"
-
-struct ll_iattr {
-	struct iattr	iattr;
-	unsigned int	ia_attr_flags;
-};
-
-#define CLIENT_OBD_LIST_LOCK_DEBUG 1
-
-struct client_obd_lock {
-	spinlock_t		lock;
-
-	unsigned long       time;
-	struct task_struct *task;
-	const char	 *func;
-	int		 line;
-};
-
-static inline void __client_obd_list_lock(struct client_obd_lock *lock,
-					  const char *func, int line)
-{
-	unsigned long cur = jiffies;
-
-	while (1) {
-		if (spin_trylock(&lock->lock)) {
-			LASSERT(!lock->task);
-			lock->task = current;
-			lock->func = func;
-			lock->line = line;
-			lock->time = jiffies;
-			break;
-		}
-
-		if (time_before(cur + 5 * HZ, jiffies) &&
-		    time_before(lock->time + 5 * HZ, jiffies)) {
-			struct task_struct *task = lock->task;
-
-			if (!task)
-				continue;
-
-			LCONSOLE_WARN("%s:%d: lock %p was acquired by <%s:%d:%s:%d> for %lu seconds.\n",
-				      current->comm, current->pid,
-				      lock, task->comm, task->pid,
-				      lock->func, lock->line,
-				      (jiffies - lock->time) / HZ);
-			LCONSOLE_WARN("====== for current process =====\n");
-			dump_stack();
-			LCONSOLE_WARN("====== end =======\n");
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(1000 * HZ);
-		}
-		cpu_relax();
-	}
-}
-
-#define client_obd_list_lock(lock) \
-	__client_obd_list_lock(lock, __func__, __LINE__)
-
-static inline void client_obd_list_unlock(struct client_obd_lock *lock)
-{
-	LASSERT(lock->task);
-	lock->task = NULL;
-	lock->time = jiffies;
-	spin_unlock(&lock->lock);
-}
-
-static inline void client_obd_list_lock_init(struct client_obd_lock *lock)
-{
-	spin_lock_init(&lock->lock);
-}
-
-static inline void client_obd_list_lock_done(struct client_obd_lock *lock)
-{}
-
-#endif /* __LINUX_OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 242bb1ef6..281651218 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -198,7 +198,6 @@ typedef int (*lu_printer_t)(const struct lu_env *env,
  * Operations specific for particular lu_object.
  */
 struct lu_object_operations {
-
 	/**
 	 * Allocate lower-layer parts of the object by calling
 	 * lu_device_operations::ldo_object_alloc() of the corresponding
@@ -656,21 +655,21 @@ static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
  * @{
  */
 
-int  lu_site_init	 (struct lu_site *s, struct lu_device *d);
-void lu_site_fini	 (struct lu_site *s);
-int  lu_site_init_finish  (struct lu_site *s);
-void lu_stack_fini	(const struct lu_env *env, struct lu_device *top);
-void lu_device_get	(struct lu_device *d);
-void lu_device_put	(struct lu_device *d);
-int  lu_device_init       (struct lu_device *d, struct lu_device_type *t);
-void lu_device_fini       (struct lu_device *d);
-int  lu_object_header_init(struct lu_object_header *h);
+int lu_site_init(struct lu_site *s, struct lu_device *d);
+void lu_site_fini(struct lu_site *s);
+int lu_site_init_finish(struct lu_site *s);
+void lu_stack_fini(const struct lu_env *env, struct lu_device *top);
+void lu_device_get(struct lu_device *d);
+void lu_device_put(struct lu_device *d);
+int lu_device_init(struct lu_device *d, struct lu_device_type *t);
+void lu_device_fini(struct lu_device *d);
+int lu_object_header_init(struct lu_object_header *h);
 void lu_object_header_fini(struct lu_object_header *h);
-int  lu_object_init       (struct lu_object *o,
-			   struct lu_object_header *h, struct lu_device *d);
-void lu_object_fini       (struct lu_object *o);
-void lu_object_add_top    (struct lu_object_header *h, struct lu_object *o);
-void lu_object_add	(struct lu_object *before, struct lu_object *o);
+int lu_object_init(struct lu_object *o,
+		   struct lu_object_header *h, struct lu_device *d);
+void lu_object_fini(struct lu_object *o);
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
+void lu_object_add(struct lu_object *before, struct lu_object *o);
 
 /**
  * Helpers to initialize and finalize device types.
@@ -781,9 +780,8 @@ int lu_cdebug_printer(const struct lu_env *env,
  */
 #define LU_OBJECT_DEBUG(mask, env, object, format, ...)		   \
 do {								      \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		  \
-									  \
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		     \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
 		CDEBUG(mask, format, ## __VA_ARGS__);		    \
 	}								 \
@@ -794,9 +792,8 @@ do {								      \
  */
 #define LU_OBJECT_HEADER(mask, env, object, format, ...)		\
 do {								    \
-	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
-									\
 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
+		LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
 		lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
 				       (object)->lo_header);	    \
 		lu_cdebug_printer(env, &msgdata, "\n");		 \
@@ -1006,6 +1003,10 @@ enum lu_context_tag {
 	 * Context for local operations
 	 */
 	LCT_LOCAL = 1 << 7,
+	/**
+	 * session for server thread
+	 **/
+	LCT_SERVER_SESSION = BIT(8),
 	/**
 	 * Set when at least one of keys, having values in this context has
 	 * non-NULL lu_context_key::lct_exit() method. This is used to
@@ -1118,7 +1119,7 @@ struct lu_context_key {
 	{							 \
 		type *value;				      \
 								  \
-		CLASSERT(PAGE_SIZE >= sizeof (*value));       \
+		CLASSERT(PAGE_SIZE >= sizeof(*value));        \
 								  \
 		value = kzalloc(sizeof(*value), GFP_NOFS);	\
 		if (!value)				\
@@ -1154,12 +1155,12 @@ do {						    \
 	(key)->lct_owner = THIS_MODULE;		 \
 } while (0)
 
-int   lu_context_key_register(struct lu_context_key *key);
-void  lu_context_key_degister(struct lu_context_key *key);
-void *lu_context_key_get     (const struct lu_context *ctx,
-			       const struct lu_context_key *key);
-void  lu_context_key_quiesce (struct lu_context_key *key);
-void  lu_context_key_revive  (struct lu_context_key *key);
+int lu_context_key_register(struct lu_context_key *key);
+void lu_context_key_degister(struct lu_context_key *key);
+void *lu_context_key_get(const struct lu_context *ctx,
+			 const struct lu_context_key *key);
+void lu_context_key_quiesce(struct lu_context_key *key);
+void lu_context_key_revive(struct lu_context_key *key);
 
 /*
  * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
@@ -1216,21 +1217,21 @@ void  lu_context_key_revive  (struct lu_context_key *key);
 	LU_TYPE_START(mod, __VA_ARGS__);	\
 	LU_TYPE_STOP(mod, __VA_ARGS__)
 
-int   lu_context_init  (struct lu_context *ctx, __u32 tags);
-void  lu_context_fini  (struct lu_context *ctx);
-void  lu_context_enter (struct lu_context *ctx);
-void  lu_context_exit  (struct lu_context *ctx);
-int   lu_context_refill(struct lu_context *ctx);
+int lu_context_init(struct lu_context *ctx, __u32 tags);
+void lu_context_fini(struct lu_context *ctx);
+void lu_context_enter(struct lu_context *ctx);
+void lu_context_exit(struct lu_context *ctx);
+int lu_context_refill(struct lu_context *ctx);
 
 /*
  * Helper functions to operate on multiple keys. These are used by the default
  * device type operations, defined by LU_TYPE_INIT_FINI().
  */
 
-int  lu_context_key_register_many(struct lu_context_key *k, ...);
+int lu_context_key_register_many(struct lu_context_key *k, ...);
 void lu_context_key_degister_many(struct lu_context_key *k, ...);
-void lu_context_key_revive_many  (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_revive_many(struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_context_key *k, ...);
 
 /**
  * Environment.
@@ -1246,9 +1247,9 @@ struct lu_env {
 	struct lu_context *le_ses;
 };
 
-int  lu_env_init  (struct lu_env *env, __u32 tags);
-void lu_env_fini  (struct lu_env *env);
-int  lu_env_refill(struct lu_env *env);
+int lu_env_init(struct lu_env *env, __u32 tags);
+void lu_env_fini(struct lu_env *env);
+int lu_env_refill(struct lu_env *env);
 
 /** @} lu_context */
 
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 5aae1d06a..9c53c1792 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -183,6 +183,12 @@ struct lu_seq_range {
 	__u32 lsr_flags;
 };
 
+struct lu_seq_range_array {
+	__u32 lsra_count;
+	__u32 lsra_padding;
+	struct lu_seq_range lsra_lsr[0];
+};
+
 #define LU_SEQ_RANGE_MDT	0x0
 #define LU_SEQ_RANGE_OST	0x1
 #define LU_SEQ_RANGE_ANY	0x3
@@ -578,7 +584,7 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
 	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
 		return FID_SEQ_OST_MDT0;
 
-	if (fid_seq_is_default(ostid->oi.oi_seq))
+	if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
 		return FID_SEQ_LOV_DEFAULT;
 
 	if (fid_is_idif(&ostid->oi_fid))
@@ -590,9 +596,12 @@ static inline __u64 ostid_seq(const struct ost_id *ostid)
 /* extract OST objid from a wire ost_id (id/seq) pair */
 static inline __u64 ostid_id(const struct ost_id *ostid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(ostid)))
+	if (fid_seq_is_mdt0(ostid->oi.oi_seq))
 		return ostid->oi.oi_id & IDIF_OID_MASK;
 
+	if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
+		return ostid->oi.oi_id;
+
 	if (fid_is_idif(&ostid->oi_fid))
 		return fid_idif_id(fid_seq(&ostid->oi_fid),
 				   fid_oid(&ostid->oi_fid), 0);
@@ -636,12 +645,22 @@ static inline void ostid_set_seq_llog(struct ost_id *oi)
  */
 static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(oi))) {
+	if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
 		if (oid >= IDIF_MAX_OID) {
 			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
 			return;
 		}
 		oi->oi.oi_id = oid;
+	} else if (fid_is_idif(&oi->oi_fid)) {
+		if (oid >= IDIF_MAX_OID) {
+			CERROR("Bad %llu to set "DOSTID"\n",
+			       oid, POSTID(oi));
+			return;
+		}
+		oi->oi_fid.f_seq = fid_idif_seq(oid,
+						fid_idif_ost_idx(&oi->oi_fid));
+		oi->oi_fid.f_oid = oid;
+		oi->oi_fid.f_ver = oid >> 48;
 	} else {
 		if (oid > OBIF_MAX_OID) {
 			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
@@ -651,25 +670,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 	}
 }
 
-static inline void ostid_inc_id(struct ost_id *oi)
+static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
 {
-	if (fid_seq_is_mdt0(ostid_seq(oi))) {
-		if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) {
-			CERROR("Bad inc "DOSTID"\n", POSTID(oi));
-			return;
+	if (unlikely(fid_seq_is_igif(fid->f_seq))) {
+		CERROR("bad IGIF, "DFID"\n", PFID(fid));
+		return -EBADF;
+	}
+
+	if (fid_is_idif(fid)) {
+		if (oid >= IDIF_MAX_OID) {
+			CERROR("Too large OID %#llx to set IDIF "DFID"\n",
+			       (unsigned long long)oid, PFID(fid));
+			return -EBADF;
 		}
-		oi->oi.oi_id++;
+		fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid));
+		fid->f_oid = oid;
+		fid->f_ver = oid >> 48;
 	} else {
-		oi->oi_fid.f_oid++;
+		if (oid > OBIF_MAX_OID) {
+			CERROR("Too large OID %#llx to set REG "DFID"\n",
+			       (unsigned long long)oid, PFID(fid));
+			return -EBADF;
+		}
+		fid->f_oid = oid;
 	}
-}
-
-static inline void ostid_dec_id(struct ost_id *oi)
-{
-	if (fid_seq_is_mdt0(ostid_seq(oi)))
-		oi->oi.oi_id--;
-	else
-		oi->oi_fid.f_oid--;
+	return 0;
 }
 
 /**
@@ -684,30 +709,34 @@ static inline void ostid_dec_id(struct ost_id *oi)
 static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid,
 			       __u32 ost_idx)
 {
+	__u64 seq = ostid_seq(ostid);
+
 	if (ost_idx > 0xffff) {
 		CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
 		       ost_idx);
 		return -EBADF;
 	}
 
-	if (fid_seq_is_mdt0(ostid_seq(ostid))) {
+	if (fid_seq_is_mdt0(seq)) {
+		__u64 oid = ostid_id(ostid);
+
 		/* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
 		 * that we map into the IDIF namespace.  It allows up to 2^48
 		 * objects per OST, as this is the object namespace that has
 		 * been in production for years.  This can handle create rates
 		 * of 1M objects/s/OST for 9 years, or combinations thereof.
 		 */
-		if (ostid_id(ostid) >= IDIF_MAX_OID) {
+		if (oid >= IDIF_MAX_OID) {
 			CERROR("bad MDT0 id, " DOSTID " ost_idx:%u\n",
 			       POSTID(ostid), ost_idx);
 			return -EBADF;
 		}
-		fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx);
+		fid->f_seq = fid_idif_seq(oid, ost_idx);
 		/* truncate to 32 bits by assignment */
-		fid->f_oid = ostid_id(ostid);
+		fid->f_oid = oid;
 		/* in theory, not currently used */
-		fid->f_ver = ostid_id(ostid) >> 48;
-	} else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
+		fid->f_ver = oid >> 48;
+	} else if (likely(!fid_seq_is_default(seq))) {
 	       /* This is either an IDIF object, which identifies objects across
 		* all OSTs, or a regular FID.  The IDIF namespace maps legacy
 		* OST objects into the FID namespace.  In both cases, we just
@@ -1001,8 +1030,9 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
 
 		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
 		size += sizeof(struct luda_type);
-	} else
+	} else {
 		size = sizeof(struct lu_dirent) + namelen;
+	}
 
 	return (size + 7) & ~7;
 }
@@ -1256,6 +1286,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_PINGLESS	0x4000000000000ULL/* pings not required */
 #define OBD_CONNECT_FLOCK_DEAD	0x8000000000000ULL/* flock deadlock detection */
 #define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/
+#define OBD_CONNECT_OPEN_BY_FID	0x20000000000000ULL	/* open by fid won't pack
+							 * name in request
+							 */
 
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
@@ -1428,6 +1461,8 @@ enum obdo_flags {
 					   */
 	OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
 	OBD_FL_NOSPC_BLK    = 0x00100000, /* no more block space on OST */
+	OBD_FL_FLUSH	    = 0x00200000, /* flush pages on the OST */
+	OBD_FL_SHORT_IO	    = 0x00400000, /* short io request */
 
 	/* Note that while these checksum values are currently separate bits,
 	 * in 2.x we can actually allow all values from 1-31 if we wanted.
@@ -1525,6 +1560,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
 	oi->oi.oi_seq = seq;
 }
 
+static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
+{
+	oi->oi.oi_id = oid;
+}
+
 static inline __u64 lmm_oi_id(struct ost_id *oi)
 {
 	return oi->oi.oi_id;
@@ -1732,6 +1772,11 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
 #define OBD_BRW_MEMALLOC       0x800 /* Client runs in the "kswapd" context */
 #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */
 #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */
+#define OBD_BRW_SOFT_SYNC     0x4000 /* This flag notifies the server
+				      * that the client is running low on
+				      * space for unstable pages; asking
+				      * it to sync quickly
+				      */
 
 #define OBD_OBJECT_EOF 0xffffffffffffffffULL
 
@@ -2436,6 +2481,7 @@ struct mdt_rec_reint {
 
 void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr);
 
+/* lmv structures */
 struct lmv_desc {
 	__u32 ld_tgt_count;		/* how many MDS's */
 	__u32 ld_active_tgt_count;	 /* how many active */
@@ -2460,7 +2506,6 @@ struct lmv_stripe_md {
 	struct lu_fid mea_ids[0];
 };
 
-/* lmv structures */
 #define MEA_MAGIC_LAST_CHAR      0xb2221ca1
 #define MEA_MAGIC_ALL_CHARS      0xb222a11c
 #define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
@@ -2470,9 +2515,10 @@ struct lmv_stripe_md {
 #define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
 
 enum fld_rpc_opc {
-	FLD_QUERY		       = 900,
+	FLD_QUERY	= 900,
+	FLD_READ	= 901,
 	FLD_LAST_OPC,
-	FLD_FIRST_OPC		   = FLD_QUERY
+	FLD_FIRST_OPC	= FLD_QUERY
 };
 
 enum seq_rpc_opc {
@@ -2486,6 +2532,12 @@ enum seq_op {
 	SEQ_ALLOC_META = 1
 };
 
+enum fld_op {
+	FLD_CREATE = 0,
+	FLD_DELETE = 1,
+	FLD_LOOKUP = 2,
+};
+
 /*
  *  LOV data structures
  */
@@ -2582,6 +2634,8 @@ struct ldlm_extent {
 	__u64 gid;
 };
 
+#define LDLM_GID_ANY ((__u64)-1)
+
 static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
 				      struct ldlm_extent *ex2)
 {
@@ -3304,7 +3358,7 @@ struct getinfo_fid2path {
 	char	    gf_path[0];
 } __packed;
 
-void lustre_swab_fid2path (struct getinfo_fid2path *gf);
+void lustre_swab_fid2path(struct getinfo_fid2path *gf);
 
 enum {
 	LAYOUT_INTENT_ACCESS    = 0,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 276906e64..59ba48ac3 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -193,37 +193,37 @@ struct ost_id {
  * *INFO    - set/get lov_user_mds_data
  */
 /* see <lustre_lib.h> for ioctl numberss 101-150 */
-#define LL_IOC_GETFLAGS		 _IOR ('f', 151, long)
-#define LL_IOC_SETFLAGS		 _IOW ('f', 152, long)
-#define LL_IOC_CLRFLAGS		 _IOW ('f', 153, long)
+#define LL_IOC_GETFLAGS		 _IOR('f', 151, long)
+#define LL_IOC_SETFLAGS		 _IOW('f', 152, long)
+#define LL_IOC_CLRFLAGS		 _IOW('f', 153, long)
 /* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
-#define LL_IOC_LOV_SETSTRIPE	    _IOW ('f', 154, long)
+#define LL_IOC_LOV_SETSTRIPE	    _IOW('f', 154, long)
 /* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
-#define LL_IOC_LOV_GETSTRIPE	    _IOW ('f', 155, long)
+#define LL_IOC_LOV_GETSTRIPE	    _IOW('f', 155, long)
 /* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
-#define LL_IOC_LOV_SETEA		_IOW ('f', 156, long)
-#define LL_IOC_RECREATE_OBJ	     _IOW ('f', 157, long)
-#define LL_IOC_RECREATE_FID	     _IOW ('f', 157, struct lu_fid)
-#define LL_IOC_GROUP_LOCK	       _IOW ('f', 158, long)
-#define LL_IOC_GROUP_UNLOCK	     _IOW ('f', 159, long)
+#define LL_IOC_LOV_SETEA		_IOW('f', 156, long)
+#define LL_IOC_RECREATE_OBJ	     _IOW('f', 157, long)
+#define LL_IOC_RECREATE_FID	     _IOW('f', 157, struct lu_fid)
+#define LL_IOC_GROUP_LOCK	       _IOW('f', 158, long)
+#define LL_IOC_GROUP_UNLOCK	     _IOW('f', 159, long)
 /* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK	       _IOW ('f', 160, int)
+#define LL_IOC_QUOTACHECK	       _IOW('f', 160, int)
 /* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK	  _IOR ('f', 161, struct if_quotacheck *)
+#define LL_IOC_POLL_QUOTACHECK	  _IOR('f', 161, struct if_quotacheck *)
 /* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
 #define LL_IOC_QUOTACTL		 _IOWR('f', 162, struct if_quotactl)
 #define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
-#define LL_IOC_FLUSHCTX		 _IOW ('f', 166, long)
-#define LL_IOC_RMTACL		   _IOW ('f', 167, long)
-#define LL_IOC_GETOBDCOUNT	      _IOR ('f', 168, long)
+#define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
+#define LL_IOC_RMTACL		   _IOW('f', 167, long)
+#define LL_IOC_GETOBDCOUNT	      _IOR('f', 168, long)
 #define LL_IOC_LLOOP_ATTACH	     _IOWR('f', 169, long)
 #define LL_IOC_LLOOP_DETACH	     _IOWR('f', 170, long)
 #define LL_IOC_LLOOP_INFO	       _IOWR('f', 171, struct lu_fid)
 #define LL_IOC_LLOOP_DETACH_BYDEV       _IOWR('f', 172, long)
-#define LL_IOC_PATH2FID		 _IOR ('f', 173, long)
+#define LL_IOC_PATH2FID		 _IOR('f', 173, long)
 #define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
-#define LL_IOC_GET_MDTIDX	       _IOR ('f', 175, int)
+#define LL_IOC_GET_MDTIDX	       _IOR('f', 175, int)
 
 /* see <lustre_lib.h> for ioctl numbers 177-210 */
 
@@ -676,7 +676,12 @@ static inline const char *changelog_type2str(int type)
 #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
 				     /* HSM cleaning needed */
 /* Flags for rename */
-#define CLF_RENAME_LAST       0x0001 /* rename unlink last hardlink of target */
+#define CLF_RENAME_LAST		0x0001	/* rename unlink last hardlink of
+					 * target
+					 */
+#define CLF_RENAME_LAST_EXISTS	0x0002	/* rename unlink last hardlink of target
+					 * has an archive in backend
+					 */
 
 /* Flags for HSM */
 /* 12b used (from high weight to low weight):
@@ -833,9 +838,8 @@ struct ioc_data_version {
 	__u64 idv_flags;     /* See LL_DV_xxx */
 };
 
-#define LL_DV_NOFLUSH 0x01   /* Do not take READ EXTENT LOCK before sampling
-			      * version. Dirty caches are left unchanged.
-			      */
+#define LL_DV_RD_FLUSH	BIT(0)	/* Flush dirty pages from clients */
+#define LL_DV_WR_FLUSH	BIT(1)	/* Flush all caching pages from clients */
 
 #ifndef offsetof
 # define offsetof(typ, memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
@@ -1095,12 +1099,12 @@ struct hsm_action_list {
 	__u32 padding1;
 	char  hal_fsname[0];   /* null-terminated */
 	/* struct hsm_action_item[hal_count] follows, aligned on 8-byte
-	 * boundaries. See hai_zero
+	 * boundaries. See hai_first
 	 */
 } __packed;
 
 #ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
+static inline int cfs_size_round(int val)
 {
 	return (val + 7) & (~0x7);
 }
@@ -1109,7 +1113,7 @@ static inline int cfs_size_round (int val)
 #endif
 
 /* Return pointer to first hai in action list */
-static inline struct hsm_action_item *hai_zero(struct hsm_action_list *hal)
+static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
 {
 	return (struct hsm_action_item *)(hal->hal_fsname +
 					  cfs_size_round(strlen(hal-> \
@@ -1131,7 +1135,7 @@ static inline int hal_size(struct hsm_action_list *hal)
 	struct hsm_action_item *hai;
 
 	sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
-	hai = hai_zero(hal);
+	hai = hai_first(hal);
 	for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai))
 		sz += cfs_size_round(hai->hai_len);
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index bb16ae980..e229e91f7 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -161,7 +161,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
 	int offset;
 	int bufcount;
 
-	LASSERT (index >= 0);
+	LASSERT(index >= 0);
 
 	bufcount = lcfg->lcfg_bufcount;
 	if (index >= bufcount)
diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h
index 95fd36063..b36821ffb 100644
--- a/drivers/staging/lustre/lustre/include/lustre_disk.h
+++ b/drivers/staging/lustre/lustre/include/lustre_disk.h
@@ -130,7 +130,6 @@ struct lustre_sb_info {
 	struct lustre_mount_data *lsi_lmd;     /* mount command info */
 	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
 	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
-	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
 	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
 	char			  lsi_svname[MTI_NAME_MAXLEN];
 	char			  lsi_osd_obdname[64];
@@ -158,7 +157,6 @@ struct lustre_sb_info {
 struct lustre_mount_info {
 	char		 *lmi_name;
 	struct super_block   *lmi_sb;
-	struct vfsmount      *lmi_mnt;
 	struct list_head	    lmi_list_chain;
 };
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 8b0364f71..9cade144f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -71,6 +71,7 @@ struct obd_device;
  */
 enum ldlm_error {
 	ELDLM_OK = 0,
+	ELDLM_LOCK_MATCHED = 1,
 
 	ELDLM_LOCK_CHANGED = 300,
 	ELDLM_LOCK_ABORTED = 301,
@@ -269,7 +270,7 @@ struct ldlm_pool {
 	struct completion	 pl_kobj_unregister;
 };
 
-typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock);
 
 /**
  * LVB operations.
@@ -446,8 +447,11 @@ struct ldlm_namespace {
 	/** Limit of parallel AST RPC count. */
 	unsigned		ns_max_parallel_ast;
 
-	/** Callback to cancel locks before replaying it during recovery. */
-	ldlm_cancel_for_recovery ns_cancel_for_recovery;
+	/**
+	 * Callback to check if a lock is good to be canceled by ELC or
+	 * during recovery.
+	 */
+	ldlm_cancel_cbt		ns_cancel;
 
 	/** LDLM lock stats */
 	struct lprocfs_stats	*ns_stats;
@@ -479,9 +483,9 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
 }
 
 static inline void ns_register_cancel(struct ldlm_namespace *ns,
-				      ldlm_cancel_for_recovery arg)
+				      ldlm_cancel_cbt arg)
 {
-	ns->ns_cancel_for_recovery = arg;
+	ns->ns_cancel = arg;
 }
 
 struct ldlm_lock;
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index 7f2ba2ffe..e7e0c21a9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -37,17 +37,11 @@
 /** l_flags bits marked as "gone" bits */
 #define LDLM_FL_GONE_MASK               0x0006004000000000ULL
 
-/** l_flags bits marked as "hide_lock" bits */
-#define LDLM_FL_HIDE_LOCK_MASK          0x0000206400000000ULL
-
 /** l_flags bits marked as "inherit" bits */
 #define LDLM_FL_INHERIT_MASK            0x0000000000800000ULL
 
-/** l_flags bits marked as "local_only" bits */
-#define LDLM_FL_LOCAL_ONLY_MASK         0x00FFFFFF00000000ULL
-
-/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK            0x00000000C08F932FULL
+/** l_flags bits marked as "off_wire" bits */
+#define LDLM_FL_OFF_WIRE_MASK           0x00FFFFFF00000000ULL
 
 /** extent, mode, or resource changed */
 #define LDLM_FL_LOCK_CHANGED            0x0000000000000001ULL /* bit 0 */
@@ -204,7 +198,7 @@
 #define ldlm_set_cancel(_l)             LDLM_SET_FLAG((_l), 1ULL << 36)
 #define ldlm_clear_cancel(_l)           LDLM_CLEAR_FLAG((_l), 1ULL << 36)
 
-/** whatever it might mean */
+/** whatever it might mean -- never transmitted? */
 #define LDLM_FL_LOCAL_ONLY              0x0000002000000000ULL /* bit 37 */
 #define ldlm_is_local_only(_l)          LDLM_TEST_FLAG((_l), 1ULL << 37)
 #define ldlm_set_local_only(_l)         LDLM_SET_FLAG((_l), 1ULL << 37)
@@ -287,18 +281,18 @@
  * has canceled this lock and is waiting for rpc_lock which is taken by
  * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in
  * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it.
- *
- * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is
- * dropped to let ldlm_callback_handler() return EINVAL to the server. It
- * is used when ELC RPC is already prepared and is waiting for rpc_lock,
- * too late to send a separate CANCEL RPC.
  */
 #define LDLM_FL_BL_AST                  0x0000400000000000ULL /* bit 46 */
 #define ldlm_is_bl_ast(_l)              LDLM_TEST_FLAG((_l), 1ULL << 46)
 #define ldlm_set_bl_ast(_l)             LDLM_SET_FLAG((_l), 1ULL << 46)
 #define ldlm_clear_bl_ast(_l)           LDLM_CLEAR_FLAG((_l), 1ULL << 46)
 
-/** whatever it might mean */
+/**
+ * Set by ldlm_cancel_callback() when lock cache is dropped to let
+ * ldlm_callback_handler() return EINVAL to the server. It is used when
+ * ELC RPC is already prepared and is waiting for rpc_lock, too late to
+ * send a separate CANCEL RPC.
+ */
 #define LDLM_FL_BL_DONE                 0x0000800000000000ULL /* bit 47 */
 #define ldlm_is_bl_done(_l)             LDLM_TEST_FLAG((_l), 1ULL << 47)
 #define ldlm_set_bl_done(_l)            LDLM_SET_FLAG((_l), 1ULL << 47)
@@ -381,104 +375,16 @@
 /** test for ldlm_lock flag bit set */
 #define LDLM_TEST_FLAG(_l, _b)        (((_l)->l_flags & (_b)) != 0)
 
+/** multi-bit test: are any of mask bits set? */
+#define LDLM_HAVE_MASK(_l, _m)		((_l)->l_flags & LDLM_FL_##_m##_MASK)
+
 /** set a ldlm_lock flag bit */
 #define LDLM_SET_FLAG(_l, _b)         ((_l)->l_flags |= (_b))
 
 /** clear a ldlm_lock flag bit */
 #define LDLM_CLEAR_FLAG(_l, _b)       ((_l)->l_flags &= ~(_b))
 
-/** Mask of flags inherited from parent lock when doing intents. */
-#define LDLM_INHERIT_FLAGS            LDLM_FL_INHERIT_MASK
-
-/** Mask of Flags sent in AST lock_flags to map into the receiving lock. */
-#define LDLM_AST_FLAGS                LDLM_FL_AST_MASK
-
 /** @} subgroup */
 /** @} group */
-#ifdef WIRESHARK_COMPILE
-static int hf_lustre_ldlm_fl_lock_changed        = -1;
-static int hf_lustre_ldlm_fl_block_granted       = -1;
-static int hf_lustre_ldlm_fl_block_conv          = -1;
-static int hf_lustre_ldlm_fl_block_wait          = -1;
-static int hf_lustre_ldlm_fl_ast_sent            = -1;
-static int hf_lustre_ldlm_fl_replay              = -1;
-static int hf_lustre_ldlm_fl_intent_only         = -1;
-static int hf_lustre_ldlm_fl_has_intent          = -1;
-static int hf_lustre_ldlm_fl_flock_deadlock      = -1;
-static int hf_lustre_ldlm_fl_discard_data        = -1;
-static int hf_lustre_ldlm_fl_no_timeout          = -1;
-static int hf_lustre_ldlm_fl_block_nowait        = -1;
-static int hf_lustre_ldlm_fl_test_lock           = -1;
-static int hf_lustre_ldlm_fl_cancel_on_block     = -1;
-static int hf_lustre_ldlm_fl_deny_on_contention  = -1;
-static int hf_lustre_ldlm_fl_ast_discard_data    = -1;
-static int hf_lustre_ldlm_fl_fail_loc            = -1;
-static int hf_lustre_ldlm_fl_skipped             = -1;
-static int hf_lustre_ldlm_fl_cbpending           = -1;
-static int hf_lustre_ldlm_fl_wait_noreproc       = -1;
-static int hf_lustre_ldlm_fl_cancel              = -1;
-static int hf_lustre_ldlm_fl_local_only          = -1;
-static int hf_lustre_ldlm_fl_failed              = -1;
-static int hf_lustre_ldlm_fl_canceling           = -1;
-static int hf_lustre_ldlm_fl_local               = -1;
-static int hf_lustre_ldlm_fl_lvb_ready           = -1;
-static int hf_lustre_ldlm_fl_kms_ignore          = -1;
-static int hf_lustre_ldlm_fl_cp_reqd             = -1;
-static int hf_lustre_ldlm_fl_cleaned             = -1;
-static int hf_lustre_ldlm_fl_atomic_cb           = -1;
-static int hf_lustre_ldlm_fl_bl_ast              = -1;
-static int hf_lustre_ldlm_fl_bl_done             = -1;
-static int hf_lustre_ldlm_fl_no_lru              = -1;
-static int hf_lustre_ldlm_fl_fail_notified       = -1;
-static int hf_lustre_ldlm_fl_destroyed           = -1;
-static int hf_lustre_ldlm_fl_server_lock         = -1;
-static int hf_lustre_ldlm_fl_res_locked          = -1;
-static int hf_lustre_ldlm_fl_waited              = -1;
-static int hf_lustre_ldlm_fl_ns_srv              = -1;
-static int hf_lustre_ldlm_fl_excl                = -1;
-
-const value_string lustre_ldlm_flags_vals[] = {
-	{LDLM_FL_LOCK_CHANGED,        "LDLM_FL_LOCK_CHANGED"},
-	{LDLM_FL_BLOCK_GRANTED,       "LDLM_FL_BLOCK_GRANTED"},
-	{LDLM_FL_BLOCK_CONV,          "LDLM_FL_BLOCK_CONV"},
-	{LDLM_FL_BLOCK_WAIT,          "LDLM_FL_BLOCK_WAIT"},
-	{LDLM_FL_AST_SENT,            "LDLM_FL_AST_SENT"},
-	{LDLM_FL_REPLAY,              "LDLM_FL_REPLAY"},
-	{LDLM_FL_INTENT_ONLY,         "LDLM_FL_INTENT_ONLY"},
-	{LDLM_FL_HAS_INTENT,          "LDLM_FL_HAS_INTENT"},
-	{LDLM_FL_FLOCK_DEADLOCK,      "LDLM_FL_FLOCK_DEADLOCK"},
-	{LDLM_FL_DISCARD_DATA,        "LDLM_FL_DISCARD_DATA"},
-	{LDLM_FL_NO_TIMEOUT,          "LDLM_FL_NO_TIMEOUT"},
-	{LDLM_FL_BLOCK_NOWAIT,        "LDLM_FL_BLOCK_NOWAIT"},
-	{LDLM_FL_TEST_LOCK,           "LDLM_FL_TEST_LOCK"},
-	{LDLM_FL_CANCEL_ON_BLOCK,     "LDLM_FL_CANCEL_ON_BLOCK"},
-	{LDLM_FL_DENY_ON_CONTENTION,  "LDLM_FL_DENY_ON_CONTENTION"},
-	{LDLM_FL_AST_DISCARD_DATA,    "LDLM_FL_AST_DISCARD_DATA"},
-	{LDLM_FL_FAIL_LOC,            "LDLM_FL_FAIL_LOC"},
-	{LDLM_FL_SKIPPED,             "LDLM_FL_SKIPPED"},
-	{LDLM_FL_CBPENDING,           "LDLM_FL_CBPENDING"},
-	{LDLM_FL_WAIT_NOREPROC,       "LDLM_FL_WAIT_NOREPROC"},
-	{LDLM_FL_CANCEL,              "LDLM_FL_CANCEL"},
-	{LDLM_FL_LOCAL_ONLY,          "LDLM_FL_LOCAL_ONLY"},
-	{LDLM_FL_FAILED,              "LDLM_FL_FAILED"},
-	{LDLM_FL_CANCELING,           "LDLM_FL_CANCELING"},
-	{LDLM_FL_LOCAL,               "LDLM_FL_LOCAL"},
-	{LDLM_FL_LVB_READY,           "LDLM_FL_LVB_READY"},
-	{LDLM_FL_KMS_IGNORE,          "LDLM_FL_KMS_IGNORE"},
-	{LDLM_FL_CP_REQD,             "LDLM_FL_CP_REQD"},
-	{LDLM_FL_CLEANED,             "LDLM_FL_CLEANED"},
-	{LDLM_FL_ATOMIC_CB,           "LDLM_FL_ATOMIC_CB"},
-	{LDLM_FL_BL_AST,              "LDLM_FL_BL_AST"},
-	{LDLM_FL_BL_DONE,             "LDLM_FL_BL_DONE"},
-	{LDLM_FL_NO_LRU,              "LDLM_FL_NO_LRU"},
-	{LDLM_FL_FAIL_NOTIFIED,       "LDLM_FL_FAIL_NOTIFIED"},
-	{LDLM_FL_DESTROYED,           "LDLM_FL_DESTROYED"},
-	{LDLM_FL_SERVER_LOCK,         "LDLM_FL_SERVER_LOCK"},
-	{LDLM_FL_RES_LOCKED,          "LDLM_FL_RES_LOCKED"},
-	{LDLM_FL_WAITED,              "LDLM_FL_WAITED"},
-	{LDLM_FL_NS_SRV,              "LDLM_FL_NS_SRV"},
-	{LDLM_FL_EXCL,                "LDLM_FL_EXCL"},
-	{ 0, NULL }
-};
-#endif /*  WIRESHARK_COMPILE */
+
 #endif /* LDLM_ALL_FLAGS_MASK */
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index ab4a92390..12e8b585c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -308,10 +308,10 @@ static inline int fid_seq_in_fldb(__u64 seq)
 	       fid_seq_is_root(seq) || fid_seq_is_dot(seq);
 }
 
-static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq)
+static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx)
 {
 	if (fid_seq_is_mdt0(seq)) {
-		fid->f_seq = fid_idif_seq(0, 0);
+		fid->f_seq = fid_idif_seq(0, ost_idx);
 	} else {
 		LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) ||
 			 fid_seq_is_idif(seq), "%#llx\n", seq);
@@ -498,19 +498,6 @@ static inline void ostid_build_res_name(struct ost_id *oi,
 	}
 }
 
-static inline void ostid_res_name_to_id(struct ost_id *oi,
-					struct ldlm_res_id *name)
-{
-	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_SEQ_OFF])) {
-		/* old resid */
-		ostid_set_seq(oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
-		ostid_set_id(oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
-	} else {
-		/* new resid */
-		fid_extract_from_res_name(&oi->oi_fid, name);
-	}
-}
-
 /**
  * Return true if the resource is for the object identified by this id & group.
  */
@@ -546,7 +533,8 @@ static inline void ost_fid_build_resid(const struct lu_fid *fid,
 }
 
 static inline void ost_fid_from_resid(struct lu_fid *fid,
-				      const struct ldlm_res_id *name)
+				      const struct ldlm_res_id *name,
+				      int ost_idx)
 {
 	if (fid_seq_is_mdt0(name->name[LUSTRE_RES_ID_VER_OID_OFF])) {
 		/* old resid */
@@ -554,7 +542,7 @@ static inline void ost_fid_from_resid(struct lu_fid *fid,
 
 		ostid_set_seq(&oi, name->name[LUSTRE_RES_ID_VER_OID_OFF]);
 		ostid_set_id(&oi, name->name[LUSTRE_RES_ID_SEQ_OFF]);
-		ostid_to_fid(fid, &oi, 0);
+		ostid_to_fid(fid, &oi, ost_idx);
 	} else {
 		/* new resid */
 		fid_extract_from_res_name(fid, name);
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index dac2d84d8..8325c82b3 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -109,7 +109,7 @@ static inline char *ptlrpc_import_state_name(enum lustre_imp_state state)
 		"RECOVER", "FULL", "EVICTED",
 	};
 
-	LASSERT (state <= LUSTRE_IMP_EVICTED);
+	LASSERT(state <= LUSTRE_IMP_EVICTED);
 	return import_state_names[state];
 }
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index f2223d558..00b976766 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -280,16 +280,16 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_DATA_TYPE long
 
 #define OBD_IOC_CREATE		 _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY		_IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PREALLOCATE	    _IOWR('f', 105, OBD_IOC_DATA_TYPE)
 
-#define OBD_IOC_SETATTR		_IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_GETATTR		_IOWR ('f', 108, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_READ		   _IOWR('f', 109, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_WRITE		  _IOWR('f', 110, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_STATFS		 _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC		   _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC		   _IOW('f', 114, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_READ2		  _IOWR('f', 115, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_FORMAT		 _IOWR('f', 116, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARTITION	      _IOWR('f', 117, OBD_IOC_DATA_TYPE)
@@ -308,13 +308,13 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_GETDTNAME	       OBD_IOC_GETNAME
 
 #define OBD_IOC_LOV_GET_CONFIG	 _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER	 _IOW ('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET	    _IOW ('f', 136, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER	 _IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET	    _IOW('f', 136, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139)
-#define OBD_IOC_NO_TRANSNO	     _IOW ('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY	   _IOW ('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY	 _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NO_TRANSNO	     _IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY	   _IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY	 _IOR('f', 142, OBD_IOC_DATA_TYPE)
 
 #define OBD_IOC_ROOT_SQUASH	    _IOWR('f', 143, OBD_IOC_DATA_TYPE)
 
@@ -324,27 +324,27 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 
 #define OBD_IOC_CLOSE_UUID	     _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
 
-#define OBD_IOC_CHANGELOG_SEND	 _IOW ('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CHANGELOG_SEND	 _IOW('f', 148, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_GETDEVICE	      _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_FID2PATH	       _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
 /* see also <lustre/lustre_user.h> for ioctls 151-153 */
 /* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE	  _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETSTRIPE	  _IOW('f', 154, OBD_IOC_DATA_TYPE)
 /* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE	  _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_GETSTRIPE	  _IOW('f', 155, OBD_IOC_DATA_TYPE)
 /* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA	      _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETEA	      _IOW('f', 156, OBD_IOC_DATA_TYPE)
 /* see <lustre/lustre_user.h> for ioctls 157-159 */
 /* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK	     _IOW ('f', 160, int)
+#define OBD_IOC_QUOTACHECK	     _IOW('f', 160, int)
 /* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK	_IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *)
 /* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
 #define OBD_IOC_QUOTACTL	       _IOWR('f', 162, struct if_quotactl)
 /* see  also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG	  _IOW ('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG	_IOW ('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR	_IOW ('f', 179, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_REG	  _IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
 #define OBD_IOC_RECORD		 _IOWR('f', 180, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_ENDRECORD	      _IOWR('f', 181, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_PARSE		  _IOWR('f', 182, OBD_IOC_DATA_TYPE)
@@ -352,7 +352,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
 #define OBD_IOC_PROCESS_CFG	    _IOWR('f', 184, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_DUMP_LOG	       _IOWR('f', 185, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_CLEAR_LOG	      _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM		  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM		  _IOW('f', 187, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_POOL		   _IOWR('f', 188, OBD_IOC_DATA_TYPE)
 #define OBD_IOC_REPLACE_NIDS	   _IOWR('f', 189, OBD_IOC_DATA_TYPE)
 
@@ -522,6 +522,28 @@ struct l_wait_info {
 			   sigmask(SIGTERM) | sigmask(SIGQUIT) |	\
 			   sigmask(SIGALRM))
 
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if add_wait_queue_exclusive_head is used.
+ */
+#define add_wait_queue_exclusive_head(waitq, link)		\
+{								\
+	unsigned long flags;					\
+								\
+	spin_lock_irqsave(&((waitq)->lock), flags);		\
+	__add_wait_queue_exclusive(waitq, link);		\
+	spin_unlock_irqrestore(&((waitq)->lock), flags);	\
+}
+
 /*
  * wait for @condition to become true, but no longer than timeout, specified
  * by @info.
@@ -578,7 +600,7 @@ do {									   \
 									       \
 		if (condition)						 \
 			break;						 \
-		if (cfs_signal_pending()) {				    \
+		if (signal_pending(current)) {				    \
 			if (info->lwi_on_signal &&		     \
 			    (__timeout == 0 || __allow_intr)) {		\
 				if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index af77eb359..f267ff8a6 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -64,9 +64,27 @@ struct obd_export;
 struct ptlrpc_request;
 struct obd_device;
 
+/**
+ * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
+ *
+ * This mutex is used to implement execute-once semantics on the MDT.
+ * The MDT stores the last transaction ID and result for every client in
+ * its last_rcvd file. If the client doesn't get a reply, it can safely
+ * resend the request and the MDT will reconstruct the reply being aware
+ * that the request has already been executed. Without this lock,
+ * execution status of concurrent in-flight requests would be
+ * overwritten.
+ *
+ * This design limits the extent to which we can keep a full pipeline of
+ * in-flight requests from a single client.  This limitation could be
+ * overcome by allowing multiple slots per client in the last_rcvd file.
+ */
 struct mdc_rpc_lock {
+	/** Lock protecting in-flight RPC concurrency. */
 	struct mutex		rpcl_mutex;
+	/** Intent associated with currently executing request. */
 	struct lookup_intent	*rpcl_it;
+	/** Used for MDS/RPC load testing purposes. */
 	int			rpcl_fakes;
 };
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 69586a522..a7973d5de 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -1327,7 +1327,9 @@ struct ptlrpc_request {
 		/* allow the req to be sent if the import is in recovery
 		 * status
 		 */
-		rq_allow_replay:1;
+		rq_allow_replay:1,
+		/* bulk request, sent to server, but uncommitted */
+		rq_unstable:1;
 
 	unsigned int rq_nr_resend;
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 383fe6feb..a42cf90c1 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -89,6 +89,7 @@ int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh);
 
 /* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */
 #define PARAM_OST		  "ost."
+#define PARAM_OSD		"osd."
 #define PARAM_OSC		  "osc."
 #define PARAM_MDT		  "mdt."
 #define PARAM_MDD		  "mdd."
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index b2e67fcf9..0aac4391e 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -137,6 +137,7 @@ extern struct req_format RQF_MGS_CONFIG_READ;
 /* fid/fld req_format */
 extern struct req_format RQF_SEQ_QUERY;
 extern struct req_format RQF_FLD_QUERY;
+extern struct req_format RQF_FLD_READ;
 /* MDS req_format */
 extern struct req_format RQF_MDS_CONNECT;
 extern struct req_format RQF_MDS_DISCONNECT;
@@ -199,7 +200,7 @@ extern struct req_format RQF_OST_BRW_READ;
 extern struct req_format RQF_OST_BRW_WRITE;
 extern struct req_format RQF_OST_STATFS;
 extern struct req_format RQF_OST_SET_GRANT_INFO;
-extern struct req_format RQF_OST_GET_INFO_GENERIC;
+extern struct req_format RQF_OST_GET_INFO;
 extern struct req_format RQF_OST_GET_INFO_LAST_ID;
 extern struct req_format RQF_OST_GET_INFO_LAST_FID;
 extern struct req_format RQF_OST_SET_INFO_LAST_FID;
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 4264d9765..2d926e0ee 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -37,7 +37,7 @@
 #ifndef __OBD_H
 #define __OBD_H
 
-#include "linux/obd.h"
+#include <linux/spinlock.h>
 
 #define IOC_OSC_TYPE	 'h'
 #define IOC_OSC_MIN_NR       20
@@ -54,6 +54,7 @@
 #include "lustre_export.h"
 #include "lustre_fid.h"
 #include "lustre_fld.h"
+#include "lustre_intent.h"
 
 #define MAX_OBD_DEVICES 8192
 
@@ -165,9 +166,6 @@ struct obd_info {
 	obd_enqueue_update_f    oi_cb_up;
 };
 
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-
 struct obd_type {
 	struct list_head typ_chain;
 	struct obd_ops *typ_dt_ops;
@@ -293,14 +291,10 @@ struct client_obd {
 	 * blocking everywhere, but we don't want to slow down fast-path of
 	 * our main platform.)
 	 *
-	 * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
-	 * with client_obd_list_{un,}lock() and
-	 * client_obd_list_lock_{init,done}() functions.
-	 *
 	 * NB by Jinshan: though field names are still _loi_, but actually
 	 * osc_object{}s are in the list.
 	 */
-	struct client_obd_lock	       cl_loi_list_lock;
+	spinlock_t		       cl_loi_list_lock;
 	struct list_head	       cl_loi_ready_list;
 	struct list_head	       cl_loi_hp_ready_list;
 	struct list_head	       cl_loi_write_list;
@@ -327,7 +321,8 @@ struct client_obd {
 	atomic_t		 cl_lru_shrinkers;
 	atomic_t		 cl_lru_in_list;
 	struct list_head	 cl_lru_list; /* lru page list */
-	struct client_obd_lock   cl_lru_list_lock; /* page list protector */
+	spinlock_t		 cl_lru_list_lock; /* page list protector */
+	atomic_t		 cl_unstable_count;
 
 	/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
 	atomic_t	     cl_destroy_in_flight;
@@ -364,6 +359,7 @@ struct client_obd {
 
 	/* ptlrpc work for writeback in ptlrpcd context */
 	void		    *cl_writeback_work;
+	void			*cl_lru_work;
 	/* hash tables for osc_quota_info */
 	struct cfs_hash	      *cl_quota_hash[MAXQUOTAS];
 };
@@ -391,45 +387,9 @@ struct ost_pool {
 	struct rw_semaphore op_rw_sem;     /* to protect ost_pool use */
 };
 
-/* Round-robin allocator data */
-struct lov_qos_rr {
-	__u32	       lqr_start_idx;   /* start index of new inode */
-	__u32	       lqr_offset_idx;  /* aliasing for start_idx  */
-	int		 lqr_start_count; /* reseed counter */
-	struct ost_pool     lqr_pool;	/* round-robin optimized list */
-	unsigned long       lqr_dirty:1;     /* recalc round-robin list */
-};
-
 /* allow statfs data caching for 1 second */
 #define OBD_STATFS_CACHE_SECONDS 1
 
-struct lov_statfs_data {
-	struct obd_info   lsd_oi;
-	struct obd_statfs lsd_statfs;
-};
-
-/* Stripe placement optimization */
-struct lov_qos {
-	struct list_head    lq_oss_list; /* list of OSSs that targets use */
-	struct rw_semaphore lq_rw_sem;
-	__u32		lq_active_oss_count;
-	unsigned int	lq_prio_free;   /* priority for free space */
-	unsigned int	lq_threshold_rr;/* priority for rr */
-	struct lov_qos_rr   lq_rr;	  /* round robin qos data */
-	unsigned long       lq_dirty:1,     /* recalc qos data */
-			    lq_same_space:1,/* the ost's all have approx.
-					     * the same space avail
-					     */
-			    lq_reset:1,     /* zero current penalties */
-			    lq_statfs_in_progress:1; /* statfs op in
-							progress */
-	/* qos statfs data */
-	struct lov_statfs_data *lq_statfs_data;
-	wait_queue_head_t lq_statfs_waitq; /* waitqueue to notify statfs
-					    * requests completion
-					    */
-};
-
 struct lov_tgt_desc {
 	struct list_head	  ltd_kill;
 	struct obd_uuid     ltd_uuid;
@@ -442,25 +402,6 @@ struct lov_tgt_desc {
 			    ltd_reap:1;  /* should this target be deleted */
 };
 
-/* Pool metadata */
-#define pool_tgt_size(_p)   _p->pool_obds.op_size
-#define pool_tgt_count(_p)  _p->pool_obds.op_count
-#define pool_tgt_array(_p)  _p->pool_obds.op_array
-#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem
-
-struct pool_desc {
-	char		  pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */
-	struct ost_pool       pool_obds;	      /* pool members */
-	atomic_t	  pool_refcount;	  /* pool ref. counter */
-	struct lov_qos_rr     pool_rr;		/* round robin qos */
-	struct hlist_node      pool_hash;	      /* access by poolname */
-	struct list_head	    pool_list;	      /* serial access */
-	struct dentry		*pool_debugfs_entry;	/* file in debugfs */
-	struct obd_device    *pool_lobd;	/* obd of the lov/lod to which
-						 * this pool belongs
-						 */
-};
-
 struct lov_obd {
 	struct lov_desc	 desc;
 	struct lov_tgt_desc   **lov_tgts;	      /* sparse array */
@@ -468,8 +409,6 @@ struct lov_obd {
 	struct mutex		lov_lock;
 	struct obd_connect_data lov_ocd;
 	atomic_t	    lov_refcount;
-	__u32		   lov_tgt_count;	 /* how many OBD's */
-	__u32		   lov_active_tgt_count;  /* how many active */
 	__u32		   lov_death_row;/* tgts scheduled to be deleted */
 	__u32		   lov_tgt_size;   /* size of tgts array */
 	int		     lov_connects;
@@ -479,7 +418,7 @@ struct lov_obd {
 	struct dentry		*lov_pool_debugfs_entry;
 	enum lustre_sec_part    lov_sp_me;
 
-	/* Cached LRU pages from upper layer */
+	/* Cached LRU and unstable data from upper layer */
 	void		       *lov_cache;
 
 	struct rw_semaphore     lov_notify_lock;
@@ -511,7 +450,7 @@ struct lmv_obd {
 	struct obd_uuid		cluuid;
 	struct obd_export	*exp;
 
-	struct mutex		init_mutex;
+	struct mutex		lmv_init_mutex;
 	int			connected;
 	int			max_easize;
 	int			max_def_easize;
diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h
index 637fa2211..f6c18df90 100644
--- a/drivers/staging/lustre/lustre/include/obd_cksum.h
+++ b/drivers/staging/lustre/lustre/include/obd_cksum.h
@@ -35,6 +35,7 @@
 #ifndef __OBD_CKSUM
 #define __OBD_CKSUM
 #include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/libcfs/libcfs_crypto.h"
 #include "lustre/lustre_idl.h"
 
 static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type)
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 706869f8c..32863bcb3 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -477,7 +477,7 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 		struct lu_context  session_ctx;
 		struct lu_env env;
 
-		lu_context_init(&session_ctx, LCT_SESSION);
+		lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION);
 		session_ctx.lc_thread = NULL;
 		lu_context_enter(&session_ctx);
 
@@ -490,8 +490,9 @@ static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 				obd->obd_lu_dev = d;
 				d->ld_obd = obd;
 				rc = 0;
-			} else
+			} else {
 				rc = PTR_ERR(d);
+			}
 		}
 		lu_context_exit(&session_ctx);
 		lu_context_fini(&session_ctx);
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index f8ee3a325..60034d39b 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -58,6 +58,7 @@ extern int at_early_margin;
 extern int at_extra;
 extern unsigned int obd_sync_filter;
 extern unsigned int obd_max_dirty_pages;
+extern atomic_t obd_unstable_pages;
 extern atomic_t obd_dirty_pages;
 extern atomic_t obd_dirty_transit_pages;
 extern char obd_jobid_var[];
@@ -289,6 +290,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_ENOINO	      0x229
 #define OBD_FAIL_OST_DQACQ_NET	   0x230
 #define OBD_FAIL_OST_STATFS_EINPROGRESS  0x231
+#define OBD_FAIL_OST_SET_INFO_NET		0x232
 
 #define OBD_FAIL_LDLM		    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -319,6 +321,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LDLM_AGL_DELAY	  0x31a
 #define OBD_FAIL_LDLM_AGL_NOLOCK	 0x31b
 #define OBD_FAIL_LDLM_OST_LVB		 0x31c
+#define OBD_FAIL_LDLM_ENQUEUE_HANG	 0x31d
 
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
@@ -426,6 +429,7 @@ extern char obd_jobid_var[];
 
 #define OBD_FAIL_FLD		     0x1100
 #define OBD_FAIL_FLD_QUERY_NET	   0x1101
+#define OBD_FAIL_FLD_READ_NET		0x1102
 
 #define OBD_FAIL_SEC_CTX		 0x1200
 #define OBD_FAIL_SEC_CTX_INIT_NET	0x1201
diff --git a/drivers/staging/lustre/lustre/lclient/glimpse.c b/drivers/staging/lustre/lustre/lclient/glimpse.c
deleted file mode 100644
index c4e8a0878..000000000
--- a/drivers/staging/lustre/lustre/lclient/glimpse.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * glimpse code shared between vvp and liblustre (and other Lustre clients in
- * the future).
- *
- *   Author: Nikita Danilov <nikita.danilov@sun.com>
- *   Author: Oleg Drokin <oleg.drokin@sun.com>
- */
-
-#include "../../include/linux/libcfs/libcfs.h"
-#include "../include/obd_class.h"
-#include "../include/obd_support.h"
-#include "../include/obd.h"
-
-#include "../include/lustre_dlm.h"
-#include "../include/lustre_lite.h"
-#include "../include/lustre_mdc.h"
-#include <linux/pagemap.h>
-#include <linux/file.h>
-
-#include "../include/cl_object.h"
-#include "../include/lclient.h"
-#include "../llite/llite_internal.h"
-
-static const struct cl_lock_descr whole_file = {
-	.cld_start = 0,
-	.cld_end   = CL_PAGE_EOF,
-	.cld_mode  = CLM_READ
-};
-
-/*
- * Check whether file has possible unwriten pages.
- *
- * \retval 1    file is mmap-ed or has dirty pages
- *	 0    otherwise
- */
-blkcnt_t dirty_cnt(struct inode *inode)
-{
-	blkcnt_t cnt = 0;
-	struct ccc_object *vob = cl_inode2ccc(inode);
-	void	      *results[1];
-
-	if (inode->i_mapping)
-		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
-						  results, 0, 1,
-						  PAGECACHE_TAG_DIRTY);
-	if (cnt == 0 && atomic_read(&vob->cob_mmap_cnt) > 0)
-		cnt = 1;
-
-	return (cnt > 0) ? 1 : 0;
-}
-
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
-		    struct inode *inode, struct cl_object *clob, int agl)
-{
-	struct cl_lock_descr *descr = &ccc_env_info(env)->cti_descr;
-	struct cl_inode_info *lli   = cl_i2info(inode);
-	const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
-	struct ccc_io	*cio   = ccc_env_io(env);
-	struct cl_lock       *lock;
-	int result;
-
-	result = 0;
-	if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
-		CDEBUG(D_DLMTRACE, "Glimpsing inode "DFID"\n", PFID(fid));
-		if (lli->lli_has_smd) {
-			/* NOTE: this looks like DLM lock request, but it may
-			 *       not be one. Due to CEF_ASYNC flag (translated
-			 *       to LDLM_FL_HAS_INTENT by osc), this is
-			 *       glimpse request, that won't revoke any
-			 *       conflicting DLM locks held. Instead,
-			 *       ll_glimpse_callback() will be called on each
-			 *       client holding a DLM lock against this file,
-			 *       and resulting size will be returned for each
-			 *       stripe. DLM lock on [0, EOF] is acquired only
-			 *       if there were no conflicting locks. If there
-			 *       were conflicting locks, enqueuing or waiting
-			 *       fails with -ENAVAIL, but valid inode
-			 *       attributes are returned anyway.
-			 */
-			*descr = whole_file;
-			descr->cld_obj   = clob;
-			descr->cld_mode  = CLM_PHANTOM;
-			descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
-			if (agl)
-				descr->cld_enq_flags |= CEF_AGL;
-			cio->cui_glimpse = 1;
-			/*
-			 * CEF_ASYNC is used because glimpse sub-locks cannot
-			 * deadlock (because they never conflict with other
-			 * locks) and, hence, can be enqueued out-of-order.
-			 *
-			 * CEF_MUST protects glimpse lock from conversion into
-			 * a lockless mode.
-			 */
-			lock = cl_lock_request(env, io, descr, "glimpse",
-					       current);
-			cio->cui_glimpse = 0;
-
-			if (!lock)
-				return 0;
-
-			if (IS_ERR(lock))
-				return PTR_ERR(lock);
-
-			LASSERT(agl == 0);
-			result = cl_wait(env, lock);
-			if (result == 0) {
-				cl_merge_lvb(env, inode);
-				if (cl_isize_read(inode) > 0 &&
-				    inode->i_blocks == 0) {
-					/*
-					 * LU-417: Add dirty pages block count
-					 * lest i_blocks reports 0, some "cp" or
-					 * "tar" may think it's a completely
-					 * sparse file and skip it.
-					 */
-					inode->i_blocks = dirty_cnt(inode);
-				}
-				cl_unuse(env, lock);
-			}
-			cl_lock_release(env, lock, "glimpse", current);
-		} else {
-			CDEBUG(D_DLMTRACE, "No objects for inode\n");
-			cl_merge_lvb(env, inode);
-		}
-	}
-
-	return result;
-}
-
-static int cl_io_get(struct inode *inode, struct lu_env **envout,
-		     struct cl_io **ioout, int *refcheck)
-{
-	struct lu_env	  *env;
-	struct cl_io	   *io;
-	struct cl_inode_info   *lli = cl_i2info(inode);
-	struct cl_object       *clob = lli->lli_clob;
-	int result;
-
-	if (S_ISREG(cl_inode_mode(inode))) {
-		env = cl_env_get(refcheck);
-		if (!IS_ERR(env)) {
-			io = ccc_env_thread_io(env);
-			io->ci_obj = clob;
-			*envout = env;
-			*ioout  = io;
-			result = 1;
-		} else
-			result = PTR_ERR(env);
-	} else
-		result = 0;
-	return result;
-}
-
-int cl_glimpse_size0(struct inode *inode, int agl)
-{
-	/*
-	 * We don't need ast_flags argument to cl_glimpse_size(), because
-	 * osc_lock_enqueue() takes care of the possible deadlock that said
-	 * argument was introduced to avoid.
-	 */
-	/*
-	 * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
-	 * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
-	 * blocking anyway.
-	 */
-	struct lu_env	  *env = NULL;
-	struct cl_io	   *io  = NULL;
-	int		     result;
-	int		     refcheck;
-
-	result = cl_io_get(inode, &env, &io, &refcheck);
-	if (result > 0) {
-again:
-		io->ci_verify_layout = 1;
-		result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-		if (result > 0)
-			/*
-			 * nothing to do for this io. This currently happens
-			 * when stripe sub-object's are not yet created.
-			 */
-			result = io->ci_result;
-		else if (result == 0)
-			result = cl_glimpse_lock(env, io, inode, io->ci_obj,
-						 agl);
-
-		OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
-		cl_io_fini(env, io);
-		if (unlikely(io->ci_need_restart))
-			goto again;
-		cl_env_put(env, &refcheck);
-	}
-	return result;
-}
-
-int cl_local_size(struct inode *inode)
-{
-	struct lu_env	   *env = NULL;
-	struct cl_io	    *io  = NULL;
-	struct ccc_thread_info  *cti;
-	struct cl_object	*clob;
-	struct cl_lock_descr    *descr;
-	struct cl_lock	  *lock;
-	int		      result;
-	int		      refcheck;
-
-	if (!cl_i2info(inode)->lli_has_smd)
-		return 0;
-
-	result = cl_io_get(inode, &env, &io, &refcheck);
-	if (result <= 0)
-		return result;
-
-	clob = io->ci_obj;
-	result = cl_io_init(env, io, CIT_MISC, clob);
-	if (result > 0)
-		result = io->ci_result;
-	else if (result == 0) {
-		cti = ccc_env_info(env);
-		descr = &cti->cti_descr;
-
-		*descr = whole_file;
-		descr->cld_obj = clob;
-		lock = cl_lock_peek(env, io, descr, "localsize", current);
-		if (lock) {
-			cl_merge_lvb(env, inode);
-			cl_unuse(env, lock);
-			cl_lock_release(env, lock, "localsize", current);
-			result = 0;
-		} else
-			result = -ENODATA;
-	}
-	cl_io_fini(env, io);
-	cl_env_put(env, &refcheck);
-	return result;
-}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
deleted file mode 100644
index 96141d17d..000000000
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- *   Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "../../include/linux/libcfs/libcfs.h"
-# include <linux/fs.h>
-# include <linux/sched.h>
-# include <linux/mm.h>
-# include <linux/quotaops.h>
-# include <linux/highmem.h>
-# include <linux/pagemap.h>
-# include <linux/rbtree.h>
-
-#include "../include/obd.h"
-#include "../include/obd_support.h"
-#include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
-#include "../include/lustre_dlm.h"
-#include "../include/lustre_ver.h"
-#include "../include/lustre_mdc.h"
-#include "../include/cl_object.h"
-
-#include "../include/lclient.h"
-
-#include "../llite/llite_internal.h"
-
-static const struct cl_req_operations ccc_req_ops;
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-static struct kmem_cache *ccc_lock_kmem;
-static struct kmem_cache *ccc_object_kmem;
-static struct kmem_cache *ccc_thread_kmem;
-static struct kmem_cache *ccc_session_kmem;
-static struct kmem_cache *ccc_req_kmem;
-
-static struct lu_kmem_descr ccc_caches[] = {
-	{
-		.ckd_cache = &ccc_lock_kmem,
-		.ckd_name  = "ccc_lock_kmem",
-		.ckd_size  = sizeof(struct ccc_lock)
-	},
-	{
-		.ckd_cache = &ccc_object_kmem,
-		.ckd_name  = "ccc_object_kmem",
-		.ckd_size  = sizeof(struct ccc_object)
-	},
-	{
-		.ckd_cache = &ccc_thread_kmem,
-		.ckd_name  = "ccc_thread_kmem",
-		.ckd_size  = sizeof(struct ccc_thread_info),
-	},
-	{
-		.ckd_cache = &ccc_session_kmem,
-		.ckd_name  = "ccc_session_kmem",
-		.ckd_size  = sizeof(struct ccc_session)
-	},
-	{
-		.ckd_cache = &ccc_req_kmem,
-		.ckd_name  = "ccc_req_kmem",
-		.ckd_size  = sizeof(struct ccc_req)
-	},
-	{
-		.ckd_cache = NULL
-	}
-};
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key)
-{
-	struct ccc_thread_info *info;
-
-	info = kmem_cache_zalloc(ccc_thread_kmem, GFP_NOFS);
-	if (!info)
-		info = ERR_PTR(-ENOMEM);
-	return info;
-}
-
-void ccc_key_fini(const struct lu_context *ctx,
-			 struct lu_context_key *key, void *data)
-{
-	struct ccc_thread_info *info = data;
-
-	kmem_cache_free(ccc_thread_kmem, info);
-}
-
-void *ccc_session_key_init(const struct lu_context *ctx,
-				  struct lu_context_key *key)
-{
-	struct ccc_session *session;
-
-	session = kmem_cache_zalloc(ccc_session_kmem, GFP_NOFS);
-	if (!session)
-		session = ERR_PTR(-ENOMEM);
-	return session;
-}
-
-void ccc_session_key_fini(const struct lu_context *ctx,
-				 struct lu_context_key *key, void *data)
-{
-	struct ccc_session *session = data;
-
-	kmem_cache_free(ccc_session_kmem, session);
-}
-
-struct lu_context_key ccc_key = {
-	.lct_tags = LCT_CL_THREAD,
-	.lct_init = ccc_key_init,
-	.lct_fini = ccc_key_fini
-};
-
-struct lu_context_key ccc_session_key = {
-	.lct_tags = LCT_SESSION,
-	.lct_init = ccc_session_key_init,
-	.lct_fini = ccc_session_key_fini
-};
-
-/* type constructor/destructor: ccc_type_{init,fini,start,stop}(). */
-/* LU_TYPE_INIT_FINI(ccc, &ccc_key, &ccc_session_key); */
-
-int ccc_device_init(const struct lu_env *env, struct lu_device *d,
-			   const char *name, struct lu_device *next)
-{
-	struct ccc_device  *vdv;
-	int rc;
-
-	vdv = lu2ccc_dev(d);
-	vdv->cdv_next = lu2cl_dev(next);
-
-	LASSERT(d->ld_site && next->ld_type);
-	next->ld_site = d->ld_site;
-	rc = next->ld_type->ldt_ops->ldto_device_init(
-			env, next, next->ld_type->ldt_name, NULL);
-	if (rc == 0) {
-		lu_device_get(next);
-		lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
-	}
-	return rc;
-}
-
-struct lu_device *ccc_device_fini(const struct lu_env *env,
-					 struct lu_device *d)
-{
-	return cl2lu_dev(lu2ccc_dev(d)->cdv_next);
-}
-
-struct lu_device *ccc_device_alloc(const struct lu_env *env,
-				   struct lu_device_type *t,
-				   struct lustre_cfg *cfg,
-				   const struct lu_device_operations *luops,
-				   const struct cl_device_operations *clops)
-{
-	struct ccc_device *vdv;
-	struct lu_device  *lud;
-	struct cl_site    *site;
-	int rc;
-
-	vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
-	if (!vdv)
-		return ERR_PTR(-ENOMEM);
-
-	lud = &vdv->cdv_cl.cd_lu_dev;
-	cl_device_init(&vdv->cdv_cl, t);
-	ccc2lu_dev(vdv)->ld_ops = luops;
-	vdv->cdv_cl.cd_ops = clops;
-
-	site = kzalloc(sizeof(*site), GFP_NOFS);
-	if (site) {
-		rc = cl_site_init(site, &vdv->cdv_cl);
-		if (rc == 0)
-			rc = lu_site_init_finish(&site->cs_lu);
-		else {
-			LASSERT(!lud->ld_site);
-			CERROR("Cannot init lu_site, rc %d.\n", rc);
-			kfree(site);
-		}
-	} else
-		rc = -ENOMEM;
-	if (rc != 0) {
-		ccc_device_free(env, lud);
-		lud = ERR_PTR(rc);
-	}
-	return lud;
-}
-
-struct lu_device *ccc_device_free(const struct lu_env *env,
-					 struct lu_device *d)
-{
-	struct ccc_device *vdv  = lu2ccc_dev(d);
-	struct cl_site    *site = lu2cl_site(d->ld_site);
-	struct lu_device  *next = cl2lu_dev(vdv->cdv_next);
-
-	if (d->ld_site) {
-		cl_site_fini(site);
-		kfree(site);
-	}
-	cl_device_fini(lu2cl_dev(d));
-	kfree(vdv);
-	return next;
-}
-
-int ccc_req_init(const struct lu_env *env, struct cl_device *dev,
-			struct cl_req *req)
-{
-	struct ccc_req *vrq;
-	int result;
-
-	vrq = kmem_cache_zalloc(ccc_req_kmem, GFP_NOFS);
-	if (vrq) {
-		cl_req_slice_add(req, &vrq->crq_cl, dev, &ccc_req_ops);
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-/**
- * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by ccc_inode_fini_guard
- * mutex.
- */
-static struct lu_env *ccc_inode_fini_env;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(ccc_inode_fini_guard);
-static int dummy_refcheck;
-
-int ccc_global_init(struct lu_device_type *device_type)
-{
-	int result;
-
-	result = lu_kmem_init(ccc_caches);
-	if (result)
-		return result;
-
-	result = lu_device_type_init(device_type);
-	if (result)
-		goto out_kmem;
-
-	ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
-					  LCT_REMEMBER|LCT_NOREF);
-	if (IS_ERR(ccc_inode_fini_env)) {
-		result = PTR_ERR(ccc_inode_fini_env);
-		goto out_device;
-	}
-
-	ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
-	return 0;
-out_device:
-	lu_device_type_fini(device_type);
-out_kmem:
-	lu_kmem_fini(ccc_caches);
-	return result;
-}
-
-void ccc_global_fini(struct lu_device_type *device_type)
-{
-	if (ccc_inode_fini_env) {
-		cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
-		ccc_inode_fini_env = NULL;
-	}
-	lu_device_type_fini(device_type);
-	lu_kmem_fini(ccc_caches);
-}
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-struct lu_object *ccc_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *unused,
-				   struct lu_device *dev,
-				   const struct cl_object_operations *clops,
-				   const struct lu_object_operations *luops)
-{
-	struct ccc_object *vob;
-	struct lu_object  *obj;
-
-	vob = kmem_cache_zalloc(ccc_object_kmem, GFP_NOFS);
-	if (vob) {
-		struct cl_object_header *hdr;
-
-		obj = ccc2lu(vob);
-		hdr = &vob->cob_header;
-		cl_object_header_init(hdr);
-		lu_object_init(obj, &hdr->coh_lu, dev);
-		lu_object_add_top(&hdr->coh_lu, obj);
-
-		vob->cob_cl.co_ops = clops;
-		obj->lo_ops = luops;
-	} else
-		obj = NULL;
-	return obj;
-}
-
-int ccc_object_init0(const struct lu_env *env,
-			    struct ccc_object *vob,
-			    const struct cl_object_conf *conf)
-{
-	vob->cob_inode = conf->coc_inode;
-	vob->cob_transient_pages = 0;
-	cl_object_page_init(&vob->cob_cl, sizeof(struct ccc_page));
-	return 0;
-}
-
-int ccc_object_init(const struct lu_env *env, struct lu_object *obj,
-			   const struct lu_object_conf *conf)
-{
-	struct ccc_device *dev = lu2ccc_dev(obj->lo_dev);
-	struct ccc_object *vob = lu2ccc(obj);
-	struct lu_object  *below;
-	struct lu_device  *under;
-	int result;
-
-	under = &dev->cdv_next->cd_lu_dev;
-	below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
-	if (below) {
-		const struct cl_object_conf *cconf;
-
-		cconf = lu2cl_conf(conf);
-		INIT_LIST_HEAD(&vob->cob_pending_list);
-		lu_object_add(obj, below);
-		result = ccc_object_init0(env, vob, cconf);
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-void ccc_object_free(const struct lu_env *env, struct lu_object *obj)
-{
-	struct ccc_object *vob = lu2ccc(obj);
-
-	lu_object_fini(obj);
-	lu_object_header_fini(obj->lo_header);
-	kmem_cache_free(ccc_object_kmem, vob);
-}
-
-int ccc_lock_init(const struct lu_env *env,
-		  struct cl_object *obj, struct cl_lock *lock,
-		  const struct cl_io *unused,
-		  const struct cl_lock_operations *lkops)
-{
-	struct ccc_lock *clk;
-	int result;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	clk = kmem_cache_zalloc(ccc_lock_kmem, GFP_NOFS);
-	if (clk) {
-		cl_lock_slice_add(lock, &clk->clk_cl, obj, lkops);
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
-}
-
-int ccc_object_glimpse(const struct lu_env *env,
-		       const struct cl_object *obj, struct ost_lvb *lvb)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	lvb->lvb_mtime = cl_inode_mtime(inode);
-	lvb->lvb_atime = cl_inode_atime(inode);
-	lvb->lvb_ctime = cl_inode_ctime(inode);
-	/*
-	 * LU-417: Add dirty pages block count lest i_blocks reports 0, some
-	 * "cp" or "tar" on remote node may think it's a completely sparse file
-	 * and skip it.
-	 */
-	if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
-		lvb->lvb_blocks = dirty_cnt(inode);
-	return 0;
-}
-
-static void ccc_object_size_lock(struct cl_object *obj)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	ll_inode_size_lock(inode);
-	cl_object_attr_lock(obj);
-}
-
-static void ccc_object_size_unlock(struct cl_object *obj)
-{
-	struct inode *inode = ccc_object_inode(obj);
-
-	cl_object_attr_unlock(obj);
-	ll_inode_size_unlock(inode);
-}
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-struct page *ccc_page_vmpage(const struct lu_env *env,
-			    const struct cl_page_slice *slice)
-{
-	return cl2vm_page(slice);
-}
-
-int ccc_page_is_under_lock(const struct lu_env *env,
-			   const struct cl_page_slice *slice,
-			   struct cl_io *io)
-{
-	struct ccc_io	*cio  = ccc_env_io(env);
-	struct cl_lock_descr *desc = &ccc_env_info(env)->cti_descr;
-	struct cl_page       *page = slice->cpl_page;
-
-	int result;
-
-	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
-	    io->ci_type == CIT_FAULT) {
-		if (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)
-			result = -EBUSY;
-		else {
-			desc->cld_start = page->cp_index;
-			desc->cld_end   = page->cp_index;
-			desc->cld_obj   = page->cp_obj;
-			desc->cld_mode  = CLM_READ;
-			result = cl_queue_match(&io->ci_lockset.cls_done,
-						desc) ? -EBUSY : 0;
-		}
-	} else
-		result = 0;
-	return result;
-}
-
-int ccc_fail(const struct lu_env *env, const struct cl_page_slice *slice)
-{
-	/*
-	 * Cached read?
-	 */
-	LBUG();
-	return 0;
-}
-
-int ccc_transient_page_prep(const struct lu_env *env,
-				   const struct cl_page_slice *slice,
-				   struct cl_io *unused)
-{
-	/* transient page should always be sent. */
-	return 0;
-}
-
-/*****************************************************************************
- *
- * Lock operations.
- *
- */
-
-void ccc_lock_delete(const struct lu_env *env,
-		     const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-}
-
-void ccc_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
-	struct ccc_lock *clk = cl2ccc_lock(slice);
-
-	kmem_cache_free(ccc_lock_kmem, clk);
-}
-
-int ccc_lock_enqueue(const struct lu_env *env,
-		     const struct cl_lock_slice *slice,
-		     struct cl_io *unused, __u32 enqflags)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_use(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_unuse(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice)
-{
-	CLOBINVRNT(env, slice->cls_obj, ccc_object_invariant(slice->cls_obj));
-	return 0;
-}
-
-/**
- * Implementation of cl_lock_operations::clo_fits_into() methods for ccc
- * layer. This function is executed every time io finds an existing lock in
- * the lock cache while creating new lock. This function has to decide whether
- * cached lock "fits" into io.
- *
- * \param slice lock to be checked
- * \param io    IO that wants a lock.
- *
- * \see lov_lock_fits_into().
- */
-int ccc_lock_fits_into(const struct lu_env *env,
-		       const struct cl_lock_slice *slice,
-		       const struct cl_lock_descr *need,
-		       const struct cl_io *io)
-{
-	const struct cl_lock       *lock  = slice->cls_lock;
-	const struct cl_lock_descr *descr = &lock->cll_descr;
-	const struct ccc_io	*cio   = ccc_env_io(env);
-	int			 result;
-
-	/*
-	 * Work around DLM peculiarity: it assumes that glimpse
-	 * (LDLM_FL_HAS_INTENT) lock is always LCK_PR, and returns reads lock
-	 * when asked for LCK_PW lock with LDLM_FL_HAS_INTENT flag set. Make
-	 * sure that glimpse doesn't get CLM_WRITE top-lock, so that it
-	 * doesn't enqueue CLM_WRITE sub-locks.
-	 */
-	if (cio->cui_glimpse)
-		result = descr->cld_mode != CLM_WRITE;
-
-	/*
-	 * Also, don't match incomplete write locks for read, otherwise read
-	 * would enqueue missing sub-locks in the write mode.
-	 */
-	else if (need->cld_mode != descr->cld_mode)
-		result = lock->cll_state >= CLS_ENQUEUED;
-	else
-		result = 1;
-	return result;
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
- * whenever lock state changes. Transfers object attributes, that might be
- * updated as a result of lock acquiring into inode.
- */
-void ccc_lock_state(const struct lu_env *env,
-		    const struct cl_lock_slice *slice,
-		    enum cl_lock_state state)
-{
-	struct cl_lock *lock = slice->cls_lock;
-
-	/*
-	 * Refresh inode attributes when the lock is moving into CLS_HELD
-	 * state, and only when this is a result of real enqueue, rather than
-	 * of finding lock in the cache.
-	 */
-	if (state == CLS_HELD && lock->cll_state < CLS_HELD) {
-		struct cl_object *obj;
-		struct inode     *inode;
-
-		obj   = slice->cls_obj;
-		inode = ccc_object_inode(obj);
-
-		/* vmtruncate() sets the i_size
-		 * under both a DLM lock and the
-		 * ll_inode_size_lock().  If we don't get the
-		 * ll_inode_size_lock() here we can match the DLM lock and
-		 * reset i_size.  generic_file_write can then trust the
-		 * stale i_size when doing appending writes and effectively
-		 * cancel the result of the truncate.  Getting the
-		 * ll_inode_size_lock() after the enqueue maintains the DLM
-		 * -> ll_inode_size_lock() acquiring order.
-		 */
-		if (lock->cll_descr.cld_start == 0 &&
-		    lock->cll_descr.cld_end == CL_PAGE_EOF)
-			cl_merge_lvb(env, inode);
-	}
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
-			  __u32 enqflags, enum cl_lock_mode mode,
-			  pgoff_t start, pgoff_t end)
-{
-	struct ccc_io	  *cio   = ccc_env_io(env);
-	struct cl_lock_descr   *descr = &cio->cui_link.cill_descr;
-	struct cl_object       *obj   = io->ci_obj;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
-	memset(&cio->cui_link, 0, sizeof(cio->cui_link));
-
-	if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-		descr->cld_mode = CLM_GROUP;
-		descr->cld_gid  = cio->cui_fd->fd_grouplock.cg_gid;
-	} else {
-		descr->cld_mode  = mode;
-	}
-	descr->cld_obj   = obj;
-	descr->cld_start = start;
-	descr->cld_end   = end;
-	descr->cld_enq_flags = enqflags;
-
-	cl_io_lock_add(env, io, &cio->cui_link);
-	return 0;
-}
-
-void ccc_io_update_iov(const struct lu_env *env,
-		       struct ccc_io *cio, struct cl_io *io)
-{
-	size_t size = io->u.ci_rw.crw_count;
-
-	if (!cl_is_normalio(env, io) || !cio->cui_iter)
-		return;
-
-	iov_iter_truncate(cio->cui_iter, size);
-}
-
-int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
-		    __u32 enqflags, enum cl_lock_mode mode,
-		    loff_t start, loff_t end)
-{
-	struct cl_object *obj = io->ci_obj;
-
-	return ccc_io_one_lock_index(env, io, enqflags, mode,
-				     cl_index(obj, start), cl_index(obj, end));
-}
-
-void ccc_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
-	CLOBINVRNT(env, ios->cis_io->ci_obj,
-		   ccc_object_invariant(ios->cis_io->ci_obj));
-}
-
-void ccc_io_advance(const struct lu_env *env,
-		    const struct cl_io_slice *ios,
-		    size_t nob)
-{
-	struct ccc_io    *cio = cl2ccc_io(env, ios);
-	struct cl_io     *io  = ios->cis_io;
-	struct cl_object *obj = ios->cis_io->ci_obj;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-
-	if (!cl_is_normalio(env, io))
-		return;
-
-	iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-int ccc_prep_size(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_io *io, loff_t start, size_t count, int *exceed)
-{
-	struct cl_attr *attr  = ccc_env_thread_attr(env);
-	struct inode   *inode = ccc_object_inode(obj);
-	loff_t	  pos   = start + count - 1;
-	loff_t kms;
-	int result;
-
-	/*
-	 * Consistency guarantees: following possibilities exist for the
-	 * relation between region being accessed and real file size at this
-	 * moment:
-	 *
-	 *  (A): the region is completely inside of the file;
-	 *
-	 *  (B-x): x bytes of region are inside of the file, the rest is
-	 *  outside;
-	 *
-	 *  (C): the region is completely outside of the file.
-	 *
-	 * This classification is stable under DLM lock already acquired by
-	 * the caller, because to change the class, other client has to take
-	 * DLM lock conflicting with our lock. Also, any updates to ->i_size
-	 * by other threads on this client are serialized by
-	 * ll_inode_size_lock(). This guarantees that short reads are handled
-	 * correctly in the face of concurrent writes and truncates.
-	 */
-	ccc_object_size_lock(obj);
-	result = cl_object_attr_get(env, obj, attr);
-	if (result == 0) {
-		kms = attr->cat_kms;
-		if (pos > kms) {
-			/*
-			 * A glimpse is necessary to determine whether we
-			 * return a short read (B) or some zeroes at the end
-			 * of the buffer (C)
-			 */
-			ccc_object_size_unlock(obj);
-			result = cl_glimpse_lock(env, io, inode, obj, 0);
-			if (result == 0 && exceed) {
-				/* If objective page index exceed end-of-file
-				 * page index, return directly. Do not expect
-				 * kernel will check such case correctly.
-				 * linux-2.6.18-128.1.1 miss to do that.
-				 * --bug 17336
-				 */
-				loff_t size = cl_isize_read(inode);
-				loff_t cur_index = start >> PAGE_SHIFT;
-				loff_t size_index = (size - 1) >>
-						    PAGE_SHIFT;
-
-				if ((size == 0 && cur_index != 0) ||
-				    size_index < cur_index)
-					*exceed = 1;
-			}
-			return result;
-		}
-		/*
-		 * region is within kms and, hence, within real file
-		 * size (A). We need to increase i_size to cover the
-		 * read region so that generic_file_read() will do its
-		 * job, but that doesn't mean the kms size is
-		 * _correct_, it is only the _minimum_ size. If
-		 * someone does a stat they will get the correct size
-		 * which will always be >= the kms value here.
-		 * b=11081
-		 */
-		if (cl_isize_read(inode) < kms) {
-			cl_isize_write_nolock(inode, kms);
-			CDEBUG(D_VFSTRACE,
-					DFID" updating i_size %llu\n",
-					PFID(lu_object_fid(&obj->co_lu)),
-					(__u64)cl_isize_read(inode));
-
-		}
-	}
-	ccc_object_size_unlock(obj);
-	return result;
-}
-
-/*****************************************************************************
- *
- * Transfer operations.
- *
- */
-
-void ccc_req_completion(const struct lu_env *env,
-			const struct cl_req_slice *slice, int ioret)
-{
-	struct ccc_req *vrq;
-
-	if (ioret > 0)
-		cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
-
-	vrq = cl2ccc_req(slice);
-	kmem_cache_free(ccc_req_kmem, vrq);
-}
-
-/**
- * Implementation of struct cl_req_operations::cro_attr_set() for ccc
- * layer. ccc is responsible for
- *
- *    - o_[mac]time
- *
- *    - o_mode
- *
- *    - o_parent_seq
- *
- *    - o_[ug]id
- *
- *    - o_parent_oid
- *
- *    - o_parent_ver
- *
- *    - o_ioepoch,
- *
- */
-void ccc_req_attr_set(const struct lu_env *env,
-		      const struct cl_req_slice *slice,
-		      const struct cl_object *obj,
-		      struct cl_req_attr *attr, u64 flags)
-{
-	struct inode *inode;
-	struct obdo  *oa;
-	u32	      valid_flags;
-
-	oa = attr->cra_oa;
-	inode = ccc_object_inode(obj);
-	valid_flags = OBD_MD_FLTYPE;
-
-	if (slice->crs_req->crq_type == CRT_WRITE) {
-		if (flags & OBD_MD_FLEPOCH) {
-			oa->o_valid |= OBD_MD_FLEPOCH;
-			oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch;
-			valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-				       OBD_MD_FLUID | OBD_MD_FLGID;
-		}
-	}
-	obdo_from_inode(oa, inode, valid_flags & flags);
-	obdo_set_parent_fid(oa, &cl_i2info(inode)->lli_fid);
-	memcpy(attr->cra_jobid, cl_i2info(inode)->lli_jobid,
-	       JOBSTATS_JOBID_SIZE);
-}
-
-static const struct cl_req_operations ccc_req_ops = {
-	.cro_attr_set   = ccc_req_attr_set,
-	.cro_completion = ccc_req_completion
-};
-
-int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
-{
-	struct lu_env *env;
-	struct cl_io  *io;
-	int	    result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	io = ccc_env_thread_io(env);
-	io->ci_obj = cl_i2info(inode)->lli_clob;
-
-	io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
-	io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
-	io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
-	io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
-	io->u.ci_setattr.sa_valid = attr->ia_valid;
-
-again:
-	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
-		struct ccc_io *cio = ccc_env_io(env);
-
-		if (attr->ia_valid & ATTR_FILE)
-			/* populate the file descriptor for ftruncate to honor
-			 * group lock - see LU-787
-			 */
-			cio->cui_fd = cl_iattr2fd(inode, attr);
-
-		result = cl_io_loop(env, io);
-	} else {
-		result = io->ci_result;
-	}
-	cl_io_fini(env, io);
-	if (unlikely(io->ci_need_restart))
-		goto again;
-	/* HSM import case: file is released, cannot be restored
-	 * no need to fail except if restore registration failed
-	 * with -ENODATA
-	 */
-	if (result == -ENODATA && io->ci_restore_needed &&
-	    io->ci_result != -ENODATA)
-		result = 0;
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-/*****************************************************************************
- *
- * Type conversions.
- *
- */
-
-struct lu_device *ccc2lu_dev(struct ccc_device *vdv)
-{
-	return &vdv->cdv_cl.cd_lu_dev;
-}
-
-struct ccc_device *lu2ccc_dev(const struct lu_device *d)
-{
-	return container_of0(d, struct ccc_device, cdv_cl.cd_lu_dev);
-}
-
-struct ccc_device *cl2ccc_dev(const struct cl_device *d)
-{
-	return container_of0(d, struct ccc_device, cdv_cl);
-}
-
-struct lu_object *ccc2lu(struct ccc_object *vob)
-{
-	return &vob->cob_cl.co_lu;
-}
-
-struct ccc_object *lu2ccc(const struct lu_object *obj)
-{
-	return container_of0(obj, struct ccc_object, cob_cl.co_lu);
-}
-
-struct ccc_object *cl2ccc(const struct cl_object *obj)
-{
-	return container_of0(obj, struct ccc_object, cob_cl);
-}
-
-struct ccc_lock *cl2ccc_lock(const struct cl_lock_slice *slice)
-{
-	return container_of(slice, struct ccc_lock, clk_cl);
-}
-
-struct ccc_io *cl2ccc_io(const struct lu_env *env,
-			 const struct cl_io_slice *slice)
-{
-	struct ccc_io *cio;
-
-	cio = container_of(slice, struct ccc_io, cui_cl);
-	LASSERT(cio == ccc_env_io(env));
-	return cio;
-}
-
-struct ccc_req *cl2ccc_req(const struct cl_req_slice *slice)
-{
-	return container_of0(slice, struct ccc_req, crq_cl);
-}
-
-struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
-	return cl2ccc_page(slice)->cpg_page;
-}
-
-/*****************************************************************************
- *
- * Accessors.
- *
- */
-int ccc_object_invariant(const struct cl_object *obj)
-{
-	struct inode	 *inode = ccc_object_inode(obj);
-	struct cl_inode_info *lli   = cl_i2info(inode);
-
-	return (S_ISREG(cl_inode_mode(inode)) ||
-		/* i_mode of unlinked inode is zeroed. */
-		cl_inode_mode(inode) == 0) && lli->lli_clob == obj;
-}
-
-struct inode *ccc_object_inode(const struct cl_object *obj)
-{
-	return cl2ccc(obj)->cob_inode;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md    new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
-	struct lu_env	*env;
-	struct cl_inode_info *lli;
-	struct cl_object     *clob;
-	struct lu_site       *site;
-	struct lu_fid	*fid;
-	struct cl_object_conf conf = {
-		.coc_inode = inode,
-		.u = {
-			.coc_md    = md
-		}
-	};
-	int result = 0;
-	int refcheck;
-
-	LASSERT(md->body->valid & OBD_MD_FLID);
-	LASSERT(S_ISREG(cl_inode_mode(inode)));
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	site = cl_i2sbi(inode)->ll_site;
-	lli  = cl_i2info(inode);
-	fid  = &lli->lli_fid;
-	LASSERT(fid_is_sane(fid));
-
-	if (!lli->lli_clob) {
-		/* clob is slave of inode, empty lli_clob means for new inode,
-		 * there is no clob in cache with the given fid, so it is
-		 * unnecessary to perform lookup-alloc-lookup-insert, just
-		 * alloc and insert directly.
-		 */
-		LASSERT(inode->i_state & I_NEW);
-		conf.coc_lu.loc_flags = LOC_F_NEW;
-		clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
-				      fid, &conf);
-		if (!IS_ERR(clob)) {
-			/*
-			 * No locking is necessary, as new inode is
-			 * locked by I_NEW bit.
-			 */
-			lli->lli_clob = clob;
-			lli->lli_has_smd = lsm_has_objects(md->lsm);
-			lu_object_ref_add(&clob->co_lu, "inode", inode);
-		} else
-			result = PTR_ERR(clob);
-	} else {
-		result = cl_conf_set(env, lli->lli_clob, &conf);
-	}
-
-	cl_env_put(env, &refcheck);
-
-	if (result != 0)
-		CERROR("Failure to initialize cl object "DFID": %d\n",
-		       PFID(fid), result);
-	return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
-	struct lu_object_header *header = obj->co_lu.lo_header;
-	wait_queue_t	   waiter;
-
-	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
-		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
-		struct lu_site_bkt_data *bkt;
-
-		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
-
-		init_waitqueue_entry(&waiter, current);
-		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-
-		while (1) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			if (atomic_read(&header->loh_ref) == 1)
-				break;
-			schedule();
-		}
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-	}
-
-	cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
-	struct lu_env	   *env;
-	struct cl_inode_info    *lli  = cl_i2info(inode);
-	struct cl_object	*clob = lli->lli_clob;
-	int refcheck;
-	int emergency;
-
-	if (clob) {
-		void		    *cookie;
-
-		cookie = cl_env_reenter();
-		env = cl_env_get(&refcheck);
-		emergency = IS_ERR(env);
-		if (emergency) {
-			mutex_lock(&ccc_inode_fini_guard);
-			LASSERT(ccc_inode_fini_env);
-			cl_env_implant(ccc_inode_fini_env, &refcheck);
-			env = ccc_inode_fini_env;
-		}
-		/*
-		 * cl_object cache is a slave to inode cache (which, in turn
-		 * is a slave to dentry cache), don't keep cl_object in memory
-		 * when its master is evicted.
-		 */
-		cl_object_kill(env, clob);
-		lu_object_ref_del(&clob->co_lu, "inode", inode);
-		cl_object_put_last(env, clob);
-		lli->lli_clob = NULL;
-		if (emergency) {
-			cl_env_unplant(ccc_inode_fini_env, &refcheck);
-			mutex_unlock(&ccc_inode_fini_guard);
-		} else
-			cl_env_put(env, &refcheck);
-		cl_env_reexit(cookie);
-	}
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-__u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
-	__u16 type = 0;
-	struct luda_type *lt;
-	int len = 0;
-
-	if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
-		const unsigned align = sizeof(struct luda_type) - 1;
-
-		len = le16_to_cpu(ent->lde_namelen);
-		len = (len + align) & ~align;
-		lt = (void *)ent->lde_name + len;
-		type = IFTODT(le16_to_cpu(lt->lt_type));
-	}
-	return type;
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
-	if (BITS_PER_LONG == 32 || api32)
-		return fid_flatten32(fid);
-	else
-		return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid.  If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
-	__u32 gen;
-
-	if (fid_is_igif(fid)) {
-		gen = lu_igif_gen(fid);
-		return gen;
-	}
-
-	gen = fid_flatten(fid) >> 32;
-	return gen;
-}
-
-/* lsm is unreliable after hsm implementation as layout can be changed at
- * any time. This is only to support old, non-clio-ized interfaces. It will
- * cause deadlock if clio operations are called with this extra layout refcount
- * because in case the layout changed during the IO, ll_layout_refresh() will
- * have to wait for the refcount to become zero to destroy the older layout.
- *
- * Notice that the lsm returned by this function may not be valid unless called
- * inside layout lock - MDS_INODELOCK_LAYOUT.
- */
-struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
-{
-	return lov_lsm_get(cl_i2info(inode)->lli_clob);
-}
-
-inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
-{
-	lov_lsm_put(cl_i2info(inode)->lli_clob, lsm);
-}
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c b/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
deleted file mode 100644
index d80bcedd7..000000000
--- a/drivers/staging/lustre/lustre/lclient/lcommon_misc.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- */
-#include "../include/obd_class.h"
-#include "../include/obd_support.h"
-#include "../include/obd.h"
-#include "../include/cl_object.h"
-#include "../include/lclient.h"
-
-#include "../include/lustre_lite.h"
-
-/* Initialize the default and maximum LOV EA and cookie sizes.  This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
- */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
-{
-	struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
-	__u32 valsize = sizeof(struct lov_desc);
-	int rc, easize, def_easize, cookiesize;
-	struct lov_desc desc;
-	__u16 stripes, def_stripes;
-
-	rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
-			  &valsize, &desc, NULL);
-	if (rc)
-		return rc;
-
-	stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT);
-	lsm.lsm_stripe_count = stripes;
-	easize = obd_size_diskmd(dt_exp, &lsm);
-
-	def_stripes = min_t(__u32, desc.ld_default_stripe_count,
-			    LOV_MAX_STRIPE_COUNT);
-	lsm.lsm_stripe_count = def_stripes;
-	def_easize = obd_size_diskmd(dt_exp, &lsm);
-
-	cookiesize = stripes * sizeof(struct llog_cookie);
-
-	/* default cookiesize is 0 because from 2.4 server doesn't send
-	 * llog cookies to client.
-	 */
-	CDEBUG(D_HA,
-	       "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n",
-	       def_easize, easize, cookiesize);
-
-	rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0);
-	return rc;
-}
-
-/**
- * This function is used as an upcall-callback hooked by liblustre and llite
- * clients into obd_notify() listeners chain to handle notifications about
- * change of import connect_flags. See llu_fsswop_mount() and
- * lustre_common_fill_super().
- */
-int cl_ocd_update(struct obd_device *host,
-		  struct obd_device *watched,
-		  enum obd_notify_event ev, void *owner, void *data)
-{
-	struct lustre_client_ocd *lco;
-	struct client_obd	*cli;
-	__u64 flags;
-	int   result;
-
-	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
-		cli = &watched->u.cli;
-		lco = owner;
-		flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
-		CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
-		       lco->lco_flags, flags);
-		mutex_lock(&lco->lco_lock);
-		lco->lco_flags &= flags;
-		/* for each osc event update ea size */
-		if (lco->lco_dt_exp)
-			cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
-
-		mutex_unlock(&lco->lco_lock);
-		result = 0;
-	} else {
-		CERROR("unexpected notification from %s %s!\n",
-		       watched->obd_type->typ_name,
-		       watched->obd_name);
-		result = -EINVAL;
-	}
-	return result;
-}
-
-#define GROUPLOCK_SCOPE "grouplock"
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
-		     struct ccc_grouplock *cg)
-{
-	struct lu_env	  *env;
-	struct cl_io	   *io;
-	struct cl_lock	 *lock;
-	struct cl_lock_descr   *descr;
-	__u32		   enqflags;
-	int		     refcheck;
-	int		     rc;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	io = ccc_env_thread_io(env);
-	io->ci_obj = obj;
-	io->ci_ignore_layout = 1;
-
-	rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-	if (rc) {
-		/* Does not make sense to take GL for released layout */
-		if (rc > 0)
-			rc = -ENOTSUPP;
-		cl_env_put(env, &refcheck);
-		return rc;
-	}
-
-	descr = &ccc_env_info(env)->cti_descr;
-	descr->cld_obj = obj;
-	descr->cld_start = 0;
-	descr->cld_end = CL_PAGE_EOF;
-	descr->cld_gid = gid;
-	descr->cld_mode = CLM_GROUP;
-
-	enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
-	descr->cld_enq_flags = enqflags;
-
-	lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current);
-	if (IS_ERR(lock)) {
-		cl_io_fini(env, io);
-		cl_env_put(env, &refcheck);
-		return PTR_ERR(lock);
-	}
-
-	cg->cg_env  = cl_env_get(&refcheck);
-	cg->cg_io   = io;
-	cg->cg_lock = lock;
-	cg->cg_gid  = gid;
-	LASSERT(cg->cg_env == env);
-
-	cl_env_unplant(env, &refcheck);
-	return 0;
-}
-
-void cl_put_grouplock(struct ccc_grouplock *cg)
-{
-	struct lu_env  *env  = cg->cg_env;
-	struct cl_io   *io   = cg->cg_io;
-	struct cl_lock *lock = cg->cg_lock;
-	int	     refcheck;
-
-	LASSERT(cg->cg_env);
-	LASSERT(cg->cg_gid);
-
-	cl_env_implant(env, &refcheck);
-	cl_env_put(env, &refcheck);
-
-	cl_unuse(env, lock);
-	cl_lock_release(env, lock, GROUPLOCK_SCOPE, current);
-	cl_io_fini(env, io);
-	cl_env_put(env, NULL);
-}
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index e5d1344e8..621323f6e 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -54,7 +54,7 @@ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
 
 	lock_res(lock->l_resource);
 
-	lock->l_flags |= LDLM_FL_RES_LOCKED;
+	ldlm_set_res_locked(lock);
 	return lock->l_resource;
 }
 EXPORT_SYMBOL(lock_res_and_lock);
@@ -65,7 +65,7 @@ EXPORT_SYMBOL(lock_res_and_lock);
 void unlock_res_and_lock(struct ldlm_lock *lock)
 {
 	/* on server-side resource of lock doesn't change */
-	lock->l_flags &= ~LDLM_FL_RES_LOCKED;
+	ldlm_clear_res_locked(lock);
 
 	unlock_res(lock->l_resource);
 	spin_unlock(&lock->l_lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index a803e200f..cf1f17836 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -75,12 +75,12 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
 	 * just after we finish and take our lock into account in its
 	 * calculation of the kms
 	 */
-	lock->l_flags |= LDLM_FL_KMS_IGNORE;
+	ldlm_set_kms_ignore(lock);
 
 	list_for_each(tmp, &res->lr_granted) {
 		lck = list_entry(tmp, struct ldlm_lock, l_res_link);
 
-		if (lck->l_flags & LDLM_FL_KMS_IGNORE)
+		if (ldlm_is_kms_ignore(lck))
 			continue;
 
 		if (lck->l_policy_data.l_extent.end >= old_kms)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index b88b78606..349bfcc9b 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -101,8 +101,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
 	LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
 
 	list_del_init(&lock->l_res_link);
-	if (flags == LDLM_FL_WAIT_NOREPROC &&
-	    !(lock->l_flags & LDLM_FL_FAILED)) {
+	if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
 		/* client side - set a flag to prevent sending a CANCEL */
 		lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
 
@@ -436,7 +435,7 @@ ldlm_flock_interrupted_wait(void *data)
 	lock_res_and_lock(lock);
 
 	/* client side - set flag to prevent lock from being put on LRU list */
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 	unlock_res_and_lock(lock);
 }
 
@@ -520,30 +519,29 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 granted:
 	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
-		LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
-		return 0;
-	}
-
-	if (lock->l_flags & LDLM_FL_FAILED) {
+	if (ldlm_is_failed(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
 		return -EIO;
 	}
 
-	if (rc) {
-		LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
-			   rc);
-		return rc;
-	}
-
 	LDLM_DEBUG(lock, "client-side enqueue granted");
 
 	lock_res_and_lock(lock);
 
+	/*
+	 * Protect against race where lock could have been just destroyed
+	 * due to overlap in ldlm_process_flock_lock().
+	 */
+	if (ldlm_is_destroyed(lock)) {
+		unlock_res_and_lock(lock);
+		LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
+		return 0;
+	}
+
 	/* ldlm_lock_enqueue() has already placed lock on the granted list. */
 	list_del_init(&lock->l_res_link);
 
-	if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
+	if (ldlm_is_flock_deadlock(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue deadlock received");
 		rc = -EDEADLK;
 	} else if (flags & LDLM_FL_TEST_LOCK) {
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index e21373e73..32f227f37 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -95,9 +95,10 @@ enum {
 	LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
 	LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
 	LDLM_CANCEL_LRUR   = 1 << 3, /* Cancel locks from lru resize. */
-	LDLM_CANCEL_NO_WAIT = 1 << 4 /* Cancel locks w/o blocking (neither
-				      * sending nor waiting for any rpcs)
-				      */
+	LDLM_CANCEL_NO_WAIT = 1 << 4, /* Cancel locks w/o blocking (neither
+				       * sending nor waiting for any rpcs)
+				       */
+	LDLM_CANCEL_LRUR_NO_WAIT = 1 << 5, /* LRUR + NO_WAIT */
 };
 
 int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
@@ -145,7 +146,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *, __u32 mode);
 void ldlm_lock_decref_internal_nolock(struct ldlm_lock *, __u32 mode);
 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list,
 		      enum ldlm_desc_ast_t ast_type);
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock);
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use);
+#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0)
 int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock);
 void ldlm_lock_destroy_nolock(struct ldlm_lock *lock);
 
@@ -216,8 +218,6 @@ enum ldlm_policy_res {
 	LDLM_POLICY_SKIP_LOCK
 };
 
-typedef enum ldlm_policy_res ldlm_policy_res_t;
-
 #define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v)
 #define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; }
 #define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v)
@@ -305,9 +305,10 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
 	int ret = 0;
 
 	lock_res_and_lock(lock);
-	if (((lock->l_req_mode == lock->l_granted_mode) &&
-	     !(lock->l_flags & LDLM_FL_CP_REQD)) ||
-	    (lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCEL)))
+	if ((lock->l_req_mode == lock->l_granted_mode) &&
+	     !ldlm_is_cp_reqd(lock))
+		ret = 1;
+	else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
 		ret = 1;
 	unlock_res_and_lock(lock);
 
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 7dd7df59a..b4ffbe2fc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -314,7 +314,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_write_list);
 	INIT_LIST_HEAD(&cli->cl_loi_read_list);
-	client_obd_list_lock_init(&cli->cl_loi_list_lock);
+	spin_lock_init(&cli->cl_loi_list_lock);
 	atomic_set(&cli->cl_pending_w_pages, 0);
 	atomic_set(&cli->cl_pending_r_pages, 0);
 	cli->cl_r_in_flight = 0;
@@ -333,7 +333,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	atomic_set(&cli->cl_lru_busy, 0);
 	atomic_set(&cli->cl_lru_in_list, 0);
 	INIT_LIST_HEAD(&cli->cl_lru_list);
-	client_obd_list_lock_init(&cli->cl_lru_list_lock);
+	spin_lock_init(&cli->cl_lru_list_lock);
+	atomic_set(&cli->cl_unstable_count, 0);
 
 	init_waitqueue_head(&cli->cl_destroy_waitq);
 	atomic_set(&cli->cl_destroy_in_flight, 0);
@@ -355,6 +356,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
 					  LNET_MTU >> PAGE_SHIFT);
 
+	/*
+	 * set cl_chunkbits default value to PAGE_CACHE_SHIFT,
+	 * it will be updated at OSC connection time.
+	 */
+	cli->cl_chunkbits = PAGE_SHIFT;
+
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
 		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
 	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
@@ -429,7 +436,6 @@ err_ldlm:
 	ldlm_put_ref();
 err:
 	return rc;
-
 }
 EXPORT_SYMBOL(client_obd_setup);
 
@@ -438,6 +444,7 @@ int client_obd_cleanup(struct obd_device *obddev)
 	ldlm_namespace_free_post(obddev->obd_namespace);
 	obddev->obd_namespace = NULL;
 
+	obd_cleanup_client_import(obddev);
 	LASSERT(!obddev->u.cli.cl_import);
 
 	ldlm_put_ref();
@@ -748,6 +755,7 @@ int ldlm_error2errno(enum ldlm_error error)
 
 	switch (error) {
 	case ELDLM_OK:
+	case ELDLM_LOCK_MATCHED:
 		result = 0;
 		break;
 	case ELDLM_LOCK_CHANGED:
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index ecd65a7a3..bff94ea12 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -185,7 +185,7 @@ void ldlm_lock_put(struct ldlm_lock *lock)
 			   "final lock_put on destroyed lock, freeing it.");
 
 		res = lock->l_resource;
-		LASSERT(lock->l_flags & LDLM_FL_DESTROYED);
+		LASSERT(ldlm_is_destroyed(lock));
 		LASSERT(list_empty(&lock->l_res_link));
 		LASSERT(list_empty(&lock->l_pending_chain));
 
@@ -229,15 +229,25 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
 
 /**
  * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
+ *
+ * If \a last_use is non-zero, it will remove the lock from LRU only if
+ * it matches lock's l_last_used.
+ *
+ * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use
+ *           doesn't match lock's l_last_used;
+ *           otherwise, the lock hasn't been in the LRU list.
+ * \retval 1 the lock was in LRU list and removed.
  */
-int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
+int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use)
 {
 	struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
-	int rc;
+	int rc = 0;
 
 	spin_lock(&ns->ns_lock);
-	rc = ldlm_lock_remove_from_lru_nolock(lock);
+	if (last_use == 0 || last_use == lock->l_last_used)
+		rc = ldlm_lock_remove_from_lru_nolock(lock);
 	spin_unlock(&ns->ns_lock);
+
 	return rc;
 }
 
@@ -252,8 +262,7 @@ static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
 	LASSERT(list_empty(&lock->l_lru));
 	LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
 	list_add_tail(&lock->l_lru, &ns->ns_unused_list);
-	if (lock->l_flags & LDLM_FL_SKIPPED)
-		lock->l_flags &= ~LDLM_FL_SKIPPED;
+	ldlm_clear_skipped(lock);
 	LASSERT(ns->ns_nr_unused >= 0);
 	ns->ns_nr_unused++;
 }
@@ -318,11 +327,11 @@ static int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
 		LBUG();
 	}
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		LASSERT(list_empty(&lock->l_lru));
 		return 0;
 	}
-	lock->l_flags |= LDLM_FL_DESTROYED;
+	ldlm_set_destroyed(lock);
 
 	if (lock->l_export && lock->l_export->exp_lock_hash) {
 		/* NB: it's safe to call cfs_hash_del() even lock isn't
@@ -544,7 +553,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
 	/* It's unlikely but possible that someone marked the lock as
 	 * destroyed after we did handle2object on it
 	 */
-	if (flags == 0 && ((lock->l_flags & LDLM_FL_DESTROYED) == 0)) {
+	if (flags == 0 && !ldlm_is_destroyed(lock)) {
 		lu_ref_add(&lock->l_reference, "handle", current);
 		return lock;
 	}
@@ -554,21 +563,22 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
 	LASSERT(lock->l_resource);
 
 	lu_ref_add_atomic(&lock->l_reference, "handle", current);
-	if (unlikely(lock->l_flags & LDLM_FL_DESTROYED)) {
+	if (unlikely(ldlm_is_destroyed(lock))) {
 		unlock_res_and_lock(lock);
 		CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
 		LDLM_LOCK_PUT(lock);
 		return NULL;
 	}
 
-	if (flags && (lock->l_flags & flags)) {
-		unlock_res_and_lock(lock);
-		LDLM_LOCK_PUT(lock);
-		return NULL;
-	}
+	if (flags) {
+		if (lock->l_flags & flags) {
+			unlock_res_and_lock(lock);
+			LDLM_LOCK_PUT(lock);
+			return NULL;
+		}
 
-	if (flags)
 		lock->l_flags |= flags;
+	}
 
 	unlock_res_and_lock(lock);
 	return lock;
@@ -599,14 +609,14 @@ EXPORT_SYMBOL(ldlm_lock2desc);
 static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
 				  struct list_head *work_list)
 {
-	if ((lock->l_flags & LDLM_FL_AST_SENT) == 0) {
+	if (!ldlm_is_ast_sent(lock)) {
 		LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
-		lock->l_flags |= LDLM_FL_AST_SENT;
+		ldlm_set_ast_sent(lock);
 		/* If the enqueuing client said so, tell the AST recipient to
 		 * discard dirty data, rather than writing back.
 		 */
-		if (new->l_flags & LDLM_FL_AST_DISCARD_DATA)
-			lock->l_flags |= LDLM_FL_DISCARD_DATA;
+		if (ldlm_is_ast_discard_data(new))
+			ldlm_set_discard_data(lock);
 		LASSERT(list_empty(&lock->l_bl_ast));
 		list_add(&lock->l_bl_ast, work_list);
 		LDLM_LOCK_GET(lock);
@@ -621,8 +631,8 @@ static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
 static void ldlm_add_cp_work_item(struct ldlm_lock *lock,
 				  struct list_head *work_list)
 {
-	if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
-		lock->l_flags |= LDLM_FL_CP_REQD;
+	if (!ldlm_is_cp_reqd(lock)) {
+		ldlm_set_cp_reqd(lock);
 		LDLM_DEBUG(lock, "lock granted; sending completion AST.");
 		LASSERT(list_empty(&lock->l_cp_ast));
 		list_add(&lock->l_cp_ast, work_list);
@@ -657,7 +667,7 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
 	struct ldlm_lock *lock;
 
 	lock = ldlm_handle2lock(lockh);
-	LASSERT(lock);
+	LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie);
 	ldlm_lock_addref_internal(lock, mode);
 	LDLM_LOCK_PUT(lock);
 }
@@ -704,7 +714,7 @@ int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode)
 	if (lock) {
 		lock_res_and_lock(lock);
 		if (lock->l_readers != 0 || lock->l_writers != 0 ||
-		    !(lock->l_flags & LDLM_FL_CBPENDING)) {
+		    !ldlm_is_cbpending(lock)) {
 			ldlm_lock_addref_internal_nolock(lock, mode);
 			result = 0;
 		}
@@ -770,17 +780,17 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 
 	ldlm_lock_decref_internal_nolock(lock, mode);
 
-	if (lock->l_flags & LDLM_FL_LOCAL &&
+	if (ldlm_is_local(lock) &&
 	    !lock->l_readers && !lock->l_writers) {
 		/* If this is a local lock on a server namespace and this was
 		 * the last reference, cancel the lock.
 		 */
 		CDEBUG(D_INFO, "forcing cancel of local lock\n");
-		lock->l_flags |= LDLM_FL_CBPENDING;
+		ldlm_set_cbpending(lock);
 	}
 
 	if (!lock->l_readers && !lock->l_writers &&
-	    (lock->l_flags & LDLM_FL_CBPENDING)) {
+	    ldlm_is_cbpending(lock)) {
 		/* If we received a blocked AST and this was the last reference,
 		 * run the callback.
 		 */
@@ -791,16 +801,14 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 		ldlm_lock_remove_from_lru(lock);
 		unlock_res_and_lock(lock);
 
-		if (lock->l_flags & LDLM_FL_FAIL_LOC)
+		if (ldlm_is_fail_loc(lock))
 			OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
-		if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
+		if (ldlm_is_atomic_cb(lock) ||
 		    ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
 			ldlm_handle_bl_callback(ns, NULL, lock);
 	} else if (!lock->l_readers && !lock->l_writers &&
-		   !(lock->l_flags & LDLM_FL_NO_LRU) &&
-		   !(lock->l_flags & LDLM_FL_BL_AST)) {
-
+		   !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) {
 		LDLM_DEBUG(lock, "add lock into lru list");
 
 		/* If this is a client-side namespace and this was the last
@@ -809,7 +817,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
 		ldlm_lock_add_to_lru(lock);
 		unlock_res_and_lock(lock);
 
-		if (lock->l_flags & LDLM_FL_FAIL_LOC)
+		if (ldlm_is_fail_loc(lock))
 			OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
 		/* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE
@@ -853,7 +861,7 @@ void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
 
 	LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
 	lock_res_and_lock(lock);
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 	unlock_res_and_lock(lock);
 	ldlm_lock_decref_internal(lock, mode);
 	LDLM_LOCK_PUT(lock);
@@ -971,7 +979,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
 	ldlm_resource_dump(D_INFO, res);
 	LDLM_DEBUG(lock, "About to add lock:");
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
 		return;
 	}
@@ -1073,10 +1081,9 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 		 * whose parents already hold a lock so forward progress
 		 * can still happen.
 		 */
-		if (lock->l_flags & LDLM_FL_CBPENDING &&
-		    !(flags & LDLM_FL_CBPENDING))
+		if (ldlm_is_cbpending(lock) && !(flags & LDLM_FL_CBPENDING))
 			continue;
-		if (!unref && lock->l_flags & LDLM_FL_CBPENDING &&
+		if (!unref && ldlm_is_cbpending(lock) &&
 		    lock->l_readers == 0 && lock->l_writers == 0)
 			continue;
 
@@ -1092,6 +1099,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 
 		if (unlikely(match == LCK_GROUP) &&
 		    lock->l_resource->lr_type == LDLM_EXTENT &&
+		    policy->l_extent.gid != LDLM_GID_ANY &&
 		    lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
 			continue;
 
@@ -1104,11 +1112,10 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 		      policy->l_inodebits.bits))
 			continue;
 
-		if (!unref && (lock->l_flags & LDLM_FL_GONE_MASK))
+		if (!unref && LDLM_HAVE_MASK(lock, GONE))
 			continue;
 
-		if ((flags & LDLM_FL_LOCAL_ONLY) &&
-		    !(lock->l_flags & LDLM_FL_LOCAL))
+		if ((flags & LDLM_FL_LOCAL_ONLY) && !ldlm_is_local(lock))
 			continue;
 
 		if (flags & LDLM_FL_TEST_LOCK) {
@@ -1142,7 +1149,7 @@ EXPORT_SYMBOL(ldlm_lock_fail_match_locked);
  */
 void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
 {
-	lock->l_flags |= LDLM_FL_LVB_READY;
+	ldlm_set_lvb_ready(lock);
 	wake_up_all(&lock->l_waitq);
 }
 EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
@@ -1243,8 +1250,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 
 	if (lock) {
 		ldlm_lock2handle(lock, lockh);
-		if ((flags & LDLM_FL_LVB_READY) &&
-		    (!(lock->l_flags & LDLM_FL_LVB_READY))) {
+		if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) {
 			__u64 wait_flags = LDLM_FL_LVB_READY |
 				LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED;
 			struct l_wait_info lwi;
@@ -1271,7 +1277,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 			l_wait_event(lock->l_waitq,
 				     lock->l_flags & wait_flags,
 				     &lwi);
-			if (!(lock->l_flags & LDLM_FL_LVB_READY)) {
+			if (!ldlm_is_lvb_ready(lock)) {
 				if (flags & LDLM_FL_TEST_LOCK)
 					LDLM_LOCK_RELEASE(lock);
 				else
@@ -1325,10 +1331,10 @@ enum ldlm_mode ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
 	lock = ldlm_handle2lock(lockh);
 	if (lock) {
 		lock_res_and_lock(lock);
-		if (lock->l_flags & LDLM_FL_GONE_MASK)
+		if (LDLM_HAVE_MASK(lock, GONE))
 			goto out;
 
-		if (lock->l_flags & LDLM_FL_CBPENDING &&
+		if (ldlm_is_cbpending(lock) &&
 		    lock->l_readers == 0 && lock->l_writers == 0)
 			goto out;
 
@@ -1542,7 +1548,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
 	/* Some flags from the enqueue want to make it into the AST, via the
 	 * lock's l_flags.
 	 */
-	lock->l_flags |= *flags & LDLM_FL_AST_DISCARD_DATA;
+	if (*flags & LDLM_FL_AST_DISCARD_DATA)
+		ldlm_set_ast_discard_data(lock);
 
 	/*
 	 * This distinction between local lock trees is very important; a client
@@ -1581,7 +1588,7 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 	lock_res_and_lock(lock);
 	list_del_init(&lock->l_bl_ast);
 
-	LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+	LASSERT(ldlm_is_ast_sent(lock));
 	LASSERT(lock->l_bl_ast_run == 0);
 	LASSERT(lock->l_blocking_lock);
 	lock->l_bl_ast_run++;
@@ -1628,12 +1635,12 @@ ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 	/* nobody should touch l_cp_ast */
 	lock_res_and_lock(lock);
 	list_del_init(&lock->l_cp_ast);
-	LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
+	LASSERT(ldlm_is_cp_reqd(lock));
 	/* save l_completion_ast since it can be changed by
 	 * mds_intent_policy(), see bug 14225
 	 */
 	completion_callback = lock->l_completion_ast;
-	lock->l_flags &= ~LDLM_FL_CP_REQD;
+	ldlm_clear_cp_reqd(lock);
 	unlock_res_and_lock(lock);
 
 	if (completion_callback)
@@ -1778,8 +1785,8 @@ out:
 void ldlm_cancel_callback(struct ldlm_lock *lock)
 {
 	check_res_locked(lock->l_resource);
-	if (!(lock->l_flags & LDLM_FL_CANCEL)) {
-		lock->l_flags |= LDLM_FL_CANCEL;
+	if (!ldlm_is_cancel(lock)) {
+		ldlm_set_cancel(lock);
 		if (lock->l_blocking_ast) {
 			unlock_res_and_lock(lock);
 			lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
@@ -1789,7 +1796,7 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
 			LDLM_DEBUG(lock, "no blocking ast");
 		}
 	}
-	lock->l_flags |= LDLM_FL_BL_DONE;
+	ldlm_set_bl_done(lock);
 }
 
 /**
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index ebe9042ad..ab739f079 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -124,10 +124,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
 	LDLM_DEBUG(lock, "client blocking AST callback handler");
 
 	lock_res_and_lock(lock);
-	lock->l_flags |= LDLM_FL_CBPENDING;
+	ldlm_set_cbpending(lock);
 
-	if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
-		lock->l_flags |= LDLM_FL_CANCEL;
+	if (ldlm_is_cancel_on_block(lock))
+		ldlm_set_cancel(lock);
 
 	do_ast = !lock->l_readers && !lock->l_writers;
 	unlock_res_and_lock(lock);
@@ -172,7 +172,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(to);
 			if (lock->l_granted_mode == lock->l_req_mode ||
-			    lock->l_flags & LDLM_FL_DESTROYED)
+			    ldlm_is_destroyed(lock))
 				break;
 		}
 	}
@@ -215,7 +215,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 	}
 
 	lock_res_and_lock(lock);
-	if ((lock->l_flags & LDLM_FL_DESTROYED) ||
+	if (ldlm_is_destroyed(lock) ||
 	    lock->l_granted_mode == lock->l_req_mode) {
 		/* bug 11300: the lock has already been granted */
 		unlock_res_and_lock(lock);
@@ -291,7 +291,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 out:
 	if (rc < 0) {
 		lock_res_and_lock(lock);
-		lock->l_flags |= LDLM_FL_FAILED;
+		ldlm_set_failed(lock);
 		unlock_res_and_lock(lock);
 		wake_up(&lock->l_waitq);
 	}
@@ -360,8 +360,7 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
 	struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
 
 	spin_lock(&blp->blp_lock);
-	if (blwi->blwi_lock &&
-	    blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+	if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) {
 		/* add LDLM_FL_DISCARD_DATA requests to the priority list */
 		list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
 	} else {
@@ -626,23 +625,22 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 		return 0;
 	}
 
-	if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+	if (ldlm_is_fail_loc(lock) &&
 	    lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
 		OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
 	/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
 	lock_res_and_lock(lock);
 	lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
-					      LDLM_AST_FLAGS);
+					      LDLM_FL_AST_MASK);
 	if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
 		/* If somebody cancels lock and cache is already dropped,
 		 * or lock is failed before cp_ast received on client,
 		 * we can tell the server we have no lock. Otherwise, we
 		 * should send cancel after dropping the cache.
 		 */
-		if (((lock->l_flags & LDLM_FL_CANCELING) &&
-		    (lock->l_flags & LDLM_FL_BL_DONE)) ||
-		    (lock->l_flags & LDLM_FL_FAILED)) {
+		if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
+		    ldlm_is_failed(lock)) {
 			LDLM_DEBUG(lock, "callback on lock %#llx - lock disappeared\n",
 				   dlm_req->lock_handle[0].cookie);
 			unlock_res_and_lock(lock);
@@ -656,7 +654,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 		 * Let ldlm_cancel_lru() be fast.
 		 */
 		ldlm_lock_remove_from_lru(lock);
-		lock->l_flags |= LDLM_FL_BL_AST;
+		ldlm_set_bl_ast(lock);
 	}
 	unlock_res_and_lock(lock);
 
@@ -674,7 +672,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 	case LDLM_BL_CALLBACK:
 		CDEBUG(D_INODE, "blocking ast\n");
 		req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
-		if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
+		if (!ldlm_is_cancel_on_block(lock)) {
 			rc = ldlm_callback_reply(req, 0);
 			if (req->rq_no_reply || rc)
 				ldlm_callback_errmsg(req, "Normal process", rc,
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index 74e193e52..107314e28 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -153,7 +153,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
 	long delay;
 	int  result;
 
-	if (lock->l_flags & (LDLM_FL_DESTROYED | LDLM_FL_FAILED)) {
+	if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue: destroyed");
 		result = -EIO;
 	} else {
@@ -252,7 +252,7 @@ noreproc:
 
 	lwd.lwd_lock = lock;
 
-	if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+	if (ldlm_is_no_timeout(lock)) {
 		LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
 		lwi = LWI_INTR(interrupted_completion_wait, &lwd);
 	} else {
@@ -269,7 +269,7 @@ noreproc:
 
 	if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
 				 OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
-		lock->l_flags |= LDLM_FL_FAIL_LOC;
+		ldlm_set_fail_loc(lock);
 		rc = -EINTR;
 	} else {
 		/* Go to sleep until the lock is granted or cancelled. */
@@ -296,7 +296,7 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
 	lock_res_and_lock(lock);
 	/* Check that lock is not granted or failed, we might race. */
 	if ((lock->l_req_mode != lock->l_granted_mode) &&
-	    !(lock->l_flags & LDLM_FL_FAILED)) {
+	    !ldlm_is_failed(lock)) {
 		/* Make sure that this lock will not be found by raced
 		 * bl_ast and -EINVAL reply is sent to server anyways.
 		 * bug 17645
@@ -347,7 +347,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	struct ldlm_lock *lock;
 	struct ldlm_reply *reply;
 	int cleanup_phase = 1;
-	int size = 0;
 
 	lock = ldlm_handle2lock(lockh);
 	/* ldlm_cli_enqueue is holding a reference on this lock. */
@@ -375,8 +374,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		goto cleanup;
 	}
 
-	if (lvb_len != 0) {
-		LASSERT(lvb);
+	if (lvb_len > 0) {
+		int size = 0;
 
 		size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
 					    RCL_SERVER);
@@ -390,12 +389,13 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 			rc = -EINVAL;
 			goto cleanup;
 		}
+		lvb_len = size;
 	}
 
 	if (rc == ELDLM_LOCK_ABORTED) {
-		if (lvb_len != 0)
+		if (lvb_len > 0 && lvb)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lvb, size);
+					   lvb, lvb_len);
 		if (rc == 0)
 			rc = ELDLM_LOCK_ABORTED;
 		goto cleanup;
@@ -421,7 +421,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
 	*flags = ldlm_flags_from_wire(reply->lock_flags);
 	lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
-					      LDLM_INHERIT_FLAGS);
+					      LDLM_FL_INHERIT_MASK);
 	/* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
 	 * to wait with no timeout as well
 	 */
@@ -489,7 +489,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	/* If the lock has already been granted by a completion AST, don't
 	 * clobber the LVB with an older one.
 	 */
-	if (lvb_len != 0) {
+	if (lvb_len > 0) {
 		/* We must lock or a racing completion might update lvb without
 		 * letting us know and we'll clobber the correct value.
 		 * Cannot unlock after the check either, as that still leaves
@@ -498,7 +498,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		lock_res_and_lock(lock);
 		if (lock->l_req_mode != lock->l_granted_mode)
 			rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER,
-					   lock->l_lvb_data, size);
+					   lock->l_lvb_data, lvb_len);
 		unlock_res_and_lock(lock);
 		if (rc < 0) {
 			cleanup_phase = 1;
@@ -518,7 +518,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 		}
 	}
 
-	if (lvb_len && lvb) {
+	if (lvb_len > 0 && lvb) {
 		/* Copy the LVB here, and not earlier, because the completion
 		 * AST (if any) can override what we got in the reply
 		 */
@@ -601,7 +601,7 @@ int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
 		avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff);
 
 		flags = ns_connect_lru_resize(ns) ?
-			LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+			LDLM_CANCEL_LRUR_NO_WAIT : LDLM_CANCEL_AGED;
 		to_free = !ns_connect_lru_resize(ns) &&
 			  opc == LDLM_ENQUEUE ? 1 : 0;
 
@@ -821,12 +821,11 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
 		LDLM_DEBUG(lock, "client-side cancel");
 		/* Set this flag to prevent others from getting new references*/
 		lock_res_and_lock(lock);
-		lock->l_flags |= LDLM_FL_CBPENDING;
+		ldlm_set_cbpending(lock);
 		local_only = !!(lock->l_flags &
 				(LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
 		ldlm_cancel_callback(lock);
-		rc = (lock->l_flags & LDLM_FL_BL_AST) ?
-			LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+		rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
 		unlock_res_and_lock(lock);
 
 		if (local_only) {
@@ -1131,31 +1130,30 @@ EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
  * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
  * readahead requests, ...)
  */
-static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
-						    struct ldlm_lock *lock,
-						    int unused, int added,
-						    int count)
+static enum ldlm_policy_res
+ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+			   int unused, int added, int count)
 {
-	ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
-	ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
-
-	lock_res_and_lock(lock);
+	enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK;
 
 	/* don't check added & count since we want to process all locks
-	 * from unused list
+	 * from unused list.
+	 * It's fine to not take lock to access lock->l_resource since
+	 * the lock has already been granted so it won't change.
 	 */
 	switch (lock->l_resource->lr_type) {
 	case LDLM_EXTENT:
 	case LDLM_IBITS:
-		if (cb && cb(lock))
+		if (ns->ns_cancel && ns->ns_cancel(lock) != 0)
 			break;
 	default:
 		result = LDLM_POLICY_SKIP_LOCK;
-		lock->l_flags |= LDLM_FL_SKIPPED;
+		lock_res_and_lock(lock);
+		ldlm_set_skipped(lock);
+		unlock_res_and_lock(lock);
 		break;
 	}
 
-	unlock_res_and_lock(lock);
 	return result;
 }
 
@@ -1168,10 +1166,10 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
-						 struct ldlm_lock *lock,
-						 int unused, int added,
-						 int count)
+static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+						    struct ldlm_lock *lock,
+						    int unused, int added,
+						    int count)
 {
 	unsigned long cur = cfs_time_current();
 	struct ldlm_pool *pl = &ns->ns_pool;
@@ -1196,8 +1194,13 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
 	/* Stop when SLV is not yet come from server or lv is smaller than
 	 * it is.
 	 */
-	return (slv == 0 || lv < slv) ?
-		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+	if (slv == 0 || lv < slv)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	return LDLM_POLICY_CANCEL_LOCK;
 }
 
 /**
@@ -1209,10 +1212,10 @@ static ldlm_policy_res_t ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
-						   struct ldlm_lock *lock,
-						   int unused, int added,
-						   int count)
+static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+						      struct ldlm_lock *lock,
+						      int unused, int added,
+						      int count)
 {
 	/* Stop LRU processing when we reach past @count or have checked all
 	 * locks in LRU.
@@ -1230,16 +1233,35 @@ static ldlm_policy_res_t ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
-						 struct ldlm_lock *lock,
-						 int unused, int added,
-						 int count)
+static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+						    struct ldlm_lock *lock,
+						    int unused, int added,
+						    int count)
 {
-	/* Stop LRU processing if young lock is found and we reach past count */
-	return ((added >= count) &&
-		time_before(cfs_time_current(),
-			    cfs_time_add(lock->l_last_used, ns->ns_max_age))) ?
-		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+	if ((added >= count) &&
+	    time_before(cfs_time_current(),
+			cfs_time_add(lock->l_last_used, ns->ns_max_age)))
+		return LDLM_POLICY_KEEP_LOCK;
+
+	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
+		return LDLM_POLICY_KEEP_LOCK;
+
+	return LDLM_POLICY_CANCEL_LOCK;
+}
+
+static enum ldlm_policy_res
+ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns,
+				struct ldlm_lock *lock,
+				int unused, int added,
+				int count)
+{
+	enum ldlm_policy_res result;
+
+	result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count);
+	if (result == LDLM_POLICY_KEEP_LOCK)
+		return result;
+
+	return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count);
 }
 
 /**
@@ -1251,10 +1273,9 @@ static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
  *
  * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
  */
-static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
-						    struct ldlm_lock *lock,
-						    int unused, int added,
-						    int count)
+static enum ldlm_policy_res
+ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+			   int unused, int added, int count)
 {
 	/* Stop LRU processing when we reach past count or have checked all
 	 * locks in LRU.
@@ -1263,7 +1284,8 @@ static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
 		LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
 }
 
-typedef ldlm_policy_res_t (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *,
+typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)(
+						      struct ldlm_namespace *,
 						      struct ldlm_lock *, int,
 						      int, int);
 
@@ -1281,6 +1303,8 @@ ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
 			return ldlm_cancel_lrur_policy;
 		else if (flags & LDLM_CANCEL_PASSED)
 			return ldlm_cancel_passed_policy;
+		else if (flags & LDLM_CANCEL_LRUR_NO_WAIT)
+			return ldlm_cancel_lrur_no_wait_policy;
 	} else {
 		if (flags & LDLM_CANCEL_AGED)
 			return ldlm_cancel_aged_policy;
@@ -1329,6 +1353,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 	ldlm_cancel_lru_policy_t pf;
 	struct ldlm_lock *lock, *next;
 	int added = 0, unused, remained;
+	int no_wait = flags & (LDLM_CANCEL_NO_WAIT | LDLM_CANCEL_LRUR_NO_WAIT);
 
 	spin_lock(&ns->ns_lock);
 	unused = ns->ns_nr_unused;
@@ -1341,7 +1366,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 	LASSERT(pf);
 
 	while (!list_empty(&ns->ns_unused_list)) {
-		ldlm_policy_res_t result;
+		enum ldlm_policy_res result;
+		time_t last_use = 0;
 
 		/* all unused locks */
 		if (remained-- <= 0)
@@ -1354,17 +1380,20 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 		list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
 					     l_lru) {
 			/* No locks which got blocking requests. */
-			LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+			LASSERT(!ldlm_is_bl_ast(lock));
 
-			if (flags & LDLM_CANCEL_NO_WAIT &&
-			    lock->l_flags & LDLM_FL_SKIPPED)
+			if (no_wait && ldlm_is_skipped(lock))
 				/* already processed */
 				continue;
 
+			last_use = lock->l_last_used;
+			if (last_use == cfs_time_current())
+				continue;
+
 			/* Somebody is already doing CANCEL. No need for this
 			 * lock in LRU, do not traverse it again.
 			 */
-			if (!(lock->l_flags & LDLM_FL_CANCELING))
+			if (!ldlm_is_canceling(lock))
 				break;
 
 			ldlm_lock_remove_from_lru_nolock(lock);
@@ -1407,12 +1436,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 
 		lock_res_and_lock(lock);
 		/* Check flags again under the lock. */
-		if ((lock->l_flags & LDLM_FL_CANCELING) ||
-		    (ldlm_lock_remove_from_lru(lock) == 0)) {
+		if (ldlm_is_canceling(lock) ||
+		    (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) {
 			/* Another thread is removing lock from LRU, or
 			 * somebody is already doing CANCEL, or there
 			 * is a blocking request which will send cancel
-			 * by itself, or the lock is no longer unused.
+			 * by itself, or the lock is no longer unused or
+			 * the lock has been used since the pf() call and
+			 * pages could be put under it.
 			 */
 			unlock_res_and_lock(lock);
 			lu_ref_del(&lock->l_reference,
@@ -1429,7 +1460,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 		 * where while we are doing cancel here, server is also
 		 * silently cancelling this lock.
 		 */
-		lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+		ldlm_clear_cancel_on_block(lock);
 
 		/* Setting the CBPENDING flag is a little misleading,
 		 * but prevents an important race; namely, once
@@ -1526,8 +1557,7 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
 		/* If somebody is already doing CANCEL, or blocking AST came,
 		 * skip this lock.
 		 */
-		if (lock->l_flags & LDLM_FL_BL_AST ||
-		    lock->l_flags & LDLM_FL_CANCELING)
+		if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
 			continue;
 
 		if (lockmode_compat(lock->l_granted_mode, mode))
@@ -1771,7 +1801,6 @@ static void ldlm_namespace_foreach(struct ldlm_namespace *ns,
 
 	cfs_hash_for_each_nolock(ns->ns_rs_hash,
 				 ldlm_res_iter_helper, &helper);
-
 }
 
 /* non-blocking function to manipulate a lock whose cb_data is being put away.
@@ -1887,7 +1916,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 	int flags;
 
 	/* Bug 11974: Do not replay a lock which is actively being canceled */
-	if (lock->l_flags & LDLM_FL_CANCELING) {
+	if (ldlm_is_canceling(lock)) {
 		LDLM_DEBUG(lock, "Not replaying canceled lock:");
 		return 0;
 	}
@@ -1896,7 +1925,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 	 * server might have long dropped it, but notification of that event was
 	 * lost by network. (and server granted conflicting lock already)
 	 */
-	if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+	if (ldlm_is_cancel_on_block(lock)) {
 		LDLM_DEBUG(lock, "Not replaying reply-less lock:");
 		ldlm_lock_cancel(lock);
 		return 0;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 9dede87ad..e99c89c34 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -124,9 +124,15 @@ int ldlm_debugfs_setup(void)
 	}
 
 	rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL);
+	if (rc) {
+		CERROR("LProcFS failed in ldlm-init\n");
+		goto err_svc;
+	}
 
 	return 0;
 
+err_svc:
+	ldebugfs_remove(&ldlm_svc_debugfs_dir);
 err_ns:
 	ldebugfs_remove(&ldlm_ns_debugfs_dir);
 err_type:
@@ -758,12 +764,12 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 		list_for_each(tmp, q) {
 			lock = list_entry(tmp, struct ldlm_lock,
 					      l_res_link);
-			if (lock->l_flags & LDLM_FL_CLEANED) {
+			if (ldlm_is_cleaned(lock)) {
 				lock = NULL;
 				continue;
 			}
 			LDLM_LOCK_GET(lock);
-			lock->l_flags |= LDLM_FL_CLEANED;
+			ldlm_set_cleaned(lock);
 			break;
 		}
 
@@ -775,13 +781,13 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 		/* Set CBPENDING so nothing in the cancellation path
 		 * can match this lock.
 		 */
-		lock->l_flags |= LDLM_FL_CBPENDING;
-		lock->l_flags |= LDLM_FL_FAILED;
+		ldlm_set_cbpending(lock);
+		ldlm_set_failed(lock);
 		lock->l_flags |= flags;
 
 		/* ... without sending a CANCEL message for local_only. */
 		if (local_only)
-			lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+			ldlm_set_local_only(lock);
 
 		if (local_only && (lock->l_readers || lock->l_writers)) {
 			/* This is a little bit gross, but much better than the
@@ -1275,7 +1281,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
 
 	LDLM_DEBUG(lock, "About to add this lock:\n");
 
-	if (lock->l_flags & LDLM_FL_DESTROYED) {
+	if (ldlm_is_destroyed(lock)) {
 		CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
 		return;
 	}
@@ -1400,3 +1406,4 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
 			LDLM_DEBUG_LIMIT(level, lock, "###");
 	}
 }
+EXPORT_SYMBOL(ldlm_resource_dump);
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 9ac29e718..2ce10ff01 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -4,7 +4,8 @@ lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
 	    rw.o namei.o symlink.o llite_mmap.o \
 	    xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o \
 	    rw26.o super25.o statahead.o \
-	    ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o \
-	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o lproc_llite.o
+	    glimpse.o lcommon_cl.o lcommon_misc.o \
+	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
+	    lproc_llite.o
 
 llite_lloop-y := lloop.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index dd1c82701..1b6f82a1a 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -108,11 +108,8 @@ static int ll_dcompare(const struct dentry *parent, const struct dentry *dentry,
 
 static inline int return_if_equal(struct ldlm_lock *lock, void *data)
 {
-	if ((lock->l_flags &
-	     (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA)) ==
-	    (LDLM_FL_CANCELING | LDLM_FL_DISCARD_DATA))
-		return LDLM_ITER_CONTINUE;
-	return LDLM_ITER_STOP;
+	return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
+		LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
 }
 
 /* find any ldlm lock of the inode in mdc and lov
@@ -253,8 +250,8 @@ void ll_invalidate_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
 
-	CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_INODE, "marking dentries for ino "DFID"(%p) invalid\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	ll_lock_dcache(inode);
 	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
@@ -289,8 +286,8 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
 	if (it->d.lustre.it_lock_mode && inode) {
 		struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-		CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
-		       inode, inode->i_ino, inode->i_generation);
+		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"(%p)\n",
+		       PFID(ll_inode2fid(inode)), inode);
 		ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
 	}
 
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index e4c82883e..4b00d1ac8 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -158,11 +158,16 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 	int i;
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n",
-	       inode->i_ino, inode->i_generation, inode, hash);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
+	       PFID(ll_inode2fid(inode)), inode, hash);
 
 	LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
 
+	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, NULL);
+	if (IS_ERR(op_data))
+		return PTR_ERR(op_data);
+
 	page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
 	if (page_pool) {
 		page_pool[0] = page0;
@@ -177,8 +182,6 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		page_pool[npages] = page;
 	}
 
-	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
-				     LUSTRE_OPC_ANY, NULL);
 	op_data->op_npages = npages;
 	op_data->op_offset = hash;
 	rc = md_readpage(exp, op_data, page_pool, &request);
@@ -190,7 +193,7 @@ static int ll_dir_filler(void *_hash, struct page *page0)
 		body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
 		/* Checked by mdc_readpage() */
 		if (body->valid & OBD_MD_FLSIZE)
-			cl_isize_write(inode, body->size);
+			i_size_write(inode, body->size);
 
 		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
 			 >> PAGE_SHIFT;
@@ -372,8 +375,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 			return ERR_PTR(rc);
 		}
 
-		CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
-		       dir, dir->i_ino, dir->i_generation);
+		CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
+		       PFID(ll_inode2fid(dir)), dir);
 		md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
 				 &it.d.lustre.it_lock_handle, dir, NULL);
 	} else {
@@ -468,6 +471,28 @@ fail:
 	goto out_unlock;
 }
 
+/**
+ * return IF_* type for given lu_dirent entry.
+ * IF_* flag shld be converted to particular OS file type in
+ * platform llite module.
+ */
+static __u16 ll_dirent_type_get(struct lu_dirent *ent)
+{
+	__u16 type = 0;
+	struct luda_type *lt;
+	int len = 0;
+
+	if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
+		const unsigned int align = sizeof(struct luda_type) - 1;
+
+		len = le16_to_cpu(ent->lde_namelen);
+		len = (len + align) & ~align;
+		lt = (void *)ent->lde_name + len;
+		type = IFTODT(le16_to_cpu(lt->lt_type));
+	}
+	return type;
+}
+
 int ll_dir_read(struct inode *inode, struct dir_context *ctx)
 {
 	struct ll_inode_info *info       = ll_i2info(inode);
@@ -589,15 +614,16 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 	struct inode		*inode	= file_inode(filp);
 	struct ll_file_data	*lfd	= LUSTRE_FPRIVATE(filp);
 	struct ll_sb_info	*sbi	= ll_i2sbi(inode);
+	__u64 pos = lfd ? lfd->lfd_pos : 0;
 	int			hash64	= sbi->ll_flags & LL_SBI_64BIT_HASH;
 	int			api32	= ll_need_32bit_api(sbi);
 	int			rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
-	       inode->i_ino, inode->i_generation,
-	       inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+	       PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
+	       i_size_read(inode), api32);
 
-	if (lfd->lfd_pos == MDS_DIR_END_OFF) {
+	if (pos == MDS_DIR_END_OFF) {
 		/*
 		 * end-of-file.
 		 */
@@ -605,9 +631,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 		goto out;
 	}
 
-	ctx->pos = lfd->lfd_pos;
+	ctx->pos = pos;
 	rc = ll_dir_read(inode, ctx);
-	lfd->lfd_pos = ctx->pos;
+	if (lfd)
+		lfd->lfd_pos = ctx->pos;
 	if (ctx->pos == MDS_DIR_END_OFF) {
 		if (api32)
 			ctx->pos = LL_DIR_END_OFF_32BIT;
@@ -804,9 +831,8 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 	rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 	ll_finish_md_op_data(op_data);
 	if (rc < 0) {
-		CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n",
-		       inode->i_ino,
-		       inode->i_generation, rc);
+		CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n",
+		       PFID(ll_inode2fid(inode)), rc);
 		goto out;
 	}
 
@@ -916,7 +942,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
 		}
 
 		/* Read current file data version */
-		rc = ll_data_version(inode, &data_version, 1);
+		rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
 		iput(inode);
 		if (rc != 0) {
 			CDEBUG(D_HSM, "Could not read file data version of "
@@ -936,6 +962,9 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
 	}
 
 progress:
+	/* On error, the request should be considered as completed */
+	if (hpk.hpk_errval > 0)
+		hpk.hpk_flags |= HP_FLAG_COMPLETED;
 	rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
 			   &hpk, NULL);
 
@@ -997,8 +1026,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
 			goto progress;
 		}
 
-		rc = ll_data_version(inode, &data_version,
-				     copy->hc_hai.hai_action == HSMA_ARCHIVE);
+		rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
 		iput(inode);
 		if (rc) {
 			CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n");
@@ -1033,7 +1061,6 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
 			/* hpk_errval must be >= 0 */
 			hpk.hpk_errval = EBUSY;
 		}
-
 	}
 
 progress:
@@ -1242,8 +1269,8 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct obd_ioctl_data *data;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
-	       inode->i_ino, inode->i_generation, inode, cmd);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n",
+	       PFID(ll_inode2fid(inode)), inode, cmd);
 
 	/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
 	if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
@@ -1362,7 +1389,6 @@ out_free:
 lmv_out_free:
 		obd_ioctl_freedata(buf, len);
 		return rc;
-
 	}
 	case LL_IOC_LOV_SETSTRIPE: {
 		struct lov_user_md_v3 lumv3;
@@ -1474,8 +1500,9 @@ free_lmv:
 					       cmd == LL_IOC_MDC_GETINFO)) {
 				rc = 0;
 				goto skip_lmm;
-			} else
+			} else {
 				goto out_req;
+			}
 		}
 
 		if (cmd == IOC_MDC_GETFILESTRIPE ||
@@ -1688,15 +1715,16 @@ out_quotactl:
 		return ll_flush_ctx(inode);
 #ifdef CONFIG_FS_POSIX_ACL
 	case LL_IOC_RMTACL: {
-	    if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
-		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+		if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
+			struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
 
-		rc = rct_add(&sbi->ll_rct, current_pid(), arg);
-		if (!rc)
-			fd->fd_flags |= LL_FILE_RMTACL;
-		return rc;
-	    } else
-		return 0;
+			rc = rct_add(&sbi->ll_rct, current_pid(), arg);
+			if (!rc)
+				fd->fd_flags |= LL_FILE_RMTACL;
+			return rc;
+		} else {
+			return 0;
+		}
 	}
 #endif
 	case LL_IOC_GETOBDCOUNT: {
@@ -1817,6 +1845,9 @@ out_quotactl:
 		return rc;
 	}
 	case LL_IOC_HSM_CT_START:
+		if (!capable(CFS_CAP_SYS_ADMIN))
+			return -EPERM;
+
 		rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
 				    sizeof(struct lustre_kernelcomm));
 		return rc;
@@ -1865,7 +1896,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 	int api32 = ll_need_32bit_api(sbi);
 	loff_t ret = -EINVAL;
 
-	inode_lock(inode);
 	switch (origin) {
 	case SEEK_SET:
 		break;
@@ -1903,7 +1933,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 	goto out;
 
 out:
-	inode_unlock(inode);
 	return ret;
 }
 
@@ -1922,7 +1951,7 @@ const struct file_operations ll_dir_operations = {
 	.open     = ll_dir_open,
 	.release  = ll_dir_release,
 	.read     = generic_read_dir,
-	.iterate  = ll_readdir,
+	.iterate_shared  = ll_readdir,
 	.unlocked_ioctl   = ll_dir_ioctl,
 	.fsync    = ll_fsync,
 };
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index cf619af3c..f47f2acaf 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -45,6 +45,7 @@
 #include "../include/lustre_lite.h"
 #include <linux/pagemap.h>
 #include <linux/file.h>
+#include <linux/mount.h>
 #include "llite_internal.h"
 #include "../include/lustre/ll_fiemap.h"
 
@@ -87,8 +88,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
 	op_data->op_attr.ia_ctime = inode->i_ctime;
 	op_data->op_attr.ia_size = i_size_read(inode);
 	op_data->op_attr_blocks = inode->i_blocks;
-	((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
-					ll_inode_to_ext_flags(inode->i_flags);
+	op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
 	op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
 	if (fh)
 		op_data->op_handle = *fh;
@@ -170,13 +170,15 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
 		 */
 		rc = ll_som_update(inode, op_data);
 		if (rc) {
-			CERROR("inode %lu mdc Size-on-MDS update failed: rc = %d\n",
-			       inode->i_ino, rc);
+			CERROR("%s: inode "DFID" mdc Size-on-MDS update failed: rc = %d\n",
+			       ll_i2mdexp(inode)->exp_obd->obd_name,
+			       PFID(ll_inode2fid(inode)), rc);
 			rc = 0;
 		}
 	} else if (rc) {
-		CERROR("inode %lu mdc close failed: rc = %d\n",
-		       inode->i_ino, rc);
+		CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
+		       ll_i2mdexp(inode)->exp_obd->obd_name,
+		       PFID(ll_inode2fid(inode)), rc);
 	}
 
 	/* DATA_MODIFIED flag was successfully sent on close, cancel data
@@ -278,7 +280,7 @@ static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
 
 	/* clear group lock, if present */
 	if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
-		ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+		ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
 
 	if (fd->fd_lease_och) {
 		bool lease_broken;
@@ -343,8 +345,8 @@ int ll_file_release(struct inode *inode, struct file *file)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 #ifdef CONFIG_FS_POSIX_ACL
 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
@@ -543,8 +545,8 @@ int ll_file_open(struct inode *inode, struct file *file)
 	struct ll_file_data *fd;
 	int rc = 0, opendir_set = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-	       inode->i_generation, inode, file->f_flags);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
+	       PFID(ll_inode2fid(inode)), inode, file->f_flags);
 
 	it = file->private_data; /* XXX: compat macro */
 	file->private_data = NULL; /* prevent ll_local_open assertion */
@@ -677,7 +679,9 @@ restart:
 		if (rc)
 			goto out_och_free;
 
-		LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
+		LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
+			 "inode %p: disposition %x, status %d\n", inode,
+			 it_disposition(it, ~0), it->d.lustre.it_status);
 
 		rc = ll_local_open(file, it, fd, *och_p);
 		if (rc)
@@ -875,16 +879,19 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 	return och;
 
 out_close:
-	rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
-	if (rc2)
-		CERROR("Close openhandle returned %d\n", rc2);
-
-	/* cancel open lock */
+	/* Cancel open lock */
 	if (it.d.lustre.it_lock_mode != 0) {
 		ldlm_lock_decref_and_cancel(&och->och_lease_handle,
 					    it.d.lustre.it_lock_mode);
 		it.d.lustre.it_lock_mode = 0;
+		och->och_lease_handle.cookie = 0ULL;
 	}
+	rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+	if (rc2 < 0)
+		CERROR("%s: error closing file "DFID": %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(&ll_i2info(inode)->lli_fid), rc2);
+	och = NULL; /* och has been freed in ll_close_inode_openhandle() */
 out_release_it:
 	ll_intent_release(&it);
 out:
@@ -908,7 +915,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 		lock_res_and_lock(lock);
 		cancelled = ldlm_is_cancel(lock);
 		unlock_res_and_lock(lock);
-		ldlm_lock_put(lock);
+		LDLM_LOCK_PUT(lock);
 	}
 
 	CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
@@ -926,7 +933,7 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 
 /* Fills the obdo with the attributes for the lsm */
 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
-			  struct obdo *obdo, __u64 ioepoch, int sync)
+			  struct obdo *obdo, __u64 ioepoch, int dv_flags)
 {
 	struct ptlrpc_request_set *set;
 	struct obd_info	    oinfo = { };
@@ -945,9 +952,11 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 			       OBD_MD_FLMTIME | OBD_MD_FLCTIME |
 			       OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
 			       OBD_MD_FLDATAVERSION;
-	if (sync) {
+	if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
 		oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
 		oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
+		if (dv_flags & LL_DV_WR_FLUSH)
+			oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
 	}
 
 	set = ptlrpc_prep_set();
@@ -960,11 +969,16 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
 			rc = ptlrpc_set_wait(set);
 		ptlrpc_set_destroy(set);
 	}
-	if (rc == 0)
+	if (rc == 0) {
 		oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
 					 OBD_MD_FLATIME | OBD_MD_FLMTIME |
 					 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
-					 OBD_MD_FLDATAVERSION);
+					 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
+		if (dv_flags & LL_DV_WR_FLUSH &&
+		    !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
+		      oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
+			return -ENOTSUPP;
+	}
 	return rc;
 }
 
@@ -980,7 +994,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
 
 	lsm = ccc_inode_lsm_get(inode);
 	rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
-			    obdo, ioepoch, sync);
+			    obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
 	if (rc == 0) {
 		struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
 
@@ -994,50 +1008,57 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
 	return rc;
 }
 
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
+int ll_merge_attr(const struct lu_env *env, struct inode *inode)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct cl_object *obj = lli->lli_clob;
-	struct cl_attr *attr = ccc_env_thread_attr(env);
-	struct ost_lvb lvb;
+	struct cl_attr *attr = vvp_env_thread_attr(env);
+	s64 atime;
+	s64 mtime;
+	s64 ctime;
 	int rc = 0;
 
 	ll_inode_size_lock(inode);
+
 	/* merge timestamps the most recently obtained from mds with
 	 * timestamps obtained from osts
 	 */
-	LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
-	LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
-	LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
+	LTIME_S(inode->i_atime) = lli->lli_atime;
+	LTIME_S(inode->i_mtime) = lli->lli_mtime;
+	LTIME_S(inode->i_ctime) = lli->lli_ctime;
 
-	lvb.lvb_size = i_size_read(inode);
-	lvb.lvb_blocks = inode->i_blocks;
-	lvb.lvb_mtime = LTIME_S(inode->i_mtime);
-	lvb.lvb_atime = LTIME_S(inode->i_atime);
-	lvb.lvb_ctime = LTIME_S(inode->i_ctime);
+	mtime = LTIME_S(inode->i_mtime);
+	atime = LTIME_S(inode->i_atime);
+	ctime = LTIME_S(inode->i_ctime);
 
 	cl_object_attr_lock(obj);
 	rc = cl_object_attr_get(env, obj, attr);
 	cl_object_attr_unlock(obj);
 
-	if (rc == 0) {
-		if (lvb.lvb_atime < attr->cat_atime)
-			lvb.lvb_atime = attr->cat_atime;
-		if (lvb.lvb_ctime < attr->cat_ctime)
-			lvb.lvb_ctime = attr->cat_ctime;
-		if (lvb.lvb_mtime < attr->cat_mtime)
-			lvb.lvb_mtime = attr->cat_mtime;
+	if (rc != 0)
+		goto out_size_unlock;
 
-		CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
-		       PFID(&lli->lli_fid), attr->cat_size);
-		cl_isize_write_nolock(inode, attr->cat_size);
+	if (atime < attr->cat_atime)
+		atime = attr->cat_atime;
 
-		inode->i_blocks = attr->cat_blocks;
+	if (ctime < attr->cat_ctime)
+		ctime = attr->cat_ctime;
 
-		LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-		LTIME_S(inode->i_atime) = lvb.lvb_atime;
-		LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-	}
+	if (mtime < attr->cat_mtime)
+		mtime = attr->cat_mtime;
+
+	CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+	       PFID(&lli->lli_fid), attr->cat_size);
+
+	i_size_write(inode, attr->cat_size);
+
+	inode->i_blocks = attr->cat_blocks;
+
+	LTIME_S(inode->i_mtime) = mtime;
+	LTIME_S(inode->i_atime) = atime;
+	LTIME_S(inode->i_ctime) = ctime;
+
+out_size_unlock:
 	ll_inode_size_unlock(inode);
 
 	return rc;
@@ -1120,47 +1141,48 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
 	struct cl_io	 *io;
 	ssize_t	       result;
 
+	CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+	       file->f_path.dentry->d_name.name, iot, *ppos, count);
+
 restart:
-	io = ccc_env_thread_io(env);
+	io = vvp_env_thread_io(env);
 	ll_io_init(io, file, iot == CIT_WRITE);
 
 	if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
 		struct vvp_io *vio = vvp_env_io(env);
-		struct ccc_io *cio = ccc_env_io(env);
 		int write_mutex_locked = 0;
 
-		cio->cui_fd  = LUSTRE_FPRIVATE(file);
-		vio->cui_io_subtype = args->via_io_subtype;
+		vio->vui_fd  = LUSTRE_FPRIVATE(file);
+		vio->vui_io_subtype = args->via_io_subtype;
 
-		switch (vio->cui_io_subtype) {
+		switch (vio->vui_io_subtype) {
 		case IO_NORMAL:
-			cio->cui_iter = args->u.normal.via_iter;
-			cio->cui_iocb = args->u.normal.via_iocb;
+			vio->vui_iter = args->u.normal.via_iter;
+			vio->vui_iocb = args->u.normal.via_iocb;
 			if ((iot == CIT_WRITE) &&
-			    !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+			    !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
 				if (mutex_lock_interruptible(&lli->
 							       lli_write_mutex)) {
 					result = -ERESTARTSYS;
 					goto out;
 				}
 				write_mutex_locked = 1;
-			} else if (iot == CIT_READ) {
-				down_read(&lli->lli_trunc_sem);
 			}
+			down_read(&lli->lli_trunc_sem);
 			break;
 		case IO_SPLICE:
-			vio->u.splice.cui_pipe = args->u.splice.via_pipe;
-			vio->u.splice.cui_flags = args->u.splice.via_flags;
+			vio->u.splice.vui_pipe = args->u.splice.via_pipe;
+			vio->u.splice.vui_flags = args->u.splice.via_flags;
 			break;
 		default:
-			CERROR("Unknown IO type - %u\n", vio->cui_io_subtype);
+			CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
 			LBUG();
 		}
 		result = cl_io_loop(env, io);
+		if (args->via_io_subtype == IO_NORMAL)
+			up_read(&lli->lli_trunc_sem);
 		if (write_mutex_locked)
 			mutex_unlock(&lli->lli_write_mutex);
-		else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
-			up_read(&lli->lli_trunc_sem);
 	} else {
 		/* cl_io_rw_init() handled IO */
 		result = io->ci_result;
@@ -1197,6 +1219,7 @@ out:
 			fd->fd_write_failed = true;
 		}
 	}
+	CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
 
 	return result;
 }
@@ -1212,7 +1235,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_NORMAL);
+	args = ll_env_args(env, IO_NORMAL);
 	args->u.normal.via_iter = to;
 	args->u.normal.via_iocb = iocb;
 
@@ -1236,7 +1259,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_NORMAL);
+	args = ll_env_args(env, IO_NORMAL);
 	args->u.normal.via_iter = from;
 	args->u.normal.via_iocb = iocb;
 
@@ -1262,7 +1285,7 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	args = vvp_env_args(env, IO_SPLICE);
+	args = ll_env_args(env, IO_SPLICE);
 	args->u.splice.via_pipe = pipe;
 	args->u.splice.via_flags = flags;
 
@@ -1354,7 +1377,8 @@ static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
 }
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
-			     int flags, struct lov_user_md *lum, int lum_size)
+			     __u64 flags, struct lov_user_md *lum,
+			     int lum_size)
 {
 	struct lov_stripe_md *lsm = NULL;
 	struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
@@ -1363,8 +1387,8 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 	lsm = ccc_inode_lsm_get(inode);
 	if (lsm) {
 		ccc_inode_lsm_put(inode, lsm);
-		CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
-		       inode->i_ino);
+		CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
+		       PFID(ll_inode2fid(inode)));
 		rc = -EEXIST;
 		goto out;
 	}
@@ -1478,7 +1502,7 @@ out:
 static int ll_lov_setea(struct inode *inode, struct file *file,
 			unsigned long arg)
 {
-	int			 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
+	__u64			 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
 	struct lov_user_md	*lump;
 	int			 lum_size = sizeof(struct lov_user_md) +
 					    sizeof(struct lov_user_ost_data);
@@ -1512,7 +1536,7 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
 	struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
 	struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
 	int lum_size, rc;
-	int flags = FMODE_WRITE;
+	__u64 flags = FMODE_WRITE;
 
 	/* first try with v1 which is smaller than v3 */
 	lum_size = sizeof(struct lov_user_md_v1);
@@ -1561,7 +1585,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
 {
 	struct ll_inode_info   *lli = ll_i2info(inode);
 	struct ll_file_data    *fd = LUSTRE_FPRIVATE(file);
-	struct ccc_grouplock    grouplock;
+	struct ll_grouplock    grouplock;
 	int		     rc;
 
 	if (arg == 0) {
@@ -1575,14 +1599,14 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
 	spin_lock(&lli->lli_lock);
 	if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
 		CWARN("group lock already existed with gid %lu\n",
-		      fd->fd_grouplock.cg_gid);
+		      fd->fd_grouplock.lg_gid);
 		spin_unlock(&lli->lli_lock);
 		return -EINVAL;
 	}
-	LASSERT(!fd->fd_grouplock.cg_lock);
+	LASSERT(!fd->fd_grouplock.lg_lock);
 	spin_unlock(&lli->lli_lock);
 
-	rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
+	rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
 			      arg, (file->f_flags & O_NONBLOCK), &grouplock);
 	if (rc)
 		return rc;
@@ -1608,7 +1632,7 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
 {
 	struct ll_inode_info   *lli = ll_i2info(inode);
 	struct ll_file_data    *fd = LUSTRE_FPRIVATE(file);
-	struct ccc_grouplock    grouplock;
+	struct ll_grouplock    grouplock;
 
 	spin_lock(&lli->lli_lock);
 	if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1616,11 +1640,11 @@ static int ll_put_grouplock(struct inode *inode, struct file *file,
 		CWARN("no group lock held\n");
 		return -EINVAL;
 	}
-	LASSERT(fd->fd_grouplock.cg_lock);
+	LASSERT(fd->fd_grouplock.lg_lock);
 
-	if (fd->fd_grouplock.cg_gid != arg) {
+	if (fd->fd_grouplock.lg_gid != arg) {
 		CWARN("group lock %lu doesn't match current id %lu\n",
-		      arg, fd->fd_grouplock.cg_gid);
+		      arg, fd->fd_grouplock.lg_gid);
 		spin_unlock(&lli->lli_lock);
 		return -EINVAL;
 	}
@@ -1861,11 +1885,12 @@ error:
  * This value is computed using stripe object version on OST.
  * Version is computed using server side locking.
  *
- * @param extent_lock  Take extent lock. Not needed if a process is already
- *		       holding the OST object group locks.
+ * @param sync  if do sync on the OST side;
+ *		0: no sync
+ *		LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+ *		LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
  */
-int ll_data_version(struct inode *inode, __u64 *data_version,
-		    int extent_lock)
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
 {
 	struct lov_stripe_md	*lsm = NULL;
 	struct ll_sb_info	*sbi = ll_i2sbi(inode);
@@ -1887,7 +1912,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version,
 		goto out;
 	}
 
-	rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, extent_lock);
+	rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, obdo, 0, flags);
 	if (rc == 0) {
 		if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
 			rc = -EOPNOTSUPP;
@@ -1923,7 +1948,7 @@ int ll_hsm_release(struct inode *inode)
 	}
 
 	/* Grab latest data_version and [am]time values */
-	rc = ll_data_version(inode, &data_version, 1);
+	rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
 	if (rc != 0)
 		goto out;
 
@@ -1933,7 +1958,7 @@ int ll_hsm_release(struct inode *inode)
 		goto out;
 	}
 
-	ll_merge_lvb(env, inode);
+	ll_merge_attr(env, inode);
 	cl_env_nested_put(&nest, env);
 
 	/* Release the file.
@@ -2227,8 +2252,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct ll_file_data	*fd = LUSTRE_FPRIVATE(file);
 	int			 flags, rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
-	       inode->i_generation, inode, cmd);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),cmd=%x\n",
+	       PFID(ll_inode2fid(inode)), inode, cmd);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
 
 	/* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
@@ -2331,9 +2356,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
 			return -EFAULT;
 
-		rc = ll_data_version(inode, &idv.idv_version,
-				     !(idv.idv_flags & LL_DV_NOFLUSH));
-
+		idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
+		rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
 		if (rc == 0 && copy_to_user((char __user *)arg, &idv,
 					    sizeof(idv)))
 			return -EFAULT;
@@ -2499,7 +2523,7 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 					rc = och->och_flags &
 						(FMODE_READ | FMODE_WRITE);
 				unlock_res_and_lock(lock);
-				ldlm_lock_put(lock);
+				LDLM_LOCK_PUT(lock);
 			}
 		}
 		mutex_unlock(&lli->lli_och_mutex);
@@ -2537,9 +2561,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 
 	retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
 			   (origin == SEEK_CUR) ? file->f_pos : 0);
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
-	       inode->i_ino, inode->i_generation, inode, retval, retval,
-	       origin);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
+	       PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
 
 	if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
@@ -2603,8 +2626,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
-	io = ccc_env_thread_io(env);
-	io->ci_obj = cl_i2info(inode)->lli_clob;
+	io = vvp_env_thread_io(env);
+	io->ci_obj = ll_i2info(inode)->lli_clob;
 	io->ci_ignore_layout = ignore_layout;
 
 	/* initialize parameters for sync */
@@ -2634,8 +2657,8 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	struct ptlrpc_request *req;
 	int rc, err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
 
 	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2693,8 +2716,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	int rc;
 	int rc2 = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
-	       inode->i_ino, file_lock);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
+	       PFID(ll_inode2fid(inode)), file_lock);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
 
@@ -2777,9 +2800,9 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
-	       inode->i_ino, flock.l_flock.pid, flags, einfo.ei_mode,
-	       flock.l_flock.start, flock.l_flock.end);
+	CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
+	       PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
+	       einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
 
 	rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
 			op_data, &lockh, &flock, 0, NULL /* req */, flags);
@@ -2901,8 +2924,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 	struct obd_export *exp;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%pd\n",
-	       inode->i_ino, inode->i_generation, inode, dentry);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%pd\n",
+	       PFID(ll_inode2fid(inode)), inode, dentry);
 
 	exp = ll_i2mdexp(inode);
 
@@ -2998,9 +3021,9 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 
 	/* if object isn't regular file, don't validate size */
 	if (!S_ISREG(inode->i_mode)) {
-		LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
-		LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
-		LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
+		LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
+		LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
+		LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
 	} else {
 		/* In case of restore, the MDT has the right size and has
 		 * already send it back without granting the layout lock,
@@ -3124,8 +3147,8 @@ int ll_inode_permission(struct inode *inode, int mask)
 			return rc;
 	}
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
-	       inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
+	       PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
 
 	if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
 		return lustre_check_remote_perm(inode, mask);
@@ -3335,10 +3358,10 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
 	int rc;
 
 	CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
-	       PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
+	       PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
 	       lock->l_lvb_data, lock->l_lvb_len);
 
-	if (lock->l_lvb_data && (lock->l_flags & LDLM_FL_LVB_READY))
+	if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
 		return 0;
 
 	/* if layout lock was granted right away, the layout is returned
@@ -3415,14 +3438,14 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
 	LASSERT(lock);
 	LASSERT(ldlm_has_layout(lock));
 
-	LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d",
-		   inode, PFID(&lli->lli_fid), reconf);
+	LDLM_DEBUG(lock, "File "DFID"(%p) being reconfigured: %d",
+		   PFID(&lli->lli_fid), inode, reconf);
 
 	/* in case this is a caching lock and reinstate with new inode */
 	md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
 
 	lock_res_and_lock(lock);
-	lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
+	lvb_ready = ldlm_is_lvb_ready(lock);
 	unlock_res_and_lock(lock);
 	/* checking lvb_ready is racy but this is okay. The worst case is
 	 * that multi processes may configure the file on the same time.
@@ -3487,9 +3510,9 @@ out:
 
 	/* wait for IO to complete if it's still being used. */
 	if (wait_layout) {
-		CDEBUG(D_INODE, "%s: %p/" DFID " wait for layout reconf.\n",
+		CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
 		       ll_get_fsname(inode->i_sb, NULL, 0),
-		       inode, PFID(&lli->lli_fid));
+		       PFID(&lli->lli_fid), inode);
 
 		memset(&conf, 0, sizeof(conf));
 		conf.coc_opc = OBJECT_CONF_WAIT;
@@ -3498,7 +3521,8 @@ out:
 		if (rc == 0)
 			rc = -EAGAIN;
 
-		CDEBUG(D_INODE, "file: " DFID " waiting layout return: %d.\n",
+		CDEBUG(D_INODE, "%s: file="DFID" waiting layout return: %d.\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
 		       PFID(&lli->lli_fid), rc);
 	}
 	return rc;
@@ -3571,9 +3595,9 @@ again:
 	it.it_op = IT_LAYOUT;
 	lockh.cookie = 0ULL;
 
-	LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/" DFID "",
-			  ll_get_fsname(inode->i_sb, NULL, 0), inode,
-			PFID(&lli->lli_fid));
+	LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
+			  ll_get_fsname(inode->i_sb, NULL, 0),
+			  PFID(&lli->lli_fid), inode);
 
 	rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
 			NULL, 0, NULL, 0);
@@ -3601,7 +3625,7 @@ again:
 /**
  *  This function send a restore request to the MDT
  */
-int ll_layout_restore(struct inode *inode)
+int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
 {
 	struct hsm_user_request	*hur;
 	int			 len, rc;
@@ -3617,9 +3641,10 @@ int ll_layout_restore(struct inode *inode)
 	hur->hur_request.hr_flags = 0;
 	memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
 	       sizeof(hur->hur_user_item[0].hui_fid));
-	hur->hur_user_item[0].hui_extent.length = -1;
+	hur->hur_user_item[0].hui_extent.offset = offset;
+	hur->hur_user_item[0].hui_extent.length = length;
 	hur->hur_request.hr_itemcount = 1;
-	rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+	rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
 			   len, hur, NULL);
 	kfree(hur);
 	return rc;
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
new file mode 100644
index 000000000..d8ea75424
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -0,0 +1,255 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * glimpse code shared between vvp and liblustre (and other Lustre clients in
+ * the future).
+ *
+ *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Oleg Drokin <oleg.drokin@sun.com>
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_mdc.h"
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include "../include/cl_object.h"
+#include "../llite/llite_internal.h"
+
+static const struct cl_lock_descr whole_file = {
+	.cld_start = 0,
+	.cld_end   = CL_PAGE_EOF,
+	.cld_mode  = CLM_READ
+};
+
+/*
+ * Check whether file has possible unwriten pages.
+ *
+ * \retval 1    file is mmap-ed or has dirty pages
+ *	 0    otherwise
+ */
+blkcnt_t dirty_cnt(struct inode *inode)
+{
+	blkcnt_t cnt = 0;
+	struct vvp_object *vob = cl_inode2vvp(inode);
+	void	      *results[1];
+
+	if (inode->i_mapping)
+		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+						  results, 0, 1,
+						  PAGECACHE_TAG_DIRTY);
+	if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
+		cnt = 1;
+
+	return (cnt > 0) ? 1 : 0;
+}
+
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+		    struct inode *inode, struct cl_object *clob, int agl)
+{
+	struct ll_inode_info *lli   = ll_i2info(inode);
+	const struct lu_fid  *fid   = lu_object_fid(&clob->co_lu);
+	int result;
+
+	result = 0;
+	if (!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)) {
+		CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
+		if (lli->lli_has_smd) {
+			struct cl_lock *lock = vvp_env_lock(env);
+			struct cl_lock_descr *descr = &lock->cll_descr;
+
+			/* NOTE: this looks like DLM lock request, but it may
+			 *       not be one. Due to CEF_ASYNC flag (translated
+			 *       to LDLM_FL_HAS_INTENT by osc), this is
+			 *       glimpse request, that won't revoke any
+			 *       conflicting DLM locks held. Instead,
+			 *       ll_glimpse_callback() will be called on each
+			 *       client holding a DLM lock against this file,
+			 *       and resulting size will be returned for each
+			 *       stripe. DLM lock on [0, EOF] is acquired only
+			 *       if there were no conflicting locks. If there
+			 *       were conflicting locks, enqueuing or waiting
+			 *       fails with -ENAVAIL, but valid inode
+			 *       attributes are returned anyway.
+			 */
+			*descr = whole_file;
+			descr->cld_obj   = clob;
+			descr->cld_mode  = CLM_READ;
+			descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
+			if (agl)
+				descr->cld_enq_flags |= CEF_AGL;
+			/*
+			 * CEF_ASYNC is used because glimpse sub-locks cannot
+			 * deadlock (because they never conflict with other
+			 * locks) and, hence, can be enqueued out-of-order.
+			 *
+			 * CEF_MUST protects glimpse lock from conversion into
+			 * a lockless mode.
+			 */
+			result = cl_lock_request(env, io, lock);
+			if (result < 0)
+				return result;
+
+			if (!agl) {
+				ll_merge_attr(env, inode);
+				if (i_size_read(inode) > 0 &&
+				    inode->i_blocks == 0) {
+					/*
+					 * LU-417: Add dirty pages block count
+					 * lest i_blocks reports 0, some "cp" or
+					 * "tar" may think it's a completely
+					 * sparse file and skip it.
+					 */
+					inode->i_blocks = dirty_cnt(inode);
+				}
+			}
+			cl_lock_release(env, lock);
+		} else {
+			CDEBUG(D_DLMTRACE, "No objects for inode\n");
+			ll_merge_attr(env, inode);
+		}
+	}
+
+	return result;
+}
+
+static int cl_io_get(struct inode *inode, struct lu_env **envout,
+		     struct cl_io **ioout, int *refcheck)
+{
+	struct lu_env	  *env;
+	struct cl_io	   *io;
+	struct ll_inode_info	*lli = ll_i2info(inode);
+	struct cl_object       *clob = lli->lli_clob;
+	int result;
+
+	if (S_ISREG(inode->i_mode)) {
+		env = cl_env_get(refcheck);
+		if (!IS_ERR(env)) {
+			io = vvp_env_thread_io(env);
+			io->ci_obj = clob;
+			*envout = env;
+			*ioout  = io;
+			result = 1;
+		} else {
+			result = PTR_ERR(env);
+		}
+	} else {
+		result = 0;
+	}
+	return result;
+}
+
+int cl_glimpse_size0(struct inode *inode, int agl)
+{
+	/*
+	 * We don't need ast_flags argument to cl_glimpse_size(), because
+	 * osc_lock_enqueue() takes care of the possible deadlock that said
+	 * argument was introduced to avoid.
+	 */
+	/*
+	 * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
+	 * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
+	 * blocking anyway.
+	 */
+	struct lu_env	  *env = NULL;
+	struct cl_io	   *io  = NULL;
+	int		     result;
+	int		     refcheck;
+
+	result = cl_io_get(inode, &env, &io, &refcheck);
+	if (result > 0) {
+again:
+		io->ci_verify_layout = 1;
+		result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+		if (result > 0)
+			/*
+			 * nothing to do for this io. This currently happens
+			 * when stripe sub-object's are not yet created.
+			 */
+			result = io->ci_result;
+		else if (result == 0)
+			result = cl_glimpse_lock(env, io, inode, io->ci_obj,
+						 agl);
+
+		OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
+		cl_io_fini(env, io);
+		if (unlikely(io->ci_need_restart))
+			goto again;
+		cl_env_put(env, &refcheck);
+	}
+	return result;
+}
+
+int cl_local_size(struct inode *inode)
+{
+	struct lu_env	   *env = NULL;
+	struct cl_io	    *io  = NULL;
+	struct cl_object	*clob;
+	int		      result;
+	int		      refcheck;
+
+	if (!ll_i2info(inode)->lli_has_smd)
+		return 0;
+
+	result = cl_io_get(inode, &env, &io, &refcheck);
+	if (result <= 0)
+		return result;
+
+	clob = io->ci_obj;
+	result = cl_io_init(env, io, CIT_MISC, clob);
+	if (result > 0) {
+		result = io->ci_result;
+	} else if (result == 0) {
+		struct cl_lock *lock = vvp_env_lock(env);
+
+		lock->cll_descr = whole_file;
+		lock->cll_descr.cld_enq_flags = CEF_PEEK;
+		lock->cll_descr.cld_obj = clob;
+		result = cl_lock_request(env, io, lock);
+		if (result == 0) {
+			ll_merge_attr(env, inode);
+			cl_lock_release(env, lock);
+		}
+	}
+	cl_io_fini(env, io);
+	cl_env_put(env, &refcheck);
+	return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
new file mode 100644
index 000000000..6c00715b4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -0,0 +1,327 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../../include/linux/libcfs/libcfs.h"
+# include <linux/fs.h>
+# include <linux/sched.h>
+# include <linux/mm.h>
+# include <linux/quotaops.h>
+# include <linux/highmem.h>
+# include <linux/pagemap.h>
+# include <linux/rbtree.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_fid.h"
+#include "../include/lustre_lite.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_ver.h"
+#include "../include/lustre_mdc.h"
+#include "../include/cl_object.h"
+
+#include "../llite/llite_internal.h"
+
+/*
+ * ccc_ prefix stands for "Common Client Code".
+ */
+
+/*****************************************************************************
+ *
+ * Vvp device and device type functions.
+ *
+ */
+
+/**
+ * An `emergency' environment used by cl_inode_fini() when cl_env_get()
+ * fails. Access to this environment is serialized by cl_inode_fini_guard
+ * mutex.
+ */
+struct lu_env *cl_inode_fini_env;
+int cl_inode_fini_refcheck;
+
+/**
+ * A mutex serializing calls to slp_inode_fini() under extreme memory
+ * pressure, when environments cannot be allocated.
+ */
+static DEFINE_MUTEX(cl_inode_fini_guard);
+
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
+{
+	struct lu_env *env;
+	struct cl_io  *io;
+	int	    result;
+	int	    refcheck;
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	io = vvp_env_thread_io(env);
+	io->ci_obj = ll_i2info(inode)->lli_clob;
+
+	io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
+	io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
+	io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
+	io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
+	io->u.ci_setattr.sa_valid = attr->ia_valid;
+
+again:
+	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
+		struct vvp_io *vio = vvp_env_io(env);
+
+		if (attr->ia_valid & ATTR_FILE)
+			/* populate the file descriptor for ftruncate to honor
+			 * group lock - see LU-787
+			 */
+			vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
+
+		result = cl_io_loop(env, io);
+	} else {
+		result = io->ci_result;
+	}
+	cl_io_fini(env, io);
+	if (unlikely(io->ci_need_restart))
+		goto again;
+	/* HSM import case: file is released, cannot be restored
+	 * no need to fail except if restore registration failed
+	 * with -ENODATA
+	 */
+	if (result == -ENODATA && io->ci_restore_needed &&
+	    io->ci_result != -ENODATA)
+		result = 0;
+	cl_env_put(env, &refcheck);
+	return result;
+}
+
+/**
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
+ *
+ * \param inode regular file inode
+ * \param md    new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
+ */
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
+{
+	struct lu_env	*env;
+	struct ll_inode_info *lli;
+	struct cl_object     *clob;
+	struct lu_site       *site;
+	struct lu_fid	*fid;
+	struct cl_object_conf conf = {
+		.coc_inode = inode,
+		.u = {
+			.coc_md    = md
+		}
+	};
+	int result = 0;
+	int refcheck;
+
+	LASSERT(md->body->valid & OBD_MD_FLID);
+	LASSERT(S_ISREG(inode->i_mode));
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	site = ll_i2sbi(inode)->ll_site;
+	lli  = ll_i2info(inode);
+	fid  = &lli->lli_fid;
+	LASSERT(fid_is_sane(fid));
+
+	if (!lli->lli_clob) {
+		/* clob is slave of inode, empty lli_clob means for new inode,
+		 * there is no clob in cache with the given fid, so it is
+		 * unnecessary to perform lookup-alloc-lookup-insert, just
+		 * alloc and insert directly.
+		 */
+		LASSERT(inode->i_state & I_NEW);
+		conf.coc_lu.loc_flags = LOC_F_NEW;
+		clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
+				      fid, &conf);
+		if (!IS_ERR(clob)) {
+			/*
+			 * No locking is necessary, as new inode is
+			 * locked by I_NEW bit.
+			 */
+			lli->lli_clob = clob;
+			lli->lli_has_smd = lsm_has_objects(md->lsm);
+			lu_object_ref_add(&clob->co_lu, "inode", inode);
+		} else {
+			result = PTR_ERR(clob);
+		}
+	} else {
+		result = cl_conf_set(env, lli->lli_clob, &conf);
+	}
+
+	cl_env_put(env, &refcheck);
+
+	if (result != 0)
+		CERROR("Failure to initialize cl object " DFID ": %d\n",
+		       PFID(fid), result);
+	return result;
+}
+
+/**
+ * Wait for others drop their references of the object at first, then we drop
+ * the last one, which will lead to the object be destroyed immediately.
+ * Must be called after cl_object_kill() against this object.
+ *
+ * The reason we want to do this is: destroying top object will wait for sub
+ * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
+ * to initiate top object destroying which may deadlock. See bz22520.
+ */
+static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
+{
+	struct lu_object_header *header = obj->co_lu.lo_header;
+	wait_queue_t	   waiter;
+
+	if (unlikely(atomic_read(&header->loh_ref) != 1)) {
+		struct lu_site *site = obj->co_lu.lo_dev->ld_site;
+		struct lu_site_bkt_data *bkt;
+
+		bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
+
+		init_waitqueue_entry(&waiter, current);
+		add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+
+		while (1) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (atomic_read(&header->loh_ref) == 1)
+				break;
+			schedule();
+		}
+
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	}
+
+	cl_object_put(env, obj);
+}
+
+void cl_inode_fini(struct inode *inode)
+{
+	struct lu_env	   *env;
+	struct ll_inode_info    *lli  = ll_i2info(inode);
+	struct cl_object	*clob = lli->lli_clob;
+	int refcheck;
+	int emergency;
+
+	if (clob) {
+		void		    *cookie;
+
+		cookie = cl_env_reenter();
+		env = cl_env_get(&refcheck);
+		emergency = IS_ERR(env);
+		if (emergency) {
+			mutex_lock(&cl_inode_fini_guard);
+			LASSERT(cl_inode_fini_env);
+			cl_env_implant(cl_inode_fini_env, &refcheck);
+			env = cl_inode_fini_env;
+		}
+		/*
+		 * cl_object cache is a slave to inode cache (which, in turn
+		 * is a slave to dentry cache), don't keep cl_object in memory
+		 * when its master is evicted.
+		 */
+		cl_object_kill(env, clob);
+		lu_object_ref_del(&clob->co_lu, "inode", inode);
+		cl_object_put_last(env, clob);
+		lli->lli_clob = NULL;
+		if (emergency) {
+			cl_env_unplant(cl_inode_fini_env, &refcheck);
+			mutex_unlock(&cl_inode_fini_guard);
+		} else {
+			cl_env_put(env, &refcheck);
+		}
+		cl_env_reexit(cookie);
+	}
+}
+
+/**
+ * build inode number from passed @fid
+ */
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
+{
+	if (BITS_PER_LONG == 32 || api32)
+		return fid_flatten32(fid);
+	else
+		return fid_flatten(fid);
+}
+
+/**
+ * build inode generation from passed @fid.  If our FID overflows the 32-bit
+ * inode number then return a non-zero generation to distinguish them.
+ */
+__u32 cl_fid_build_gen(const struct lu_fid *fid)
+{
+	__u32 gen;
+
+	if (fid_is_igif(fid)) {
+		gen = lu_igif_gen(fid);
+		return gen;
+	}
+
+	gen = fid_flatten(fid) >> 32;
+	return gen;
+}
+
+/* lsm is unreliable after hsm implementation as layout can be changed at
+ * any time. This is only to support old, non-clio-ized interfaces. It will
+ * cause deadlock if clio operations are called with this extra layout refcount
+ * because in case the layout changed during the IO, ll_layout_refresh() will
+ * have to wait for the refcount to become zero to destroy the older layout.
+ *
+ * Notice that the lsm returned by this function may not be valid unless called
+ * inside layout lock - MDS_INODELOCK_LAYOUT.
+ */
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
+{
+	return lov_lsm_get(ll_i2info(inode)->lli_clob);
+}
+
+inline void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
+{
+	lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);
+}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
new file mode 100644
index 000000000..12f3e71f4
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -0,0 +1,201 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * cl code shared between vvp and liblustre (and other Lustre clients in the
+ * future).
+ *
+ */
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+#include "../include/cl_object.h"
+
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+
+/* Initialize the default and maximum LOV EA and cookie sizes.  This allows
+ * us to make MDS RPCs with large enough reply buffers to hold the
+ * maximum-sized (= maximum striped) EA and cookie without having to
+ * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
+ */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
+{
+	struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 };
+	__u32 valsize = sizeof(struct lov_desc);
+	int rc, easize, def_easize, cookiesize;
+	struct lov_desc desc;
+	__u16 stripes, def_stripes;
+
+	rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC,
+			  &valsize, &desc, NULL);
+	if (rc)
+		return rc;
+
+	stripes = min_t(__u32, desc.ld_tgt_count, LOV_MAX_STRIPE_COUNT);
+	lsm.lsm_stripe_count = stripes;
+	easize = obd_size_diskmd(dt_exp, &lsm);
+
+	def_stripes = min_t(__u32, desc.ld_default_stripe_count,
+			    LOV_MAX_STRIPE_COUNT);
+	lsm.lsm_stripe_count = def_stripes;
+	def_easize = obd_size_diskmd(dt_exp, &lsm);
+
+	cookiesize = stripes * sizeof(struct llog_cookie);
+
+	/* default cookiesize is 0 because from 2.4 server doesn't send
+	 * llog cookies to client.
+	 */
+	CDEBUG(D_HA,
+	       "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n",
+	       def_easize, easize, cookiesize);
+
+	rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0);
+	return rc;
+}
+
+/**
+ * This function is used as an upcall-callback hooked by liblustre and llite
+ * clients into obd_notify() listeners chain to handle notifications about
+ * change of import connect_flags. See llu_fsswop_mount() and
+ * lustre_common_fill_super().
+ */
+int cl_ocd_update(struct obd_device *host,
+		  struct obd_device *watched,
+		  enum obd_notify_event ev, void *owner, void *data)
+{
+	struct lustre_client_ocd *lco;
+	struct client_obd	*cli;
+	__u64 flags;
+	int   result;
+
+	if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+		cli = &watched->u.cli;
+		lco = owner;
+		flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
+		CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
+		       lco->lco_flags, flags);
+		mutex_lock(&lco->lco_lock);
+		lco->lco_flags &= flags;
+		/* for each osc event update ea size */
+		if (lco->lco_dt_exp)
+			cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
+
+		mutex_unlock(&lco->lco_lock);
+		result = 0;
+	} else {
+		CERROR("unexpected notification from %s %s!\n",
+		       watched->obd_type->typ_name,
+		       watched->obd_name);
+		result = -EINVAL;
+	}
+	return result;
+}
+
+#define GROUPLOCK_SCOPE "grouplock"
+
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+		     struct ll_grouplock *cg)
+{
+	struct lu_env	  *env;
+	struct cl_io	   *io;
+	struct cl_lock	 *lock;
+	struct cl_lock_descr   *descr;
+	__u32		   enqflags;
+	int		     refcheck;
+	int		     rc;
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	io = vvp_env_thread_io(env);
+	io->ci_obj = obj;
+	io->ci_ignore_layout = 1;
+
+	rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+	if (rc != 0) {
+		cl_io_fini(env, io);
+		cl_env_put(env, &refcheck);
+		/* Does not make sense to take GL for released layout */
+		if (rc > 0)
+			rc = -ENOTSUPP;
+		return rc;
+	}
+
+	lock = vvp_env_lock(env);
+	descr = &lock->cll_descr;
+	descr->cld_obj = obj;
+	descr->cld_start = 0;
+	descr->cld_end = CL_PAGE_EOF;
+	descr->cld_gid = gid;
+	descr->cld_mode = CLM_GROUP;
+
+	enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
+	descr->cld_enq_flags = enqflags;
+
+	rc = cl_lock_request(env, io, lock);
+	if (rc < 0) {
+		cl_io_fini(env, io);
+		cl_env_put(env, &refcheck);
+		return rc;
+	}
+
+	cg->lg_env  = cl_env_get(&refcheck);
+	cg->lg_io   = io;
+	cg->lg_lock = lock;
+	cg->lg_gid  = gid;
+	LASSERT(cg->lg_env == env);
+
+	cl_env_unplant(env, &refcheck);
+	return 0;
+}
+
+void cl_put_grouplock(struct ll_grouplock *cg)
+{
+	struct lu_env  *env  = cg->lg_env;
+	struct cl_io   *io   = cg->lg_io;
+	struct cl_lock *lock = cg->lg_lock;
+	int	     refcheck;
+
+	LASSERT(cg->lg_env);
+	LASSERT(cg->lg_gid);
+
+	cl_env_implant(env, &refcheck);
+	cl_env_put(env, &refcheck);
+
+	cl_lock_release(env, lock);
+	cl_io_fini(env, io);
+	cl_env_put(env, NULL);
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index a55ac4dcc..2df551d3a 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -46,31 +46,31 @@
 #include "llite_internal.h"
 
 /** records that a write is in flight */
-void vvp_write_pending(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page)
 {
-	struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+	struct ll_inode_info *lli = ll_i2info(club->vob_inode);
 
 	spin_lock(&lli->lli_lock);
 	lli->lli_flags |= LLIF_SOM_DIRTY;
-	if (page && list_empty(&page->cpg_pending_linkage))
-		list_add(&page->cpg_pending_linkage, &club->cob_pending_list);
+	if (page && list_empty(&page->vpg_pending_linkage))
+		list_add(&page->vpg_pending_linkage, &club->vob_pending_list);
 	spin_unlock(&lli->lli_lock);
 }
 
 /** records that a write has completed */
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page)
 {
-	struct ll_inode_info *lli = ll_i2info(club->cob_inode);
+	struct ll_inode_info *lli = ll_i2info(club->vob_inode);
 	int rc = 0;
 
 	spin_lock(&lli->lli_lock);
-	if (page && !list_empty(&page->cpg_pending_linkage)) {
-		list_del_init(&page->cpg_pending_linkage);
+	if (page && !list_empty(&page->vpg_pending_linkage)) {
+		list_del_init(&page->vpg_pending_linkage);
 		rc = 1;
 	}
 	spin_unlock(&lli->lli_lock);
 	if (rc)
-		ll_queue_done_writing(club->cob_inode, 0);
+		ll_queue_done_writing(club->vob_inode, 0);
 }
 
 /** Queues DONE_WRITING if
@@ -80,25 +80,25 @@ void vvp_write_complete(struct ccc_object *club, struct ccc_page *page)
 void ll_queue_done_writing(struct inode *inode, unsigned long flags)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+	struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
 
 	spin_lock(&lli->lli_lock);
 	lli->lli_flags |= flags;
 
 	if ((lli->lli_flags & LLIF_DONE_WRITING) &&
-	    list_empty(&club->cob_pending_list)) {
+	    list_empty(&club->vob_pending_list)) {
 		struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
 
 		if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-			CWARN("ino %lu/%u(flags %u) som valid it just after recovery\n",
-			      inode->i_ino, inode->i_generation,
-			      lli->lli_flags);
+			CWARN("%s: file "DFID"(flags %u) Size-on-MDS valid, done writing allowed and no diry pages\n",
+			      ll_get_fsname(inode->i_sb, NULL, 0),
+			      PFID(ll_inode2fid(inode)), lli->lli_flags);
 		/* DONE_WRITING is allowed and inode has no dirty page. */
 		spin_lock(&lcq->lcq_lock);
 
 		LASSERT(list_empty(&lli->lli_close_list));
-		CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
-		       inode->i_ino, inode->i_generation);
+		CDEBUG(D_INODE, "adding inode "DFID" to close list\n",
+		       PFID(ll_inode2fid(inode)));
 		list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
 
 		/* Avoid a concurrent insertion into the close thread queue:
@@ -124,9 +124,9 @@ void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
 	op_data->op_flags |= MF_SOM_CHANGE;
 	/* Check if Size-on-MDS attributes are valid. */
 	if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-		CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
-		       inode->i_ino, inode->i_generation,
-		       lli->lli_flags);
+		CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), lli->lli_flags);
 
 	if (!cl_local_size(inode)) {
 		/* Send Size-on-MDS Attributes if valid. */
@@ -140,10 +140,10 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
 		      struct obd_client_handle **och, unsigned long flags)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob);
+	struct vvp_object *club = cl2vvp(ll_i2info(inode)->lli_clob);
 
 	spin_lock(&lli->lli_lock);
-	if (!(list_empty(&club->cob_pending_list))) {
+	if (!(list_empty(&club->vob_pending_list))) {
 		if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) {
 			LASSERT(*och);
 			LASSERT(!lli->lli_pending_och);
@@ -198,7 +198,7 @@ void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
 		}
 	}
 
-	LASSERT(list_empty(&club->cob_pending_list));
+	LASSERT(list_empty(&club->vob_pending_list));
 	lli->lli_flags &= ~LLIF_SOM_DIRTY;
 	spin_unlock(&lli->lli_lock);
 	ll_done_writing_attr(inode, op_data);
@@ -221,9 +221,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
 
 	LASSERT(op_data);
 	if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
-		CERROR("ino %lu/%u(flags %u) som valid it just after recovery\n",
-		       inode->i_ino, inode->i_generation,
-		       lli->lli_flags);
+		CERROR("%s: inode "DFID"(flags %u) MDS holds lock on Size-on-MDS attributes\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), lli->lli_flags);
 
 	oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
 	if (!oa) {
@@ -241,9 +241,9 @@ int ll_som_update(struct inode *inode, struct md_op_data *op_data)
 		if (rc) {
 			oa->o_valid = 0;
 			if (rc != -ENOENT)
-				CERROR("inode_getattr failed (%d): unable to send a Size-on-MDS attribute update for inode %lu/%u\n",
-				       rc, inode->i_ino,
-				       inode->i_generation);
+				CERROR("%s: inode_getattr failed - unable to send a Size-on-MDS attribute update for inode "DFID": rc = %d\n",
+				       ll_get_fsname(inode->i_sb, NULL, 0),
+				       PFID(ll_inode2fid(inode)), rc);
 		} else {
 			CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
 			       PFID(&lli->lli_fid));
@@ -302,9 +302,11 @@ static void ll_done_writing(struct inode *inode)
 		 * OSTs and send setattr to back to MDS.
 		 */
 		rc = ll_som_update(inode, op_data);
-	else if (rc)
-		CERROR("inode %lu mdc done_writing failed: rc = %d\n",
-		       inode->i_ino, rc);
+	else if (rc) {
+		CERROR("%s: inode "DFID" mdc done_writing failed: rc = %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), rc);
+	}
 out:
 	ll_finish_md_op_data(op_data);
 	if (och) {
@@ -323,8 +325,9 @@ static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
 		lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
 				 lli_close_list);
 		list_del_init(&lli->lli_close_list);
-	} else if (atomic_read(&lcq->lcq_stop))
+	} else if (atomic_read(&lcq->lcq_stop)) {
 		lli = ERR_PTR(-EALREADY);
+	}
 
 	spin_unlock(&lcq->lcq_lock);
 	return lli;
@@ -348,8 +351,8 @@ static int ll_close_thread(void *arg)
 			break;
 
 		inode = ll_info2i(lli);
-		CDEBUG(D_INFO, "done_writing for inode %lu/%u\n",
-		       inode->i_ino, inode->i_generation);
+		CDEBUG(D_INFO, "done_writing for inode "DFID"\n",
+		       PFID(ll_inode2fid(inode)));
 		ll_done_writing(inode);
 		iput(inode);
 	}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index e3c0f1dd4..3f2f30b65 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -43,11 +43,11 @@
 
 /* for struct cl_lock_descr and struct cl_io */
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 #include "../include/lustre_mdc.h"
 #include "../include/lustre_intent.h"
 #include <linux/compat.h>
 #include <linux/posix_acl_xattr.h>
+#include "vvp_internal.h"
 
 #ifndef FMODE_EXEC
 #define FMODE_EXEC 0
@@ -99,6 +99,13 @@ struct ll_remote_perm {
 					     */
 };
 
+struct ll_grouplock {
+	struct lu_env	*lg_env;
+	struct cl_io	*lg_io;
+	struct cl_lock	*lg_lock;
+	unsigned long	 lg_gid;
+};
+
 enum lli_flags {
 	/* MDS has an authority for the Size-on-MDS attributes. */
 	LLIF_MDS_SIZE_LOCK      = (1 << 0),
@@ -161,7 +168,9 @@ struct ll_inode_info {
 	struct inode			lli_vfs_inode;
 
 	/* the most recent timestamps obtained from mds */
-	struct ost_lvb			lli_lvb;
+	s64				lli_atime;
+	s64				lli_mtime;
+	s64				lli_ctime;
 	spinlock_t			lli_agl_lock;
 
 	/* Try to make the d::member and f::member are aligned. Before using
@@ -328,6 +337,7 @@ enum ra_stat {
 	RA_STAT_EOF,
 	RA_STAT_MAX_IN_FLIGHT,
 	RA_STAT_WRONG_GRAB_PAGE,
+	RA_STAT_FAILED_REACH_END,
 	_NR_RA_STAT,
 };
 
@@ -481,6 +491,12 @@ struct ll_sb_info {
 
 	struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 
+	/*
+	 * Used to track "unstable" pages on a client, and maintain a
+	 * LRU list of clean pages. An "unstable" page is defined as
+	 * any page which is sent to a server as part of a bulk request,
+	 * but is uncommitted to stable storage.
+	 */
 	struct cl_client_cache    ll_cache;
 
 	struct lprocfs_stats     *ll_ra_stats;
@@ -525,13 +541,6 @@ struct ll_sb_info {
 	struct completion	 ll_kobj_unregister;
 };
 
-struct ll_ra_read {
-	pgoff_t	     lrr_start;
-	pgoff_t	     lrr_count;
-	struct task_struct *lrr_reader;
-	struct list_head	  lrr_linkage;
-};
-
 /*
  * per file-descriptor read-ahead data.
  */
@@ -589,12 +598,6 @@ struct ll_readahead_state {
 	 * will not be accurate when dealing with reads issued via mmap.
 	 */
 	unsigned long   ras_request_index;
-	/*
-	 * list of struct ll_ra_read's one per read(2) call current in
-	 * progress against this file descriptor. Used by read-ahead code,
-	 * protected by ->ras_lock.
-	 */
-	struct list_head      ras_read_beads;
 	/*
 	 * The following 3 items are used for detecting the stride I/O
 	 * mode.
@@ -622,7 +625,7 @@ extern struct kmem_cache *ll_file_data_slab;
 struct lustre_handle;
 struct ll_file_data {
 	struct ll_readahead_state fd_ras;
-	struct ccc_grouplock fd_grouplock;
+	struct ll_grouplock fd_grouplock;
 	__u64 lfd_pos;
 	__u32 fd_flags;
 	fmode_t fd_omode;
@@ -663,8 +666,16 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
 #endif
 }
 
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
+void ll_ras_enter(struct file *f);
+
+/* llite/lcommon_misc.c */
+int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
+int cl_ocd_update(struct obd_device *host,
+		  struct obd_device *watched,
+		  enum obd_notify_event ev, void *owner, void *data);
+int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
+		     struct ll_grouplock *cg);
+void cl_put_grouplock(struct ll_grouplock *cg);
 
 /* llite/lproc_llite.c */
 int ldebugfs_register_mountpoint(struct dentry *parent,
@@ -697,15 +708,15 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 
 /* llite/rw.c */
-int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
 int ll_writepage(struct page *page, struct writeback_control *wbc);
 int ll_writepages(struct address_space *, struct writeback_control *wbc);
 int ll_readpage(struct file *file, struct page *page);
 void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
-		 struct ll_readahead_state *ras, struct address_space *mapping,
-		 struct cl_page_list *queue, int flags);
+		 struct cl_page_list *queue, struct ll_readahead_state *ras,
+		 bool hit);
+struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage);
+void ll_cl_fini(struct ll_cl_context *lcc);
 
 extern const struct address_space_operations ll_aops;
 
@@ -740,7 +751,7 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type);
 int ll_inode_permission(struct inode *inode, int mask);
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
-			     int flags, struct lov_user_md *lum,
+			     __u64 flags, struct lov_user_md *lum,
 			     int lum_size);
 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 			     struct lov_mds_md **lmm, int *lmm_size,
@@ -750,9 +761,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 		     int *lmm_size, struct ptlrpc_request **request);
 int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
+int ll_merge_attr(const struct lu_env *env, struct inode *inode);
 int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
 int ll_hsm_release(struct inode *inode);
 
 /* llite/dcache.c */
@@ -824,65 +835,8 @@ struct ll_close_queue {
 	atomic_t		lcq_stop;
 };
 
-struct ccc_object *cl_inode2ccc(struct inode *inode);
-
-void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
-void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
-
-/* specific architecture can implement only part of this list */
-enum vvp_io_subtype {
-	/** normal IO */
-	IO_NORMAL,
-	/** io started from splice_{read|write} */
-	IO_SPLICE
-};
-
-/* IO subtypes */
-struct vvp_io {
-	/** io subtype */
-	enum vvp_io_subtype    cui_io_subtype;
-
-	union {
-		struct {
-			struct pipe_inode_info *cui_pipe;
-			unsigned int	    cui_flags;
-		} splice;
-		struct vvp_fault_io {
-			/**
-			 * Inode modification time that is checked across DLM
-			 * lock request.
-			 */
-			time64_t	    ft_mtime;
-			struct vm_area_struct *ft_vma;
-			/**
-			 *  locked page returned from vvp_io
-			 */
-			struct page	    *ft_vmpage;
-			struct vm_fault_api {
-				/**
-				 * kernel fault info
-				 */
-				struct vm_fault *ft_vmf;
-				/**
-				 * fault API used bitflags for return code.
-				 */
-				unsigned int    ft_flags;
-				/**
-				 * check that flags are from filemap_fault
-				 */
-				bool		ft_flags_valid;
-			} fault;
-		} fault;
-	} u;
-	/**
-	 * Read-ahead state used by read and page-fault IO contexts.
-	 */
-	struct ll_ra_read    cui_bead;
-	/**
-	 * Set when cui_bead has been initialized.
-	 */
-	int		  cui_ra_window_set;
-};
+void vvp_write_pending(struct vvp_object *club, struct vvp_page *page);
+void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
 
 /**
  * IO arguments for various VFS I/O interfaces.
@@ -911,54 +865,32 @@ struct ll_cl_context {
 	int	     lcc_refcheck;
 };
 
-struct vvp_thread_info {
-	struct vvp_io_args   vti_args;
-	struct ra_io_arg     vti_ria;
-	struct ll_cl_context vti_io_ctx;
+struct ll_thread_info {
+	struct vvp_io_args   lti_args;
+	struct ra_io_arg     lti_ria;
+	struct ll_cl_context lti_io_ctx;
 };
 
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
-{
-	extern struct lu_context_key vvp_key;
-	struct vvp_thread_info      *info;
-
-	info = lu_context_key_get(&env->le_ctx, &vvp_key);
-	LASSERT(info);
-	return info;
-}
-
-static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
-					       enum vvp_io_subtype type)
+extern struct lu_context_key ll_thread_key;
+static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
 {
-	struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
-
-	ret->via_io_subtype = type;
+	struct ll_thread_info *lti;
 
-	return ret;
+	lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
+	LASSERT(lti);
+	return lti;
 }
 
-struct vvp_session {
-	struct vvp_io	 vs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
+					      enum vvp_io_subtype type)
 {
-	extern struct lu_context_key vvp_session_key;
-	struct vvp_session *ses;
+	struct vvp_io_args *via = &ll_env_info(env)->lti_args;
 
-	ses = lu_context_key_get(env->le_ses, &vvp_session_key);
-	LASSERT(ses);
-	return ses;
-}
+	via->via_io_subtype = type;
 
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
-	return &vvp_env_session(env)->vs_ios;
+	return via;
 }
 
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
 void ll_queue_done_writing(struct inode *inode, unsigned long flags);
 void ll_close_thread_shutdown(struct ll_close_queue *lcq);
 int ll_close_thread_start(struct ll_close_queue **lcq_ret);
@@ -981,6 +913,10 @@ static inline void ll_invalidate_page(struct page *vmpage)
 	if (!mapping)
 		return;
 
+	/*
+	 * truncate_complete_page() calls
+	 * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
+	 */
 	ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
 	truncate_complete_page(mapping, vmpage);
 }
@@ -1040,10 +976,10 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
 }
 
 /* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-		    void *buffer, size_t size);
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+		const char *name, const void *value, size_t size, int flags);
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer, size_t size);
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ll_removexattr(struct dentry *dentry, const char *name);
 
@@ -1055,9 +991,6 @@ void free_rmtperm_hash(struct hlist_head *hash);
 int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
 int lustre_check_remote_perm(struct inode *inode, int mask);
 
-/* llite/llite_cl.c */
-extern struct lu_device_type vvp_device_type;
-
 /**
  * Common IO arguments for various VFS I/O interfaces.
  */
@@ -1069,7 +1002,7 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 		struct ll_readahead_state *ras, unsigned long index,
 		unsigned hit);
 void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 
 /* llite/llite_rmtacl.c */
 #ifdef CONFIG_FS_POSIX_ACL
@@ -1163,6 +1096,22 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentry,
 		       int only_unplug);
 void ll_stop_statahead(struct inode *dir, void *key);
 
+blkcnt_t dirty_cnt(struct inode *inode);
+
+int cl_glimpse_size0(struct inode *inode, int agl);
+int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
+		    struct inode *inode, struct cl_object *clob, int agl);
+
+static inline int cl_glimpse_size(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 0);
+}
+
+static inline int cl_agl(struct inode *inode)
+{
+	return cl_glimpse_size0(inode, 1);
+}
+
 static inline int ll_glimpse_size(struct inode *inode)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
@@ -1285,43 +1234,6 @@ typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
 void ll_iocontrol_unregister(void *magic);
 
-/* lclient compat stuff */
-#define cl_inode_info ll_inode_info
-#define cl_i2info(info) ll_i2info(info)
-#define cl_inode_mode(inode) ((inode)->i_mode)
-#define cl_i2sbi ll_i2sbi
-
-static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
-					       const struct iattr *attr)
-{
-	LASSERT(attr->ia_valid & ATTR_FILE);
-	return LUSTRE_FPRIVATE(attr->ia_file);
-}
-
-static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
-{
-	LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex));
-	i_size_write(inode, kms);
-}
-
-static inline void cl_isize_write(struct inode *inode, loff_t kms)
-{
-	ll_inode_size_lock(inode);
-	i_size_write(inode, kms);
-	ll_inode_size_unlock(inode);
-}
-
-#define cl_isize_read(inode)	     i_size_read(inode)
-
-static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
-{
-	return ll_merge_lvb(env, inode);
-}
-
-#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
-#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
-#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
-
 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
 		       enum cl_fsync_mode mode, int ignore_layout);
 
@@ -1350,7 +1262,7 @@ static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
 	int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
 				      LPROC_LL_OSC_WRITE;
 
-	ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+	ll_stats_ops_tally(ll_s2sbi(cl2vvp_dev(dev)->vdv_sb), opc, rc);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -1382,18 +1294,16 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 		 */
 		if (it->d.lustre.it_remote_lock_mode) {
 			handle.cookie = it->d.lustre.it_remote_lock_handle;
-			CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
-			       inode,
-			       inode->i_ino, inode->i_generation,
+			CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
+			       PFID(ll_inode2fid(inode)), inode,
 			       handle.cookie);
 			md_set_lock_data(exp, &handle.cookie, inode, NULL);
 		}
 
 		handle.cookie = it->d.lustre.it_lock_handle;
 
-		CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n",
-		       inode, inode->i_ino,
-		       inode->i_generation, handle.cookie);
+		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
+		       PFID(ll_inode2fid(inode)), inode, handle.cookie);
 
 		md_set_lock_data(exp, &handle.cookie, inode,
 				 &it->d.lustre.it_lock_bits);
@@ -1471,9 +1381,25 @@ enum {
 
 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
 int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode);
+int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
 
 int ll_xattr_init(void);
 void ll_xattr_fini(void);
 
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+		    struct cl_page *page, enum cl_req_type crt);
+
+/* lcommon_cl.c */
+int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
+
+extern struct lu_env *cl_inode_fini_env;
+extern int cl_inode_fini_refcheck;
+
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
+void cl_inode_fini(struct inode *inode);
+int cl_local_size(struct inode *inode);
+
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
+__u32 cl_fid_build_gen(const struct lu_fid *fid);
+
 #endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b57a99268..96c7e9fc6 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -85,18 +85,18 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
 
 	si_meminfo(&si);
 	pages = si.totalram - si.totalhigh;
-	if (pages >> (20 - PAGE_SHIFT) < 512)
-		lru_page_max = pages / 2;
-	else
-		lru_page_max = (pages / 4) * 3;
+	lru_page_max = pages / 2;
 
-	/* initialize lru data */
+	/* initialize ll_cache data */
 	atomic_set(&sbi->ll_cache.ccc_users, 0);
 	sbi->ll_cache.ccc_lru_max = lru_page_max;
 	atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
 	spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
 	INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
 
+	atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
+	init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
+
 	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
 					   SBI_DEFAULT_READAHEAD_MAX);
 	sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
@@ -169,12 +169,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		return -ENOMEM;
 	}
 
-	if (llite_root) {
-		err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
-		if (err < 0)
-			CERROR("could not register mount in <debugfs>/lustre/llite\n");
-	}
-
 	/* indicate the features supported by this client */
 	data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
 				  OBD_CONNECT_ATTRFID  |
@@ -337,10 +331,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	else
 		sbi->ll_md_brw_size = PAGE_SIZE;
 
-	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
-		LCONSOLE_INFO("Layout lock feature supported.\n");
+	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
 		sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
-	}
 
 	if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
 		if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
@@ -453,7 +445,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	/* make root inode
 	 * XXX: move this to after cbd setup?
 	 */
-	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
+	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
 		valid |= OBD_MD_FLRMTPERM;
 	else if (sbi->ll_flags & LL_SBI_ACL)
@@ -555,6 +547,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	kfree(data);
 	kfree(osfs);
 
+	if (llite_root) {
+		err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
+		if (err < 0) {
+			CERROR("%s: could not register mount in debugfs: "
+			       "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
+			err = 0;
+		}
+	}
+
 	return err;
 out_root:
 	iput(root);
@@ -573,7 +574,6 @@ out_md:
 out:
 	kfree(data);
 	kfree(osfs);
-	ldebugfs_unregister_mountpoint(sbi);
 	return err;
 }
 
@@ -897,10 +897,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 	cfg->cfg_callback = class_config_llog_handler;
 	/* set up client obds */
 	err = lustre_process_log(sb, profilenm, cfg);
-	if (err < 0) {
-		CERROR("Unable to process log: %d\n", err);
+	if (err < 0)
 		goto out_free;
-	}
 
 	/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
 	lprof = class_get_profile(profilenm);
@@ -947,7 +945,7 @@ void ll_put_super(struct super_block *sb)
 	struct lustre_sb_info *lsi = s2lsi(sb);
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	char *profilenm = get_profile_name(sb);
-	int next, force = 1;
+	int ccc_count, next, force = 1, rc = 0;
 
 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
 
@@ -963,6 +961,19 @@ void ll_put_super(struct super_block *sb)
 			force = obd->obd_force;
 	}
 
+	/* Wait for unstable pages to be committed to stable storage */
+	if (!force) {
+		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+
+		rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
+				  !atomic_read(&sbi->ll_cache.ccc_unstable_nr),
+				  &lwi);
+	}
+
+	ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
+	if (!force && rc != -EINTR)
+		LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+
 	/* We need to set force before the lov_disconnect in
 	 * lustre_common_put_super, since l_d cleans up osc's as well.
 	 */
@@ -999,6 +1010,8 @@ void ll_put_super(struct super_block *sb)
 
 	lustre_common_put_super(sb);
 
+	cl_env_cache_purge(~0);
+
 	module_put(THIS_MODULE);
 } /* client_put_super */
 
@@ -1032,8 +1045,8 @@ void ll_clear_inode(struct inode *inode)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-	       inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	if (S_ISDIR(inode->i_mode)) {
 		/* these should have been cleared in ll_file_release */
@@ -1180,9 +1193,11 @@ static int ll_setattr_done_writing(struct inode *inode,
 		 * from OSTs and send setattr to back to MDS.
 		 */
 		rc = ll_som_update(inode, op_data);
-	else if (rc)
-		CERROR("inode %lu mdc truncate failed: rc = %d\n",
-		       inode->i_ino, rc);
+	else if (rc) {
+		CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
+		      ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+		      PFID(ll_inode2fid(inode)), rc);
+	}
 	return rc;
 }
 
@@ -1210,12 +1225,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	bool file_is_released = false;
 	int rc = 0, rc1 = 0;
 
-	CDEBUG(D_VFSTRACE,
-	       "%s: setattr inode %p/fid:" DFID
-	       " from %llu to %llu, valid %x, hsm_import %d\n",
-	       ll_get_fsname(inode->i_sb, NULL, 0), inode,
-	       PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
-	       attr->ia_valid, hsm_import);
+	CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, valid %x, hsm_import %d\n",
+	       ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
+	       i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
 
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Check new size against VFS/VM file size limit and rlimit */
@@ -1265,14 +1277,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 		       LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
 		       (s64)ktime_get_real_seconds());
 
-	/* If we are changing file size, file content is modified, flag it. */
-	if (attr->ia_valid & ATTR_SIZE) {
-		attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-		spin_lock(&lli->lli_lock);
-		lli->lli_flags |= LLIF_DATA_MODIFIED;
-		spin_unlock(&lli->lli_lock);
-	}
-
 	/* We always do an MDS RPC, even if we're only changing the size;
 	 * only the MDS knows whether truncate() should fail with -ETXTBUSY
 	 */
@@ -1284,13 +1288,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	if (!S_ISDIR(inode->i_mode))
 		inode_unlock(inode);
 
-	memcpy(&op_data->op_attr, attr, sizeof(*attr));
-
-	/* Open epoch for truncate. */
-	if (exp_connect_som(ll_i2mdexp(inode)) &&
-	    (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
-		op_data->op_flags = MF_EPOCH_OPEN;
-
 	/* truncate on a released file must failed with -ENODATA,
 	 * so size must not be set on MDS for released file
 	 * but other attributes must be set
@@ -1304,29 +1301,40 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 		if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
 			file_is_released = true;
 		ccc_inode_lsm_put(inode, lsm);
+
+		if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
+			if (file_is_released) {
+				rc = ll_layout_restore(inode, 0, attr->ia_size);
+				if (rc < 0)
+					goto out;
+
+				file_is_released = false;
+				ll_layout_refresh(inode, &gen);
+			}
+
+			/*
+			 * If we are changing file size, file content is
+			 * modified, flag it.
+			 */
+			attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+			spin_lock(&lli->lli_lock);
+			lli->lli_flags |= LLIF_DATA_MODIFIED;
+			spin_unlock(&lli->lli_lock);
+			op_data->op_bias |= MDS_DATA_MODIFIED;
+		}
 	}
 
-	/* if not in HSM import mode, clear size attr for released file
-	 * we clear the attribute send to MDT in op_data, not the original
-	 * received from caller in attr which is used later to
-	 * decide return code
-	 */
-	if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
-		op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+	memcpy(&op_data->op_attr, attr, sizeof(*attr));
+
+	/* Open epoch for truncate. */
+	if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
+	    (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
+		op_data->op_flags = MF_EPOCH_OPEN;
 
 	rc = ll_md_setattr(dentry, op_data, &mod);
 	if (rc)
 		goto out;
 
-	/* truncate failed (only when non HSM import), others succeed */
-	if (file_is_released) {
-		if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
-			rc = -ENODATA;
-		else
-			rc = 0;
-		goto out;
-	}
-
 	/* RPC to MDT is sent, cancel data modification flag */
 	if (op_data->op_bias & MDS_DATA_MODIFIED) {
 		spin_lock(&lli->lli_lock);
@@ -1335,7 +1343,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 	}
 
 	ll_ioepoch_open(lli, op_data->op_ioepoch);
-	if (!S_ISREG(inode->i_mode)) {
+	if (!S_ISREG(inode->i_mode) || file_is_released) {
 		rc = 0;
 		goto out;
 	}
@@ -1552,7 +1560,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 	if (body->valid & OBD_MD_FLATIME) {
 		if (body->atime > LTIME_S(inode->i_atime))
 			LTIME_S(inode->i_atime) = body->atime;
-		lli->lli_lvb.lvb_atime = body->atime;
+		lli->lli_atime = body->atime;
 	}
 	if (body->valid & OBD_MD_FLMTIME) {
 		if (body->mtime > LTIME_S(inode->i_mtime)) {
@@ -1561,12 +1569,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			       body->mtime);
 			LTIME_S(inode->i_mtime) = body->mtime;
 		}
-		lli->lli_lvb.lvb_mtime = body->mtime;
+		lli->lli_mtime = body->mtime;
 	}
 	if (body->valid & OBD_MD_FLCTIME) {
 		if (body->ctime > LTIME_S(inode->i_ctime))
 			LTIME_S(inode->i_ctime) = body->ctime;
-		lli->lli_lvb.lvb_ctime = body->ctime;
+		lli->lli_ctime = body->ctime;
 	}
 	if (body->valid & OBD_MD_FLMODE)
 		inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
@@ -1593,12 +1601,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 		/* FID shouldn't be changed! */
 		if (fid_is_sane(&lli->lli_fid)) {
 			LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
-				 "Trying to change FID "DFID
-				 " to the "DFID", inode %lu/%u(%p)\n",
+				 "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
 				 PFID(&lli->lli_fid), PFID(&body->fid1),
-				 inode->i_ino, inode->i_generation, inode);
-		} else
+				 PFID(ll_inode2fid(inode)), inode);
+		} else {
 			lli->lli_fid = body->fid1;
+		}
 	}
 
 	LASSERT(fid_seq(&lli->lli_fid) != 0);
@@ -1622,8 +1630,10 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 				if (lli->lli_flags & (LLIF_DONE_WRITING |
 						      LLIF_EPOCH_PENDING |
 						      LLIF_SOM_DIRTY)) {
-					CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
-					       inode->i_ino, lli->lli_flags);
+					CERROR("%s: inode "DFID" flags %u still has size authority! do not trust the size got from MDS\n",
+					       sbi->ll_md_exp->exp_obd->obd_name,
+					       PFID(ll_inode2fid(inode)),
+					       lli->lli_flags);
 				} else {
 					/* Use old size assignment to avoid
 					 * deadlock bz14138 & bz14326
@@ -1699,7 +1709,7 @@ void ll_read_inode2(struct inode *inode, void *opaque)
 
 void ll_delete_inode(struct inode *inode)
 {
-	struct cl_inode_info *lli = cl_i2info(inode);
+	struct ll_inode_info *lli = ll_i2info(inode);
 
 	if (S_ISREG(inode->i_mode) && lli->lli_clob)
 		/* discard all dirty pages before truncating them, required by
@@ -1715,8 +1725,8 @@ void ll_delete_inode(struct inode *inode)
 		spin_lock_irq(&inode->i_data.tree_lock);
 		spin_unlock_irq(&inode->i_data.tree_lock);
 		LASSERTF(inode->i_data.nrpages == 0,
-			 "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
-			 inode->i_ino, inode->i_generation, inode,
+			 "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+			 PFID(ll_inode2fid(inode)), inode,
 			 inode->i_data.nrpages);
 	}
 	/* Workaround end */
@@ -1747,7 +1757,9 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 		rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 		ll_finish_md_op_data(op_data);
 		if (rc) {
-			CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+			CERROR("%s: failure inode "DFID": rc = %d\n",
+			       sbi->ll_md_exp->exp_obd->obd_name,
+			       PFID(ll_inode2fid(inode)), rc);
 			return -abs(rc);
 		}
 
@@ -1772,7 +1784,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 		if (IS_ERR(op_data))
 			return PTR_ERR(op_data);
 
-		((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+		op_data->op_attr_flags = flags;
 		op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
 		rc = md_setattr(sbi->ll_md_exp, op_data,
 				NULL, 0, NULL, 0, &req, NULL);
@@ -2066,11 +2078,11 @@ int ll_obd_statfs(struct inode *inode, void __user *arg)
 	}
 
 	memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-	if (type & LL_STATFS_LMV)
+	if (type & LL_STATFS_LMV) {
 		exp = sbi->ll_md_exp;
-	else if (type & LL_STATFS_LOV)
+	} else if (type & LL_STATFS_LOV) {
 		exp = sbi->ll_dt_exp;
-	else {
+	} else {
 		rc = -ENODEV;
 		goto out_statfs;
 	}
@@ -2271,7 +2283,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
 	char *buf, *path = NULL;
 	struct dentry *dentry = NULL;
-	struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+	struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
 
 	/* this can be called inside spin lock so use GFP_ATOMIC. */
 	buf = (char *)__get_free_page(GFP_ATOMIC);
@@ -2285,7 +2297,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 	       "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
 	       ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
 	       s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
-	       PFID(&obj->cob_header.coh_lu.loh_fid),
+	       PFID(&obj->vob_header.coh_lu.loh_fid),
 	       (path && !IS_ERR(path)) ? path : "", ioret);
 
 	if (dentry)
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 5b484e62f..88ef1cac9 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -57,10 +57,10 @@ void policy_from_vma(ldlm_policy_data_t *policy,
 		     struct vm_area_struct *vma, unsigned long addr,
 		     size_t count)
 {
-	policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+	policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
 				 (vma->vm_pgoff << PAGE_SHIFT);
 	policy->l_extent.end = (policy->l_extent.start + count - 1) |
-			       ~CFS_PAGE_MASK;
+			       ~PAGE_MASK;
 }
 
 struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
@@ -123,7 +123,8 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
 
 	*env_ret = env;
 
-	io = ccc_env_thread_io(env);
+restart:
+	io = vvp_env_thread_io(env);
 	io->ci_obj = ll_i2info(inode)->lli_clob;
 	LASSERT(io->ci_obj);
 
@@ -146,17 +147,20 @@ ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
 
 	rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
 	if (rc == 0) {
-		struct ccc_io *cio = ccc_env_io(env);
+		struct vvp_io *vio = vvp_env_io(env);
 		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
 
-		LASSERT(cio->cui_cl.cis_io == io);
+		LASSERT(vio->vui_cl.cis_io == io);
 
 		/* mmap lock must be MANDATORY it has to cache pages. */
 		io->ci_lockreq = CILR_MANDATORY;
-		cio->cui_fd = fd;
+		vio->vui_fd = fd;
 	} else {
 		LASSERT(rc < 0);
 		cl_io_fini(env, io);
+		if (io->ci_need_restart)
+			goto restart;
+
 		cl_env_nested_put(nest, env);
 		io = ERR_PTR(rc);
 	}
@@ -200,7 +204,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
 	 * Otherwise, we could add dirty pages into osc cache
 	 * while truncate is on-going.
 	 */
-	inode = ccc_object_inode(io->ci_obj);
+	inode = vvp_object_inode(io->ci_obj);
 	lli = ll_i2info(inode);
 	down_read(&lli->lli_trunc_sem);
 
@@ -307,17 +311,17 @@ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
 		vio = vvp_env_io(env);
 		vio->u.fault.ft_vma       = vma;
 		vio->u.fault.ft_vmpage    = NULL;
-		vio->u.fault.fault.ft_vmf = vmf;
-		vio->u.fault.fault.ft_flags = 0;
-		vio->u.fault.fault.ft_flags_valid = false;
+		vio->u.fault.ft_vmf = vmf;
+		vio->u.fault.ft_flags = 0;
+		vio->u.fault.ft_flags_valid = false;
 
 		result = cl_io_loop(env, io);
 
 		/* ft_flags are only valid if we reached
 		 * the call to filemap_fault
 		 */
-		if (vio->u.fault.fault.ft_flags_valid)
-			fault_ret = vio->u.fault.fault.ft_flags;
+		if (vio->u.fault.ft_flags_valid)
+			fault_ret = vio->u.fault.ft_flags;
 
 		vmpage = vio->u.fault.ft_vmpage;
 		if (result != 0 && vmpage) {
@@ -390,9 +394,11 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		result = ll_page_mkwrite0(vma, vmf->page, &retry);
 
 		if (!printed && ++count > 16) {
-			CWARN("app(%s): the page %lu of file %lu is under heavy contention.\n",
+			const struct dentry *de = vma->vm_file->f_path.dentry;
+
+			CWARN("app(%s): the page %lu of file "DFID" is under heavy contention\n",
 			      current->comm, vmf->pgoff,
-			      file_inode(vma->vm_file)->i_ino);
+			      PFID(ll_inode2fid(de->d_inode)));
 			printed = true;
 		}
 	} while (retry);
@@ -422,16 +428,16 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 /**
  *  To avoid cancel the locks covering mmapped region for lock cache pressure,
- *  we track the mapped vma count in ccc_object::cob_mmap_cnt.
+ *  we track the mapped vma count in vvp_object::vob_mmap_cnt.
  */
 static void ll_vm_open(struct vm_area_struct *vma)
 {
 	struct inode *inode    = file_inode(vma->vm_file);
-	struct ccc_object *vob = cl_inode2ccc(inode);
+	struct vvp_object *vob = cl_inode2vvp(inode);
 
 	LASSERT(vma->vm_file);
-	LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
-	atomic_inc(&vob->cob_mmap_cnt);
+	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
+	atomic_inc(&vob->vob_mmap_cnt);
 }
 
 /**
@@ -440,11 +446,11 @@ static void ll_vm_open(struct vm_area_struct *vma)
 static void ll_vm_close(struct vm_area_struct *vma)
 {
 	struct inode      *inode = file_inode(vma->vm_file);
-	struct ccc_object *vob   = cl_inode2ccc(inode);
+	struct vvp_object *vob   = cl_inode2vvp(inode);
 
 	LASSERT(vma->vm_file);
-	atomic_dec(&vob->cob_mmap_cnt);
-	LASSERT(atomic_read(&vob->cob_mmap_cnt) >= 0);
+	atomic_dec(&vob->vob_mmap_cnt);
+	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
 }
 
 /* XXX put nice comment here.  talk about __free_pte -> dirty pages and
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 193aab879..c1eef6198 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -119,7 +119,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
 	rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 	kfree(op_data);
 	if (rc) {
-		CERROR("can't get object attrs, fid "DFID", rc %d\n",
+		CDEBUG(D_INFO, "can't get object attrs, fid "DFID", rc %d\n",
 		       PFID(fid), rc);
 		return ERR_PTR(rc);
 	}
@@ -191,8 +191,9 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
 	int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
 	struct lustre_nfs_fid *nfs_fid = (void *)fh;
 
-	CDEBUG(D_INFO, "encoding for (%lu," DFID ") maxlen=%d minlen=%d\n",
-	       inode->i_ino, PFID(ll_inode2fid(inode)), *plen, fileid_len);
+	CDEBUG(D_INFO, "%s: encoding for ("DFID") maxlen=%d minlen=%d\n",
+	       ll_get_fsname(inode->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(inode)), *plen, fileid_len);
 
 	if (*plen < fileid_len) {
 		*plen = fileid_len;
@@ -298,8 +299,9 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 
 	sbi = ll_s2sbi(dir->i_sb);
 
-	CDEBUG(D_INFO, "getting parent for (%lu," DFID ")\n",
-	       dir->i_ino, PFID(ll_inode2fid(dir)));
+	CDEBUG(D_INFO, "%s: getting parent for ("DFID")\n",
+	       ll_get_fsname(dir->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(dir)));
 
 	rc = ll_get_default_mdsize(sbi, &lmmsize);
 	if (rc != 0)
@@ -314,15 +316,20 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 	rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
 	ll_finish_md_op_data(op_data);
 	if (rc) {
-		CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+		CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
+		       ll_get_fsname(dir->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(dir)), rc);
 		return ERR_PTR(rc);
 	}
 	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-	LASSERT(body->valid & OBD_MD_FLID);
-
-	CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
-	       PFID(ll_inode2fid(dir)), PFID(&body->fid1));
-
+	/*
+	 * LU-3952: MDT may lost the FID of its parent, we should not crash
+	 * the NFS server, ll_iget_for_nfs() will handle the error.
+	 */
+	if (body->valid & OBD_MD_FLID) {
+		CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
+		       PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+	}
 	result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
 
 	ptlrpc_req_finished(req);
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index f169c0db6..813a9a354 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -274,8 +274,9 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
 	if (lo->lo_biotail) {
 		lo->lo_biotail->bi_next = bio;
 		lo->lo_biotail = bio;
-	} else
+	} else {
 		lo->lo_bio = lo->lo_biotail = bio;
+	}
 	spin_unlock_irqrestore(&lo->lo_lock, flags);
 
 	atomic_inc(&lo->lo_pending);
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index 27ab12614..55d62eb11 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -254,7 +254,6 @@ static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
 	pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
 
 	if (pages_number > totalram_pages / 2) {
-
 		CERROR("can't set file readahead more than %lu MB\n",
 		       totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
 		return -ERANGE;
@@ -393,6 +392,8 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 	struct super_block *sb = ((struct seq_file *)file->private_data)->private;
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	struct cl_client_cache *cache = &sbi->ll_cache;
+	struct lu_env *env;
+	int refcheck;
 	int mult, rc, pages_number;
 	int diff = 0;
 	int nrpages = 0;
@@ -430,6 +431,10 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 		goto out;
 	}
 
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return 0;
+
 	diff = -diff;
 	while (diff > 0) {
 		int tmp;
@@ -455,19 +460,20 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 			break;
 
 		if (!sbi->ll_dt_exp) { /* being initialized */
-			rc = -ENODEV;
-			break;
+			rc = 0;
+			goto out;
 		}
 
 		/* difficult - have to ask OSCs to drop LRU slots. */
 		tmp = diff << 1;
-		rc = obd_set_info_async(NULL, sbi->ll_dt_exp,
+		rc = obd_set_info_async(env, sbi->ll_dt_exp,
 					sizeof(KEY_CACHE_LRU_SHRINK),
 					KEY_CACHE_LRU_SHRINK,
 					sizeof(tmp), &tmp, NULL);
 		if (rc < 0)
 			break;
 	}
+	cl_env_put(env, &refcheck);
 
 out:
 	if (rc >= 0) {
@@ -818,6 +824,23 @@ static ssize_t xattr_cache_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(xattr_cache);
 
+static ssize_t unstable_stats_show(struct kobject *kobj,
+				   struct attribute *attr,
+				   char *buf)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	struct cl_client_cache *cache = &sbi->ll_cache;
+	int pages, mb;
+
+	pages = atomic_read(&cache->ccc_unstable_nr);
+	mb = (pages * PAGE_SIZE) >> 20;
+
+	return sprintf(buf, "unstable_pages: %8d\n"
+			    "unstable_mb:    %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
 static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	/* { "mntpt_path",   ll_rd_path,	     0, 0 }, */
 	{ "site",	  &ll_site_stats_fops,    NULL, 0 },
@@ -853,6 +876,7 @@ static struct attribute *llite_attrs[] = {
 	&lustre_attr_max_easize.attr,
 	&lustre_attr_default_easize.attr,
 	&lustre_attr_xattr_cache.attr,
+	&lustre_attr_unstable_stats.attr,
 	NULL,
 };
 
@@ -953,6 +977,7 @@ static const char *ra_stat_string[] = {
 	[RA_STAT_EOF] = "read-ahead to EOF",
 	[RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
 	[RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
+	[RA_STAT_FAILED_REACH_END] = "failed to reach end"
 };
 
 int ldebugfs_register_mountpoint(struct dentry *parent,
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index f8f98e4e8..5eba0ebae 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -128,12 +128,14 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 			if (rc != 0) {
 				iget_failed(inode);
 				inode = NULL;
-			} else
+			} else {
 				unlock_new_inode(inode);
-		} else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
+			}
+		} else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
 			ll_update_inode(inode, md);
-		CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
-		       inode, PFID(&md->body->fid1));
+			CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
+			       PFID(&md->body->fid1), inode);
+		}
 	}
 	return inode;
 }
@@ -188,7 +190,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 			break;
 
 		/* Invalidate all dentries associated with this inode */
-		LASSERT(lock->l_flags & LDLM_FL_CANCELING);
+		LASSERT(ldlm_is_canceling(lock));
 
 		if (!fid_res_name_eq(ll_inode2fid(inode),
 				     &lock->l_resource->lr_name)) {
@@ -255,8 +257,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 		}
 
 		if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
-			CDEBUG(D_INODE, "invalidating inode %lu\n",
-			       inode->i_ino);
+			CDEBUG(D_INODE, "invalidating inode "DFID"\n",
+			       PFID(ll_inode2fid(inode)));
 			truncate_inode_pages(inode->i_mapping, 0);
 			ll_invalidate_negative_children(inode);
 		}
@@ -476,9 +478,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 	if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
-	       dentry, parent->i_ino,
-	       parent->i_generation, parent, LL_IT2STR(it));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),intent=%s\n",
+	       dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
 
 	if (d_mountpoint(dentry))
 		CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
@@ -553,9 +554,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
 	struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
 	struct dentry *de;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u\n",
-	       dentry, parent->i_ino,
-	       parent->i_generation, parent, flags);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
+	       dentry, PFID(ll_inode2fid(parent)), parent, flags);
 
 	/* Optimize away (CREATE && !OPEN). Let .create handle the race. */
 	if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
@@ -586,10 +586,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 	long long lookup_flags = LOOKUP_OPEN;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op:name=%pd,dir=%lu/%u(%p),file %p,open_flags %x,mode %x opened %d\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, file, open_flags, mode, *opened);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
+	       *opened);
 
 	it = kzalloc(sizeof(*it), GFP_NOFS);
 	if (!it)
@@ -680,8 +679,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 	 * lock on the inode.  Since we finally have an inode pointer,
 	 * stuff it in the lock.
 	 */
-	CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
-	       inode, inode->i_ino, inode->i_generation);
+	CDEBUG(D_DLMTRACE, "setting l_ast_data to inode "DFID"(%p)\n",
+	       PFID(ll_inode2fid(dir)), inode);
 	ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
  out:
 	ptlrpc_req_finished(request);
@@ -708,9 +707,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int rc = 0;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),intent=%s\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, LL_IT2STR(it));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), intent=%s\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
 
 	rc = it_open_error(DISP_OPEN_CREATE, it);
 	if (rc)
@@ -733,8 +731,9 @@ static void ll_update_times(struct ptlrpc_request *request,
 	LASSERT(body);
 	if (body->valid & OBD_MD_FLMTIME &&
 	    body->mtime > LTIME_S(inode->i_mtime)) {
-		CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
-		       inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
+		CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
+		       PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
+		       body->mtime);
 		LTIME_S(inode->i_mtime) = body->mtime;
 	}
 	if (body->valid & OBD_MD_FLCTIME &&
@@ -791,9 +790,9 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild,
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p) mode %o dev %x\n",
-	       dchild, dir->i_ino, dir->i_generation, dir,
-	       mode, old_encode_dev(rdev));
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
+	       dchild, PFID(ll_inode2fid(dir)), dir, mode,
+	       old_encode_dev(rdev));
 
 	if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
 		mode &= ~current_umask();
@@ -831,9 +830,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 {
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),flags=%u, excl=%d\n",
-	       dentry, dir->i_ino,
-	       dir->i_generation, dir, mode, want_excl);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u, excl=%d\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
 
 	rc = ll_mknod(dir, dentry, mode, 0);
 
@@ -845,12 +843,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 	return rc;
 }
 
-static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
-{
-	if (d_really_is_positive(child))
-		*fid = *ll_inode2fid(d_inode(child));
-}
-
 int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
 {
 	struct mdt_body *body;
@@ -927,23 +919,25 @@ out:
  * is any lock existing. They will recycle dentries and inodes based upon locks
  * too. b=20433
  */
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
+static int ll_unlink(struct inode *dir, struct dentry *dchild)
 {
 	struct ptlrpc_request *request = NULL;
 	struct md_op_data *op_data;
 	int rc;
 
 	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	       dchild, dir->i_ino, dir->i_generation, dir);
 
 	op_data = ll_prep_md_op_data(NULL, dir, NULL,
-				     dentry->d_name.name,
-				     dentry->d_name.len,
+				     dchild->d_name.name,
+				     dchild->d_name.len,
 				     0, LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(dentry, &op_data->op_fid3);
+	if (dchild && dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
 	op_data->op_fid2 = op_data->op_fid3;
 	rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
 	ll_finish_md_op_data(op_data);
@@ -963,8 +957,8 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir"DFID"(%p)\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir);
 
 	if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
 		mode &= ~current_umask();
@@ -977,23 +971,25 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	return err;
 }
 
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+static int ll_rmdir(struct inode *dir, struct dentry *dchild)
 {
 	struct ptlrpc_request *request = NULL;
 	struct md_op_data *op_data;
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
-	       dentry, dir->i_ino, dir->i_generation, dir);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
+	       dchild, PFID(ll_inode2fid(dir)), dir);
 
 	op_data = ll_prep_md_op_data(NULL, dir, NULL,
-				     dentry->d_name.name,
-				     dentry->d_name.len,
+				     dchild->d_name.name,
+				     dchild->d_name.len,
 				     S_IFDIR, LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(dentry, &op_data->op_fid3);
+	if (dchild && dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+
 	op_data->op_fid2 = op_data->op_fid3;
 	rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
 	ll_finish_md_op_data(op_data);
@@ -1011,9 +1007,8 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
 {
 	int err;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p),target=%.*s\n",
-	       dentry, dir->i_ino, dir->i_generation,
-	       dir, 3000, oldname);
+	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),target=%.*s\n",
+	       dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
 
 	err = ll_new_node(dir, dentry, oldname, S_IFLNK | S_IRWXUGO,
 			  0, LUSTRE_OPC_SYMLINK);
@@ -1033,10 +1028,9 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir,
 	struct md_op_data *op_data;
 	int err;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%pd\n",
-	       src->i_ino, src->i_generation, src, dir->i_ino,
-	       dir->i_generation, dir, new_dentry);
+	CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
+	       PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
+	       new_dentry);
 
 	op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
 				     new_dentry->d_name.len,
@@ -1056,42 +1050,45 @@ out:
 	return err;
 }
 
-static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
-		     struct inode *new_dir, struct dentry *new_dentry)
+static int ll_rename(struct inode *src, struct dentry *src_dchild,
+		     struct inode *tgt, struct dentry *tgt_dchild)
 {
 	struct ptlrpc_request *request = NULL;
-	struct ll_sb_info *sbi = ll_i2sbi(old_dir);
+	struct ll_sb_info *sbi = ll_i2sbi(src);
 	struct md_op_data *op_data;
 	int err;
 
 	CDEBUG(D_VFSTRACE,
-	       "VFS Op:oldname=%pd,src_dir=%lu/%u(%p),newname=%pd,tgt_dir=%lu/%u(%p)\n",
-	       old_dentry, old_dir->i_ino, old_dir->i_generation, old_dir,
-	       new_dentry, new_dir->i_ino, new_dir->i_generation, new_dir);
+	       "VFS Op:oldname=%pd, src_dir="DFID"(%p), newname=%pd, tgt_dir="DFID"(%p)\n",
+	       src_dchild, PFID(ll_inode2fid(src)), src,
+	       tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
 
-	op_data = ll_prep_md_op_data(NULL, old_dir, new_dir, NULL, 0, 0,
+	op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
 				     LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	ll_get_child_fid(old_dentry, &op_data->op_fid3);
-	ll_get_child_fid(new_dentry, &op_data->op_fid4);
+	if (src_dchild && src_dchild->d_inode)
+		op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
+	if (tgt_dchild && tgt_dchild->d_inode)
+		op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
+
 	err = md_rename(sbi->ll_md_exp, op_data,
-			old_dentry->d_name.name,
-			old_dentry->d_name.len,
-			new_dentry->d_name.name,
-			new_dentry->d_name.len, &request);
+			src_dchild->d_name.name,
+			src_dchild->d_name.len,
+			tgt_dchild->d_name.name,
+			tgt_dchild->d_name.len, &request);
 	ll_finish_md_op_data(op_data);
 	if (!err) {
-		ll_update_times(request, old_dir);
-		ll_update_times(request, new_dir);
+		ll_update_times(request, src);
+		ll_update_times(request, tgt);
 		ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
-		err = ll_objects_destroy(request, old_dir);
+		err = ll_objects_destroy(request, src);
 	}
 
 	ptlrpc_req_finished(request);
 	if (!err)
-		d_move(old_dentry, new_dentry);
+		d_move(src_dchild, tgt_dchild);
 	return err;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index edab6c5b7..336397773 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -63,7 +63,7 @@
  * Finalizes cl-data before exiting typical address_space operation. Dual to
  * ll_cl_init().
  */
-static void ll_cl_fini(struct ll_cl_context *lcc)
+void ll_cl_fini(struct ll_cl_context *lcc)
 {
 	struct lu_env  *env  = lcc->lcc_env;
 	struct cl_io   *io   = lcc->lcc_io;
@@ -84,200 +84,59 @@ static void ll_cl_fini(struct ll_cl_context *lcc)
  * Initializes common cl-data at the typical address_space operation entry
  * point.
  */
-static struct ll_cl_context *ll_cl_init(struct file *file,
-					struct page *vmpage, int create)
+struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage)
 {
 	struct ll_cl_context *lcc;
 	struct lu_env    *env;
 	struct cl_io     *io;
 	struct cl_object *clob;
-	struct ccc_io    *cio;
+	struct vvp_io    *vio;
 
 	int refcheck;
 	int result = 0;
 
-	clob = ll_i2info(vmpage->mapping->host)->lli_clob;
+	clob = ll_i2info(file_inode(file))->lli_clob;
 	LASSERT(clob);
 
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return ERR_CAST(env);
 
-	lcc = &vvp_env_info(env)->vti_io_ctx;
+	lcc = &ll_env_info(env)->lti_io_ctx;
 	memset(lcc, 0, sizeof(*lcc));
 	lcc->lcc_env = env;
 	lcc->lcc_refcheck = refcheck;
 	lcc->lcc_cookie = current;
 
-	cio = ccc_env_io(env);
-	io = cio->cui_cl.cis_io;
-	if (!io && create) {
-		struct inode *inode = vmpage->mapping->host;
-		loff_t pos;
-
-		if (inode_trylock(inode)) {
-			inode_unlock((inode));
-
-			/* this is too bad. Someone is trying to write the
-			 * page w/o holding inode mutex. This means we can
-			 * add dirty pages into cache during truncate
-			 */
-			CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
-			       current->comm);
-			dump_stack();
-			LBUG();
-			return ERR_PTR(-EIO);
-		}
-
-		/*
-		 * Loop-back driver calls ->prepare_write().
-		 * methods directly, bypassing file system ->write() operation,
-		 * so cl_io has to be created here.
-		 */
-		io = ccc_env_thread_io(env);
-		ll_io_init(io, file, 1);
-
-		/* No lock at all for this kind of IO - we can't do it because
-		 * we have held page lock, it would cause deadlock.
-		 * XXX: This causes poor performance to loop device - One page
-		 *      per RPC.
-		 *      In order to get better performance, users should use
-		 *      lloop driver instead.
-		 */
-		io->ci_lockreq = CILR_NEVER;
-
-		pos = vmpage->index << PAGE_SHIFT;
-
-		/* Create a temp IO to serve write. */
-		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_SIZE);
-		if (result == 0) {
-			cio->cui_fd = LUSTRE_FPRIVATE(file);
-			cio->cui_iter = NULL;
-			result = cl_io_iter_init(env, io);
-			if (result == 0) {
-				result = cl_io_lock(env, io);
-				if (result == 0)
-					result = cl_io_start(env, io);
-			}
-		} else
-			result = io->ci_result;
-	}
-
+	vio = vvp_env_io(env);
+	io = vio->vui_cl.cis_io;
 	lcc->lcc_io = io;
 	if (!io)
 		result = -EIO;
-	if (result == 0) {
+
+	if (result == 0 && vmpage) {
 		struct cl_page   *page;
 
 		LASSERT(io->ci_state == CIS_IO_GOING);
-		LASSERT(cio->cui_fd == LUSTRE_FPRIVATE(file));
+		LASSERT(vio->vui_fd == LUSTRE_FPRIVATE(file));
 		page = cl_page_find(env, clob, vmpage->index, vmpage,
 				    CPT_CACHEABLE);
 		if (!IS_ERR(page)) {
 			lcc->lcc_page = page;
 			lu_ref_add(&page->cp_reference, "cl_io", io);
 			result = 0;
-		} else
+		} else {
 			result = PTR_ERR(page);
+		}
 	}
 	if (result) {
 		ll_cl_fini(lcc);
 		lcc = ERR_PTR(result);
 	}
 
-	CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
-	       vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
-	       env, io);
-	return lcc;
-}
-
-static struct ll_cl_context *ll_cl_get(void)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env *env;
-	int refcheck;
-
-	env = cl_env_get(&refcheck);
-	LASSERT(!IS_ERR(env));
-	lcc = &vvp_env_info(env)->vti_io_ctx;
-	LASSERT(env == lcc->lcc_env);
-	LASSERT(current == lcc->lcc_cookie);
-	cl_env_put(env, &refcheck);
-
-	/* env has got in ll_cl_init, so it is still usable. */
 	return lcc;
 }
 
-/**
- * ->prepare_write() address space operation called by generic_file_write()
- * for every page during write.
- */
-int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
-		     unsigned to)
-{
-	struct ll_cl_context *lcc;
-	int result;
-
-	lcc = ll_cl_init(file, vmpage, 1);
-	if (!IS_ERR(lcc)) {
-		struct lu_env  *env = lcc->lcc_env;
-		struct cl_io   *io  = lcc->lcc_io;
-		struct cl_page *page = lcc->lcc_page;
-
-		cl_page_assume(env, io, page);
-
-		result = cl_io_prepare_write(env, io, page, from, to);
-		if (result == 0) {
-			/*
-			 * Add a reference, so that page is not evicted from
-			 * the cache until ->commit_write() is called.
-			 */
-			cl_page_get(page);
-			lu_ref_add(&page->cp_reference, "prepare_write",
-				   current);
-		} else {
-			cl_page_unassume(env, io, page);
-			ll_cl_fini(lcc);
-		}
-		/* returning 0 in prepare assumes commit must be called
-		 * afterwards
-		 */
-	} else {
-		result = PTR_ERR(lcc);
-	}
-	return result;
-}
-
-int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
-		    unsigned to)
-{
-	struct ll_cl_context *lcc;
-	struct lu_env    *env;
-	struct cl_io     *io;
-	struct cl_page   *page;
-	int result = 0;
-
-	lcc  = ll_cl_get();
-	env  = lcc->lcc_env;
-	page = lcc->lcc_page;
-	io   = lcc->lcc_io;
-
-	LASSERT(cl_page_is_owned(page, io));
-	LASSERT(from <= to);
-	if (from != to) /* handle short write case. */
-		result = cl_io_commit_write(env, io, page, from, to);
-	if (cl_page_is_owned(page, io))
-		cl_page_unassume(env, io, page);
-
-	/*
-	 * Release reference acquired by ll_prepare_write().
-	 */
-	lu_ref_del(&page->cp_reference, "prepare_write", current);
-	cl_page_put(env, page);
-	ll_cl_fini(lcc);
-	return result;
-}
-
 static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
 
 /**
@@ -301,7 +160,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
  */
 static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 				     struct ra_io_arg *ria,
-				     unsigned long pages)
+				     unsigned long pages, unsigned long min)
 {
 	struct ll_ra_info *ra = &sbi->ll_ra_info;
 	long ret;
@@ -341,6 +200,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 	}
 
 out:
+	if (ret < min) {
+		/* override ra limit for maximum performance */
+		atomic_add(min - ret, &ra->ra_cur_pages);
+		ret = min;
+	}
 	return ret;
 }
 
@@ -357,9 +221,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
 	lprocfs_counter_incr(sbi->ll_ra_stats, which);
 }
 
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
 {
-	struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
 	ll_ra_stats_inc_sbi(sbi, which);
 }
@@ -388,61 +252,42 @@ static int index_in_window(unsigned long index, unsigned long point,
 	return start <= index && index <= end;
 }
 
-static struct ll_readahead_state *ll_ras_get(struct file *f)
+void ll_ras_enter(struct file *f)
 {
-	struct ll_file_data       *fd;
-
-	fd = LUSTRE_FPRIVATE(f);
-	return &fd->fd_ras;
-}
-
-void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-{
-	struct ll_readahead_state *ras;
-
-	ras = ll_ras_get(f);
+	struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
+	struct ll_readahead_state *ras = &fd->fd_ras;
 
 	spin_lock(&ras->ras_lock);
 	ras->ras_requests++;
 	ras->ras_request_index = 0;
 	ras->ras_consecutive_requests++;
-	rar->lrr_reader = current;
-
-	list_add(&rar->lrr_linkage, &ras->ras_read_beads);
-	spin_unlock(&ras->ras_lock);
-}
-
-void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-{
-	struct ll_readahead_state *ras;
-
-	ras = ll_ras_get(f);
-
-	spin_lock(&ras->ras_lock);
-	list_del_init(&rar->lrr_linkage);
 	spin_unlock(&ras->ras_lock);
 }
 
 static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue, struct cl_page *page,
-			      struct page *vmpage)
+			      struct cl_object *clob, pgoff_t *max_index)
 {
-	struct ccc_page *cp;
+	struct page *vmpage = page->cp_vmpage;
+	struct vvp_page *vpg;
 	int	      rc;
 
 	rc = 0;
 	cl_page_assume(env, io, page);
 	lu_ref_add(&page->cp_reference, "ra", current);
-	cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-	if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
-		rc = cl_page_is_under_lock(env, io, page);
-		if (rc == -EBUSY) {
-			cp->cpg_defer_uptodate = 1;
-			cp->cpg_ra_used = 0;
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
+		CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
+		       vvp_index(vpg), *max_index);
+		if (*max_index == 0 || vvp_index(vpg) > *max_index)
+			rc = cl_page_is_under_lock(env, io, page, max_index);
+		if (rc == 0) {
+			vpg->vpg_defer_uptodate = 1;
+			vpg->vpg_ra_used = 0;
 			cl_page_list_add(queue, page);
 			rc = 1;
 		} else {
-			cl_page_delete(env, page);
+			cl_page_discard(env, io, page);
 			rc = -ENOLCK;
 		}
 	} else {
@@ -466,24 +311,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
  */
 static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue,
-			      pgoff_t index, struct address_space *mapping)
+			      pgoff_t index, pgoff_t *max_index)
 {
+	struct cl_object *clob  = io->ci_obj;
+	struct inode     *inode = vvp_object_inode(clob);
 	struct page      *vmpage;
-	struct cl_object *clob  = ll_i2info(mapping->host)->lli_clob;
 	struct cl_page   *page;
 	enum ra_stat      which = _NR_RA_STAT; /* keep gcc happy */
 	int	       rc    = 0;
 	const char       *msg   = NULL;
 
-	vmpage = grab_cache_page_nowait(mapping, index);
+	vmpage = grab_cache_page_nowait(inode->i_mapping, index);
 	if (vmpage) {
 		/* Check if vmpage was truncated or reclaimed */
-		if (vmpage->mapping == mapping) {
+		if (vmpage->mapping == inode->i_mapping) {
 			page = cl_page_find(env, clob, vmpage->index,
 					    vmpage, CPT_CACHEABLE);
 			if (!IS_ERR(page)) {
 				rc = cl_read_ahead_page(env, io, queue,
-							page, vmpage);
+							page, clob, max_index);
 				if (rc == -ENOLCK) {
 					which = RA_STAT_FAILED_MATCH;
 					msg   = "lock match failed";
@@ -504,7 +350,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 		msg   = "g_c_p_n failed";
 	}
 	if (msg) {
-		ll_ra_stats_inc(mapping, which);
+		ll_ra_stats_inc(inode, which);
 		CDEBUG(D_READA, "%s\n", msg);
 	}
 	return rc;
@@ -616,11 +462,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 			       struct cl_io *io, struct cl_page_list *queue,
 			       struct ra_io_arg *ria,
 			       unsigned long *reserved_pages,
-			       struct address_space *mapping,
 			       unsigned long *ra_end)
 {
-	int rc, count = 0, stride_ria;
-	unsigned long page_idx;
+	int rc, count = 0;
+	bool stride_ria;
+	pgoff_t page_idx;
+	pgoff_t max_index = 0;
 
 	LASSERT(ria);
 	RIA_DEBUG(ria);
@@ -631,12 +478,13 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 		if (ras_inside_ra_window(page_idx, ria)) {
 			/* If the page is inside the read-ahead window*/
 			rc = ll_read_ahead_page(env, io, queue,
-						page_idx, mapping);
+						page_idx, &max_index);
 			if (rc == 1) {
 				(*reserved_pages)--;
 				count++;
-			} else if (rc == -ENOLCK)
+			} else if (rc == -ENOLCK) {
 				break;
+			}
 		} else if (stride_ria) {
 			/* If it is not in the read-ahead window, and it is
 			 * read-ahead mode, then check whether it should skip
@@ -666,25 +514,22 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 }
 
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
-		 struct ll_readahead_state *ras, struct address_space *mapping,
-		 struct cl_page_list *queue, int flags)
+		 struct cl_page_list *queue, struct ll_readahead_state *ras,
+		 bool hit)
 {
 	struct vvp_io *vio = vvp_env_io(env);
-	struct vvp_thread_info *vti = vvp_env_info(env);
-	struct cl_attr *attr = ccc_env_thread_attr(env);
+	struct ll_thread_info *lti = ll_env_info(env);
+	struct cl_attr *attr = vvp_env_thread_attr(env);
 	unsigned long start = 0, end = 0, reserved;
-	unsigned long ra_end, len;
+	unsigned long ra_end, len, mlen = 0;
 	struct inode *inode;
-	struct ll_ra_read *bead;
-	struct ra_io_arg *ria = &vti->vti_ria;
-	struct ll_inode_info *lli;
+	struct ra_io_arg *ria = &lti->lti_ria;
 	struct cl_object *clob;
 	int ret = 0;
 	__u64 kms;
 
-	inode = mapping->host;
-	lli = ll_i2info(inode);
-	clob = lli->lli_clob;
+	clob = io->ci_obj;
+	inode = vvp_object_inode(clob);
 
 	memset(ria, 0, sizeof(*ria));
 
@@ -696,22 +541,20 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 		return ret;
 	kms = attr->cat_kms;
 	if (kms == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
 		return 0;
 	}
 
 	spin_lock(&ras->ras_lock);
-	if (vio->cui_ra_window_set)
-		bead = &vio->cui_bead;
-	else
-		bead = NULL;
 
 	/* Enlarge the RA window to encompass the full read */
-	if (bead && ras->ras_window_start + ras->ras_window_len <
-	    bead->lrr_start + bead->lrr_count) {
-		ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+	if (vio->vui_ra_valid &&
+	    ras->ras_window_start + ras->ras_window_len <
+	    vio->vui_ra_start + vio->vui_ra_count) {
+		ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
 				      ras->ras_window_start;
 	}
+
 	/* Reserve a part of the read-ahead window that we'll be issuing */
 	if (ras->ras_window_len) {
 		start = ras->ras_next_readahead;
@@ -755,29 +598,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	spin_unlock(&ras->ras_lock);
 
 	if (end == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
 	}
 	len = ria_page_count(ria);
-	if (len == 0)
+	if (len == 0) {
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
+	}
+
+	CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
+	       PFID(lu_object_fid(&clob->co_lu)),
+	       ria->ria_start, ria->ria_end,
+	       vio->vui_ra_valid ? vio->vui_ra_start : 0,
+	       vio->vui_ra_valid ? vio->vui_ra_count : 0,
+	       hit);
+
+	/* at least to extend the readahead window to cover current read */
+	if (!hit && vio->vui_ra_valid &&
+	    vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
+		/* to the end of current read window. */
+		mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
+		/* trim to RPC boundary */
+		start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
+		mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+	}
 
-	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
 	if (reserved < len)
-		ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+		ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
 
-	CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+	CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
+	       reserved, len, mlen,
 	       atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
 	       ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
 
-	ret = ll_read_ahead_pages(env, io, queue,
-				  ria, &reserved, mapping, &ra_end);
+	ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
 
 	if (reserved != 0)
 		ll_ra_count_put(ll_i2sbi(inode), reserved);
 
 	if (ra_end == end + 1 && ra_end == (kms >> PAGE_SHIFT))
-		ll_ra_stats_inc(mapping, RA_STAT_EOF);
+		ll_ra_stats_inc(inode, RA_STAT_EOF);
 
 	/* if we didn't get to the end of the region we reserved from
 	 * the ras we need to go back and update the ras so that the
@@ -789,6 +651,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	       ra_end, end, ria->ria_end);
 
 	if (ra_end != end + 1) {
+		ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
 		spin_lock(&ras->ras_lock);
 		if (ra_end < ras->ras_next_readahead &&
 		    index_in_window(ra_end, ras->ras_window_start, 0,
@@ -836,7 +699,6 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
 	spin_lock_init(&ras->ras_lock);
 	ras_reset(inode, ras, 0);
 	ras->ras_requests = 0;
-	INIT_LIST_HEAD(&ras->ras_read_beads);
 }
 
 /*
@@ -1059,15 +921,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	ras->ras_last_readpage = index;
 	ras_set_start(inode, ras, index);
 
-	if (stride_io_mode(ras))
+	if (stride_io_mode(ras)) {
 		/* Since stride readahead is sensitive to the offset
 		 * of read-ahead, so we use original offset here,
 		 * instead of ras_window_start, which is RPC aligned
 		 */
 		ras->ras_next_readahead = max(index, ras->ras_next_readahead);
-	else
-		ras->ras_next_readahead = max(ras->ras_window_start,
-					      ras->ras_next_readahead);
+	} else {
+		if (ras->ras_next_readahead < ras->ras_window_start)
+			ras->ras_next_readahead = ras->ras_window_start;
+		if (!hit)
+			ras->ras_next_readahead = index + 1;
+	}
 	RAS_CDEBUG(ras);
 
 	/* Trigger RA in the mmap case where ras_consecutive_requests
@@ -1129,7 +994,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
 	clob  = ll_i2info(inode)->lli_clob;
 	LASSERT(clob);
 
-	io = ccc_env_thread_io(env);
+	io = vvp_env_thread_io(env);
 	io->ci_obj = clob;
 	io->ci_ignore_layout = 1;
 	result = cl_io_init(env, io, CIT_MISC, clob);
@@ -1240,8 +1105,9 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
 		if (end == OBD_OBJECT_EOF)
-			end = i_size_read(inode);
-		mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
+			mapping->writeback_index = 0;
+		else
+			mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
 	}
 	return result;
 }
@@ -1251,7 +1117,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
 	struct ll_cl_context *lcc;
 	int result;
 
-	lcc = ll_cl_init(file, vmpage, 0);
+	lcc = ll_cl_init(file, vmpage);
 	if (!IS_ERR(lcc)) {
 		struct lu_env  *env  = lcc->lcc_env;
 		struct cl_io   *io   = lcc->lcc_io;
@@ -1273,3 +1139,28 @@ int ll_readpage(struct file *file, struct page *vmpage)
 	}
 	return result;
 }
+
+int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
+		    struct cl_page *page, enum cl_req_type crt)
+{
+	struct cl_2queue  *queue;
+	int result;
+
+	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
+
+	queue = &io->ci_queue;
+	cl_2queue_init_page(queue, page);
+
+	result = cl_io_submit_sync(env, io, crt, queue, 0);
+	LASSERT(cl_page_is_owned(page, io));
+
+	if (crt == CRT_READ)
+		/*
+		 * in CRT_WRITE case page is left locked even in case of
+		 * error.
+		 */
+		cl_page_list_disown(env, io, &queue->c2_qin);
+	cl_2queue_fini(env, queue);
+
+	return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 69aa15e8e..c12a048fc 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -95,15 +95,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
 			if (obj) {
 				page = cl_vmpage_page(vmpage, obj);
 				if (page) {
-					lu_ref_add(&page->cp_reference,
-						   "delete", vmpage);
 					cl_page_delete(env, page);
-					lu_ref_del(&page->cp_reference,
-						   "delete", vmpage);
 					cl_page_put(env, page);
 				}
-			} else
+			} else {
 				LASSERT(vmpage->private == 0);
+			}
 			cl_env_put(env, &refcheck);
 		}
 	}
@@ -111,12 +108,12 @@ static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
 
 static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 {
-	struct cl_env_nest nest;
 	struct lu_env     *env;
+	void			*cookie;
 	struct cl_object  *obj;
 	struct cl_page    *page;
 	struct address_space *mapping;
-	int result;
+	int result = 0;
 
 	LASSERT(PageLocked(vmpage));
 	if (PageWriteback(vmpage) || PageDirty(vmpage))
@@ -130,53 +127,42 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 	if (!obj)
 		return 1;
 
-	/* 1 for page allocator, 1 for cl_page and 1 for page cache */
+	/* 1 for caller, 1 for cl_page and 1 for page cache */
 	if (page_count(vmpage) > 3)
 		return 0;
 
-	/* TODO: determine what gfp should be used by @gfp_mask. */
-	env = cl_env_nested_get(&nest);
-	if (IS_ERR(env))
-		/* If we can't allocate an env we won't call cl_page_put()
-		 * later on which further means it's impossible to drop
-		 * page refcount by cl_page, so ask kernel to not free
-		 * this page.
-		 */
-		return 0;
-
 	page = cl_vmpage_page(vmpage, obj);
-	result = !page;
-	if (page) {
-		if (!cl_page_in_use(page)) {
-			result = 1;
-			cl_page_delete(env, page);
-		}
-		cl_page_put(env, page);
-	}
-	cl_env_nested_put(&nest, env);
-	return result;
-}
+	if (!page)
+		return 1;
 
-static int ll_set_page_dirty(struct page *vmpage)
-{
-#if 0
-	struct cl_page    *page = vvp_vmpage_page_transient(vmpage);
-	struct vvp_object *obj  = cl_inode2vvp(vmpage->mapping->host);
-	struct vvp_page   *cpg;
+	cookie = cl_env_reenter();
+	env = cl_env_percpu_get();
+	LASSERT(!IS_ERR(env));
 
-	/*
-	 * XXX should page method be called here?
-	 */
-	LASSERT(&obj->co_cl == page->cp_obj);
-	cpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
-	/*
-	 * XXX cannot do much here, because page is possibly not locked:
-	 * sys_munmap()->...
-	 *     ->unmap_page_range()->zap_pte_range()->set_page_dirty().
+	if (!cl_page_in_use(page)) {
+		result = 1;
+		cl_page_delete(env, page);
+	}
+
+	/* To use percpu env array, the call path can not be rescheduled;
+	 * otherwise percpu array will be messed if ll_releaspage() called
+	 * again on the same CPU.
+	 *
+	 * If this page holds the last refc of cl_object, the following
+	 * call path may cause reschedule:
+	 *   cl_page_put -> cl_page_free -> cl_object_put ->
+	 *     lu_object_put -> lu_object_free -> lov_delete_raid0.
+	 *
+	 * However, the kernel can't get rid of this inode until all pages have
+	 * been cleaned up. Now that we hold page lock here, it's pretty safe
+	 * that we won't get into object delete path.
 	 */
-	vvp_write_pending(obj, cpg);
-#endif
-	return __set_page_dirty_nobuffers(vmpage);
+	LASSERT(cl_object_refc(obj) > 1);
+	cl_page_put(env, page);
+
+	cl_env_percpu_put(env);
+	cl_env_reexit(cookie);
+	return result;
 }
 
 #define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
@@ -266,7 +252,7 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
 		 * write directly
 		 */
 		if (clp->cp_type == CPT_CACHEABLE) {
-			struct page *vmpage = cl_page_vmpage(env, clp);
+			struct page *vmpage = cl_page_vmpage(clp);
 			struct page *src_page;
 			struct page *dst_page;
 			void       *src;
@@ -358,14 +344,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  */
 #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) *	  \
 		       PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
-			       loff_t file_offset)
+static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct lu_env *env;
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct ccc_object *obj = cl_inode2ccc(inode);
+	struct vvp_object *obj = cl_inode2vvp(inode);
+	loff_t file_offset = iocb->ki_pos;
 	ssize_t count = iov_iter_count(iter);
 	ssize_t tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
@@ -376,22 +362,21 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 		return -EBADF;
 
 	/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
-	if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+	if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
 		return -EINVAL;
 
-	CDEBUG(D_VFSTRACE,
-	       "VFS Op:inode=%lu/%u(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
-	       inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
+	       PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
 	       file_offset, file_offset, count >> PAGE_SHIFT,
 	       MAX_DIO_SIZE >> PAGE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+	if (iov_iter_alignment(iter) & ~PAGE_MASK)
 		return -EINVAL;
 
 	env = cl_env_get(&refcheck);
 	LASSERT(!IS_ERR(env));
-	io = ccc_env_io(env)->cui_cl.cis_io;
+	io = vvp_env_io(env)->vui_cl.cis_io;
 	LASSERT(io);
 
 	/* 0. Need locking between buffered and direct access. and race with
@@ -401,7 +386,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == READ)
 		inode_lock(inode);
 
-	LASSERT(obj->cob_transient_pages == 0);
+	LASSERT(obj->vob_transient_pages == 0);
 	while (iov_iter_count(iter)) {
 		struct page **pages;
 		size_t offs;
@@ -435,8 +420,8 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 			    size > (PAGE_SIZE / sizeof(*pages)) *
 			    PAGE_SIZE) {
 				size = ((((size / 2) - 1) |
-					 ~CFS_PAGE_MASK) + 1) &
-					CFS_PAGE_MASK;
+					 ~PAGE_MASK) + 1) &
+					PAGE_MASK;
 				CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
 				       size);
 				continue;
@@ -449,62 +434,213 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
 		file_offset += result;
 	}
 out:
-	LASSERT(obj->cob_transient_pages == 0);
+	LASSERT(obj->vob_transient_pages == 0);
 	if (iov_iter_rw(iter) == READ)
 		inode_unlock(inode);
 
 	if (tot_bytes > 0) {
-		if (iov_iter_rw(iter) == WRITE) {
-			struct lov_stripe_md *lsm;
-
-			lsm = ccc_inode_lsm_get(inode);
-			LASSERT(lsm);
-			lov_stripe_lock(lsm);
-			obd_adjust_kms(ll_i2dtexp(inode), lsm, file_offset, 0);
-			lov_stripe_unlock(lsm);
-			ccc_inode_lsm_put(inode, lsm);
-		}
+		struct vvp_io *vio = vvp_env_io(env);
+
+		/* no commit async for direct IO */
+		vio->u.write.vui_written += tot_bytes;
 	}
 
 	cl_env_put(env, &refcheck);
-	return tot_bytes ? : result;
+	return tot_bytes ? tot_bytes : result;
+}
+
+/**
+ * Prepare partially written-to page for a write.
+ */
+static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
+				   struct cl_page *pg)
+{
+	struct cl_attr *attr   = vvp_env_thread_attr(env);
+	struct cl_object *obj  = io->ci_obj;
+	struct vvp_page *vpg   = cl_object_page_slice(obj, pg);
+	loff_t          offset = cl_offset(obj, vvp_index(vpg));
+	int             result;
+
+	cl_object_attr_lock(obj);
+	result = cl_object_attr_get(env, obj, attr);
+	cl_object_attr_unlock(obj);
+	if (result == 0) {
+		/*
+		 * If are writing to a new page, no need to read old data.
+		 * The extent locking will have updated the KMS, and for our
+		 * purposes here we can treat it like i_size.
+		 */
+		if (attr->cat_kms <= offset) {
+			char *kaddr = kmap_atomic(vpg->vpg_page);
+
+			memset(kaddr, 0, cl_page_size(obj));
+			kunmap_atomic(kaddr);
+		} else if (vpg->vpg_defer_uptodate) {
+			vpg->vpg_ra_used = 1;
+		} else {
+			result = ll_page_sync_io(env, io, pg, CRT_READ);
+		}
+	}
+	return result;
 }
 
 static int ll_write_begin(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned flags,
 			  struct page **pagep, void **fsdata)
 {
+	struct ll_cl_context *lcc;
+	struct lu_env  *env;
+	struct cl_io   *io;
+	struct cl_page *page;
+	struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
 	pgoff_t index = pos >> PAGE_SHIFT;
-	struct page *page;
-	int rc;
-	unsigned from = pos & (PAGE_SIZE - 1);
+	struct page *vmpage = NULL;
+	unsigned int from = pos & (PAGE_SIZE - 1);
+	unsigned int to = from + len;
+	int result = 0;
 
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
-		return -ENOMEM;
+	CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
 
-	*pagep = page;
+	lcc = ll_cl_init(file, NULL);
+	if (IS_ERR(lcc)) {
+		result = PTR_ERR(lcc);
+		goto out;
+	}
 
-	rc = ll_prepare_write(file, page, from, from + len);
-	if (rc) {
-		unlock_page(page);
-		put_page(page);
+	env = lcc->lcc_env;
+	io  = lcc->lcc_io;
+
+	/* To avoid deadlock, try to lock page first. */
+	vmpage = grab_cache_page_nowait(mapping, index);
+	if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
+		struct vvp_io *vio = vvp_env_io(env);
+		struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+		/* if the page is already in dirty cache, we have to commit
+		 * the pages right now; otherwise, it may cause deadlock
+		 * because it holds page lock of a dirty page and request for
+		 * more grants. It's okay for the dirty page to be the first
+		 * one in commit page list, though.
+		 */
+		if (vmpage && plist->pl_nr > 0) {
+			unlock_page(vmpage);
+			put_page(vmpage);
+			vmpage = NULL;
+		}
+
+		/* commit pages and then wait for page lock */
+		result = vvp_io_write_commit(env, io);
+		if (result < 0)
+			goto out;
+
+		if (!vmpage) {
+			vmpage = grab_cache_page_write_begin(mapping, index,
+							     flags);
+			if (!vmpage) {
+				result = -ENOMEM;
+				goto out;
+			}
+		}
 	}
-	return rc;
+
+	page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
+	if (IS_ERR(page)) {
+		result = PTR_ERR(page);
+		goto out;
+	}
+
+	lcc->lcc_page = page;
+	lu_ref_add(&page->cp_reference, "cl_io", io);
+
+	cl_page_assume(env, io, page);
+	if (!PageUptodate(vmpage)) {
+		/*
+		 * We're completely overwriting an existing page,
+		 * so _don't_ set it up to date until commit_write
+		 */
+		if (from == 0 && to == PAGE_SIZE) {
+			CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
+			POISON_PAGE(vmpage, 0x11);
+		} else {
+			/* TODO: can be optimized at OSC layer to check if it
+			 * is a lockless IO. In that case, it's not necessary
+			 * to read the data.
+			 */
+			result = ll_prepare_partial_page(env, io, page);
+			if (result == 0)
+				SetPageUptodate(vmpage);
+		}
+	}
+	if (result < 0)
+		cl_page_unassume(env, io, page);
+out:
+	if (result < 0) {
+		if (vmpage) {
+			unlock_page(vmpage);
+			put_page(vmpage);
+		}
+		if (!IS_ERR(lcc))
+			ll_cl_fini(lcc);
+	} else {
+		*pagep = vmpage;
+		*fsdata = lcc;
+	}
+	return result;
 }
 
 static int ll_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
+			struct page *vmpage, void *fsdata)
 {
+	struct ll_cl_context *lcc = fsdata;
+	struct lu_env *env;
+	struct cl_io *io;
+	struct vvp_io *vio;
+	struct cl_page *page;
 	unsigned from = pos & (PAGE_SIZE - 1);
-	int rc;
+	bool unplug = false;
+	int result = 0;
+
+	put_page(vmpage);
+
+	env  = lcc->lcc_env;
+	page = lcc->lcc_page;
+	io   = lcc->lcc_io;
+	vio  = vvp_env_io(env);
+
+	LASSERT(cl_page_is_owned(page, io));
+	if (copied > 0) {
+		struct cl_page_list *plist = &vio->u.write.vui_queue;
+
+		lcc->lcc_page = NULL; /* page will be queued */
+
+		/* Add it into write queue */
+		cl_page_list_add(plist, page);
+		if (plist->pl_nr == 1) /* first page */
+			vio->u.write.vui_from = from;
+		else
+			LASSERT(from == 0);
+		vio->u.write.vui_to = from + copied;
+
+		/* We may have one full RPC, commit it soon */
+		if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
+			unplug = true;
+
+		CL_PAGE_DEBUG(D_VFSTRACE, env, page,
+			      "queued page: %d.\n", plist->pl_nr);
+	} else {
+		cl_page_disown(env, io, page);
+
+		/* page list is not contiguous now, commit it now */
+		unplug = true;
+	}
 
-	rc = ll_commit_write(file, page, from, from + copied);
-	unlock_page(page);
-	put_page(page);
+	if (unplug ||
+	    file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
+		result = vvp_io_write_commit(env, io);
 
-	return rc ?: copied;
+	ll_cl_fini(lcc);
+	return result >= 0 ? copied : result;
 }
 
 #ifdef CONFIG_MIGRATION
@@ -523,7 +659,7 @@ const struct address_space_operations ll_aops = {
 	.direct_IO      = ll_direct_IO_26,
 	.writepage      = ll_writepage,
 	.writepages     = ll_writepages,
-	.set_page_dirty = ll_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_nobuffers,
 	.write_begin    = ll_write_begin,
 	.write_end      = ll_write_end,
 	.invalidatepage = ll_invalidatepage,
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index 99ffd1589..6322f8866 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -661,8 +661,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 	if (rc)
 		goto out;
 
-	CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
-	       child, child->i_ino, child->i_generation);
+	CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+	       ll_get_fsname(child->i_sb, NULL, 0),
+	       PFID(ll_inode2fid(child)), child);
 	ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
 
 	entry->se_inode = child;
@@ -1591,13 +1592,11 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 					*dentryp = alias;
 				} else if (d_inode(*dentryp) != inode) {
 					/* revalidate, but inode is recreated */
-					CDEBUG(D_READA,
-					       "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
-					      *dentryp,
-					      d_inode(*dentryp)->i_ino,
-					      d_inode(*dentryp)->i_generation,
-					      inode->i_ino,
-					      inode->i_generation);
+					CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+					       ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
+					       *dentryp,
+					       PFID(ll_inode2fid(d_inode(*dentryp))),
+					       PFID(ll_inode2fid(inode)));
 					ll_sai_unplug(sai, entry);
 					return -ESTALE;
 				} else {
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 61856d37a..415750b0b 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -164,9 +164,18 @@ static int __init lustre_init(void)
 	if (rc != 0)
 		goto out_sysfs;
 
+	cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
+					 LCT_REMEMBER | LCT_NOREF);
+	if (IS_ERR(cl_inode_fini_env)) {
+		rc = PTR_ERR(cl_inode_fini_env);
+		goto out_vvp;
+	}
+
+	cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
+
 	rc = ll_xattr_init();
 	if (rc != 0)
-		goto out_vvp;
+		goto out_inode_fini_env;
 
 	lustre_register_client_fill_super(ll_fill_super);
 	lustre_register_kill_super_cb(ll_kill_super);
@@ -174,6 +183,8 @@ static int __init lustre_init(void)
 
 	return 0;
 
+out_inode_fini_env:
+	cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
 out_vvp:
 	vvp_global_fini();
 out_sysfs:
@@ -198,6 +209,7 @@ static void __exit lustre_exit(void)
 	kset_unregister(llite_kset);
 
 	ll_xattr_fini();
+	cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
 	vvp_global_fini();
 
 	kmem_cache_destroy(ll_inode_cachep);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 46d03ea48..3fc736ccf 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -77,7 +77,9 @@ static int ll_readlink_internal(struct inode *inode,
 	ll_finish_md_op_data(op_data);
 	if (rc) {
 		if (rc != -ENOENT)
-			CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
+			CERROR("%s: inode "DFID": rc = %d\n",
+			       ll_get_fsname(inode->i_sb, NULL, 0),
+			       PFID(ll_inode2fid(inode)), rc);
 		goto failed;
 	}
 
@@ -90,8 +92,10 @@ static int ll_readlink_internal(struct inode *inode,
 
 	LASSERT(symlen != 0);
 	if (body->eadatasize != symlen) {
-		CERROR("inode %lu: symlink length %d not expected %d\n",
-		       inode->i_ino, body->eadatasize - 1, symlen - 1);
+		CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0),
+		       PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+		       symlen - 1);
 		rc = -EPROTO;
 		goto failed;
 	}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index 282b70b77..47101de1c 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -36,6 +36,7 @@
  * cl_device and cl_device_type implementation for VVP layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
@@ -56,43 +57,74 @@
  * "llite_" (var. "ll_") prefix.
  */
 
-static struct kmem_cache *vvp_thread_kmem;
+static struct kmem_cache *ll_thread_kmem;
+struct kmem_cache *vvp_lock_kmem;
+struct kmem_cache *vvp_object_kmem;
+struct kmem_cache *vvp_req_kmem;
 static struct kmem_cache *vvp_session_kmem;
+static struct kmem_cache *vvp_thread_kmem;
+
 static struct lu_kmem_descr vvp_caches[] = {
 	{
-		.ckd_cache = &vvp_thread_kmem,
-		.ckd_name  = "vvp_thread_kmem",
-		.ckd_size  = sizeof(struct vvp_thread_info),
+		.ckd_cache = &ll_thread_kmem,
+		.ckd_name  = "ll_thread_kmem",
+		.ckd_size  = sizeof(struct ll_thread_info),
+	},
+	{
+		.ckd_cache = &vvp_lock_kmem,
+		.ckd_name  = "vvp_lock_kmem",
+		.ckd_size  = sizeof(struct vvp_lock),
+	},
+	{
+		.ckd_cache = &vvp_object_kmem,
+		.ckd_name  = "vvp_object_kmem",
+		.ckd_size  = sizeof(struct vvp_object),
+	},
+	{
+		.ckd_cache = &vvp_req_kmem,
+		.ckd_name  = "vvp_req_kmem",
+		.ckd_size  = sizeof(struct vvp_req),
 	},
 	{
 		.ckd_cache = &vvp_session_kmem,
 		.ckd_name  = "vvp_session_kmem",
 		.ckd_size  = sizeof(struct vvp_session)
 	},
+	{
+		.ckd_cache = &vvp_thread_kmem,
+		.ckd_name  = "vvp_thread_kmem",
+		.ckd_size  = sizeof(struct vvp_thread_info),
+	},
 	{
 		.ckd_cache = NULL
 	}
 };
 
-static void *vvp_key_init(const struct lu_context *ctx,
-			  struct lu_context_key *key)
+static void *ll_thread_key_init(const struct lu_context *ctx,
+				struct lu_context_key *key)
 {
 	struct vvp_thread_info *info;
 
-	info = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+	info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
 	if (!info)
 		info = ERR_PTR(-ENOMEM);
 	return info;
 }
 
-static void vvp_key_fini(const struct lu_context *ctx,
-			 struct lu_context_key *key, void *data)
+static void ll_thread_key_fini(const struct lu_context *ctx,
+			       struct lu_context_key *key, void *data)
 {
 	struct vvp_thread_info *info = data;
 
-	kmem_cache_free(vvp_thread_kmem, info);
+	kmem_cache_free(ll_thread_kmem, info);
 }
 
+struct lu_context_key ll_thread_key = {
+	.lct_tags = LCT_CL_THREAD,
+	.lct_init = ll_thread_key_init,
+	.lct_fini = ll_thread_key_fini
+};
+
 static void *vvp_session_key_init(const struct lu_context *ctx,
 				  struct lu_context_key *key)
 {
@@ -112,34 +144,127 @@ static void vvp_session_key_fini(const struct lu_context *ctx,
 	kmem_cache_free(vvp_session_kmem, session);
 }
 
-struct lu_context_key vvp_key = {
-	.lct_tags = LCT_CL_THREAD,
-	.lct_init = vvp_key_init,
-	.lct_fini = vvp_key_fini
-};
-
 struct lu_context_key vvp_session_key = {
 	.lct_tags = LCT_SESSION,
 	.lct_init = vvp_session_key_init,
 	.lct_fini = vvp_session_key_fini
 };
 
+void *vvp_thread_key_init(const struct lu_context *ctx,
+			  struct lu_context_key *key)
+{
+	struct vvp_thread_info *vti;
+
+	vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
+	if (!vti)
+		vti = ERR_PTR(-ENOMEM);
+	return vti;
+}
+
+void vvp_thread_key_fini(const struct lu_context *ctx,
+			 struct lu_context_key *key, void *data)
+{
+	struct vvp_thread_info *vti = data;
+
+	kmem_cache_free(vvp_thread_kmem, vti);
+}
+
+struct lu_context_key vvp_thread_key = {
+	.lct_tags = LCT_CL_THREAD,
+	.lct_init = vvp_thread_key_init,
+	.lct_fini = vvp_thread_key_fini
+};
+
 /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &ccc_key, &ccc_session_key, &vvp_key, &vvp_session_key);
+LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
 
 static const struct lu_device_operations vvp_lu_ops = {
 	.ldo_object_alloc      = vvp_object_alloc
 };
 
 static const struct cl_device_operations vvp_cl_ops = {
-	.cdo_req_init = ccc_req_init
+	.cdo_req_init = vvp_req_init
 };
 
+static struct lu_device *vvp_device_free(const struct lu_env *env,
+					 struct lu_device *d)
+{
+	struct vvp_device *vdv  = lu2vvp_dev(d);
+	struct cl_site    *site = lu2cl_site(d->ld_site);
+	struct lu_device  *next = cl2lu_dev(vdv->vdv_next);
+
+	if (d->ld_site) {
+		cl_site_fini(site);
+		kfree(site);
+	}
+	cl_device_fini(lu2cl_dev(d));
+	kfree(vdv);
+	return next;
+}
+
 static struct lu_device *vvp_device_alloc(const struct lu_env *env,
 					  struct lu_device_type *t,
 					  struct lustre_cfg *cfg)
 {
-	return ccc_device_alloc(env, t, cfg, &vvp_lu_ops, &vvp_cl_ops);
+	struct vvp_device *vdv;
+	struct lu_device  *lud;
+	struct cl_site    *site;
+	int rc;
+
+	vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
+	if (!vdv)
+		return ERR_PTR(-ENOMEM);
+
+	lud = &vdv->vdv_cl.cd_lu_dev;
+	cl_device_init(&vdv->vdv_cl, t);
+	vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
+	vdv->vdv_cl.cd_ops = &vvp_cl_ops;
+
+	site = kzalloc(sizeof(*site), GFP_NOFS);
+	if (site) {
+		rc = cl_site_init(site, &vdv->vdv_cl);
+		if (rc == 0) {
+			rc = lu_site_init_finish(&site->cs_lu);
+		} else {
+			LASSERT(!lud->ld_site);
+			CERROR("Cannot init lu_site, rc %d.\n", rc);
+			kfree(site);
+		}
+	} else {
+		rc = -ENOMEM;
+	}
+	if (rc != 0) {
+		vvp_device_free(env, lud);
+		lud = ERR_PTR(rc);
+	}
+	return lud;
+}
+
+static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
+			   const char *name, struct lu_device *next)
+{
+	struct vvp_device  *vdv;
+	int rc;
+
+	vdv = lu2vvp_dev(d);
+	vdv->vdv_next = lu2cl_dev(next);
+
+	LASSERT(d->ld_site && next->ld_type);
+	next->ld_site = d->ld_site;
+	rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
+						      next->ld_type->ldt_name,
+						      NULL);
+	if (rc == 0) {
+		lu_device_get(next);
+		lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
+	}
+	return rc;
+}
+
+static struct lu_device *vvp_device_fini(const struct lu_env *env,
+					 struct lu_device *d)
+{
+	return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
 }
 
 static const struct lu_device_type_operations vvp_device_type_ops = {
@@ -150,9 +275,9 @@ static const struct lu_device_type_operations vvp_device_type_ops = {
 	.ldto_stop  = vvp_type_stop,
 
 	.ldto_device_alloc = vvp_device_alloc,
-	.ldto_device_free  = ccc_device_free,
-	.ldto_device_init  = ccc_device_init,
-	.ldto_device_fini  = ccc_device_fini
+	.ldto_device_free	= vvp_device_free,
+	.ldto_device_init	= vvp_device_init,
+	.ldto_device_fini	= vvp_device_fini,
 };
 
 struct lu_device_type vvp_device_type = {
@@ -168,20 +293,27 @@ struct lu_device_type vvp_device_type = {
  */
 int vvp_global_init(void)
 {
-	int result;
+	int rc;
 
-	result = lu_kmem_init(vvp_caches);
-	if (result == 0) {
-		result = ccc_global_init(&vvp_device_type);
-		if (result != 0)
-			lu_kmem_fini(vvp_caches);
-	}
-	return result;
+	rc = lu_kmem_init(vvp_caches);
+	if (rc != 0)
+		return rc;
+
+	rc = lu_device_type_init(&vvp_device_type);
+	if (rc != 0)
+		goto out_kmem;
+
+	return 0;
+
+out_kmem:
+	lu_kmem_fini(vvp_caches);
+
+	return rc;
 }
 
 void vvp_global_fini(void)
 {
-	ccc_global_fini(&vvp_device_type);
+	lu_device_type_fini(&vvp_device_type);
 	lu_kmem_fini(vvp_caches);
 }
 
@@ -205,13 +337,14 @@ int cl_sb_init(struct super_block *sb)
 		cl = cl_type_setup(env, NULL, &vvp_device_type,
 				   sbi->ll_dt_exp->exp_obd->obd_lu_dev);
 		if (!IS_ERR(cl)) {
-			cl2ccc_dev(cl)->cdv_sb = sb;
+			cl2vvp_dev(cl)->vdv_sb = sb;
 			sbi->ll_cl = cl;
 			sbi->ll_site = cl2lu_dev(cl)->ld_site;
 		}
 		cl_env_put(env, &refcheck);
-	} else
+	} else {
 		rc = PTR_ERR(env);
+	}
 	return rc;
 }
 
@@ -356,23 +489,18 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
 			return ~0ULL;
 		clob = vvp_pgcache_obj(env, dev, &id);
 		if (clob) {
-			struct cl_object_header *hdr;
-			int		      nr;
-			struct cl_page	  *pg;
-
-			/* got an object. Find next page. */
-			hdr = cl_object_header(clob);
+			struct inode *inode = vvp_object_inode(clob);
+			struct page *vmpage;
+			int nr;
 
-			spin_lock(&hdr->coh_page_guard);
-			nr = radix_tree_gang_lookup(&hdr->coh_tree,
-						    (void **)&pg,
-						    id.vpi_index, 1);
+			nr = find_get_pages_contig(inode->i_mapping,
+						   id.vpi_index, 1, &vmpage);
 			if (nr > 0) {
-				id.vpi_index = pg->cp_index;
+				id.vpi_index = vmpage->index;
 				/* Cant support over 16T file */
-				nr = !(pg->cp_index > 0xffffffff);
+				nr = !(vmpage->index > 0xffffffff);
+				put_page(vmpage);
 			}
-			spin_unlock(&hdr->coh_page_guard);
 
 			lu_object_ref_del(&clob->co_lu, "dump", current);
 			cl_object_put(env, clob);
@@ -398,21 +526,20 @@ static loff_t vvp_pgcache_find(const struct lu_env *env,
 static void vvp_pgcache_page_show(const struct lu_env *env,
 				  struct seq_file *seq, struct cl_page *page)
 {
-	struct ccc_page *cpg;
+	struct vvp_page *vpg;
 	struct page      *vmpage;
 	int	      has_flags;
 
-	cpg = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-	vmpage = cpg->cpg_page;
-	seq_printf(seq, " %5i | %p %p %s %s %s %s | %p %lu/%u(%p) %lu %u [",
+	vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
+	vmpage = vpg->vpg_page;
+	seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
 		   0 /* gen */,
-		   cpg, page,
+		   vpg, page,
 		   "none",
-		   cpg->cpg_write_queued ? "wq" : "- ",
-		   cpg->cpg_defer_uptodate ? "du" : "- ",
+		   vpg->vpg_write_queued ? "wq" : "- ",
+		   vpg->vpg_defer_uptodate ? "du" : "- ",
 		   PageWriteback(vmpage) ? "wb" : "-",
-		   vmpage, vmpage->mapping->host->i_ino,
-		   vmpage->mapping->host->i_generation,
+		   vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
 		   vmpage->mapping->host, vmpage->index,
 		   page_count(vmpage));
 	has_flags = 0;
@@ -431,8 +558,6 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
 	struct ll_sb_info       *sbi;
 	struct cl_object	*clob;
 	struct lu_env	   *env;
-	struct cl_page	  *page;
-	struct cl_object_header *hdr;
 	struct vvp_pgcache_id    id;
 	int		      refcheck;
 	int		      result;
@@ -444,27 +569,38 @@ static int vvp_pgcache_show(struct seq_file *f, void *v)
 		sbi = f->private;
 		clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
 		if (clob) {
-			hdr = cl_object_header(clob);
-
-			spin_lock(&hdr->coh_page_guard);
-			page = cl_page_lookup(hdr, id.vpi_index);
-			spin_unlock(&hdr->coh_page_guard);
+			struct inode *inode = vvp_object_inode(clob);
+			struct cl_page *page = NULL;
+			struct page *vmpage;
+
+			result = find_get_pages_contig(inode->i_mapping,
+						       id.vpi_index, 1,
+						       &vmpage);
+			if (result > 0) {
+				lock_page(vmpage);
+				page = cl_vmpage_page(vmpage, clob);
+				unlock_page(vmpage);
+				put_page(vmpage);
+			}
 
-			seq_printf(f, "%8x@"DFID": ",
-				   id.vpi_index, PFID(&hdr->coh_lu.loh_fid));
+			seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
+				   PFID(lu_object_fid(&clob->co_lu)));
 			if (page) {
 				vvp_pgcache_page_show(env, f, page);
 				cl_page_put(env, page);
-			} else
+			} else {
 				seq_puts(f, "missing\n");
+			}
 			lu_object_ref_del(&clob->co_lu, "dump", current);
 			cl_object_put(env, clob);
-		} else
+		} else {
 			seq_printf(f, "%llx missing\n", pos);
+		}
 		cl_env_put(env, &refcheck);
 		result = 0;
-	} else
+	} else {
 		result = PTR_ERR(env);
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index bb393378c..27b9b0a01 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -41,21 +41,337 @@
 #ifndef VVP_INTERNAL_H
 #define VVP_INTERNAL_H
 
+#include "../include/lustre/lustre_idl.h"
 #include "../include/cl_object.h"
-#include "llite_internal.h"
 
-int vvp_io_init(const struct lu_env *env,
-		struct cl_object *obj, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env,
-		  struct cl_object *obj, struct cl_lock *lock,
-				   const struct cl_io *io);
+enum obd_notify_event;
+struct inode;
+struct lov_stripe_md;
+struct lustre_md;
+struct obd_capa;
+struct obd_device;
+struct obd_export;
+struct page;
+
+/* specific architecture can implement only part of this list */
+enum vvp_io_subtype {
+	/** normal IO */
+	IO_NORMAL,
+	/** io started from splice_{read|write} */
+	IO_SPLICE
+};
+
+/**
+ * IO state private to IO state private to VVP layer.
+ */
+struct vvp_io {
+	/** super class */
+	struct cl_io_slice     vui_cl;
+	struct cl_io_lock_link vui_link;
+	/**
+	 * I/O vector information to or from which read/write is going.
+	 */
+	struct iov_iter *vui_iter;
+	/**
+	 * Total size for the left IO.
+	 */
+	size_t vui_tot_count;
+
+	union {
+		struct vvp_fault_io {
+			/**
+			 * Inode modification time that is checked across DLM
+			 * lock request.
+			 */
+			time64_t	    ft_mtime;
+			struct vm_area_struct *ft_vma;
+			/**
+			 *  locked page returned from vvp_io
+			 */
+			struct page	    *ft_vmpage;
+			/**
+			 * kernel fault info
+			 */
+			struct vm_fault *ft_vmf;
+			/**
+			 * fault API used bitflags for return code.
+			 */
+			unsigned int    ft_flags;
+			/**
+			 * check that flags are from filemap_fault
+			 */
+			bool		ft_flags_valid;
+		} fault;
+		struct {
+			struct pipe_inode_info	*vui_pipe;
+			unsigned int		 vui_flags;
+		} splice;
+		struct {
+			struct cl_page_list vui_queue;
+			unsigned long vui_written;
+			int vui_from;
+			int vui_to;
+		} write;
+	} u;
+
+	enum vvp_io_subtype	vui_io_subtype;
+
+	/**
+	 * Layout version when this IO is initialized
+	 */
+	__u32			vui_layout_gen;
+	/**
+	 * File descriptor against which IO is done.
+	 */
+	struct ll_file_data	*vui_fd;
+	struct kiocb		*vui_iocb;
+
+	/* Readahead state. */
+	pgoff_t	vui_ra_start;
+	pgoff_t	vui_ra_count;
+	/* Set when vui_ra_{start,count} have been initialized. */
+	bool		vui_ra_valid;
+};
+
+extern struct lu_device_type vvp_device_type;
+
+extern struct lu_context_key vvp_session_key;
+extern struct lu_context_key vvp_thread_key;
+
+extern struct kmem_cache *vvp_lock_kmem;
+extern struct kmem_cache *vvp_object_kmem;
+extern struct kmem_cache *vvp_req_kmem;
+
+struct vvp_thread_info {
+	struct cl_lock		vti_lock;
+	struct cl_lock_descr	vti_descr;
+	struct cl_io		vti_io;
+	struct cl_attr		vti_attr;
+};
+
+static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
+{
+	struct vvp_thread_info      *vti;
+
+	vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
+	LASSERT(vti);
+
+	return vti;
+}
+
+static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
+{
+	struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
+
+	memset(lock, 0, sizeof(*lock));
+	return lock;
+}
+
+static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
+{
+	struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
+
+	memset(attr, 0, sizeof(*attr));
+
+	return attr;
+}
+
+static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
+{
+	struct cl_io *io = &vvp_env_info(env)->vti_io;
+
+	memset(io, 0, sizeof(*io));
+
+	return io;
+}
+
+struct vvp_session {
+	struct vvp_io cs_ios;
+};
+
+static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
+{
+	struct vvp_session *ses;
+
+	ses = lu_context_key_get(env->le_ses, &vvp_session_key);
+	LASSERT(ses);
+
+	return ses;
+}
+
+static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
+{
+	return &vvp_env_session(env)->cs_ios;
+}
+
+/**
+ * ccc-private object state.
+ */
+struct vvp_object {
+	struct cl_object_header vob_header;
+	struct cl_object        vob_cl;
+	struct inode           *vob_inode;
+
+	/**
+	 * A list of dirty pages pending IO in the cache. Used by
+	 * SOM. Protected by ll_inode_info::lli_lock.
+	 *
+	 * \see vvp_page::vpg_pending_linkage
+	 */
+	struct list_head	vob_pending_list;
+
+	/**
+	 * Access this counter is protected by inode->i_sem. Now that
+	 * the lifetime of transient pages must be covered by inode sem,
+	 * we don't need to hold any lock..
+	 */
+	int			vob_transient_pages;
+	/**
+	 * Number of outstanding mmaps on this file.
+	 *
+	 * \see ll_vm_open(), ll_vm_close().
+	 */
+	atomic_t                vob_mmap_cnt;
+
+	/**
+	 * various flags
+	 * vob_discard_page_warned
+	 *     if pages belonging to this object are discarded when a client
+	 * is evicted, some debug info will be printed, this flag will be set
+	 * during processing the first discarded page, then avoid flooding
+	 * debug message for lots of discarded pages.
+	 *
+	 * \see ll_dirty_page_discard_warn.
+	 */
+	unsigned int		vob_discard_page_warned:1;
+};
+
+/**
+ * VVP-private page state.
+ */
+struct vvp_page {
+	struct cl_page_slice vpg_cl;
+	int		  vpg_defer_uptodate;
+	int		  vpg_ra_used;
+	int		  vpg_write_queued;
+	/**
+	 * Non-empty iff this page is already counted in
+	 * vvp_object::vob_pending_list. This list is only used as a flag,
+	 * that is, never iterated through, only checked for list_empty(), but
+	 * having a list is useful for debugging.
+	 */
+	struct list_head	   vpg_pending_linkage;
+	/** VM page */
+	struct page	  *vpg_page;
+};
+
+static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
+{
+	return container_of(slice, struct vvp_page, vpg_cl);
+}
+
+static inline pgoff_t vvp_index(struct vvp_page *vvp)
+{
+	return vvp->vpg_cl.cpl_index;
+}
+
+struct vvp_device {
+	struct cl_device    vdv_cl;
+	struct super_block *vdv_sb;
+	struct cl_device   *vdv_next;
+};
+
+struct vvp_lock {
+	struct cl_lock_slice vlk_cl;
+};
+
+struct vvp_req {
+	struct cl_req_slice  vrq_cl;
+};
+
+void *ccc_key_init(const struct lu_context *ctx,
+		   struct lu_context_key *key);
+void ccc_key_fini(const struct lu_context *ctx,
+		  struct lu_context_key *key, void *data);
+
+void ccc_umount(const struct lu_env *env, struct cl_device *dev);
+
+static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
+{
+	return &vdv->vdv_cl.cd_lu_dev;
+}
+
+static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
+{
+	return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
+}
+
+static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
+{
+	return container_of0(d, struct vvp_device, vdv_cl);
+}
+
+static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
+{
+	return container_of0(obj, struct vvp_object, vob_cl);
+}
+
+static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
+{
+	return container_of0(obj, struct vvp_object, vob_cl.co_lu);
+}
+
+static inline struct inode *vvp_object_inode(const struct cl_object *obj)
+{
+	return cl2vvp(obj)->vob_inode;
+}
+
+int vvp_object_invariant(const struct cl_object *obj);
+struct vvp_object *cl_inode2vvp(struct inode *inode);
+
+static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
+{
+	return cl2vvp_page(slice)->vpg_page;
+}
+
+static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
+{
+	return container_of(slice, struct vvp_lock, vlk_cl);
+}
+
+# define CLOBINVRNT(env, clob, expr)					\
+	((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
+
+/**
+ * New interfaces to get and put lov_stripe_md from lov layer. This violates
+ * layering because lov_stripe_md is supposed to be a private data in lov.
+ *
+ * NB: If you find you have to use these interfaces for your new code, please
+ * think about it again. These interfaces may be removed in the future for
+ * better layering.
+ */
+struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj);
+void lov_lsm_put(struct cl_object *clobj, struct lov_stripe_md *lsm);
+int lov_read_and_clear_async_rc(struct cl_object *clob);
+
+struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
+void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
+
+int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
+		struct cl_io *io);
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
+int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
+		  struct cl_lock *lock, const struct cl_io *io);
 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t index);
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req);
 struct lu_object *vvp_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
 
-struct ccc_object *cl_inode2ccc(struct inode *inode);
+int vvp_global_init(void);
+void vvp_global_fini(void);
 
 extern const struct file_operations vvp_dump_pgcache_file_ops;
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 85a835976..5bf9592ae 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -44,21 +44,30 @@
 #include "../include/obd.h"
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
-				const struct cl_io_slice *slice);
+struct vvp_io *cl2vvp_io(const struct lu_env *env,
+			 const struct cl_io_slice *slice)
+{
+	struct vvp_io *vio;
+
+	vio = container_of(slice, struct vvp_io, vui_cl);
+	LASSERT(vio == vvp_env_io(env));
+
+	return vio;
+}
 
 /**
  * True, if \a io is a normal io, False for splice_{read,write}
  */
-int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
+static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
 {
 	struct vvp_io *vio = vvp_env_io(env);
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
-	return vio->cui_io_subtype == IO_NORMAL;
+	return vio->vui_io_subtype == IO_NORMAL;
 }
 
 /**
@@ -71,7 +80,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 			       struct inode *inode)
 {
 	struct ll_inode_info	*lli = ll_i2info(inode);
-	struct ccc_io		*cio = ccc_env_io(env);
+	struct vvp_io		*vio = vvp_env_io(env);
 	bool rc = true;
 
 	switch (io->ci_type) {
@@ -80,7 +89,7 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 		/* don't need lock here to check lli_layout_gen as we have held
 		 * extent lock and GROUP lock has to hold to swap layout
 		 */
-		if (ll_layout_version_get(lli) != cio->cui_layout_gen) {
+		if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
 			io->ci_need_restart = 1;
 			/* this will return application a short read/write */
 			io->ci_continue = 0;
@@ -95,20 +104,187 @@ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
 	return rc;
 }
 
+static void vvp_object_size_lock(struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	ll_inode_size_lock(inode);
+	cl_object_attr_lock(obj);
+}
+
+static void vvp_object_size_unlock(struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	cl_object_attr_unlock(obj);
+	ll_inode_size_unlock(inode);
+}
+
+/**
+ * Helper function that if necessary adjusts file size (inode->i_size), when
+ * position at the offset \a pos is accessed. File size can be arbitrary stale
+ * on a Lustre client, but client at least knows KMS. If accessed area is
+ * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
+ *
+ * Locking: cl_isize_lock is used to serialize changes to inode size and to
+ * protect consistency between inode size and cl_object
+ * attributes. cl_object_size_lock() protects consistency between cl_attr's of
+ * top-object and sub-objects.
+ */
+static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
+			 struct cl_io *io, loff_t start, size_t count,
+			 int *exceed)
+{
+	struct cl_attr *attr  = vvp_env_thread_attr(env);
+	struct inode   *inode = vvp_object_inode(obj);
+	loff_t	  pos   = start + count - 1;
+	loff_t kms;
+	int result;
+
+	/*
+	 * Consistency guarantees: following possibilities exist for the
+	 * relation between region being accessed and real file size at this
+	 * moment:
+	 *
+	 *  (A): the region is completely inside of the file;
+	 *
+	 *  (B-x): x bytes of region are inside of the file, the rest is
+	 *  outside;
+	 *
+	 *  (C): the region is completely outside of the file.
+	 *
+	 * This classification is stable under DLM lock already acquired by
+	 * the caller, because to change the class, other client has to take
+	 * DLM lock conflicting with our lock. Also, any updates to ->i_size
+	 * by other threads on this client are serialized by
+	 * ll_inode_size_lock(). This guarantees that short reads are handled
+	 * correctly in the face of concurrent writes and truncates.
+	 */
+	vvp_object_size_lock(obj);
+	result = cl_object_attr_get(env, obj, attr);
+	if (result == 0) {
+		kms = attr->cat_kms;
+		if (pos > kms) {
+			/*
+			 * A glimpse is necessary to determine whether we
+			 * return a short read (B) or some zeroes at the end
+			 * of the buffer (C)
+			 */
+			vvp_object_size_unlock(obj);
+			result = cl_glimpse_lock(env, io, inode, obj, 0);
+			if (result == 0 && exceed) {
+				/* If objective page index exceed end-of-file
+				 * page index, return directly. Do not expect
+				 * kernel will check such case correctly.
+				 * linux-2.6.18-128.1.1 miss to do that.
+				 * --bug 17336
+				 */
+				loff_t size = i_size_read(inode);
+				loff_t cur_index = start >> PAGE_SHIFT;
+				loff_t size_index = (size - 1) >> PAGE_SHIFT;
+
+				if ((size == 0 && cur_index != 0) ||
+				    size_index < cur_index)
+					*exceed = 1;
+			}
+			return result;
+		}
+		/*
+		 * region is within kms and, hence, within real file
+		 * size (A). We need to increase i_size to cover the
+		 * read region so that generic_file_read() will do its
+		 * job, but that doesn't mean the kms size is
+		 * _correct_, it is only the _minimum_ size. If
+		 * someone does a stat they will get the correct size
+		 * which will always be >= the kms value here.
+		 * b=11081
+		 */
+		if (i_size_read(inode) < kms) {
+			i_size_write(inode, kms);
+			CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
+			       PFID(lu_object_fid(&obj->co_lu)),
+			       (__u64)i_size_read(inode));
+		}
+	}
+
+	vvp_object_size_unlock(obj);
+
+	return result;
+}
+
 /*****************************************************************************
  *
  * io operations.
  *
  */
 
+static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
+				 __u32 enqflags, enum cl_lock_mode mode,
+			  pgoff_t start, pgoff_t end)
+{
+	struct vvp_io          *vio   = vvp_env_io(env);
+	struct cl_lock_descr   *descr = &vio->vui_link.cill_descr;
+	struct cl_object       *obj   = io->ci_obj;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
+
+	memset(&vio->vui_link, 0, sizeof(vio->vui_link));
+
+	if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+		descr->cld_mode = CLM_GROUP;
+		descr->cld_gid  = vio->vui_fd->fd_grouplock.lg_gid;
+	} else {
+		descr->cld_mode  = mode;
+	}
+	descr->cld_obj   = obj;
+	descr->cld_start = start;
+	descr->cld_end   = end;
+	descr->cld_enq_flags = enqflags;
+
+	cl_io_lock_add(env, io, &vio->vui_link);
+	return 0;
+}
+
+static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
+			   __u32 enqflags, enum cl_lock_mode mode,
+			   loff_t start, loff_t end)
+{
+	struct cl_object *obj = io->ci_obj;
+
+	return vvp_io_one_lock_index(env, io, enqflags, mode,
+				     cl_index(obj, start), cl_index(obj, end));
+}
+
+static int vvp_io_write_iter_init(const struct lu_env *env,
+				  const struct cl_io_slice *ios)
+{
+	struct vvp_io *vio = cl2vvp_io(env, ios);
+
+	cl_page_list_init(&vio->u.write.vui_queue);
+	vio->u.write.vui_written = 0;
+	vio->u.write.vui_from = 0;
+	vio->u.write.vui_to = PAGE_SIZE;
+
+	return 0;
+}
+
+static void vvp_io_write_iter_fini(const struct lu_env *env,
+				   const struct cl_io_slice *ios)
+{
+	struct vvp_io *vio = cl2vvp_io(env, ios);
+
+	LASSERT(vio->u.write.vui_queue.pl_nr == 0);
+}
+
 static int vvp_io_fault_iter_init(const struct lu_env *env,
 				  const struct cl_io_slice *ios)
 {
 	struct vvp_io *vio   = cl2vvp_io(env, ios);
-	struct inode  *inode = ccc_object_inode(ios->cis_obj);
+	struct inode  *inode = vvp_object_inode(ios->cis_obj);
 
-	LASSERT(inode ==
-		file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file));
+	LASSERT(inode == file_inode(vio->vui_fd->fd_file));
 	vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
 	return 0;
 }
@@ -117,15 +293,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 {
 	struct cl_io     *io  = ios->cis_io;
 	struct cl_object *obj = io->ci_obj;
-	struct ccc_io    *cio = cl2ccc_io(env, ios);
+	struct vvp_io    *vio = cl2vvp_io(env, ios);
+	struct inode *inode = vvp_object_inode(obj);
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, DFID
 	       " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
 	       PFID(lu_object_fid(&obj->co_lu)),
 	       io->ci_ignore_layout, io->ci_verify_layout,
-	       cio->cui_layout_gen, io->ci_restore_needed);
+	       vio->vui_layout_gen, io->ci_restore_needed);
 
 	if (io->ci_restore_needed == 1) {
 		int	rc;
@@ -133,7 +310,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 		/* file was detected release, we need to restore it
 		 * before finishing the io
 		 */
-		rc = ll_layout_restore(ccc_object_inode(obj));
+		rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
 		/* if restore registration failed, no restart,
 		 * we will return -ENODATA
 		 */
@@ -159,16 +336,16 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
 		__u32 gen = 0;
 
 		/* check layout version */
-		ll_layout_refresh(ccc_object_inode(obj), &gen);
-		io->ci_need_restart = cio->cui_layout_gen != gen;
+		ll_layout_refresh(inode, &gen);
+		io->ci_need_restart = vio->vui_layout_gen != gen;
 		if (io->ci_need_restart) {
 			CDEBUG(D_VFSTRACE,
 			       DFID" layout changed from %d to %d.\n",
 			       PFID(lu_object_fid(&obj->co_lu)),
-			       cio->cui_layout_gen, gen);
+			       vio->vui_layout_gen, gen);
 			/* today successful restore is the only possible case */
 			/* restore was done, clear restoring state */
-			ll_i2info(ccc_object_inode(obj))->lli_flags &=
+			ll_i2info(vvp_object_inode(obj))->lli_flags &=
 				~LLIF_FILE_RESTORING;
 		}
 	}
@@ -180,7 +357,7 @@ static void vvp_io_fault_fini(const struct lu_env *env,
 	struct cl_io   *io   = ios->cis_io;
 	struct cl_page *page = io->u.ci_fault.ft_page;
 
-	CLOBINVRNT(env, io->ci_obj, ccc_object_invariant(io->ci_obj));
+	CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
 
 	if (page) {
 		lu_ref_del(&page->cp_reference, "fault", io);
@@ -203,16 +380,16 @@ static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
 }
 
 static int vvp_mmap_locks(const struct lu_env *env,
-			  struct ccc_io *vio, struct cl_io *io)
+			  struct vvp_io *vio, struct cl_io *io)
 {
-	struct ccc_thread_info *cti = ccc_env_info(env);
+	struct vvp_thread_info *cti = vvp_env_info(env);
 	struct mm_struct       *mm = current->mm;
 	struct vm_area_struct  *vma;
-	struct cl_lock_descr   *descr = &cti->cti_descr;
+	struct cl_lock_descr   *descr = &cti->vti_descr;
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
 	ssize_t		 count;
-	int		     result;
+	int		 result = 0;
 	struct iov_iter i;
 	struct iovec iov;
 
@@ -221,21 +398,21 @@ static int vvp_mmap_locks(const struct lu_env *env,
 	if (!cl_is_normalio(env, io))
 		return 0;
 
-	if (!vio->cui_iter) /* nfs or loop back device write */
+	if (!vio->vui_iter) /* nfs or loop back device write */
 		return 0;
 
 	/* No MM (e.g. NFS)? No vmas too. */
 	if (!mm)
 		return 0;
 
-	iov_for_each(iov, i, *(vio->cui_iter)) {
+	iov_for_each(iov, i, *vio->vui_iter) {
 		addr = (unsigned long)iov.iov_base;
 		count = iov.iov_len;
 		if (count == 0)
 			continue;
 
-		count += addr & (~CFS_PAGE_MASK);
-		addr &= CFS_PAGE_MASK;
+		count += addr & (~PAGE_MASK);
+		addr &= PAGE_MASK;
 
 		down_read(&mm->mmap_sem);
 		while ((vma = our_vma(mm, addr, count)) != NULL) {
@@ -244,10 +421,10 @@ static int vvp_mmap_locks(const struct lu_env *env,
 
 			if (ll_file_nolock(vma->vm_file)) {
 				/*
-				 * For no lock case, a lockless lock will be
-				 * generated.
+				 * For no lock case is not allowed for mmap
 				 */
-				flags = CEF_NEVER;
+				result = -EINVAL;
+				break;
 			}
 
 			/*
@@ -269,10 +446,8 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			       descr->cld_mode, descr->cld_start,
 			       descr->cld_end);
 
-			if (result < 0) {
-				up_read(&mm->mmap_sem);
-				return result;
-			}
+			if (result < 0)
+				break;
 
 			if (vma->vm_end - addr >= count)
 				break;
@@ -281,26 +456,55 @@ static int vvp_mmap_locks(const struct lu_env *env,
 			addr = vma->vm_end;
 		}
 		up_read(&mm->mmap_sem);
+		if (result < 0)
+			break;
 	}
-	return 0;
+	return result;
+}
+
+static void vvp_io_advance(const struct lu_env *env,
+			   const struct cl_io_slice *ios,
+			   size_t nob)
+{
+	struct vvp_io    *vio = cl2vvp_io(env, ios);
+	struct cl_io     *io  = ios->cis_io;
+	struct cl_object *obj = ios->cis_io->ci_obj;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	if (!cl_is_normalio(env, io))
+		return;
+
+	iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count  -= nob);
+}
+
+static void vvp_io_update_iov(const struct lu_env *env,
+			      struct vvp_io *vio, struct cl_io *io)
+{
+	size_t size = io->u.ci_rw.crw_count;
+
+	if (!cl_is_normalio(env, io) || !vio->vui_iter)
+		return;
+
+	iov_iter_truncate(vio->vui_iter, size);
 }
 
 static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
 			  enum cl_lock_mode mode, loff_t start, loff_t end)
 {
-	struct ccc_io *cio = ccc_env_io(env);
+	struct vvp_io *vio = vvp_env_io(env);
 	int result;
 	int ast_flags = 0;
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
-	ccc_io_update_iov(env, cio, io);
+	vvp_io_update_iov(env, vio, io);
 
 	if (io->u.ci_rw.crw_nonblock)
 		ast_flags |= CEF_NONBLOCK;
-	result = vvp_mmap_locks(env, cio, io);
+	result = vvp_mmap_locks(env, vio, io);
 	if (result == 0)
-		result = ccc_io_one_lock(env, io, ast_flags, mode, start, end);
+		result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
 	return result;
 }
 
@@ -325,9 +529,11 @@ static int vvp_io_fault_lock(const struct lu_env *env,
 	/*
 	 * XXX LDLM_FL_CBPENDING
 	 */
-	return ccc_io_one_lock_index
-		(env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma),
-		 io->u.ci_fault.ft_index, io->u.ci_fault.ft_index);
+	return vvp_io_one_lock_index(env,
+				     io, 0,
+				     vvp_mode_from_vma(vio->u.fault.ft_vma),
+				     io->u.ci_fault.ft_index,
+				     io->u.ci_fault.ft_index);
 }
 
 static int vvp_io_write_lock(const struct lu_env *env,
@@ -354,14 +560,13 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env,
 }
 
 /**
- * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io.
+ * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
  *
  * Handles "lockless io" mode when extent locking is done by server.
  */
 static int vvp_io_setattr_lock(const struct lu_env *env,
 			       const struct cl_io_slice *ios)
 {
-	struct ccc_io *cio = ccc_env_io(env);
 	struct cl_io  *io  = ios->cis_io;
 	__u64 new_size;
 	__u32 enqflags = 0;
@@ -378,8 +583,8 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
 			return 0;
 		new_size = 0;
 	}
-	cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK;
-	return ccc_io_one_lock(env, io, enqflags, CLM_WRITE,
+
+	return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
 			       new_size, OBD_OBJECT_EOF);
 }
 
@@ -413,7 +618,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
 {
 	struct cl_io       *io    = ios->cis_io;
 	struct cl_object   *obj   = io->ci_obj;
-	struct cl_attr     *attr  = ccc_env_thread_attr(env);
+	struct cl_attr     *attr  = vvp_env_thread_attr(env);
 	int result;
 	unsigned valid = CAT_CTIME;
 
@@ -437,7 +642,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
 				const struct cl_io_slice *ios)
 {
 	struct cl_io	*io    = ios->cis_io;
-	struct inode	*inode = ccc_object_inode(io->ci_obj);
+	struct inode	*inode = vvp_object_inode(io->ci_obj);
 	int result = 0;
 
 	inode_lock(inode);
@@ -453,7 +658,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
 			       const struct cl_io_slice *ios)
 {
 	struct cl_io *io    = ios->cis_io;
-	struct inode *inode = ccc_object_inode(io->ci_obj);
+	struct inode *inode = vvp_object_inode(io->ci_obj);
 
 	if (cl_io_is_trunc(io))
 		/* Truncate in memory pages - they must be clean pages
@@ -474,27 +679,25 @@ static int vvp_io_read_start(const struct lu_env *env,
 			     const struct cl_io_slice *ios)
 {
 	struct vvp_io     *vio   = cl2vvp_io(env, ios);
-	struct ccc_io     *cio   = cl2ccc_io(env, ios);
 	struct cl_io      *io    = ios->cis_io;
 	struct cl_object  *obj   = io->ci_obj;
-	struct inode      *inode = ccc_object_inode(obj);
-	struct ll_ra_read *bead  = &vio->cui_bead;
-	struct file       *file  = cio->cui_fd->fd_file;
+	struct inode      *inode = vvp_object_inode(obj);
+	struct file       *file  = vio->vui_fd->fd_file;
 
 	int     result;
 	loff_t  pos = io->u.ci_rd.rd.crw_pos;
 	long    cnt = io->u.ci_rd.rd.crw_count;
-	long    tot = cio->cui_tot_count;
+	long    tot = vio->vui_tot_count;
 	int     exceed = 0;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
 
 	if (!can_populate_pages(env, io, inode))
 		return 0;
 
-	result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
+	result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
 	if (result != 0)
 		return result;
 	else if (exceed != 0)
@@ -505,30 +708,27 @@ static int vvp_io_read_start(const struct lu_env *env,
 			 inode->i_ino, cnt, pos, i_size_read(inode));
 
 	/* turn off the kernel's read-ahead */
-	cio->cui_fd->fd_file->f_ra.ra_pages = 0;
+	vio->vui_fd->fd_file->f_ra.ra_pages = 0;
 
 	/* initialize read-ahead window once per syscall */
-	if (!vio->cui_ra_window_set) {
-		vio->cui_ra_window_set = 1;
-		bead->lrr_start = cl_index(obj, pos);
-		/*
-		 * XXX: explicit PAGE_SIZE
-		 */
-		bead->lrr_count = cl_index(obj, tot + PAGE_SIZE - 1);
-		ll_ra_read_in(file, bead);
+	if (!vio->vui_ra_valid) {
+		vio->vui_ra_valid = true;
+		vio->vui_ra_start = cl_index(obj, pos);
+		vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
+		ll_ras_enter(file);
 	}
 
 	/* BUG: 5972 */
 	file_accessed(file);
-	switch (vio->cui_io_subtype) {
+	switch (vio->vui_io_subtype) {
 	case IO_NORMAL:
-		LASSERT(cio->cui_iocb->ki_pos == pos);
-		result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
+		LASSERT(vio->vui_iocb->ki_pos == pos);
+		result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
 		break;
 	case IO_SPLICE:
 		result = generic_file_splice_read(file, &pos,
-						  vio->u.splice.cui_pipe, cnt,
-						  vio->u.splice.cui_flags);
+						  vio->u.splice.vui_pipe, cnt,
+						  vio->u.splice.vui_flags);
 		/* LU-1109: do splice read stripe by stripe otherwise if it
 		 * may make nfsd stuck if this read occupied all internal pipe
 		 * buffers.
@@ -536,7 +736,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 		io->ci_continue = 0;
 		break;
 	default:
-		CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
+		CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
 		LBUG();
 	}
 
@@ -546,30 +746,201 @@ out:
 			io->ci_continue = 0;
 		io->ci_nob += result;
 		ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
-				  cio->cui_fd, pos, result, READ);
+				  vio->vui_fd, pos, result, READ);
 		result = 0;
 	}
 	return result;
 }
 
-static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios)
+static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
+			      struct cl_page_list *plist, int from, int to)
 {
-	struct vvp_io *vio = cl2vvp_io(env, ios);
-	struct ccc_io *cio = cl2ccc_io(env, ios);
+	struct cl_2queue *queue = &io->ci_queue;
+	struct cl_page *page;
+	unsigned int bytes = 0;
+	int rc = 0;
 
-	if (vio->cui_ra_window_set)
-		ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead);
+	if (plist->pl_nr == 0)
+		return 0;
 
-	vvp_io_fini(env, ios);
+	if (from > 0 || to != PAGE_SIZE) {
+		page = cl_page_list_first(plist);
+		if (plist->pl_nr == 1) {
+			cl_page_clip(env, page, from, to);
+		} else {
+			if (from > 0)
+				cl_page_clip(env, page, from, PAGE_SIZE);
+			if (to != PAGE_SIZE) {
+				page = cl_page_list_last(plist);
+				cl_page_clip(env, page, 0, to);
+			}
+		}
+	}
+
+	cl_2queue_init(queue);
+	cl_page_list_splice(plist, &queue->c2_qin);
+	rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
+
+	/* plist is not sorted any more */
+	cl_page_list_splice(&queue->c2_qin, plist);
+	cl_page_list_splice(&queue->c2_qout, plist);
+	cl_2queue_fini(env, queue);
+
+	if (rc == 0) {
+		/* calculate bytes */
+		bytes = plist->pl_nr << PAGE_SHIFT;
+		bytes -= from + PAGE_SIZE - to;
+
+		while (plist->pl_nr > 0) {
+			page = cl_page_list_first(plist);
+			cl_page_list_del(env, plist, page);
+
+			cl_page_clip(env, page, 0, PAGE_SIZE);
+
+			SetPageUptodate(cl_page_vmpage(page));
+			cl_page_disown(env, io, page);
+
+			/* held in ll_cl_init() */
+			lu_ref_del(&page->cp_reference, "cl_io", io);
+			cl_page_put(env, page);
+		}
+	}
+
+	return bytes > 0 ? bytes : rc;
+}
+
+static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
+				  struct cl_page *page)
+{
+	struct vvp_page *vpg;
+	struct page *vmpage = page->cp_vmpage;
+	struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+	SetPageUptodate(vmpage);
+	set_page_dirty(vmpage);
+
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	vvp_write_pending(cl2vvp(clob), vpg);
+
+	cl_page_disown(env, io, page);
+
+	/* held in ll_cl_init() */
+	lu_ref_del(&page->cp_reference, "cl_io", io);
+	cl_page_put(env, page);
+}
+
+/* make sure the page list is contiguous */
+static bool page_list_sanity_check(struct cl_object *obj,
+				   struct cl_page_list *plist)
+{
+	struct cl_page *page;
+	pgoff_t index = CL_PAGE_EOF;
+
+	cl_page_list_for_each(page, plist) {
+		struct vvp_page *vpg = cl_object_page_slice(obj, page);
+
+		if (index == CL_PAGE_EOF) {
+			index = vvp_index(vpg);
+			continue;
+		}
+
+		++index;
+		if (index == vvp_index(vpg))
+			continue;
+
+		return false;
+	}
+	return true;
+}
+
+/* Return how many bytes have queued or written */
+int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
+{
+	struct cl_object *obj = io->ci_obj;
+	struct inode *inode = vvp_object_inode(obj);
+	struct vvp_io *vio = vvp_env_io(env);
+	struct cl_page_list *queue = &vio->u.write.vui_queue;
+	struct cl_page *page;
+	int rc = 0;
+	int bytes = 0;
+	unsigned int npages = vio->u.write.vui_queue.pl_nr;
+
+	if (npages == 0)
+		return 0;
+
+	CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
+	       npages, vio->u.write.vui_from, vio->u.write.vui_to);
+
+	LASSERT(page_list_sanity_check(obj, queue));
+
+	/* submit IO with async write */
+	rc = cl_io_commit_async(env, io, queue,
+				vio->u.write.vui_from, vio->u.write.vui_to,
+				write_commit_callback);
+	npages -= queue->pl_nr; /* already committed pages */
+	if (npages > 0) {
+		/* calculate how many bytes were written */
+		bytes = npages << PAGE_SHIFT;
+
+		/* first page */
+		bytes -= vio->u.write.vui_from;
+		if (queue->pl_nr == 0) /* last page */
+			bytes -= PAGE_SIZE - vio->u.write.vui_to;
+		LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
+
+		vio->u.write.vui_written += bytes;
+
+		CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
+		       npages, bytes, vio->u.write.vui_written);
+
+		/* the first page must have been written. */
+		vio->u.write.vui_from = 0;
+	}
+	LASSERT(page_list_sanity_check(obj, queue));
+	LASSERT(ergo(rc == 0, queue->pl_nr == 0));
+
+	/* out of quota, try sync write */
+	if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
+		rc = vvp_io_commit_sync(env, io, queue,
+					vio->u.write.vui_from,
+					vio->u.write.vui_to);
+		if (rc > 0) {
+			vio->u.write.vui_written += rc;
+			rc = 0;
+		}
+	}
+
+	/* update inode size */
+	ll_merge_attr(env, inode);
+
+	/* Now the pages in queue were failed to commit, discard them
+	 * unless they were dirtied before.
+	 */
+	while (queue->pl_nr > 0) {
+		page = cl_page_list_first(queue);
+		cl_page_list_del(env, queue, page);
+
+		if (!PageDirty(cl_page_vmpage(page)))
+			cl_page_discard(env, io, page);
+
+		cl_page_disown(env, io, page);
+
+		/* held in ll_cl_init() */
+		lu_ref_del(&page->cp_reference, "cl_io", io);
+		cl_page_put(env, page);
+	}
+	cl_page_list_fini(env, queue);
+
+	return rc;
 }
 
 static int vvp_io_write_start(const struct lu_env *env,
 			      const struct cl_io_slice *ios)
 {
-	struct ccc_io      *cio   = cl2ccc_io(env, ios);
+	struct vvp_io      *vio   = cl2vvp_io(env, ios);
 	struct cl_io       *io    = ios->cis_io;
 	struct cl_object   *obj   = io->ci_obj;
-	struct inode       *inode = ccc_object_inode(obj);
+	struct inode       *inode = vvp_object_inode(obj);
 	ssize_t result = 0;
 	loff_t pos = io->u.ci_wr.wr.crw_pos;
 	size_t cnt = io->u.ci_wr.wr.crw_count;
@@ -582,25 +953,41 @@ static int vvp_io_write_start(const struct lu_env *env,
 		 * PARALLEL IO This has to be changed for parallel IO doing
 		 * out-of-order writes.
 		 */
+		ll_merge_attr(env, inode);
 		pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode);
-		cio->cui_iocb->ki_pos = pos;
+		vio->vui_iocb->ki_pos = pos;
 	} else {
-		LASSERT(cio->cui_iocb->ki_pos == pos);
+		LASSERT(vio->vui_iocb->ki_pos == pos);
 	}
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */
+	if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
 		result = 0;
 	else
-		result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+		result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+
+	if (result > 0) {
+		result = vvp_io_write_commit(env, io);
+		if (vio->u.write.vui_written > 0) {
+			result = vio->u.write.vui_written;
+			io->ci_nob += result;
 
+			CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
+			       io->ci_nob, result);
+		}
+	}
 	if (result > 0) {
+		struct ll_inode_info *lli = ll_i2info(inode);
+
+		spin_lock(&lli->lli_lock);
+		lli->lli_flags |= LLIF_DATA_MODIFIED;
+		spin_unlock(&lli->lli_lock);
+
 		if (result < cnt)
 			io->ci_continue = 0;
-		io->ci_nob += result;
 		ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
-				  cio->cui_fd, pos, result, WRITE);
+				  vio->vui_fd, pos, result, WRITE);
 		result = 0;
 	}
 	return result;
@@ -608,10 +995,10 @@ static int vvp_io_write_start(const struct lu_env *env,
 
 static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 {
-	struct vm_fault *vmf = cfio->fault.ft_vmf;
+	struct vm_fault *vmf = cfio->ft_vmf;
 
-	cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf);
-	cfio->fault.ft_flags_valid = 1;
+	cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
+	cfio->ft_flags_valid = 1;
 
 	if (vmf->page) {
 		CDEBUG(D_PAGE,
@@ -619,39 +1006,51 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 		       vmf->page, vmf->page->mapping, vmf->page->index,
 		       (long)vmf->page->flags, page_count(vmf->page),
 		       page_private(vmf->page), vmf->virtual_address);
-		if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) {
+		if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
 			lock_page(vmf->page);
-			cfio->fault.ft_flags |= VM_FAULT_LOCKED;
+			cfio->ft_flags |= VM_FAULT_LOCKED;
 		}
 
 		cfio->ft_vmpage = vmf->page;
 		return 0;
 	}
 
-	if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+	if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
 		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
 		return -EFAULT;
 	}
 
-	if (cfio->fault.ft_flags & VM_FAULT_OOM) {
+	if (cfio->ft_flags & VM_FAULT_OOM) {
 		CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
 		return -ENOMEM;
 	}
 
-	if (cfio->fault.ft_flags & VM_FAULT_RETRY)
+	if (cfio->ft_flags & VM_FAULT_RETRY)
 		return -EAGAIN;
 
-	CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags);
+	CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
 	return -EINVAL;
 }
 
+static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
+				    struct cl_page *page)
+{
+	struct vvp_page *vpg;
+	struct cl_object *clob = cl_io_top(io)->ci_obj;
+
+	set_page_dirty(page->cp_vmpage);
+
+	vpg = cl2vvp_page(cl_object_page_slice(clob, page));
+	vvp_write_pending(cl2vvp(clob), vpg);
+}
+
 static int vvp_io_fault_start(const struct lu_env *env,
 			      const struct cl_io_slice *ios)
 {
 	struct vvp_io       *vio     = cl2vvp_io(env, ios);
 	struct cl_io	*io      = ios->cis_io;
 	struct cl_object    *obj     = io->ci_obj;
-	struct inode	*inode   = ccc_object_inode(obj);
+	struct inode        *inode   = vvp_object_inode(obj);
 	struct cl_fault_io  *fio     = &io->u.ci_fault;
 	struct vvp_fault_io *cfio    = &vio->u.fault;
 	loff_t	       offset;
@@ -659,7 +1058,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	struct page	  *vmpage  = NULL;
 	struct cl_page      *page;
 	loff_t	       size;
-	pgoff_t	      last; /* last page in a file data region */
+	pgoff_t		     last_index;
 
 	if (fio->ft_executable &&
 	    inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
@@ -670,7 +1069,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	/* offset of the last byte on the page */
 	offset = cl_offset(obj, fio->ft_index + 1) - 1;
 	LASSERT(cl_index(obj, offset) == fio->ft_index);
-	result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL);
+	result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
 	if (result != 0)
 		return result;
 
@@ -705,15 +1104,15 @@ static int vvp_io_fault_start(const struct lu_env *env,
 		goto out;
 	}
 
+	last_index = cl_index(obj, size - 1);
+
 	if (fio->ft_mkwrite) {
-		pgoff_t last_index;
 		/*
 		 * Capture the size while holding the lli_trunc_sem from above
 		 * we want to make sure that we complete the mkwrite action
 		 * while holding this lock. We need to make sure that we are
 		 * not past the end of the file.
 		 */
-		last_index = cl_index(obj, size - 1);
 		if (last_index < fio->ft_index) {
 			CDEBUG(D_PAGE,
 			       "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
@@ -745,25 +1144,32 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	 */
 	if (fio->ft_mkwrite) {
 		wait_on_page_writeback(vmpage);
-		if (set_page_dirty(vmpage)) {
-			struct ccc_page *cp;
+		if (!PageDirty(vmpage)) {
+			struct cl_page_list *plist = &io->ci_queue.c2_qin;
+			struct vvp_page *vpg = cl_object_page_slice(obj, page);
+			int to = PAGE_SIZE;
 
 			/* vvp_page_assume() calls wait_on_page_writeback(). */
 			cl_page_assume(env, io, page);
 
-			cp = cl2ccc_page(cl_page_at(page, &vvp_device_type));
-			vvp_write_pending(cl2ccc(obj), cp);
+			cl_page_list_init(plist);
+			cl_page_list_add(plist, page);
+
+			/* size fixup */
+			if (last_index == vvp_index(vpg))
+				to = size & ~PAGE_MASK;
 
 			/* Do not set Dirty bit here so that in case IO is
 			 * started before the page is really made dirty, we
 			 * still have chance to detect it.
 			 */
-			result = cl_page_cache_add(env, io, page, CRT_WRITE);
+			result = cl_io_commit_async(env, io, plist, 0, to,
+						    mkwrite_commit_callback);
 			LASSERT(cl_page_is_owned(page, io));
+			cl_page_list_fini(env, plist);
 
 			vmpage = NULL;
 			if (result < 0) {
-				cl_page_unmap(env, io, page);
 				cl_page_discard(env, io, page);
 				cl_page_disown(env, io, page);
 
@@ -773,20 +1179,20 @@ static int vvp_io_fault_start(const struct lu_env *env,
 				if (result == -EDQUOT)
 					result = -ENOSPC;
 				goto out;
-			} else
+			} else {
 				cl_page_disown(env, io, page);
+			}
 		}
 	}
 
-	last = cl_index(obj, size - 1);
 	/*
 	 * The ft_index is only used in the case of
 	 * a mkwrite action. We need to check
 	 * our assertions are correct, since
 	 * we should have caught this above
 	 */
-	LASSERT(!fio->ft_mkwrite || fio->ft_index <= last);
-	if (fio->ft_index == last)
+	LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
+	if (fio->ft_index == last_index)
 		/*
 		 * Last page is mapped partially.
 		 */
@@ -801,7 +1207,9 @@ out:
 	/* return unlocked vmpage to avoid deadlocking */
 	if (vmpage)
 		unlock_page(vmpage);
-	cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
+
+	cfio->ft_flags &= ~VM_FAULT_LOCKED;
+
 	return result;
 }
 
@@ -820,293 +1228,58 @@ static int vvp_io_read_page(const struct lu_env *env,
 			    const struct cl_page_slice *slice)
 {
 	struct cl_io	      *io     = ios->cis_io;
-	struct cl_object	  *obj    = slice->cpl_obj;
-	struct ccc_page	   *cp     = cl2ccc_page(slice);
+	struct vvp_page           *vpg    = cl2vvp_page(slice);
 	struct cl_page	    *page   = slice->cpl_page;
-	struct inode	      *inode  = ccc_object_inode(obj);
+	struct inode              *inode  = vvp_object_inode(slice->cpl_obj);
 	struct ll_sb_info	 *sbi    = ll_i2sbi(inode);
-	struct ll_file_data       *fd     = cl2ccc_io(env, ios)->cui_fd;
+	struct ll_file_data       *fd     = cl2vvp_io(env, ios)->vui_fd;
 	struct ll_readahead_state *ras    = &fd->fd_ras;
-	struct page		*vmpage = cp->cpg_page;
 	struct cl_2queue	  *queue  = &io->ci_queue;
-	int rc;
-
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
-	LASSERT(slice->cpl_obj == obj);
 
 	if (sbi->ll_ra_info.ra_max_pages_per_file &&
 	    sbi->ll_ra_info.ra_max_pages)
-		ras_update(sbi, inode, ras, page->cp_index,
-			   cp->cpg_defer_uptodate);
-
-	/* Sanity check whether the page is protected by a lock. */
-	rc = cl_page_is_under_lock(env, io, page);
-	if (rc != -EBUSY) {
-		CL_PAGE_HEADER(D_WARNING, env, page, "%s: %d\n",
-			       rc == -ENODATA ? "without a lock" :
-			       "match failed", rc);
-		if (rc != -ENODATA)
-			return rc;
-	}
+		ras_update(sbi, inode, ras, vvp_index(vpg),
+			   vpg->vpg_defer_uptodate);
 
-	if (cp->cpg_defer_uptodate) {
-		cp->cpg_ra_used = 1;
+	if (vpg->vpg_defer_uptodate) {
+		vpg->vpg_ra_used = 1;
 		cl_page_export(env, page, 1);
 	}
 	/*
 	 * Add page into the queue even when it is marked uptodate above.
 	 * this will unlock it automatically as part of cl_page_list_disown().
 	 */
+
 	cl_page_list_add(&queue->c2_qin, page);
 	if (sbi->ll_ra_info.ra_max_pages_per_file &&
 	    sbi->ll_ra_info.ra_max_pages)
-		ll_readahead(env, io, ras,
-			     vmpage->mapping, &queue->c2_qin, fd->fd_flags);
+		ll_readahead(env, io, &queue->c2_qin, ras,
+			     vpg->vpg_defer_uptodate);
 
 	return 0;
 }
 
-static int vvp_page_sync_io(const struct lu_env *env, struct cl_io *io,
-			    struct cl_page *page, struct ccc_page *cp,
-			    enum cl_req_type crt)
+void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
 {
-	struct cl_2queue  *queue;
-	int result;
-
-	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
-	queue = &io->ci_queue;
-	cl_2queue_init_page(queue, page);
-
-	result = cl_io_submit_sync(env, io, crt, queue, 0);
-	LASSERT(cl_page_is_owned(page, io));
-
-	if (crt == CRT_READ)
-		/*
-		 * in CRT_WRITE case page is left locked even in case of
-		 * error.
-		 */
-		cl_page_list_disown(env, io, &queue->c2_qin);
-	cl_2queue_fini(env, queue);
-
-	return result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int vvp_io_prepare_partial(const struct lu_env *env, struct cl_io *io,
-				  struct cl_object *obj, struct cl_page *pg,
-				  struct ccc_page *cp,
-				  unsigned from, unsigned to)
-{
-	struct cl_attr *attr   = ccc_env_thread_attr(env);
-	loff_t	  offset = cl_offset(obj, pg->cp_index);
-	int	     result;
-
-	cl_object_attr_lock(obj);
-	result = cl_object_attr_get(env, obj, attr);
-	cl_object_attr_unlock(obj);
-	if (result == 0) {
-		/*
-		 * If are writing to a new page, no need to read old data.
-		 * The extent locking will have updated the KMS, and for our
-		 * purposes here we can treat it like i_size.
-		 */
-		if (attr->cat_kms <= offset) {
-			char *kaddr = kmap_atomic(cp->cpg_page);
-
-			memset(kaddr, 0, cl_page_size(obj));
-			kunmap_atomic(kaddr);
-		} else if (cp->cpg_defer_uptodate)
-			cp->cpg_ra_used = 1;
-		else
-			result = vvp_page_sync_io(env, io, pg, cp, CRT_READ);
-		/*
-		 * In older implementations, obdo_refresh_inode is called here
-		 * to update the inode because the write might modify the
-		 * object info at OST. However, this has been proven useless,
-		 * since LVB functions will be called when user space program
-		 * tries to retrieve inode attribute.  Also, see bug 15909 for
-		 * details. -jay
-		 */
-		if (result == 0)
-			cl_page_export(env, pg, 1);
-	}
-	return result;
-}
-
-static int vvp_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
-{
-	struct cl_object *obj    = slice->cpl_obj;
-	struct ccc_page  *cp     = cl2ccc_page(slice);
-	struct cl_page   *pg     = slice->cpl_page;
-	struct page       *vmpage = cp->cpg_page;
-
-	int result;
-
-	LINVRNT(cl_page_is_vmlocked(env, pg));
-	LASSERT(vmpage->mapping->host == ccc_object_inode(obj));
-
-	result = 0;
-
-	CL_PAGE_HEADER(D_PAGE, env, pg, "preparing: [%d, %d]\n", from, to);
-	if (!PageUptodate(vmpage)) {
-		/*
-		 * We're completely overwriting an existing page, so _don't_
-		 * set it up to date until commit_write
-		 */
-		if (from == 0 && to == PAGE_SIZE) {
-			CL_PAGE_HEADER(D_PAGE, env, pg, "full page write\n");
-			POISON_PAGE(page, 0x11);
-		} else
-			result = vvp_io_prepare_partial(env, ios->cis_io, obj,
-							pg, cp, from, to);
-	} else
-		CL_PAGE_HEADER(D_PAGE, env, pg, "uptodate\n");
-	return result;
-}
-
-static int vvp_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
-{
-	struct cl_object  *obj    = slice->cpl_obj;
-	struct cl_io      *io     = ios->cis_io;
-	struct ccc_page   *cp     = cl2ccc_page(slice);
-	struct cl_page    *pg     = slice->cpl_page;
-	struct inode      *inode  = ccc_object_inode(obj);
-	struct ll_sb_info *sbi    = ll_i2sbi(inode);
-	struct ll_inode_info *lli = ll_i2info(inode);
-	struct page	*vmpage = cp->cpg_page;
-
-	int    result;
-	int    tallyop;
-	loff_t size;
-
-	LINVRNT(cl_page_is_vmlocked(env, pg));
-	LASSERT(vmpage->mapping->host == inode);
-
-	LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "committing page write\n");
-	CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);
-
-	/*
-	 * queue a write for some time in the future the first time we
-	 * dirty the page.
-	 *
-	 * This is different from what other file systems do: they usually
-	 * just mark page (and some of its buffers) dirty and rely on
-	 * balance_dirty_pages() to start a write-back. Lustre wants write-back
-	 * to be started earlier for the following reasons:
-	 *
-	 *     (1) with a large number of clients we need to limit the amount
-	 *     of cached data on the clients a lot;
-	 *
-	 *     (2) large compute jobs generally want compute-only then io-only
-	 *     and the IO should complete as quickly as possible;
-	 *
-	 *     (3) IO is batched up to the RPC size and is async until the
-	 *     client max cache is hit
-	 *     (/sys/fs/lustre/osc/OSC.../max_dirty_mb)
-	 *
-	 */
-	if (!PageDirty(vmpage)) {
-		tallyop = LPROC_LL_DIRTY_MISSES;
-		result = cl_page_cache_add(env, io, pg, CRT_WRITE);
-		if (result == 0) {
-			/* page was added into cache successfully. */
-			set_page_dirty(vmpage);
-			vvp_write_pending(cl2ccc(obj), cp);
-		} else if (result == -EDQUOT) {
-			pgoff_t last_index = i_size_read(inode) >> PAGE_SHIFT;
-			bool need_clip = true;
-
-			/*
-			 * Client ran out of disk space grant. Possible
-			 * strategies are:
-			 *
-			 *     (a) do a sync write, renewing grant;
-			 *
-			 *     (b) stop writing on this stripe, switch to the
-			 *     next one.
-			 *
-			 * (b) is a part of "parallel io" design that is the
-			 * ultimate goal. (a) is what "old" client did, and
-			 * what the new code continues to do for the time
-			 * being.
-			 */
-			if (last_index > pg->cp_index) {
-				to = PAGE_SIZE;
-				need_clip = false;
-			} else if (last_index == pg->cp_index) {
-				int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-
-				if (to < size_to)
-					to = size_to;
-			}
-			if (need_clip)
-				cl_page_clip(env, pg, 0, to);
-			result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
-			if (result)
-				CERROR("Write page %lu of inode %p failed %d\n",
-				       pg->cp_index, inode, result);
-		}
-	} else {
-		tallyop = LPROC_LL_DIRTY_HITS;
-		result = 0;
-	}
-	ll_stats_ops_tally(sbi, tallyop, 1);
-
-	/* Inode should be marked DIRTY even if no new page was marked DIRTY
-	 * because page could have been not flushed between 2 modifications.
-	 * It is important the file is marked DIRTY as soon as the I/O is done
-	 * Indeed, when cache is flushed, file could be already closed and it
-	 * is too late to warn the MDT.
-	 * It is acceptable that file is marked DIRTY even if I/O is dropped
-	 * for some reasons before being flushed to OST.
-	 */
-	if (result == 0) {
-		spin_lock(&lli->lli_lock);
-		lli->lli_flags |= LLIF_DATA_MODIFIED;
-		spin_unlock(&lli->lli_lock);
-	}
-
-	size = cl_offset(obj, pg->cp_index) + to;
-
-	ll_inode_size_lock(inode);
-	if (result == 0) {
-		if (size > i_size_read(inode)) {
-			cl_isize_write_nolock(inode, size);
-			CDEBUG(D_VFSTRACE, DFID" updating i_size %lu\n",
-			       PFID(lu_object_fid(&obj->co_lu)),
-			       (unsigned long)size);
-		}
-		cl_page_export(env, pg, 1);
-	} else {
-		if (size > i_size_read(inode))
-			cl_page_discard(env, io, pg);
-	}
-	ll_inode_size_unlock(inode);
-	return result;
+	CLOBINVRNT(env, ios->cis_io->ci_obj,
+		   vvp_object_invariant(ios->cis_io->ci_obj));
 }
 
 static const struct cl_io_operations vvp_io_ops = {
 	.op = {
 		[CIT_READ] = {
-			.cio_fini      = vvp_io_read_fini,
+			.cio_fini	= vvp_io_fini,
 			.cio_lock      = vvp_io_read_lock,
 			.cio_start     = vvp_io_read_start,
-			.cio_advance   = ccc_io_advance
+			.cio_advance	= vvp_io_advance,
 		},
 		[CIT_WRITE] = {
 			.cio_fini      = vvp_io_fini,
+			.cio_iter_init = vvp_io_write_iter_init,
+			.cio_iter_fini = vvp_io_write_iter_fini,
 			.cio_lock      = vvp_io_write_lock,
 			.cio_start     = vvp_io_write_start,
-			.cio_advance   = ccc_io_advance
+			.cio_advance   = vvp_io_advance,
 		},
 		[CIT_SETATTR] = {
 			.cio_fini       = vvp_io_setattr_fini,
@@ -1120,7 +1293,7 @@ static const struct cl_io_operations vvp_io_ops = {
 			.cio_iter_init = vvp_io_fault_iter_init,
 			.cio_lock      = vvp_io_fault_lock,
 			.cio_start     = vvp_io_fault_start,
-			.cio_end       = ccc_io_end
+			.cio_end       = vvp_io_end,
 		},
 		[CIT_FSYNC] = {
 			.cio_start  = vvp_io_fsync_start,
@@ -1131,29 +1304,26 @@ static const struct cl_io_operations vvp_io_ops = {
 		}
 	},
 	.cio_read_page     = vvp_io_read_page,
-	.cio_prepare_write = vvp_io_prepare_write,
-	.cio_commit_write  = vvp_io_commit_write
 };
 
 int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		struct cl_io *io)
 {
 	struct vvp_io      *vio   = vvp_env_io(env);
-	struct ccc_io      *cio   = ccc_env_io(env);
-	struct inode       *inode = ccc_object_inode(obj);
+	struct inode       *inode = vvp_object_inode(obj);
 	int		 result;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
 	CDEBUG(D_VFSTRACE, DFID
 	       " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
 	       PFID(lu_object_fid(&obj->co_lu)),
 	       io->ci_ignore_layout, io->ci_verify_layout,
-	       cio->cui_layout_gen, io->ci_restore_needed);
+	       vio->vui_layout_gen, io->ci_restore_needed);
 
-	CL_IO_SLICE_CLEAN(cio, cui_cl);
-	cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
-	vio->cui_ra_window_set = 0;
+	CL_IO_SLICE_CLEAN(vio, vui_cl);
+	cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
+	vio->vui_ra_valid = false;
 	result = 0;
 	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
 		size_t count;
@@ -1166,7 +1336,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		if (count == 0)
 			result = 1;
 		else
-			cio->cui_tot_count = count;
+			vio->vui_tot_count = count;
 
 		/* for read/write, we store the jobid in the inode, and
 		 * it'll be fetched by osc when building RPC.
@@ -1192,7 +1362,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 	 * because it might not grant layout lock in IT_OPEN.
 	 */
 	if (result == 0 && !io->ci_ignore_layout) {
-		result = ll_layout_refresh(inode, &cio->cui_layout_gen);
+		result = ll_layout_refresh(inode, &vio->vui_layout_gen);
 		if (result == -ENOENT)
 			/* If the inode on MDS has been removed, but the objects
 			 * on OSTs haven't been destroyed (async unlink), layout
@@ -1208,11 +1378,3 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 
 	return result;
 }
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
-				const struct cl_io_slice *slice)
-{
-	/* Calling just for assertion */
-	cl2ccc_io(env, slice);
-	return vvp_env_io(env);
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index ff0948043..f5bd6c22e 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -40,7 +40,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/obd.h"
+#include "../include/obd_support.h"
 #include "../include/lustre_lite.h"
 
 #include "vvp_internal.h"
@@ -51,36 +51,41 @@
  *
  */
 
-/**
- * Estimates lock value for the purpose of managing the lock cache during
- * memory shortages.
- *
- * Locks for memory mapped files are almost infinitely precious, others are
- * junk. "Mapped locks" are heavy, but not infinitely heavy, so that they are
- * ordered within themselves by weights assigned from other layers.
- */
-static unsigned long vvp_lock_weigh(const struct lu_env *env,
-				    const struct cl_lock_slice *slice)
+static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
+{
+	struct vvp_lock *vlk = cl2vvp_lock(slice);
+
+	kmem_cache_free(vvp_lock_kmem, vlk);
+}
+
+static int vvp_lock_enqueue(const struct lu_env *env,
+			    const struct cl_lock_slice *slice,
+			    struct cl_io *unused, struct cl_sync_io *anchor)
 {
-	struct ccc_object *cob = cl2ccc(slice->cls_obj);
+	CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
 
-	return atomic_read(&cob->cob_mmap_cnt) > 0 ? ~0UL >> 2 : 0;
+	return 0;
 }
 
 static const struct cl_lock_operations vvp_lock_ops = {
-	.clo_delete    = ccc_lock_delete,
-	.clo_fini      = ccc_lock_fini,
-	.clo_enqueue   = ccc_lock_enqueue,
-	.clo_wait      = ccc_lock_wait,
-	.clo_use       = ccc_lock_use,
-	.clo_unuse     = ccc_lock_unuse,
-	.clo_fits_into = ccc_lock_fits_into,
-	.clo_state     = ccc_lock_state,
-	.clo_weigh     = vvp_lock_weigh
+	.clo_fini	= vvp_lock_fini,
+	.clo_enqueue	= vvp_lock_enqueue,
 };
 
 int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_lock *lock, const struct cl_io *io)
+		  struct cl_lock *lock, const struct cl_io *unused)
 {
-	return ccc_lock_init(env, obj, lock, io, &vvp_lock_ops);
+	struct vvp_lock *vlk;
+	int result;
+
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
+
+	vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
+	if (vlk) {
+		cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
+		result = 0;
+	} else {
+		result = -ENOMEM;
+	}
+	return result;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 03c887d8e..18c9df7eb 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -45,6 +45,7 @@
 #include "../include/obd.h"
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
 /*****************************************************************************
@@ -53,16 +54,25 @@
  *
  */
 
+int vvp_object_invariant(const struct cl_object *obj)
+{
+	struct inode *inode  = vvp_object_inode(obj);
+	struct ll_inode_info *lli = ll_i2info(inode);
+
+	return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
+		lli->lli_clob == obj;
+}
+
 static int vvp_object_print(const struct lu_env *env, void *cookie,
 			    lu_printer_t p, const struct lu_object *o)
 {
-	struct ccc_object    *obj   = lu2ccc(o);
-	struct inode	 *inode = obj->cob_inode;
+	struct vvp_object    *obj   = lu2vvp(o);
+	struct inode	 *inode = obj->vob_inode;
 	struct ll_inode_info *lli;
 
 	(*p)(env, cookie, "(%s %d %d) inode: %p ",
-	     list_empty(&obj->cob_pending_list) ? "-" : "+",
-	     obj->cob_transient_pages, atomic_read(&obj->cob_mmap_cnt),
+	     list_empty(&obj->vob_pending_list) ? "-" : "+",
+	     obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
 	     inode);
 	if (inode) {
 		lli = ll_i2info(inode);
@@ -77,7 +87,7 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
 static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
 			struct cl_attr *attr)
 {
-	struct inode *inode = ccc_object_inode(obj);
+	struct inode *inode = vvp_object_inode(obj);
 
 	/*
 	 * lov overwrites most of these fields in
@@ -99,7 +109,7 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
 static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
 			const struct cl_attr *attr, unsigned valid)
 {
-	struct inode *inode = ccc_object_inode(obj);
+	struct inode *inode = vvp_object_inode(obj);
 
 	if (valid & CAT_UID)
 		inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
@@ -112,7 +122,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
 	if (valid & CAT_CTIME)
 		inode->i_ctime.tv_sec = attr->cat_ctime;
 	if (0 && valid & CAT_SIZE)
-		cl_isize_write_nolock(inode, attr->cat_size);
+		i_size_write(inode, attr->cat_size);
 	/* not currently necessary */
 	if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
 		mark_inode_dirty(inode);
@@ -165,6 +175,40 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
 	return 0;
 }
 
+static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct inode *inode = vvp_object_inode(obj);
+	int rc;
+
+	rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+	if (rc < 0) {
+		CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
+		       PFID(lu_object_fid(&obj->co_lu)), rc);
+		return rc;
+	}
+
+	truncate_inode_pages(inode->i_mapping, 0);
+	return 0;
+}
+
+static int vvp_object_glimpse(const struct lu_env *env,
+			      const struct cl_object *obj, struct ost_lvb *lvb)
+{
+	struct inode *inode = vvp_object_inode(obj);
+
+	lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+	lvb->lvb_atime = LTIME_S(inode->i_atime);
+	lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+	/*
+	 * LU-417: Add dirty pages block count lest i_blocks reports 0, some
+	 * "cp" or "tar" on remote node may think it's a completely sparse file
+	 * and skip it.
+	 */
+	if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
+		lvb->lvb_blocks = dirty_cnt(inode);
+	return 0;
+}
+
 static const struct cl_object_operations vvp_ops = {
 	.coo_page_init = vvp_page_init,
 	.coo_lock_init = vvp_lock_init,
@@ -172,29 +216,94 @@ static const struct cl_object_operations vvp_ops = {
 	.coo_attr_get  = vvp_attr_get,
 	.coo_attr_set  = vvp_attr_set,
 	.coo_conf_set  = vvp_conf_set,
-	.coo_glimpse   = ccc_object_glimpse
+	.coo_prune     = vvp_prune,
+	.coo_glimpse   = vvp_object_glimpse
 };
 
+static int vvp_object_init0(const struct lu_env *env,
+			    struct vvp_object *vob,
+			    const struct cl_object_conf *conf)
+{
+	vob->vob_inode = conf->coc_inode;
+	vob->vob_transient_pages = 0;
+	cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
+	return 0;
+}
+
+static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
+			   const struct lu_object_conf *conf)
+{
+	struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
+	struct vvp_object *vob = lu2vvp(obj);
+	struct lu_object  *below;
+	struct lu_device  *under;
+	int result;
+
+	under = &dev->vdv_next->cd_lu_dev;
+	below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
+	if (below) {
+		const struct cl_object_conf *cconf;
+
+		cconf = lu2cl_conf(conf);
+		INIT_LIST_HEAD(&vob->vob_pending_list);
+		lu_object_add(obj, below);
+		result = vvp_object_init0(env, vob, cconf);
+	} else {
+		result = -ENOMEM;
+	}
+
+	return result;
+}
+
+static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
+{
+	struct vvp_object *vob = lu2vvp(obj);
+
+	lu_object_fini(obj);
+	lu_object_header_fini(obj->lo_header);
+	kmem_cache_free(vvp_object_kmem, vob);
+}
+
 static const struct lu_object_operations vvp_lu_obj_ops = {
-	.loo_object_init  = ccc_object_init,
-	.loo_object_free  = ccc_object_free,
-	.loo_object_print = vvp_object_print
+	.loo_object_init	= vvp_object_init,
+	.loo_object_free	= vvp_object_free,
+	.loo_object_print	= vvp_object_print,
 };
 
-struct ccc_object *cl_inode2ccc(struct inode *inode)
+struct vvp_object *cl_inode2vvp(struct inode *inode)
 {
-	struct cl_inode_info *lli = cl_i2info(inode);
+	struct ll_inode_info *lli = ll_i2info(inode);
 	struct cl_object     *obj = lli->lli_clob;
 	struct lu_object     *lu;
 
 	lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
 	LASSERT(lu);
-	return lu2ccc(lu);
+	return lu2vvp(lu);
 }
 
 struct lu_object *vvp_object_alloc(const struct lu_env *env,
-				   const struct lu_object_header *hdr,
+				   const struct lu_object_header *unused,
 				   struct lu_device *dev)
 {
-	return ccc_object_alloc(env, hdr, dev, &vvp_ops, &vvp_lu_obj_ops);
+	struct vvp_object *vob;
+	struct lu_object  *obj;
+
+	vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
+	if (vob) {
+		struct cl_object_header *hdr;
+
+		obj = &vob->vob_cl.co_lu;
+		hdr = &vob->vob_header;
+		cl_object_header_init(hdr);
+		hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
+		lu_object_init(obj, &hdr->coh_lu, dev);
+		lu_object_add_top(&hdr->coh_lu, obj);
+
+		vob->vob_cl.co_ops = &vvp_ops;
+		obj->lo_ops = &vvp_lu_obj_ops;
+	} else {
+		obj = NULL;
+	}
+	return obj;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 33ca3eb34..6cd2af7a9 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -41,9 +41,16 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/obd.h"
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+
 #include "../include/lustre_lite.h"
 
+#include "llite_internal.h"
 #include "vvp_internal.h"
 
 /*****************************************************************************
@@ -52,9 +59,9 @@
  *
  */
 
-static void vvp_page_fini_common(struct ccc_page *cp)
+static void vvp_page_fini_common(struct vvp_page *vpg)
 {
-	struct page *vmpage = cp->cpg_page;
+	struct page *vmpage = vpg->vpg_page;
 
 	LASSERT(vmpage);
 	put_page(vmpage);
@@ -63,23 +70,23 @@ static void vvp_page_fini_common(struct ccc_page *cp)
 static void vvp_page_fini(const struct lu_env *env,
 			  struct cl_page_slice *slice)
 {
-	struct ccc_page *cp = cl2ccc_page(slice);
-	struct page *vmpage  = cp->cpg_page;
+	struct vvp_page *vpg     = cl2vvp_page(slice);
+	struct page     *vmpage  = vpg->vpg_page;
 
 	/*
 	 * vmpage->private was already cleared when page was moved into
 	 * VPG_FREEING state.
 	 */
 	LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
-	vvp_page_fini_common(cp);
+	vvp_page_fini_common(vpg);
 }
 
 static int vvp_page_own(const struct lu_env *env,
 			const struct cl_page_slice *slice, struct cl_io *io,
 			int nonblock)
 {
-	struct ccc_page *vpg    = cl2ccc_page(slice);
-	struct page      *vmpage = vpg->cpg_page;
+	struct vvp_page *vpg    = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
 
 	LASSERT(vmpage);
 	if (nonblock) {
@@ -96,6 +103,7 @@ static int vvp_page_own(const struct lu_env *env,
 
 	lock_page(vmpage);
 	wait_on_page_writeback(vmpage);
+
 	return 0;
 }
 
@@ -136,41 +144,15 @@ static void vvp_page_discard(const struct lu_env *env,
 			     struct cl_io *unused)
 {
 	struct page	   *vmpage  = cl2vm_page(slice);
-	struct address_space *mapping;
-	struct ccc_page      *cpg     = cl2ccc_page(slice);
+	struct vvp_page      *vpg     = cl2vvp_page(slice);
 
 	LASSERT(vmpage);
 	LASSERT(PageLocked(vmpage));
 
-	mapping = vmpage->mapping;
-
-	if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used)
-		ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
+	if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
+		ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
 
-	/*
-	 * truncate_complete_page() calls
-	 * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
-	 */
-	truncate_complete_page(mapping, vmpage);
-}
-
-static int vvp_page_unmap(const struct lu_env *env,
-			  const struct cl_page_slice *slice,
-			  struct cl_io *unused)
-{
-	struct page *vmpage = cl2vm_page(slice);
-	__u64       offset;
-
-	LASSERT(vmpage);
-	LASSERT(PageLocked(vmpage));
-
-	offset = vmpage->index << PAGE_SHIFT;
-
-	/*
-	 * XXX is it safe to call this with the page lock held?
-	 */
-	ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_SIZE);
-	return 0;
+	ll_invalidate_page(vmpage);
 }
 
 static void vvp_page_delete(const struct lu_env *env,
@@ -179,12 +161,20 @@ static void vvp_page_delete(const struct lu_env *env,
 	struct page       *vmpage = cl2vm_page(slice);
 	struct inode     *inode  = vmpage->mapping->host;
 	struct cl_object *obj    = slice->cpl_obj;
+	struct cl_page   *page   = slice->cpl_page;
+	int refc;
 
 	LASSERT(PageLocked(vmpage));
-	LASSERT((struct cl_page *)vmpage->private == slice->cpl_page);
-	LASSERT(inode == ccc_object_inode(obj));
+	LASSERT((struct cl_page *)vmpage->private == page);
+	LASSERT(inode == vvp_object_inode(obj));
 
-	vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice));
+	vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
+
+	/* Drop the reference count held in vvp_page_init */
+	refc = atomic_dec_return(&page->cp_ref);
+	LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
+
+	ClearPageUptodate(vmpage);
 	ClearPagePrivate(vmpage);
 	vmpage->private = 0;
 	/*
@@ -237,7 +227,7 @@ static int vvp_page_prep_write(const struct lu_env *env,
 	if (!pg->cp_sync_io)
 		set_page_writeback(vmpage);
 
-	vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+	vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
 
 	return 0;
 }
@@ -250,11 +240,11 @@ static int vvp_page_prep_write(const struct lu_env *env,
  */
 static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
 {
-	struct ccc_object *obj = cl_inode2ccc(inode);
+	struct vvp_object *obj = cl_inode2vvp(inode);
 
 	if (ioret == 0) {
 		ClearPageError(vmpage);
-		obj->cob_discard_page_warned = 0;
+		obj->vob_discard_page_warned = 0;
 	} else {
 		SetPageError(vmpage);
 		if (ioret == -ENOSPC)
@@ -263,8 +253,8 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
 			set_bit(AS_EIO, &inode->i_mapping->flags);
 
 		if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
-		    obj->cob_discard_page_warned == 0) {
-			obj->cob_discard_page_warned = 1;
+		     obj->vob_discard_page_warned == 0) {
+			obj->vob_discard_page_warned = 1;
 			ll_dirty_page_discard_warn(vmpage, ioret);
 		}
 	}
@@ -274,22 +264,23 @@ static void vvp_page_completion_read(const struct lu_env *env,
 				     const struct cl_page_slice *slice,
 				     int ioret)
 {
-	struct ccc_page *cp     = cl2ccc_page(slice);
-	struct page      *vmpage = cp->cpg_page;
-	struct cl_page  *page   = cl_page_top(slice->cpl_page);
-	struct inode    *inode  = ccc_object_inode(page->cp_obj);
+	struct vvp_page *vpg    = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
+	struct cl_page  *page   = slice->cpl_page;
+	struct inode    *inode  = vvp_object_inode(page->cp_obj);
 
 	LASSERT(PageLocked(vmpage));
 	CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
 
-	if (cp->cpg_defer_uptodate)
+	if (vpg->vpg_defer_uptodate)
 		ll_ra_count_put(ll_i2sbi(inode), 1);
 
 	if (ioret == 0)  {
-		if (!cp->cpg_defer_uptodate)
+		if (!vpg->vpg_defer_uptodate)
 			cl_page_export(env, page, 1);
-	} else
-		cp->cpg_defer_uptodate = 0;
+	} else {
+		vpg->vpg_defer_uptodate = 0;
+	}
 
 	if (!page->cp_sync_io)
 		unlock_page(vmpage);
@@ -299,9 +290,9 @@ static void vvp_page_completion_write(const struct lu_env *env,
 				      const struct cl_page_slice *slice,
 				      int ioret)
 {
-	struct ccc_page *cp     = cl2ccc_page(slice);
+	struct vvp_page *vpg     = cl2vvp_page(slice);
 	struct cl_page  *pg     = slice->cpl_page;
-	struct page      *vmpage = cp->cpg_page;
+	struct page     *vmpage = vpg->vpg_page;
 
 	CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
 
@@ -315,8 +306,8 @@ static void vvp_page_completion_write(const struct lu_env *env,
 	 * and then re-add the page into pending transfer queue.  -jay
 	 */
 
-	cp->cpg_write_queued = 0;
-	vvp_write_complete(cl2ccc(slice->cpl_obj), cp);
+	vpg->vpg_write_queued = 0;
+	vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
 
 	if (pg->cp_sync_io) {
 		LASSERT(PageLocked(vmpage));
@@ -327,7 +318,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
 		 * Only mark the page error only when it's an async write
 		 * because applications won't wait for IO to finish.
 		 */
-		vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret);
+		vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
 
 		end_page_writeback(vmpage);
 	}
@@ -359,7 +350,7 @@ static int vvp_page_make_ready(const struct lu_env *env,
 		LASSERT(pg->cp_state == CPS_CACHED);
 		/* This actually clears the dirty bit in the radix tree. */
 		set_page_writeback(vmpage);
-		vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice));
+		vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
 		CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
 	} else if (pg->cp_state == CPS_PAGEOUT) {
 		/* is it possible for osc_flush_async_page() to already
@@ -375,24 +366,51 @@ static int vvp_page_make_ready(const struct lu_env *env,
 	return result;
 }
 
+static int vvp_page_is_under_lock(const struct lu_env *env,
+				  const struct cl_page_slice *slice,
+				  struct cl_io *io, pgoff_t *max_index)
+{
+	if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
+	    io->ci_type == CIT_FAULT) {
+		struct vvp_io *vio = vvp_env_io(env);
+
+		if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
+			*max_index = CL_PAGE_EOF;
+	}
+	return 0;
+}
+
 static int vvp_page_print(const struct lu_env *env,
 			  const struct cl_page_slice *slice,
 			  void *cookie, lu_printer_t printer)
 {
-	struct ccc_page *vp = cl2ccc_page(slice);
-	struct page      *vmpage = vp->cpg_page;
+	struct vvp_page *vpg = cl2vvp_page(slice);
+	struct page     *vmpage = vpg->vpg_page;
 
 	(*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
-		   vp, vp->cpg_defer_uptodate, vp->cpg_ra_used,
-		   vp->cpg_write_queued, vmpage);
+		   vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
+		   vpg->vpg_write_queued, vmpage);
 	if (vmpage) {
 		(*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
 			   (long)vmpage->flags, page_count(vmpage),
 			   page_mapcount(vmpage), vmpage->private,
-			   page_index(vmpage),
+			   vmpage->index,
 			   list_empty(&vmpage->lru) ? "not-" : "");
 	}
+
 	(*printer)(env, cookie, "\n");
+
+	return 0;
+}
+
+static int vvp_page_fail(const struct lu_env *env,
+			 const struct cl_page_slice *slice)
+{
+	/*
+	 * Cached read?
+	 */
+	LBUG();
+
 	return 0;
 }
 
@@ -401,32 +419,38 @@ static const struct cl_page_operations vvp_page_ops = {
 	.cpo_assume	= vvp_page_assume,
 	.cpo_unassume      = vvp_page_unassume,
 	.cpo_disown	= vvp_page_disown,
-	.cpo_vmpage	= ccc_page_vmpage,
 	.cpo_discard       = vvp_page_discard,
 	.cpo_delete	= vvp_page_delete,
-	.cpo_unmap	 = vvp_page_unmap,
 	.cpo_export	= vvp_page_export,
 	.cpo_is_vmlocked   = vvp_page_is_vmlocked,
 	.cpo_fini	  = vvp_page_fini,
 	.cpo_print	 = vvp_page_print,
-	.cpo_is_under_lock = ccc_page_is_under_lock,
+	.cpo_is_under_lock = vvp_page_is_under_lock,
 	.io = {
 		[CRT_READ] = {
 			.cpo_prep	= vvp_page_prep_read,
 			.cpo_completion  = vvp_page_completion_read,
-			.cpo_make_ready  = ccc_fail,
+			.cpo_make_ready = vvp_page_fail,
 		},
 		[CRT_WRITE] = {
 			.cpo_prep	= vvp_page_prep_write,
 			.cpo_completion  = vvp_page_completion_write,
 			.cpo_make_ready  = vvp_page_make_ready,
-		}
-	}
+		},
+	},
 };
 
+static int vvp_transient_page_prep(const struct lu_env *env,
+				   const struct cl_page_slice *slice,
+				   struct cl_io *unused)
+{
+	/* transient page should always be sent. */
+	return 0;
+}
+
 static void vvp_transient_page_verify(const struct cl_page *page)
 {
-	struct inode *inode = ccc_object_inode(page->cp_obj);
+	struct inode *inode = vvp_object_inode(page->cp_obj);
 
 	LASSERT(!inode_trylock(inode));
 }
@@ -477,7 +501,7 @@ static void vvp_transient_page_discard(const struct lu_env *env,
 static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
 					  const struct cl_page_slice *slice)
 {
-	struct inode    *inode = ccc_object_inode(slice->cpl_obj);
+	struct inode    *inode = vvp_object_inode(slice->cpl_obj);
 	int	locked;
 
 	locked = !inode_trylock(inode);
@@ -497,13 +521,13 @@ vvp_transient_page_completion(const struct lu_env *env,
 static void vvp_transient_page_fini(const struct lu_env *env,
 				    struct cl_page_slice *slice)
 {
-	struct ccc_page *cp = cl2ccc_page(slice);
+	struct vvp_page *vpg = cl2vvp_page(slice);
 	struct cl_page *clp = slice->cpl_page;
-	struct ccc_object *clobj = cl2ccc(clp->cp_obj);
+	struct vvp_object *clobj = cl2vvp(clp->cp_obj);
 
-	vvp_page_fini_common(cp);
-	LASSERT(!inode_trylock(clobj->cob_inode));
-	clobj->cob_transient_pages--;
+	vvp_page_fini_common(vpg);
+	LASSERT(!inode_trylock(clobj->vob_inode));
+	clobj->vob_transient_pages--;
 }
 
 static const struct cl_page_operations vvp_transient_page_ops = {
@@ -512,45 +536,48 @@ static const struct cl_page_operations vvp_transient_page_ops = {
 	.cpo_unassume      = vvp_transient_page_unassume,
 	.cpo_disown	= vvp_transient_page_disown,
 	.cpo_discard       = vvp_transient_page_discard,
-	.cpo_vmpage	= ccc_page_vmpage,
 	.cpo_fini	  = vvp_transient_page_fini,
 	.cpo_is_vmlocked   = vvp_transient_page_is_vmlocked,
 	.cpo_print	 = vvp_page_print,
-	.cpo_is_under_lock = ccc_page_is_under_lock,
+	.cpo_is_under_lock	= vvp_page_is_under_lock,
 	.io = {
 		[CRT_READ] = {
-			.cpo_prep	= ccc_transient_page_prep,
+			.cpo_prep	= vvp_transient_page_prep,
 			.cpo_completion  = vvp_transient_page_completion,
 		},
 		[CRT_WRITE] = {
-			.cpo_prep	= ccc_transient_page_prep,
+			.cpo_prep	= vvp_transient_page_prep,
 			.cpo_completion  = vvp_transient_page_completion,
 		}
 	}
 };
 
 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		struct cl_page *page, pgoff_t index)
 {
-	struct ccc_page *cpg = cl_object_page_slice(obj, page);
+	struct vvp_page *vpg = cl_object_page_slice(obj, page);
+	struct page     *vmpage = page->cp_vmpage;
 
-	CLOBINVRNT(env, obj, ccc_object_invariant(obj));
+	CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
-	cpg->cpg_page = vmpage;
+	vpg->vpg_page = vmpage;
 	get_page(vmpage);
 
-	INIT_LIST_HEAD(&cpg->cpg_pending_linkage);
+	INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
 	if (page->cp_type == CPT_CACHEABLE) {
+		/* in cache, decref in vvp_page_delete */
+		atomic_inc(&page->cp_ref);
 		SetPagePrivate(vmpage);
 		vmpage->private = (unsigned long)page;
-		cl_page_slice_add(page, &cpg->cpg_cl, obj, &vvp_page_ops);
+		cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
+				  &vvp_page_ops);
 	} else {
-		struct ccc_object *clobj = cl2ccc(obj);
+		struct vvp_object *clobj = cl2vvp(obj);
 
-		LASSERT(!inode_trylock(clobj->cob_inode));
-		cl_page_slice_add(page, &cpg->cpg_cl, obj,
+		LASSERT(!inode_trylock(clobj->vob_inode));
+		cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
 				  &vvp_transient_page_ops);
-		clobj->cob_transient_pages++;
+		clobj->vob_transient_pages++;
 	}
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
new file mode 100644
index 000000000..fb886291a
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -0,0 +1,121 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/cl_object.h"
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_lite.h"
+#include "llite_internal.h"
+#include "vvp_internal.h"
+
+static inline struct vvp_req *cl2vvp_req(const struct cl_req_slice *slice)
+{
+	return container_of0(slice, struct vvp_req, vrq_cl);
+}
+
+/**
+ * Implementation of struct cl_req_operations::cro_attr_set() for VVP
+ * layer. VVP is responsible for
+ *
+ *    - o_[mac]time
+ *
+ *    - o_mode
+ *
+ *    - o_parent_seq
+ *
+ *    - o_[ug]id
+ *
+ *    - o_parent_oid
+ *
+ *    - o_parent_ver
+ *
+ *    - o_ioepoch,
+ *
+ */
+void vvp_req_attr_set(const struct lu_env *env,
+		      const struct cl_req_slice *slice,
+		      const struct cl_object *obj,
+		      struct cl_req_attr *attr, u64 flags)
+{
+	struct inode *inode;
+	struct obdo  *oa;
+	u32	      valid_flags;
+
+	oa = attr->cra_oa;
+	inode = vvp_object_inode(obj);
+	valid_flags = OBD_MD_FLTYPE;
+
+	if (slice->crs_req->crq_type == CRT_WRITE) {
+		if (flags & OBD_MD_FLEPOCH) {
+			oa->o_valid |= OBD_MD_FLEPOCH;
+			oa->o_ioepoch = ll_i2info(inode)->lli_ioepoch;
+			valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+				       OBD_MD_FLUID | OBD_MD_FLGID;
+		}
+	}
+	obdo_from_inode(oa, inode, valid_flags & flags);
+	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+	memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
+	       JOBSTATS_JOBID_SIZE);
+}
+
+void vvp_req_completion(const struct lu_env *env,
+			const struct cl_req_slice *slice, int ioret)
+{
+	struct vvp_req *vrq;
+
+	if (ioret > 0)
+		cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret);
+
+	vrq = cl2vvp_req(slice);
+	kmem_cache_free(vvp_req_kmem, vrq);
+}
+
+static const struct cl_req_operations vvp_req_ops = {
+	.cro_attr_set   = vvp_req_attr_set,
+	.cro_completion = vvp_req_completion
+};
+
+int vvp_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req)
+{
+	struct vvp_req *vrq;
+	int result;
+
+	vrq = kmem_cache_zalloc(vvp_req_kmem, GFP_NOFS);
+	if (vrq) {
+		cl_req_slice_add(req, &vrq->vrq_cl, dev, &vvp_req_ops);
+		result = 0;
+	} else {
+		result = -ENOMEM;
+	}
+	return result;
+}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index b68dcc921..608014b0d 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -181,8 +181,9 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 			size = rc;
 
 			pv = (const char *)new_value;
-		} else
+		} else {
 			return -EOPNOTSUPP;
+		}
 
 		valid |= rce_ops2valid(rce->rce_ops);
 	}
@@ -210,16 +211,14 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	return 0;
 }
 
-int ll_setxattr(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags)
+int ll_setxattr(struct dentry *dentry, struct inode *inode,
+		const char *name, const void *value, size_t size, int flags)
 {
-	struct inode *inode = d_inode(dentry);
-
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
 
@@ -243,12 +242,12 @@ int ll_setxattr(struct dentry *dentry, const char *name,
 			lump->lmm_stripe_offset = -1;
 
 		if (lump && S_ISREG(inode->i_mode)) {
-			int flags = FMODE_WRITE;
+			__u64 it_flags = FMODE_WRITE;
 			int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
 				sizeof(*lump) : sizeof(struct lov_user_md_v3);
 
-			rc = ll_lov_setstripe_ea_info(inode, dentry, flags, lump,
-						      lum_size);
+			rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
+						      lump, lum_size);
 			/* b10667: rc always be 0 here for now */
 			rc = 0;
 		} else if (S_ISDIR(inode->i_mode)) {
@@ -272,8 +271,8 @@ int ll_removexattr(struct dentry *dentry, const char *name)
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
 	return ll_setxattr_common(inode, name, NULL, 0, 0,
@@ -292,8 +291,8 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 	struct rmtacl_ctl_entry *rce = NULL;
 	struct ll_inode_info *lli = ll_i2info(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	/* listxattr have slightly different behavior from of ext3:
 	 * without 'user_xattr' ext3 will list all xattr names but
@@ -338,7 +337,6 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 	 */
 	if (xattr_type == XATTR_ACL_ACCESS_T &&
 	    !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
-
 		struct posix_acl *acl;
 
 		spin_lock(&lli->lli_lock);
@@ -423,8 +421,7 @@ getxattr_nocache:
 	if (rce && rce->rce_ops == RMT_LSETFACL) {
 		ext_acl_xattr_header *acl;
 
-		acl = lustre_posix_acl_xattr_2ext(
-					(posix_acl_xattr_header *)buffer, rc);
+		acl = lustre_posix_acl_xattr_2ext(buffer, rc);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
 			goto out;
@@ -451,16 +448,14 @@ out:
 	return rc;
 }
 
-ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-		    void *buffer, size_t size)
+ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer, size_t size)
 {
-	struct inode *inode = d_inode(dentry);
-
 	LASSERT(inode);
 	LASSERT(name);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), xattr %s\n",
-	       inode->i_ino, inode->i_generation, inode, name);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
+	       PFID(ll_inode2fid(inode)), inode, name);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
 
@@ -554,8 +549,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 	LASSERT(inode);
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
-	       inode->i_ino, inode->i_generation, inode);
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
 
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 3480ce2bb..d7e17abbe 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -229,7 +229,6 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli)
  */
 static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
 {
-
 	if (!ll_xattr_cache_valid(lli))
 		return 0;
 
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 8a0087190..7007e4c48 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -42,9 +42,6 @@
 
 #define LMV_MAX_TGT_COUNT 128
 
-#define lmv_init_lock(lmv)   mutex_lock(&lmv->init_mutex)
-#define lmv_init_unlock(lmv) mutex_unlock(&lmv->init_mutex)
-
 #define LL_IT2STR(it)					\
 	((it) ? ldlm_it2str((it)->it_op) : "0")
 
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 9abb7c2b9..9e31f6b03 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -132,8 +132,9 @@ static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
 static struct obd_uuid *lmv_get_uuid(struct obd_export *exp)
 {
 	struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
-	return obd_get_uuid(lmv->tgts[0]->ltd_exp);
+	return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL;
 }
 
 static int lmv_notify(struct obd_device *obd, struct obd_device *watched,
@@ -249,7 +250,6 @@ static int lmv_connect(const struct lu_env *env,
 
 static void lmv_set_timeouts(struct obd_device *obd)
 {
-	struct lmv_tgt_desc   *tgt;
 	struct lmv_obd	*lmv;
 	int		    i;
 
@@ -261,8 +261,10 @@ static void lmv_set_timeouts(struct obd_device *obd)
 		return;
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
 		tgt = lmv->tgts[i];
-		if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
 		obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
@@ -302,13 +304,14 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize,
 		return 0;
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) {
 			CWARN("%s: NULL export for %d\n", obd->obd_name, i);
 			continue;
 		}
 
-		rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize,
+		rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize,
 				     cookiesize, def_cookiesize);
 		if (rc) {
 			CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n",
@@ -425,7 +428,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 
 	CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
 
-	lmv_init_lock(lmv);
+	mutex_lock(&lmv->lmv_init_mutex);
 
 	if (lmv->desc.ld_tgt_count == 0) {
 		struct obd_device *mdc_obd;
@@ -433,7 +436,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME,
 						&obd->obd_uuid);
 		if (!mdc_obd) {
-			lmv_init_unlock(lmv);
+			mutex_unlock(&lmv->lmv_init_mutex);
 			CERROR("%s: Target %s not attached: rc = %d\n",
 			       obd->obd_name, uuidp->uuid, -EINVAL);
 			return -EINVAL;
@@ -445,7 +448,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n",
 		       obd->obd_name,
 		       obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST);
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return -EEXIST;
 	}
 
@@ -459,7 +462,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 			newsize <<= 1;
 		newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS);
 		if (!newtgts) {
-			lmv_init_unlock(lmv);
+			mutex_unlock(&lmv->lmv_init_mutex);
 			return -ENOMEM;
 		}
 
@@ -481,7 +484,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 
 	tgt = kzalloc(sizeof(*tgt), GFP_NOFS);
 	if (!tgt) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return -ENOMEM;
 	}
 
@@ -507,7 +510,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 		}
 	}
 
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return rc;
 }
 
@@ -522,18 +525,27 @@ int lmv_check_connect(struct obd_device *obd)
 	if (lmv->connected)
 		return 0;
 
-	lmv_init_lock(lmv);
+	mutex_lock(&lmv->lmv_init_mutex);
 	if (lmv->connected) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		return 0;
 	}
 
 	if (lmv->desc.ld_tgt_count == 0) {
-		lmv_init_unlock(lmv);
+		mutex_unlock(&lmv->lmv_init_mutex);
 		CERROR("%s: no targets configured.\n", obd->obd_name);
 		return -EINVAL;
 	}
 
+	LASSERT(lmv->tgts);
+
+	if (!lmv->tgts[0]) {
+		mutex_unlock(&lmv->lmv_init_mutex);
+		CERROR("%s: no target configured for index 0.\n",
+		       obd->obd_name);
+		return -EINVAL;
+	}
+
 	CDEBUG(D_CONFIG, "Time to connect %s to %s\n",
 	       lmv->cluuid.uuid, obd->obd_name);
 
@@ -551,7 +563,7 @@ int lmv_check_connect(struct obd_device *obd)
 	lmv->connected = 1;
 	easize = lmv_get_easize(lmv);
 	lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return 0;
 
  out_disc:
@@ -572,7 +584,7 @@ int lmv_check_connect(struct obd_device *obd)
 		}
 	}
 	class_disconnect(lmv->exp);
-	lmv_init_unlock(lmv);
+	mutex_unlock(&lmv->lmv_init_mutex);
 	return rc;
 }
 
@@ -796,6 +808,11 @@ static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len,
 
 	/* unregister request (call from llapi_hsm_copytool_fini) */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp)
+			continue;
+
 		/* best effort: try to clean as much as possible
 		 * (continue on error)
 		 */
@@ -825,20 +842,28 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
 	 * except if it because of inactive target.
 	 */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg);
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp)
+			continue;
+
+		err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg);
 		if (err) {
-			if (lmv->tgts[i]->ltd_active) {
+			if (tgt->ltd_active) {
 				/* permanent error */
 				CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
-				       lmv->tgts[i]->ltd_uuid.uuid,
-				       i, cmd, err);
+				       tgt->ltd_uuid.uuid, i, cmd, err);
 				rc = err;
 				lk->lk_flags |= LK_FLG_STOP;
 				/* unregister from previous MDS */
-				for (j = 0; j < i; j++)
-					obd_iocontrol(cmd,
-						      lmv->tgts[j]->ltd_exp,
-						      len, lk, uarg);
+				for (j = 0; j < i; j++) {
+					tgt = lmv->tgts[j];
+
+					if (!tgt || !tgt->ltd_exp)
+						continue;
+					obd_iocontrol(cmd, tgt->ltd_exp, len,
+						      lk, uarg);
+				}
 				return rc;
 			}
 			/* else: transient error.
@@ -877,6 +902,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 {
 	struct obd_device    *obddev = class_exp2obd(exp);
 	struct lmv_obd       *lmv = &obddev->u.lmv;
+	struct lmv_tgt_desc *tgt = NULL;
 	int		   i = 0;
 	int		   rc = 0;
 	int		   set = 0;
@@ -896,10 +922,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 		if (index >= count)
 			return -ENODEV;
 
-		if (!lmv->tgts[index] || lmv->tgts[index]->ltd_active == 0)
+		tgt = lmv->tgts[index];
+		if (!tgt || !tgt->ltd_active)
 			return -ENODATA;
 
-		mdc_obd = class_exp2obd(lmv->tgts[index]->ltd_exp);
+		mdc_obd = class_exp2obd(tgt->ltd_exp);
 		if (!mdc_obd)
 			return -EINVAL;
 
@@ -909,7 +936,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				     (int)sizeof(struct obd_uuid))))
 			return -EFAULT;
 
-		rc = obd_statfs(NULL, lmv->tgts[index]->ltd_exp, &stat_buf,
+		rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf,
 				cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
 				0);
 		if (rc)
@@ -922,11 +949,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case OBD_IOC_QUOTACTL: {
 		struct if_quotactl *qctl = karg;
-		struct lmv_tgt_desc *tgt = NULL;
 		struct obd_quotactl *oqctl;
 
 		if (qctl->qc_valid == QC_MDTIDX) {
-			if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+			if (count <= qctl->qc_idx)
 				return -EINVAL;
 
 			tgt = lmv->tgts[qctl->qc_idx];
@@ -975,18 +1001,18 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 		if (icc->icc_mdtindex >= count)
 			return -ENODEV;
 
-		if (!lmv->tgts[icc->icc_mdtindex] ||
-		    !lmv->tgts[icc->icc_mdtindex]->ltd_exp ||
-		    lmv->tgts[icc->icc_mdtindex]->ltd_active == 0)
+		tgt = lmv->tgts[icc->icc_mdtindex];
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			return -ENODEV;
-		rc = obd_iocontrol(cmd, lmv->tgts[icc->icc_mdtindex]->ltd_exp,
-				   sizeof(*icc), icc, NULL);
+		rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL);
 		break;
 	}
 	case LL_IOC_GET_CONNECT_FLAGS: {
-		if (!lmv->tgts[0])
+		tgt = lmv->tgts[0];
+
+		if (!tgt || !tgt->ltd_exp)
 			return -ENODATA;
-		rc = obd_iocontrol(cmd, lmv->tgts[0]->ltd_exp, len, karg, uarg);
+		rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
 		break;
 	}
 	case OBD_IOC_FID2PATH: {
@@ -997,7 +1023,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	case LL_IOC_HSM_STATE_SET:
 	case LL_IOC_HSM_ACTION: {
 		struct md_op_data	*op_data = karg;
-		struct lmv_tgt_desc	*tgt;
 
 		tgt = lmv_find_target(lmv, &op_data->op_fid1);
 		if (IS_ERR(tgt))
@@ -1011,7 +1036,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case LL_IOC_HSM_PROGRESS: {
 		const struct hsm_progress_kernel *hpk = karg;
-		struct lmv_tgt_desc	*tgt;
 
 		tgt = lmv_find_target(lmv, &hpk->hpk_fid);
 		if (IS_ERR(tgt))
@@ -1021,7 +1045,6 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	}
 	case LL_IOC_HSM_REQUEST: {
 		struct hsm_user_request *hur = karg;
-		struct lmv_tgt_desc	*tgt;
 		unsigned int reqcount = hur->hur_request.hr_itemcount;
 
 		if (reqcount == 0)
@@ -1044,7 +1067,11 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				int			rc1;
 				struct hsm_user_request *req;
 
-				nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]);
+				tgt = lmv->tgts[i];
+				if (!tgt || !tgt->ltd_exp)
+					continue;
+
+				nr = lmv_hsm_req_count(lmv, hur, tgt);
 				if (nr == 0) /* nothing for this MDS */
 					continue;
 
@@ -1056,10 +1083,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 				if (!req)
 					return -ENOMEM;
 
-				lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req);
+				lmv_hsm_req_build(lmv, hur, tgt, req);
 
-				rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp,
-						    reqlen, req, uarg);
+				rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen,
+						    req, uarg);
 				if (rc1 != 0 && rc == 0)
 					rc = rc1;
 				kvfree(req);
@@ -1103,27 +1130,27 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 			struct obd_device *mdc_obd;
 			int err;
 
-			if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
+			tgt = lmv->tgts[i];
+			if (!tgt || !tgt->ltd_exp)
 				continue;
 			/* ll_umount_begin() sets force flag but for lmv, not
 			 * mdc. Let's pass it through
 			 */
-			mdc_obd = class_exp2obd(lmv->tgts[i]->ltd_exp);
+			mdc_obd = class_exp2obd(tgt->ltd_exp);
 			mdc_obd->obd_force = obddev->obd_force;
-			err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len,
-					    karg, uarg);
+			err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
 			if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
 				return err;
 			} else if (err) {
-				if (lmv->tgts[i]->ltd_active) {
+				if (tgt->ltd_active) {
 					CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n",
-					       lmv->tgts[i]->ltd_uuid.uuid,
-					       i, cmd, err);
+					       tgt->ltd_uuid.uuid, i, cmd, err);
 					if (!rc)
 						rc = err;
 				}
-			} else
+			} else {
 				set = 1;
+			}
 		}
 		if (!set && !rc)
 			rc = -EIO;
@@ -1269,7 +1296,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	lmv->lmv_placement = PLACEMENT_CHAR_POLICY;
 
 	spin_lock_init(&lmv->lmv_lock);
-	mutex_init(&lmv->init_mutex);
+	mutex_init(&lmv->lmv_init_mutex);
 
 	lprocfs_lmv_init_vars(&lvars);
 
@@ -2071,7 +2098,7 @@ static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
 			dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
 
 			/* Check if we've reached the end of the CFS_PAGE. */
-			if (!((unsigned long)dp & ~CFS_PAGE_MASK))
+			if (!((unsigned long)dp & ~PAGE_MASK))
 				break;
 
 			/* Save the hash and flags of this lu_dirpage. */
@@ -2268,7 +2295,6 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 
 	lmv = &obd->u.lmv;
 	if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) {
-		struct lmv_tgt_desc *tgt;
 		int i;
 
 		rc = lmv_check_connect(obd);
@@ -2277,7 +2303,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 
 		LASSERT(*vallen == sizeof(__u32));
 		for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-			tgt = lmv->tgts[i];
+			struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
 			/*
 			 * All tgts should be connected when this gets called.
 			 */
@@ -2466,12 +2493,13 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
 	LASSERT(fid);
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0)
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
-		err = md_cancel_unused(lmv->tgts[i]->ltd_exp, fid,
-				       policy, mode, flags, opaque);
+		err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags,
+				       opaque);
 		if (!rc)
 			rc = err;
 	}
@@ -2482,9 +2510,13 @@ static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
 			     __u64 *bits)
 {
 	struct lmv_obd	  *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 	int		      rc;
 
-	rc =  md_set_lock_data(lmv->tgts[0]->ltd_exp, lockh, data, bits);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+
+	rc = md_set_lock_data(tgt->ltd_exp, lockh, data, bits);
 	return rc;
 }
 
@@ -2509,12 +2541,13 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
 	 * one fid was created in.
 	 */
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp ||
-		    lmv->tgts[i]->ltd_active == 0)
+		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
 
-		rc = md_lock_match(lmv->tgts[i]->ltd_exp, flags, fid,
-				   type, policy, mode, lockh);
+		rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode,
+				   lockh);
 		if (rc)
 			return rc;
 	}
@@ -2529,18 +2562,24 @@ static int lmv_get_lustre_md(struct obd_export *exp,
 			     struct lustre_md *md)
 {
 	struct lmv_obd	  *lmv = &exp->exp_obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
-	return md_get_lustre_md(lmv->tgts[0]->ltd_exp, req, dt_exp, md_exp, md);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+	return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md);
 }
 
 static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
 {
 	struct obd_device       *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
+	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
 	if (md->mea)
 		obd_free_memmd(exp, (void *)&md->mea);
-	return md_free_lustre_md(lmv->tgts[0]->ltd_exp, md);
+	if (!tgt || !tgt->ltd_exp)
+		return -EINVAL;
+	return md_free_lustre_md(tgt->ltd_exp, md);
 }
 
 static int lmv_set_open_replay_data(struct obd_export *exp,
@@ -2649,7 +2688,8 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 	int		  rc = 0, i;
 	__u64		curspace, curinodes;
 
-	if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) {
+	if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
+	    !lmv->desc.ld_tgt_count) {
 		CERROR("master lmv inactive\n");
 		return -EIO;
 	}
@@ -2665,12 +2705,8 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 
 		tgt = lmv->tgts[i];
 
-		if (!tgt || !tgt->ltd_exp || tgt->ltd_active == 0)
+		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
 			continue;
-		if (!tgt->ltd_active) {
-			CDEBUG(D_HA, "mdt %d is inactive.\n", i);
-			continue;
-		}
 
 		err = obd_quotactl(tgt->ltd_exp, oqctl);
 		if (err) {
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 7dd3162b5..ac9744e88 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -73,19 +73,6 @@
  *     - top-page keeps a reference to its sub-page, and destroys it when it
  *       is destroyed.
  *
- *     - sub-lock keep a reference to its top-locks. Top-lock keeps a
- *       reference (and a hold, see cl_lock_hold()) on its sub-locks when it
- *       actively using them (that is, in cl_lock_state::CLS_QUEUING,
- *       cl_lock_state::CLS_ENQUEUED, cl_lock_state::CLS_HELD states). When
- *       moving into cl_lock_state::CLS_CACHED state, top-lock releases a
- *       hold. From this moment top-lock has only a 'weak' reference to its
- *       sub-locks. This reference is protected by top-lock
- *       cl_lock::cll_guard, and will be automatically cleared by the sub-lock
- *       when the latter is destroyed. When a sub-lock is canceled, a
- *       reference to it is removed from the top-lock array, and top-lock is
- *       moved into CLS_NEW state. It is guaranteed that all sub-locks exist
- *       while their top-lock is in CLS_HELD or CLS_CACHED states.
- *
  *     - IO's are not reference counted.
  *
  * To implement a connection between top and sub entities, lov layer is split
@@ -280,25 +267,18 @@ struct lov_object {
 	struct task_struct	*lo_owner;
 };
 
-/**
- * Flags that top-lock can set on each of its sub-locks.
- */
-enum lov_sub_flags {
-	/** Top-lock acquired a hold (cl_lock_hold()) on a sub-lock. */
-	LSF_HELD = 1 << 0
-};
-
 /**
  * State lov_lock keeps for each sub-lock.
  */
 struct lov_lock_sub {
 	/** sub-lock itself */
-	struct lovsub_lock  *sub_lock;
-	/** An array of per-sub-lock flags, taken from enum lov_sub_flags */
-	unsigned	     sub_flags;
+	struct cl_lock		sub_lock;
+	/** Set if the sublock has ever been enqueued, meaning it may
+	 * hold resources of underlying layers
+	 */
+	unsigned int		sub_is_enqueued:1,
+				sub_initialized:1;
 	int		  sub_stripe;
-	struct cl_lock_descr sub_descr;
-	struct cl_lock_descr sub_got;
 };
 
 /**
@@ -308,59 +288,8 @@ struct lov_lock {
 	struct cl_lock_slice   lls_cl;
 	/** Number of sub-locks in this lock */
 	int		    lls_nr;
-	/**
-	 * Number of existing sub-locks.
-	 */
-	unsigned	       lls_nr_filled;
-	/**
-	 * Set when sub-lock was canceled, while top-lock was being
-	 * used, or unused.
-	 */
-	unsigned int	       lls_cancel_race:1;
-	/**
-	 * An array of sub-locks
-	 *
-	 * There are two issues with managing sub-locks:
-	 *
-	 *     - sub-locks are concurrently canceled, and
-	 *
-	 *     - sub-locks are shared with other top-locks.
-	 *
-	 * To manage cancellation, top-lock acquires a hold on a sublock
-	 * (lov_sublock_adopt()) when the latter is inserted into
-	 * lov_lock::lls_sub[]. This hold is released (lov_sublock_release())
-	 * when top-lock is going into CLS_CACHED state or destroyed. Hold
-	 * prevents sub-lock from cancellation.
-	 *
-	 * Sub-lock sharing means, among other things, that top-lock that is
-	 * in the process of creation (i.e., not yet inserted into lock list)
-	 * is already accessible to other threads once at least one of its
-	 * sub-locks is created, see lov_lock_sub_init().
-	 *
-	 * Sub-lock can be in one of the following states:
-	 *
-	 *     - doesn't exist, lov_lock::lls_sub[]::sub_lock == NULL. Such
-	 *       sub-lock was either never created (top-lock is in CLS_NEW
-	 *       state), or it was created, then canceled, then destroyed
-	 *       (lov_lock_unlink() cleared sub-lock pointer in the top-lock).
-	 *
-	 *     - sub-lock exists and is on
-	 *       hold. (lov_lock::lls_sub[]::sub_flags & LSF_HELD). This is a
-	 *       normal state of a sub-lock in CLS_HELD and CLS_CACHED states
-	 *       of a top-lock.
-	 *
-	 *     - sub-lock exists, but is not held by the top-lock. This
-	 *       happens after top-lock released a hold on sub-locks before
-	 *       going into cache (lov_lock_unuse()).
-	 *
-	 * \todo To support wide-striping, array has to be replaced with a set
-	 * of queues to avoid scanning.
-	 */
-	struct lov_lock_sub   *lls_sub;
-	/**
-	 * Original description with which lock was enqueued.
-	 */
-	struct cl_lock_descr   lls_orig;
+	/** sublock array */
+	struct lov_lock_sub     lls_sub[0];
 };
 
 struct lov_page {
@@ -444,8 +373,9 @@ struct lov_thread_info {
 	struct cl_lock_descr    lti_ldescr;
 	struct ost_lvb	  lti_lvb;
 	struct cl_2queue	lti_cl2q;
-	struct cl_lock_closure  lti_closure;
+	struct cl_page_list     lti_plist;
 	wait_queue_t	  lti_waiter;
+	struct cl_attr          lti_attr;
 };
 
 /**
@@ -611,14 +541,13 @@ int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
 		       const struct cl_lock_descr *d, int idx);
 
 int lov_page_init(const struct lu_env *env, struct cl_object *ob,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t index);
 int lovsub_page_init(const struct lu_env *env, struct cl_object *ob,
-		     struct cl_page *page, struct page *vmpage);
-
+		     struct cl_page *page, pgoff_t index);
 int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage);
+			struct cl_page *page, pgoff_t index);
 int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage);
+			struct cl_page *page, pgoff_t index);
 struct lu_object *lov_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
@@ -631,6 +560,7 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
 					 struct lovsub_lock *sub);
 struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
 				  const struct cl_page_slice *slice);
+int lov_page_stripe(const struct cl_page *page);
 
 #define lov_foreach_target(lov, var)		    \
 	for (var = 0; var < lov_targets_nr(lov); ++var)
@@ -789,11 +719,6 @@ static inline struct lovsub_req *cl2lovsub_req(const struct cl_req_slice *slice)
 	return container_of0(slice, struct lovsub_req, lsrq_cl);
 }
 
-static inline struct cl_page *lov_sub_page(const struct cl_page_slice *slice)
-{
-	return slice->cpl_page->cp_child;
-}
-
 static inline struct lov_io *cl2lov_io(const struct lu_env *env,
 				       const struct cl_io_slice *ios)
 {
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index 532ef87df..dae8e89bc 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -143,9 +143,7 @@ static void *lov_key_init(const struct lu_context *ctx,
 	struct lov_thread_info *info;
 
 	info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS);
-	if (info)
-		INIT_LIST_HEAD(&info->lti_closure.clc_list);
-	else
+	if (!info)
 		info = ERR_PTR(-ENOMEM);
 	return info;
 }
@@ -155,7 +153,6 @@ static void lov_key_fini(const struct lu_context *ctx,
 {
 	struct lov_thread_info *info = data;
 
-	LINVRNT(list_empty(&info->lti_closure.clc_list));
 	kmem_cache_free(lov_thread_kmem, info);
 }
 
@@ -265,8 +262,9 @@ static int lov_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (lr) {
 		cl_req_slice_add(req, &lr->lr_cl, dev, &lov_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -335,14 +333,15 @@ static struct lov_device_emerg **lov_emerg_alloc(int nr)
 			cl_page_list_init(&em->emrg_page_list);
 			em->emrg_env = cl_env_alloc(&em->emrg_refcheck,
 						    LCT_REMEMBER | LCT_NOREF);
-			if (!IS_ERR(em->emrg_env))
+			if (!IS_ERR(em->emrg_env)) {
 				em->emrg_env->le_ctx.lc_cookie = 0x2;
-			else {
+			} else {
 				result = PTR_ERR(em->emrg_env);
 				em->emrg_env = NULL;
 			}
-		} else
+		} else {
 			result = -ENOMEM;
+		}
 	}
 	if (result != 0) {
 		lov_emerg_free(emerg, nr);
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index b6529401c..460f0fa5e 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -48,11 +48,6 @@
 
 #include "lov_internal.h"
 
-struct lovea_unpack_args {
-	struct lov_stripe_md *lsm;
-	int		   cursor;
-};
-
 static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
 				 __u16 stripe_count)
 {
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 590f9326a..eef9afac8 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -72,6 +72,21 @@
 })
 #endif
 
+#define pool_tgt_size(p)	((p)->pool_obds.op_size)
+#define pool_tgt_count(p)	((p)->pool_obds.op_count)
+#define pool_tgt_array(p)	((p)->pool_obds.op_array)
+#define pool_tgt_rw_sem(p)	((p)->pool_obds.op_rw_sem)
+
+struct pool_desc {
+	char			 pool_name[LOV_MAXPOOLNAME + 1];
+	struct ost_pool		 pool_obds;
+	atomic_t		 pool_refcount;
+	struct hlist_node	 pool_hash;		/* access by poolname */
+	struct list_head	 pool_list;		/* serial access */
+	struct dentry		*pool_debugfs_entry;	/* file in debugfs */
+	struct obd_device	*pool_lobd;		/* owner */
+};
+
 struct lov_request {
 	struct obd_info	  rq_oi;
 	struct lov_request_set  *rq_rqset;
@@ -88,7 +103,6 @@ struct lov_request {
 };
 
 struct lov_request_set {
-	struct ldlm_enqueue_info	*set_ei;
 	struct obd_info			*set_oi;
 	atomic_t			set_refcount;
 	struct obd_export		*set_exp;
@@ -102,10 +116,8 @@ struct lov_request_set {
 	atomic_t			set_finish_checked;
 	struct llog_cookie		*set_cookies;
 	int				set_cookie_sent;
-	struct obd_trans_info		*set_oti;
 	struct list_head			set_list;
 	wait_queue_head_t			set_waitq;
-	spinlock_t			set_lock;
 };
 
 extern struct kmem_cache *lov_oinfo_slab;
@@ -114,12 +126,6 @@ extern struct lu_kmem_descr lov_caches[];
 
 void lov_finish_set(struct lov_request_set *set);
 
-static inline void lov_get_reqset(struct lov_request_set *set)
-{
-	LASSERT(atomic_read(&set->set_refcount) > 0);
-	atomic_inc(&set->set_refcount);
-}
-
 static inline void lov_put_reqset(struct lov_request_set *set)
 {
 	if (atomic_dec_and_test(&set->set_refcount))
@@ -146,10 +152,8 @@ int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
 			  u64 start, u64 end,
 			  u64 *obd_start, u64 *obd_end);
 int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off);
-
-/* lov_qos.c */
-#define LOV_USES_ASSIGNED_STRIPE	0
-#define LOV_USES_DEFAULT_STRIPE	 1
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+			 int stripe);
 
 /* lov_request.c */
 int lov_update_common_set(struct lov_request_set *set,
@@ -176,6 +180,8 @@ int lov_fini_statfs_set(struct lov_request_set *set);
 int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc);
 
 /* lov_obd.c */
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
 void lov_fix_desc(struct lov_desc *desc);
 void lov_fix_desc_stripe_size(__u64 *val);
 void lov_fix_desc_stripe_count(__u32 *val);
@@ -231,8 +237,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname);
 int lov_pool_del(struct obd_device *obd, char *poolname);
 int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname);
 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
 void lov_pool_putref(struct pool_desc *pool);
 
 static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm)
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 4296aacd8..86cb3f8f9 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -225,8 +225,9 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env,
 	if (!sub->sub_io_initialized) {
 		sub->sub_stripe = stripe;
 		rc = lov_io_sub_init(env, lio, sub);
-	} else
+	} else {
 		rc = 0;
+	}
 	if (rc == 0)
 		lov_sub_enter(sub);
 	else
@@ -245,13 +246,15 @@ void lov_sub_put(struct lov_io_sub *sub)
  *
  */
 
-static int lov_page_stripe(const struct cl_page *page)
+int lov_page_stripe(const struct cl_page *page)
 {
 	struct lovsub_object *subobj;
+	const struct cl_page_slice *slice;
+
+	slice = cl_page_at(page, &lovsub_device_type);
+	LASSERT(slice->cpl_obj);
 
-	subobj = lu2lovsub(
-		lu_object_locate(page->cp_child->cp_obj->co_lu.lo_header,
-				 &lovsub_device_type));
+	subobj = cl2lovsub(slice->cpl_obj);
 	return subobj->lso_index;
 }
 
@@ -274,10 +277,11 @@ struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
 static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 			     struct cl_io *io)
 {
-	struct lov_stripe_md *lsm = lio->lis_object->lo_lsm;
+	struct lov_stripe_md *lsm;
 	int result;
 
 	LASSERT(lio->lis_object);
+	lsm = lio->lis_object->lo_lsm;
 
 	/*
 	 * Need to be optimized, we can't afford to allocate a piece of memory
@@ -292,8 +296,9 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 		lio->lis_single_subio_index = -1;
 		lio->lis_active_subios = 0;
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -411,8 +416,9 @@ static int lov_io_iter_init(const struct lu_env *env,
 			lov_sub_put(sub);
 			CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n",
 			       stripe, start, end);
-		} else
+		} else {
 			rc = PTR_ERR(sub);
+		}
 
 		if (!rc)
 			list_add_tail(&sub->sub_linkage, &lio->lis_active);
@@ -436,7 +442,6 @@ static int lov_io_rw_iter_init(const struct lu_env *env,
 
 	/* fast path for common case. */
 	if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) {
-
 		lov_do_div64(start, ssize);
 		next = (start + 1) * ssize;
 		if (next <= start * ssize)
@@ -543,13 +548,6 @@ static void lov_io_unlock(const struct lu_env *env,
 	LASSERT(rc == 0);
 }
 
-static struct cl_page_list *lov_io_submit_qin(struct lov_device *ld,
-					      struct cl_page_list *qin,
-					      int idx, int alloc)
-{
-	return alloc ? &qin[idx] : &ld->ld_emrg[idx]->emrg_page_list;
-}
-
 /**
  * lov implementation of cl_operations::cio_submit() method. It takes a list
  * of pages in \a queue, splits it into per-stripe sub-lists, invokes
@@ -569,25 +567,17 @@ static int lov_io_submit(const struct lu_env *env,
 			 const struct cl_io_slice *ios,
 			 enum cl_req_type crt, struct cl_2queue *queue)
 {
-	struct lov_io	  *lio = cl2lov_io(env, ios);
-	struct lov_object      *obj = lio->lis_object;
-	struct lov_device       *ld = lu2lov_dev(lov2cl(obj)->co_lu.lo_dev);
-	struct cl_page_list    *qin = &queue->c2_qin;
-	struct cl_2queue      *cl2q = &lov_env_info(env)->lti_cl2q;
-	struct cl_page_list *stripes_qin = NULL;
+	struct cl_page_list *qin = &queue->c2_qin;
+	struct lov_io *lio = cl2lov_io(env, ios);
+	struct lov_io_sub *sub;
+	struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
 	struct cl_page *page;
-	struct cl_page *tmp;
 	int stripe;
 
-#define QIN(stripe) lov_io_submit_qin(ld, stripes_qin, stripe, alloc)
-
 	int rc = 0;
-	int alloc =
-		!(current->flags & PF_MEMALLOC);
 
 	if (lio->lis_active_subios == 1) {
 		int idx = lio->lis_single_subio_index;
-		struct lov_io_sub *sub;
 
 		LASSERT(idx < lio->lis_nr_subios);
 		sub = lov_sub_get(env, lio, idx);
@@ -600,119 +590,120 @@ static int lov_io_submit(const struct lu_env *env,
 	}
 
 	LASSERT(lio->lis_subs);
-	if (alloc) {
-		stripes_qin =
-			libcfs_kvzalloc(sizeof(*stripes_qin) *
-					lio->lis_nr_subios,
-					GFP_NOFS);
-		if (!stripes_qin)
-			return -ENOMEM;
-
-		for (stripe = 0; stripe < lio->lis_nr_subios; stripe++)
-			cl_page_list_init(&stripes_qin[stripe]);
-	} else {
-		/*
-		 * If we get here, it means pageout & swap doesn't help.
-		 * In order to not make things worse, even don't try to
-		 * allocate the memory with __GFP_NOWARN. -jay
-		 */
-		mutex_lock(&ld->ld_mutex);
-		lio->lis_mem_frozen = 1;
-	}
 
-	cl_2queue_init(cl2q);
-	cl_page_list_for_each_safe(page, tmp, qin) {
-		stripe = lov_page_stripe(page);
-		cl_page_list_move(QIN(stripe), qin, page);
-	}
+	cl_page_list_init(plist);
+	while (qin->pl_nr > 0) {
+		struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q;
 
-	for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
-		struct lov_io_sub   *sub;
-		struct cl_page_list *sub_qin = QIN(stripe);
+		cl_2queue_init(cl2q);
 
-		if (list_empty(&sub_qin->pl_pages))
-			continue;
+		page = cl_page_list_first(qin);
+		cl_page_list_move(&cl2q->c2_qin, qin, page);
+
+		stripe = lov_page_stripe(page);
+		while (qin->pl_nr > 0) {
+			page = cl_page_list_first(qin);
+			if (stripe != lov_page_stripe(page))
+				break;
+
+			cl_page_list_move(&cl2q->c2_qin, qin, page);
+		}
 
-		cl_page_list_splice(sub_qin, &cl2q->c2_qin);
 		sub = lov_sub_get(env, lio, stripe);
 		if (!IS_ERR(sub)) {
 			rc = cl_io_submit_rw(sub->sub_env, sub->sub_io,
 					     crt, cl2q);
 			lov_sub_put(sub);
-		} else
+		} else {
 			rc = PTR_ERR(sub);
-		cl_page_list_splice(&cl2q->c2_qin,  &queue->c2_qin);
+		}
+
+		cl_page_list_splice(&cl2q->c2_qin, plist);
 		cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout);
+		cl_2queue_fini(env, cl2q);
+
 		if (rc != 0)
 			break;
 	}
 
-	for (stripe = 0; stripe < lio->lis_nr_subios; stripe++) {
-		struct cl_page_list *sub_qin = QIN(stripe);
+	cl_page_list_splice(plist, qin);
+	cl_page_list_fini(env, plist);
 
-		if (list_empty(&sub_qin->pl_pages))
-			continue;
+	return rc;
+}
+
+static int lov_io_commit_async(const struct lu_env *env,
+			       const struct cl_io_slice *ios,
+			       struct cl_page_list *queue, int from, int to,
+			       cl_commit_cbt cb)
+{
+	struct cl_page_list *plist = &lov_env_info(env)->lti_plist;
+	struct lov_io *lio = cl2lov_io(env, ios);
+	struct lov_io_sub *sub;
+	struct cl_page *page;
+	int rc = 0;
+
+	if (lio->lis_active_subios == 1) {
+		int idx = lio->lis_single_subio_index;
 
-		cl_page_list_splice(sub_qin, qin);
+		LASSERT(idx < lio->lis_nr_subios);
+		sub = lov_sub_get(env, lio, idx);
+		LASSERT(!IS_ERR(sub));
+		LASSERT(sub->sub_io == &lio->lis_single_subio);
+		rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue,
+					from, to, cb);
+		lov_sub_put(sub);
+		return rc;
 	}
 
-	if (alloc) {
-		kvfree(stripes_qin);
-	} else {
-		int i;
+	LASSERT(lio->lis_subs);
 
-		for (i = 0; i < lio->lis_nr_subios; i++) {
-			struct cl_io *cio = lio->lis_subs[i].sub_io;
+	cl_page_list_init(plist);
+	while (queue->pl_nr > 0) {
+		int stripe_to = to;
+		int stripe;
 
-			if (cio && cio == &ld->ld_emrg[i]->emrg_subio)
-				lov_io_sub_fini(env, lio, &lio->lis_subs[i]);
+		LASSERT(plist->pl_nr == 0);
+		page = cl_page_list_first(queue);
+		cl_page_list_move(plist, queue, page);
+
+		stripe = lov_page_stripe(page);
+		while (queue->pl_nr > 0) {
+			page = cl_page_list_first(queue);
+			if (stripe != lov_page_stripe(page))
+				break;
+
+			cl_page_list_move(plist, queue, page);
 		}
-		lio->lis_mem_frozen = 0;
-		mutex_unlock(&ld->ld_mutex);
-	}
 
-	return rc;
-#undef QIN
-}
+		if (queue->pl_nr > 0) /* still has more pages */
+			stripe_to = PAGE_SIZE;
 
-static int lov_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
-{
-	struct lov_io     *lio      = cl2lov_io(env, ios);
-	struct cl_page    *sub_page = lov_sub_page(slice);
-	struct lov_io_sub *sub;
-	int result;
+		sub = lov_sub_get(env, lio, stripe);
+		if (!IS_ERR(sub)) {
+			rc = cl_io_commit_async(sub->sub_env, sub->sub_io,
+						plist, from, stripe_to, cb);
+			lov_sub_put(sub);
+		} else {
+			rc = PTR_ERR(sub);
+			break;
+		}
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		result = cl_io_prepare_write(sub->sub_env, sub->sub_io,
-					     sub_page, from, to);
-		lov_sub_put(sub);
-	} else
-		result = PTR_ERR(sub);
-	return result;
-}
+		if (plist->pl_nr > 0) /* short write */
+			break;
 
-static int lov_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
-{
-	struct lov_io     *lio      = cl2lov_io(env, ios);
-	struct cl_page    *sub_page = lov_sub_page(slice);
-	struct lov_io_sub *sub;
-	int result;
+		from = 0;
+	}
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		result = cl_io_commit_write(sub->sub_env, sub->sub_io,
-					    sub_page, from, to);
-		lov_sub_put(sub);
-	} else
-		result = PTR_ERR(sub);
-	return result;
+	/* for error case, add the page back into the qin list */
+	LASSERT(ergo(rc == 0, plist->pl_nr == 0));
+	while (plist->pl_nr > 0) {
+		/* error occurred, add the uncommitted pages back into queue */
+		page = cl_page_list_last(plist);
+		cl_page_list_move_head(queue, plist, page);
+	}
+
+	return rc;
 }
 
 static int lov_io_fault_start(const struct lu_env *env,
@@ -803,16 +794,8 @@ static const struct cl_io_operations lov_io_ops = {
 			.cio_fini   = lov_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = lov_io_submit
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = lov_io_submit
-		 }
-	 },
-	.cio_prepare_write = lov_io_prepare_write,
-	.cio_commit_write  = lov_io_commit_write
+	.cio_submit                    = lov_io_submit,
+	.cio_commit_async              = lov_io_commit_async,
 };
 
 /*****************************************************************************
@@ -880,15 +863,8 @@ static const struct cl_io_operations lov_empty_io_ops = {
 			.cio_fini   = lov_empty_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = LOV_EMPTY_IMPOSSIBLE
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = LOV_EMPTY_IMPOSSIBLE
-		 }
-	 },
-	.cio_commit_write = LOV_EMPTY_IMPOSSIBLE
+	.cio_submit                    = LOV_EMPTY_IMPOSSIBLE,
+	.cio_commit_async              = LOV_EMPTY_IMPOSSIBLE
 };
 
 int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
@@ -943,7 +919,7 @@ int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj,
 	}
 
 	io->ci_result = result < 0 ? result : 0;
-	return result != 0;
+	return result;
 }
 
 int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
@@ -986,7 +962,7 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj,
 	}
 
 	io->ci_result = result < 0 ? result : 0;
-	return result != 0;
+	return result;
 }
 
 /** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c
index ae854bc25..1b203d18c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lov_lock.c
@@ -46,11 +46,6 @@
  *  @{
  */
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-					       struct cl_lock *parent);
-
-static int lov_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice);
 /*****************************************************************************
  *
  * Lov lock operations.
@@ -58,7 +53,7 @@ static int lov_lock_unuse(const struct lu_env *env,
  */
 
 static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env,
-						   struct cl_lock *parent,
+						   const struct cl_lock *parent,
 						   struct lov_lock_sub *lls)
 {
 	struct lov_sublock_env *subenv;
@@ -100,184 +95,25 @@ static void lov_sublock_env_put(struct lov_sublock_env *subenv)
 		lov_sub_put(subenv->lse_sub);
 }
 
-static void lov_sublock_adopt(const struct lu_env *env, struct lov_lock *lck,
-			      struct cl_lock *sublock, int idx,
-			      struct lov_lock_link *link)
+static int lov_sublock_init(const struct lu_env *env,
+			    const struct cl_lock *parent,
+			    struct lov_lock_sub *lls)
 {
-	struct lovsub_lock *lsl;
-	struct cl_lock     *parent = lck->lls_cl.cls_lock;
-	int		 rc;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-	LASSERT(cl_lock_is_mutexed(sublock));
-
-	lsl = cl2sub_lock(sublock);
-	/*
-	 * check that sub-lock doesn't have lock link to this top-lock.
-	 */
-	LASSERT(!lov_lock_link_find(env, lck, lsl));
-	LASSERT(idx < lck->lls_nr);
-
-	lck->lls_sub[idx].sub_lock = lsl;
-	lck->lls_nr_filled++;
-	LASSERT(lck->lls_nr_filled <= lck->lls_nr);
-	list_add_tail(&link->lll_list, &lsl->lss_parents);
-	link->lll_idx = idx;
-	link->lll_super = lck;
-	cl_lock_get(parent);
-	lu_ref_add(&parent->cll_reference, "lov-child", sublock);
-	lck->lls_sub[idx].sub_flags |= LSF_HELD;
-	cl_lock_user_add(env, sublock);
-
-	rc = lov_sublock_modify(env, lck, lsl, &sublock->cll_descr, idx);
-	LASSERT(rc == 0); /* there is no way this can fail, currently */
-}
-
-static struct cl_lock *lov_sublock_alloc(const struct lu_env *env,
-					 const struct cl_io *io,
-					 struct lov_lock *lck,
-					 int idx, struct lov_lock_link **out)
-{
-	struct cl_lock       *sublock;
-	struct cl_lock       *parent;
-	struct lov_lock_link *link;
-
-	LASSERT(idx < lck->lls_nr);
-
-	link = kmem_cache_zalloc(lov_lock_link_kmem, GFP_NOFS);
-	if (link) {
-		struct lov_sublock_env *subenv;
-		struct lov_lock_sub  *lls;
-		struct cl_lock_descr *descr;
-
-		parent = lck->lls_cl.cls_lock;
-		lls    = &lck->lls_sub[idx];
-		descr  = &lls->sub_got;
-
-		subenv = lov_sublock_env_get(env, parent, lls);
-		if (!IS_ERR(subenv)) {
-			/* CAVEAT: Don't try to add a field in lov_lock_sub
-			 * to remember the subio. This is because lock is able
-			 * to be cached, but this is not true for IO. This
-			 * further means a sublock might be referenced in
-			 * different io context. -jay
-			 */
-
-			sublock = cl_lock_hold(subenv->lse_env, subenv->lse_io,
-					       descr, "lov-parent", parent);
-			lov_sublock_env_put(subenv);
-		} else {
-			/* error occurs. */
-			sublock = (void *)subenv;
-		}
-
-		if (!IS_ERR(sublock))
-			*out = link;
-		else
-			kmem_cache_free(lov_lock_link_kmem, link);
-	} else
-		sublock = ERR_PTR(-ENOMEM);
-	return sublock;
-}
-
-static void lov_sublock_unlock(const struct lu_env *env,
-			       struct lovsub_lock *lsl,
-			       struct cl_lock_closure *closure,
-			       struct lov_sublock_env *subenv)
-{
-	lov_sublock_env_put(subenv);
-	lsl->lss_active = NULL;
-	cl_lock_disclosure(env, closure);
-}
-
-static int lov_sublock_lock(const struct lu_env *env,
-			    struct lov_lock *lck,
-			    struct lov_lock_sub *lls,
-			    struct cl_lock_closure *closure,
-			    struct lov_sublock_env **lsep)
-{
-	struct lovsub_lock *sublock;
-	struct cl_lock     *child;
-	int		 result = 0;
-
-	LASSERT(list_empty(&closure->clc_list));
-
-	sublock = lls->sub_lock;
-	child = sublock->lss_cl.cls_lock;
-	result = cl_lock_closure_build(env, child, closure);
-	if (result == 0) {
-		struct cl_lock *parent = closure->clc_origin;
-
-		LASSERT(cl_lock_is_mutexed(child));
-		sublock->lss_active = parent;
-
-		if (unlikely((child->cll_state == CLS_FREEING) ||
-			     (child->cll_flags & CLF_CANCELLED))) {
-			struct lov_lock_link *link;
-			/*
-			 * we could race with lock deletion which temporarily
-			 * put the lock in freeing state, bug 19080.
-			 */
-			LASSERT(!(lls->sub_flags & LSF_HELD));
-
-			link = lov_lock_link_find(env, lck, sublock);
-			LASSERT(link);
-			lov_lock_unlink(env, link, sublock);
-			lov_sublock_unlock(env, sublock, closure, NULL);
-			lck->lls_cancel_race = 1;
-			result = CLO_REPEAT;
-		} else if (lsep) {
-			struct lov_sublock_env *subenv;
+	struct lov_sublock_env *subenv;
+	int result;
 
-			subenv = lov_sublock_env_get(env, parent, lls);
-			if (IS_ERR(subenv)) {
-				lov_sublock_unlock(env, sublock,
-						   closure, NULL);
-				result = PTR_ERR(subenv);
-			} else {
-				*lsep = subenv;
-			}
-		}
+	subenv = lov_sublock_env_get(env, parent, lls);
+	if (!IS_ERR(subenv)) {
+		result = cl_lock_init(subenv->lse_env, &lls->sub_lock,
+				      subenv->lse_io);
+		lov_sublock_env_put(subenv);
+	} else {
+		/* error occurs. */
+		result = PTR_ERR(subenv);
 	}
 	return result;
 }
 
-/**
- * Updates the result of a top-lock operation from a result of sub-lock
- * sub-operations. Top-operations like lov_lock_{enqueue,use,unuse}() iterate
- * over sub-locks and lov_subresult() is used to calculate return value of a
- * top-operation. To this end, possible return values of sub-operations are
- * ordered as
- *
- *     - 0		  success
- *     - CLO_WAIT	   wait for event
- *     - CLO_REPEAT	 repeat top-operation
- *     - -ne		fundamental error
- *
- * Top-level return code can only go down through this list. CLO_REPEAT
- * overwrites CLO_WAIT, because lock mutex was released and sleeping condition
- * has to be rechecked by the upper layer.
- */
-static int lov_subresult(int result, int rc)
-{
-	int result_rank;
-	int rc_rank;
-
-	LASSERTF(result <= 0 || result == CLO_REPEAT || result == CLO_WAIT,
-		 "result = %d\n", result);
-	LASSERTF(rc <= 0 || rc == CLO_REPEAT || rc == CLO_WAIT,
-		 "rc = %d\n", rc);
-	CLASSERT(CLO_WAIT < CLO_REPEAT);
-
-	/* calculate ranks in the ordering above */
-	result_rank = result < 0 ? 1 + CLO_REPEAT : result;
-	rc_rank = rc < 0 ? 1 + CLO_REPEAT : rc;
-
-	if (result_rank < rc_rank)
-		result = rc;
-	return result;
-}
-
 /**
  * Creates sub-locks for a given lov_lock for the first time.
  *
@@ -286,8 +122,9 @@ static int lov_subresult(int result, int rc)
  * fact that top-lock (that is being created) can be accessed concurrently
  * through already created sub-locks (possibly shared with other top-locks).
  */
-static int lov_lock_sub_init(const struct lu_env *env,
-			     struct lov_lock *lck, const struct cl_io *io)
+static struct lov_lock *lov_lock_sub_init(const struct lu_env *env,
+					  const struct cl_object *obj,
+					  struct cl_lock *lock)
 {
 	int result = 0;
 	int i;
@@ -297,241 +134,86 @@ static int lov_lock_sub_init(const struct lu_env *env,
 	u64 file_start;
 	u64 file_end;
 
-	struct lov_object       *loo    = cl2lov(lck->lls_cl.cls_obj);
+	struct lov_object       *loo    = cl2lov(obj);
 	struct lov_layout_raid0 *r0     = lov_r0(loo);
-	struct cl_lock	  *parent = lck->lls_cl.cls_lock;
+	struct lov_lock		*lovlck;
 
-	lck->lls_orig = parent->cll_descr;
-	file_start = cl_offset(lov2cl(loo), parent->cll_descr.cld_start);
-	file_end   = cl_offset(lov2cl(loo), parent->cll_descr.cld_end + 1) - 1;
+	file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start);
+	file_end   = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1;
 
 	for (i = 0, nr = 0; i < r0->lo_nr; i++) {
 		/*
 		 * XXX for wide striping smarter algorithm is desirable,
 		 * breaking out of the loop, early.
 		 */
-		if (likely(r0->lo_sub[i]) &&
+		if (likely(r0->lo_sub[i]) && /* spare layout */
 		    lov_stripe_intersects(loo->lo_lsm, i,
 					  file_start, file_end, &start, &end))
 			nr++;
 	}
 	LASSERT(nr > 0);
-	lck->lls_sub = libcfs_kvzalloc(nr * sizeof(lck->lls_sub[0]), GFP_NOFS);
-	if (!lck->lls_sub)
-		return -ENOMEM;
+	lovlck = libcfs_kvzalloc(offsetof(struct lov_lock, lls_sub[nr]),
+				 GFP_NOFS);
+	if (!lovlck)
+		return ERR_PTR(-ENOMEM);
 
-	lck->lls_nr = nr;
-	/*
-	 * First, fill in sub-lock descriptions in
-	 * lck->lls_sub[].sub_descr. They are used by lov_sublock_alloc()
-	 * (called below in this function, and by lov_lock_enqueue()) to
-	 * create sub-locks. At this moment, no other thread can access
-	 * top-lock.
-	 */
+	lovlck->lls_nr = nr;
 	for (i = 0, nr = 0; i < r0->lo_nr; ++i) {
 		if (likely(r0->lo_sub[i]) &&
 		    lov_stripe_intersects(loo->lo_lsm, i,
 					  file_start, file_end, &start, &end)) {
+			struct lov_lock_sub *lls = &lovlck->lls_sub[nr];
 			struct cl_lock_descr *descr;
 
-			descr = &lck->lls_sub[nr].sub_descr;
+			descr = &lls->sub_lock.cll_descr;
 
 			LASSERT(!descr->cld_obj);
 			descr->cld_obj   = lovsub2cl(r0->lo_sub[i]);
 			descr->cld_start = cl_index(descr->cld_obj, start);
 			descr->cld_end   = cl_index(descr->cld_obj, end);
-			descr->cld_mode  = parent->cll_descr.cld_mode;
-			descr->cld_gid   = parent->cll_descr.cld_gid;
-			descr->cld_enq_flags   = parent->cll_descr.cld_enq_flags;
-			/* XXX has no effect */
-			lck->lls_sub[nr].sub_got = *descr;
-			lck->lls_sub[nr].sub_stripe = i;
+			descr->cld_mode  = lock->cll_descr.cld_mode;
+			descr->cld_gid   = lock->cll_descr.cld_gid;
+			descr->cld_enq_flags = lock->cll_descr.cld_enq_flags;
+			lls->sub_stripe = i;
+
+			/* initialize sub lock */
+			result = lov_sublock_init(env, lock, lls);
+			if (result < 0)
+				break;
+
+			lls->sub_initialized = 1;
 			nr++;
 		}
 	}
-	LASSERT(nr == lck->lls_nr);
-
-	/*
-	 * Some sub-locks can be missing at this point. This is not a problem,
-	 * because enqueue will create them anyway. Main duty of this function
-	 * is to fill in sub-lock descriptions in a race free manner.
-	 */
-	return result;
-}
+	LASSERT(ergo(result == 0, nr == lovlck->lls_nr));
 
-static int lov_sublock_release(const struct lu_env *env, struct lov_lock *lck,
-			       int i, int deluser, int rc)
-{
-	struct cl_lock *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-
-	if (lck->lls_sub[i].sub_flags & LSF_HELD) {
-		struct cl_lock    *sublock;
-		int dying;
-
-		sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-		LASSERT(cl_lock_is_mutexed(sublock));
+	if (result != 0) {
+		for (i = 0; i < nr; ++i) {
+			if (!lovlck->lls_sub[i].sub_initialized)
+				break;
 
-		lck->lls_sub[i].sub_flags &= ~LSF_HELD;
-		if (deluser)
-			cl_lock_user_del(env, sublock);
-		/*
-		 * If the last hold is released, and cancellation is pending
-		 * for a sub-lock, release parent mutex, to avoid keeping it
-		 * while sub-lock is being paged out.
-		 */
-		dying = (sublock->cll_descr.cld_mode == CLM_PHANTOM ||
-			 sublock->cll_descr.cld_mode == CLM_GROUP ||
-			 (sublock->cll_flags & (CLF_CANCELPEND|CLF_DOOMED))) &&
-			sublock->cll_holds == 1;
-		if (dying)
-			cl_lock_mutex_put(env, parent);
-		cl_lock_unhold(env, sublock, "lov-parent", parent);
-		if (dying) {
-			cl_lock_mutex_get(env, parent);
-			rc = lov_subresult(rc, CLO_REPEAT);
+			cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
 		}
-		/*
-		 * From now on lck->lls_sub[i].sub_lock is a "weak" pointer,
-		 * not backed by a reference on a
-		 * sub-lock. lovsub_lock_delete() will clear
-		 * lck->lls_sub[i].sub_lock under semaphores, just before
-		 * sub-lock is destroyed.
-		 */
+		kvfree(lovlck);
+		lovlck = ERR_PTR(result);
 	}
-	return rc;
-}
-
-static void lov_sublock_hold(const struct lu_env *env, struct lov_lock *lck,
-			     int i)
-{
-	struct cl_lock *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-
-	if (!(lck->lls_sub[i].sub_flags & LSF_HELD)) {
-		struct cl_lock *sublock;
-
-		sublock = lck->lls_sub[i].sub_lock->lss_cl.cls_lock;
-		LASSERT(cl_lock_is_mutexed(sublock));
-		LASSERT(sublock->cll_state != CLS_FREEING);
 
-		lck->lls_sub[i].sub_flags |= LSF_HELD;
-
-		cl_lock_get_trust(sublock);
-		cl_lock_hold_add(env, sublock, "lov-parent", parent);
-		cl_lock_user_add(env, sublock);
-		cl_lock_put(env, sublock);
-	}
+	return lovlck;
 }
 
 static void lov_lock_fini(const struct lu_env *env,
 			  struct cl_lock_slice *slice)
 {
-	struct lov_lock *lck;
+	struct lov_lock *lovlck;
 	int i;
 
-	lck = cl2lov_lock(slice);
-	LASSERT(lck->lls_nr_filled == 0);
-	if (lck->lls_sub) {
-		for (i = 0; i < lck->lls_nr; ++i)
-			/*
-			 * No sub-locks exists at this point, as sub-lock has
-			 * a reference on its parent.
-			 */
-			LASSERT(!lck->lls_sub[i].sub_lock);
-		kvfree(lck->lls_sub);
+	lovlck = cl2lov_lock(slice);
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		LASSERT(!lovlck->lls_sub[i].sub_is_enqueued);
+		if (lovlck->lls_sub[i].sub_initialized)
+			cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock);
 	}
-	kmem_cache_free(lov_lock_kmem, lck);
-}
-
-static int lov_lock_enqueue_wait(const struct lu_env *env,
-				 struct lov_lock *lck,
-				 struct cl_lock *sublock)
-{
-	struct cl_lock *lock = lck->lls_cl.cls_lock;
-	int	     result;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-
-	cl_lock_mutex_put(env, lock);
-	result = cl_lock_enqueue_wait(env, sublock, 0);
-	cl_lock_mutex_get(env, lock);
-	return result ?: CLO_REPEAT;
-}
-
-/**
- * Tries to advance a state machine of a given sub-lock toward enqueuing of
- * the top-lock.
- *
- * \retval 0 if state-transition can proceed
- * \retval -ve otherwise.
- */
-static int lov_lock_enqueue_one(const struct lu_env *env, struct lov_lock *lck,
-				struct cl_lock *sublock,
-				struct cl_io *io, __u32 enqflags, int last)
-{
-	int result;
-
-	/* first, try to enqueue a sub-lock ... */
-	result = cl_enqueue_try(env, sublock, io, enqflags);
-	if ((sublock->cll_state == CLS_ENQUEUED) && !(enqflags & CEF_AGL)) {
-		/* if it is enqueued, try to `wait' on it---maybe it's already
-		 * granted
-		 */
-		result = cl_wait_try(env, sublock);
-		if (result == CLO_REENQUEUED)
-			result = CLO_WAIT;
-	}
-	/*
-	 * If CEF_ASYNC flag is set, then all sub-locks can be enqueued in
-	 * parallel, otherwise---enqueue has to wait until sub-lock is granted
-	 * before proceeding to the next one.
-	 */
-	if ((result == CLO_WAIT) && (sublock->cll_state <= CLS_HELD) &&
-	    (enqflags & CEF_ASYNC) && (!last || (enqflags & CEF_AGL)))
-		result = 0;
-	return result;
-}
-
-/**
- * Helper function for lov_lock_enqueue() that creates missing sub-lock.
- */
-static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
-			    struct cl_io *io, struct lov_lock *lck, int idx)
-{
-	struct lov_lock_link *link = NULL;
-	struct cl_lock       *sublock;
-	int		   result;
-
-	LASSERT(parent->cll_depth == 1);
-	cl_lock_mutex_put(env, parent);
-	sublock = lov_sublock_alloc(env, io, lck, idx, &link);
-	if (!IS_ERR(sublock))
-		cl_lock_mutex_get(env, sublock);
-	cl_lock_mutex_get(env, parent);
-
-	if (!IS_ERR(sublock)) {
-		cl_lock_get_trust(sublock);
-		if (parent->cll_state == CLS_QUEUING &&
-		    !lck->lls_sub[idx].sub_lock) {
-			lov_sublock_adopt(env, lck, sublock, idx, link);
-		} else {
-			kmem_cache_free(lov_lock_link_kmem, link);
-			/* other thread allocated sub-lock, or enqueue is no
-			 * longer going on
-			 */
-			cl_lock_mutex_put(env, parent);
-			cl_lock_unhold(env, sublock, "lov-parent", parent);
-			cl_lock_mutex_get(env, parent);
-		}
-		cl_lock_mutex_put(env, sublock);
-		cl_lock_put(env, sublock);
-		result = CLO_REPEAT;
-	} else
-		result = PTR_ERR(sublock);
-	return result;
+	kvfree(lovlck);
 }
 
 /**
@@ -543,529 +225,59 @@ static int lov_sublock_fill(const struct lu_env *env, struct cl_lock *parent,
  */
 static int lov_lock_enqueue(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *io, __u32 enqflags)
+			    struct cl_io *io, struct cl_sync_io *anchor)
 {
-	struct cl_lock	 *lock    = slice->cls_lock;
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, lock);
+	struct cl_lock *lock = slice->cls_lock;
+	struct lov_lock *lovlck = cl2lov_lock(slice);
 	int i;
-	int result;
-	enum cl_lock_state minstate;
+	int rc = 0;
 
-	for (result = 0, minstate = CLS_FREEING, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct lov_lock_sub    *lls;
-		struct cl_lock	 *sublock;
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		struct lov_lock_sub  *lls = &lovlck->lls_sub[i];
 		struct lov_sublock_env *subenv;
 
-		if (lock->cll_state != CLS_QUEUING) {
-			/*
-			 * Lock might have left QUEUING state if previous
-			 * iteration released its mutex. Stop enqueing in this
-			 * case and let the upper layer to decide what to do.
-			 */
-			LASSERT(i > 0 && result != 0);
-			break;
-		}
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		/*
-		 * Sub-lock might have been canceled, while top-lock was
-		 * cached.
-		 */
-		if (!sub) {
-			result = lov_sublock_fill(env, lock, io, lck, i);
-			/* lov_sublock_fill() released @lock mutex,
-			 * restart.
-			 */
+		subenv = lov_sublock_env_get(env, lock, lls);
+		if (IS_ERR(subenv)) {
+			rc = PTR_ERR(subenv);
 			break;
 		}
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			lov_sublock_hold(env, lck, i);
-			rc = lov_lock_enqueue_one(subenv->lse_env, lck, sublock,
-						  subenv->lse_io, enqflags,
-						  i == lck->lls_nr - 1);
-			minstate = min(minstate, sublock->cll_state);
-			if (rc == CLO_WAIT) {
-				switch (sublock->cll_state) {
-				case CLS_QUEUING:
-					/* take recursive mutex, the lock is
-					 * released in lov_lock_enqueue_wait.
-					 */
-					cl_lock_mutex_get(env, sublock);
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					rc = lov_lock_enqueue_wait(env, lck,
-								   sublock);
-					break;
-				case CLS_CACHED:
-					cl_lock_get(sublock);
-					/* take recursive mutex of sublock */
-					cl_lock_mutex_get(env, sublock);
-					/* need to release all locks in closure
-					 * otherwise it may deadlock. LU-2683.
-					 */
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					/* sublock and parent are held. */
-					rc = lov_sublock_release(env, lck, i,
-								 1, rc);
-					cl_lock_mutex_put(env, sublock);
-					cl_lock_put(env, sublock);
-					break;
-				default:
-					lov_sublock_unlock(env, sub, closure,
-							   subenv);
-					break;
-				}
-			} else {
-				LASSERT(!sublock->cll_conflict);
-				lov_sublock_unlock(env, sub, closure, subenv);
-			}
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
+		rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io,
+				     &lls->sub_lock, anchor);
+		lov_sublock_env_put(subenv);
+		if (rc != 0)
 			break;
-	}
-	cl_lock_closure_fini(closure);
-	return result ?: minstate >= CLS_ENQUEUED ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	int i;
-	int result;
-
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
 
-		/* top-lock state cannot change concurrently, because single
-		 * thread (one that released the last hold) carries unlocking
-		 * to the completion.
-		 */
-		LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub)
-			continue;
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			if (lls->sub_flags & LSF_HELD) {
-				LASSERT(sublock->cll_state == CLS_HELD ||
-					sublock->cll_state == CLS_ENQUEUED);
-				rc = cl_unuse_try(subenv->lse_env, sublock);
-				rc = lov_sublock_release(env, lck, i, 0, rc);
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		result = lov_subresult(result, rc);
+		lls->sub_is_enqueued = 1;
 	}
-
-	if (result == 0 && lck->lls_cancel_race) {
-		lck->lls_cancel_race = 0;
-		result = -ESTALE;
-	}
-	cl_lock_closure_fini(closure);
-	return result;
+	return rc;
 }
 
 static void lov_lock_cancel(const struct lu_env *env,
 			    const struct cl_lock_slice *slice)
 {
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
+	struct cl_lock *lock = slice->cls_lock;
+	struct lov_lock *lovlck = cl2lov_lock(slice);
 	int i;
-	int result;
 
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
+	for (i = 0; i < lovlck->lls_nr; ++i) {
+		struct lov_lock_sub *lls = &lovlck->lls_sub[i];
+		struct cl_lock *sublock = &lls->sub_lock;
 		struct lov_sublock_env *subenv;
 
-		/* top-lock state cannot change concurrently, because single
-		 * thread (one that released the last hold) carries unlocking
-		 * to the completion.
-		 */
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub)
-			continue;
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			if (!(lls->sub_flags & LSF_HELD)) {
-				lov_sublock_unlock(env, sub, closure, subenv);
-				continue;
-			}
-
-			switch (sublock->cll_state) {
-			case CLS_HELD:
-				rc = cl_unuse_try(subenv->lse_env, sublock);
-				lov_sublock_release(env, lck, i, 0, 0);
-				break;
-			default:
-				lov_sublock_release(env, lck, i, 1, 0);
-				break;
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-
-		if (rc == CLO_REPEAT) {
-			--i;
-			continue;
-		}
-
-		result = lov_subresult(result, rc);
-	}
-
-	if (result)
-		CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
-			      "lov_lock_cancel fails with %d.\n", result);
-
-	cl_lock_closure_fini(closure);
-}
-
-static int lov_lock_wait(const struct lu_env *env,
-			 const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	enum cl_lock_state      minstate;
-	int		     reenqueued;
-	int		     result;
-	int		     i;
-
-again:
-	for (result = 0, minstate = CLS_FREEING, i = 0, reenqueued = 0;
-	     i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			LASSERT(sublock->cll_state >= CLS_ENQUEUED);
-			if (sublock->cll_state < CLS_HELD)
-				rc = cl_wait_try(env, sublock);
-
-			minstate = min(minstate, sublock->cll_state);
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		if (rc == CLO_REENQUEUED) {
-			reenqueued++;
-			rc = 0;
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
-			break;
-	}
-	/* Each sublock only can be reenqueued once, so will not loop
-	 * forever.
-	 */
-	if (result == 0 && reenqueued != 0)
-		goto again;
-	cl_lock_closure_fini(closure);
-	return result ?: minstate >= CLS_HELD ? 0 : CLO_WAIT;
-}
-
-static int lov_lock_use(const struct lu_env *env,
-			const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	int		     result;
-	int		     i;
-
-	LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
-	for (result = 0, i = 0; i < lck->lls_nr; ++i) {
-		int rc;
-		struct lovsub_lock     *sub;
-		struct cl_lock	 *sublock;
-		struct lov_lock_sub    *lls;
-		struct lov_sublock_env *subenv;
-
-		LASSERT(slice->cls_lock->cll_state == CLS_INTRANSIT);
-
-		lls = &lck->lls_sub[i];
-		sub = lls->sub_lock;
-		if (!sub) {
-			/*
-			 * Sub-lock might have been canceled, while top-lock was
-			 * cached.
-			 */
-			result = -ESTALE;
-			break;
-		}
-
-		sublock = sub->lss_cl.cls_lock;
-		rc = lov_sublock_lock(env, lck, lls, closure, &subenv);
-		if (rc == 0) {
-			LASSERT(sublock->cll_state != CLS_FREEING);
-			lov_sublock_hold(env, lck, i);
-			if (sublock->cll_state == CLS_CACHED) {
-				rc = cl_use_try(subenv->lse_env, sublock, 0);
-				if (rc != 0)
-					rc = lov_sublock_release(env, lck,
-								 i, 1, rc);
-			} else if (sublock->cll_state == CLS_NEW) {
-				/* Sub-lock might have been canceled, while
-				 * top-lock was cached.
-				 */
-				result = -ESTALE;
-				lov_sublock_release(env, lck, i, 1, result);
-			}
-			lov_sublock_unlock(env, sub, closure, subenv);
-		}
-		result = lov_subresult(result, rc);
-		if (result != 0)
-			break;
-	}
-
-	if (lck->lls_cancel_race) {
-		/*
-		 * If there is unlocking happened at the same time, then
-		 * sublock_lock state should be FREEING, and lov_sublock_lock
-		 * should return CLO_REPEAT. In this case, it should return
-		 * ESTALE, and up layer should reset the lock state to be NEW.
-		 */
-		lck->lls_cancel_race = 0;
-		LASSERT(result != 0);
-		result = -ESTALE;
-	}
-	cl_lock_closure_fini(closure);
-	return result;
-}
-
-/**
- * Check if the extent region \a descr is covered by \a child against the
- * specific \a stripe.
- */
-static int lov_lock_stripe_is_matching(const struct lu_env *env,
-				       struct lov_object *lov, int stripe,
-				       const struct cl_lock_descr *child,
-				       const struct cl_lock_descr *descr)
-{
-	struct lov_stripe_md *lsm = lov->lo_lsm;
-	u64 start;
-	u64 end;
-	int result;
-
-	if (lov_r0(lov)->lo_nr == 1)
-		return cl_lock_ext_match(child, descr);
-
-	/*
-	 * For a multi-stripes object:
-	 * - make sure the descr only covers child's stripe, and
-	 * - check if extent is matching.
-	 */
-	start = cl_offset(&lov->lo_cl, descr->cld_start);
-	end   = cl_offset(&lov->lo_cl, descr->cld_end + 1) - 1;
-	result = 0;
-	/* glimpse should work on the object with LOV EA hole. */
-	if (end - start <= lsm->lsm_stripe_size) {
-		int idx;
-
-		idx = lov_stripe_number(lsm, start);
-		if (idx == stripe ||
-		    unlikely(!lov_r0(lov)->lo_sub[idx])) {
-			idx = lov_stripe_number(lsm, end);
-			if (idx == stripe ||
-			    unlikely(!lov_r0(lov)->lo_sub[idx]))
-				result = 1;
-		}
-	}
-
-	if (result != 0) {
-		struct cl_lock_descr *subd = &lov_env_info(env)->lti_ldescr;
-		u64 sub_start;
-		u64 sub_end;
-
-		subd->cld_obj  = NULL;   /* don't need sub object at all */
-		subd->cld_mode = descr->cld_mode;
-		subd->cld_gid  = descr->cld_gid;
-		result = lov_stripe_intersects(lsm, stripe, start, end,
-					       &sub_start, &sub_end);
-		LASSERT(result);
-		subd->cld_start = cl_index(child->cld_obj, sub_start);
-		subd->cld_end   = cl_index(child->cld_obj, sub_end);
-		result = cl_lock_ext_match(child, subd);
-	}
-	return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_fits_into() method.
- *
- * Checks whether a lock (given by \a slice) is suitable for \a
- * io. Multi-stripe locks can be used only for "quick" io, like truncate, or
- * O_APPEND write.
- *
- * \see ccc_lock_fits_into().
- */
-static int lov_lock_fits_into(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      const struct cl_lock_descr *need,
-			      const struct cl_io *io)
-{
-	struct lov_lock   *lov = cl2lov_lock(slice);
-	struct lov_object *obj = cl2lov(slice->cls_obj);
-	int result;
-
-	LASSERT(cl_object_same(need->cld_obj, slice->cls_obj));
-	LASSERT(lov->lls_nr > 0);
-
-	/* for top lock, it's necessary to match enq flags otherwise it will
-	 * run into problem if a sublock is missing and reenqueue.
-	 */
-	if (need->cld_enq_flags != lov->lls_orig.cld_enq_flags)
-		return 0;
-
-	if (need->cld_mode == CLM_GROUP)
-		/*
-		 * always allow to match group lock.
-		 */
-		result = cl_lock_ext_match(&lov->lls_orig, need);
-	else if (lov->lls_nr == 1) {
-		struct cl_lock_descr *got = &lov->lls_sub[0].sub_got;
-
-		result = lov_lock_stripe_is_matching(env,
-						     cl2lov(slice->cls_obj),
-						     lov->lls_sub[0].sub_stripe,
-						     got, need);
-	} else if (io->ci_type != CIT_SETATTR && io->ci_type != CIT_MISC &&
-		   !cl_io_is_append(io) && need->cld_mode != CLM_PHANTOM)
-		/*
-		 * Multi-stripe locks are only suitable for `quick' IO and for
-		 * glimpse.
-		 */
-		result = 0;
-	else
-		/*
-		 * Most general case: multi-stripe existing lock, and
-		 * (potentially) multi-stripe @need lock. Check that @need is
-		 * covered by @lov's sub-locks.
-		 *
-		 * For now, ignore lock expansions made by the server, and
-		 * match against original lock extent.
-		 */
-		result = cl_lock_ext_match(&lov->lls_orig, need);
-	CDEBUG(D_DLMTRACE, DDESCR"/"DDESCR" %d %d/%d: %d\n",
-	       PDESCR(&lov->lls_orig), PDESCR(&lov->lls_sub[0].sub_got),
-	       lov->lls_sub[0].sub_stripe, lov->lls_nr, lov_r0(obj)->lo_nr,
-	       result);
-	return result;
-}
-
-void lov_lock_unlink(const struct lu_env *env,
-		     struct lov_lock_link *link, struct lovsub_lock *sub)
-{
-	struct lov_lock *lck    = link->lll_super;
-	struct cl_lock  *parent = lck->lls_cl.cls_lock;
-
-	LASSERT(cl_lock_is_mutexed(parent));
-	LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
-	list_del_init(&link->lll_list);
-	LASSERT(lck->lls_sub[link->lll_idx].sub_lock == sub);
-	/* yank this sub-lock from parent's array */
-	lck->lls_sub[link->lll_idx].sub_lock = NULL;
-	LASSERT(lck->lls_nr_filled > 0);
-	lck->lls_nr_filled--;
-	lu_ref_del(&parent->cll_reference, "lov-child", sub->lss_cl.cls_lock);
-	cl_lock_put(env, parent);
-	kmem_cache_free(lov_lock_link_kmem, link);
-}
-
-struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
-					 struct lov_lock *lck,
-					 struct lovsub_lock *sub)
-{
-	struct lov_lock_link *scan;
-
-	LASSERT(cl_lock_is_mutexed(sub->lss_cl.cls_lock));
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		if (scan->lll_super == lck)
-			return scan;
-	}
-	return NULL;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked for "top-to-bottom" delete, when lock destruction starts from the
- * top-lock, e.g., as a result of inode destruction.
- *
- * Unlinks top-lock from all its sub-locks. Sub-locks are not deleted there:
- * this is done separately elsewhere:
- *
- *     - for inode destruction, lov_object_delete() calls cl_object_kill() for
- *       each sub-object, purging its locks;
- *
- *     - in other cases (e.g., a fatal error with a top-lock) sub-locks are
- *       left in the cache.
- */
-static void lov_lock_delete(const struct lu_env *env,
-			    const struct cl_lock_slice *slice)
-{
-	struct lov_lock	*lck     = cl2lov_lock(slice);
-	struct cl_lock_closure *closure = lov_closure_get(env, slice->cls_lock);
-	struct lov_lock_link   *link;
-	int		     rc;
-	int		     i;
-
-	LASSERT(slice->cls_lock->cll_state == CLS_FREEING);
-
-	for (i = 0; i < lck->lls_nr; ++i) {
-		struct lov_lock_sub *lls = &lck->lls_sub[i];
-		struct lovsub_lock  *lsl = lls->sub_lock;
-
-		if (!lsl) /* already removed */
+		if (!lls->sub_is_enqueued)
 			continue;
 
-		rc = lov_sublock_lock(env, lck, lls, closure, NULL);
-		if (rc == CLO_REPEAT) {
-			--i;
-			continue;
+		lls->sub_is_enqueued = 0;
+		subenv = lov_sublock_env_get(env, lock, lls);
+		if (!IS_ERR(subenv)) {
+			cl_lock_cancel(subenv->lse_env, sublock);
+			lov_sublock_env_put(subenv);
+		} else {
+			CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock,
+				      "lov_lock_cancel fails with %ld.\n",
+				      PTR_ERR(subenv));
 		}
-
-		LASSERT(rc == 0);
-		LASSERT(lsl->lss_cl.cls_lock->cll_state < CLS_FREEING);
-
-		if (lls->sub_flags & LSF_HELD)
-			lov_sublock_release(env, lck, i, 1, 0);
-
-		link = lov_lock_link_find(env, lck, lsl);
-		LASSERT(link);
-		lov_lock_unlink(env, link, lsl);
-		LASSERT(!lck->lls_sub[i].sub_lock);
-
-		lov_sublock_unlock(env, lsl, closure, NULL);
 	}
-
-	cl_lock_closure_fini(closure);
 }
 
 static int lov_lock_print(const struct lu_env *env, void *cookie,
@@ -1079,12 +291,8 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
 		struct lov_lock_sub *sub;
 
 		sub = &lck->lls_sub[i];
-		(*p)(env, cookie, "    %d %x: ", i, sub->sub_flags);
-		if (sub->sub_lock)
-			cl_lock_print(env, cookie, p,
-				      sub->sub_lock->lss_cl.cls_lock);
-		else
-			(*p)(env, cookie, "---\n");
+		(*p)(env, cookie, "    %d %x: ", i, sub->sub_is_enqueued);
+		cl_lock_print(env, cookie, p, &sub->sub_lock);
 	}
 	return 0;
 }
@@ -1092,12 +300,7 @@ static int lov_lock_print(const struct lu_env *env, void *cookie,
 static const struct cl_lock_operations lov_lock_ops = {
 	.clo_fini      = lov_lock_fini,
 	.clo_enqueue   = lov_lock_enqueue,
-	.clo_wait      = lov_lock_wait,
-	.clo_use       = lov_lock_use,
-	.clo_unuse     = lov_lock_unuse,
 	.clo_cancel    = lov_lock_cancel,
-	.clo_fits_into = lov_lock_fits_into,
-	.clo_delete    = lov_lock_delete,
 	.clo_print     = lov_lock_print
 };
 
@@ -1105,14 +308,13 @@ int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj,
 			struct cl_lock *lock, const struct cl_io *io)
 {
 	struct lov_lock *lck;
-	int result;
+	int result = 0;
 
-	lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
-	if (lck) {
+	lck = lov_lock_sub_init(env, obj, lock);
+	if (!IS_ERR(lck))
 		cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops);
-		result = lov_lock_sub_init(env, lck, io);
-	} else
-		result = -ENOMEM;
+	else
+		result = PTR_ERR(lck);
 	return result;
 }
 
@@ -1147,21 +349,9 @@ int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj,
 	lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS);
 	if (lck) {
 		cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops);
-		lck->lls_orig = lock->cll_descr;
 		result = 0;
 	}
 	return result;
 }
 
-static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
-					       struct cl_lock *parent)
-{
-	struct cl_lock_closure *closure;
-
-	closure = &lov_env_info(env)->lti_closure;
-	LASSERT(list_empty(&closure->clc_list));
-	cl_lock_closure_init(env, closure, parent, 1);
-	return closure;
-}
-
 /** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index 029cd4d62..56ef41d17 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -154,6 +154,7 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 	valid &= src->o_valid;
 
 	if (*set) {
+		tgt->o_valid &= valid;
 		if (valid & OBD_MD_FLSIZE) {
 			/* this handles sparse files properly */
 			u64 lov_size;
@@ -172,12 +173,22 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 			tgt->o_mtime = src->o_mtime;
 		if (valid & OBD_MD_FLDATAVERSION)
 			tgt->o_data_version += src->o_data_version;
+
+		/* handle flags */
+		if (valid & OBD_MD_FLFLAGS)
+			tgt->o_flags &= src->o_flags;
+		else
+			tgt->o_flags = 0;
 	} else {
 		memcpy(tgt, src, sizeof(*tgt));
 		tgt->o_oi = lsm->lsm_oi;
+		tgt->o_valid = valid;
 		if (valid & OBD_MD_FLSIZE)
 			tgt->o_size = lov_stripe_size(lsm, src->o_size,
 						      stripeno);
+		tgt->o_flags = 0;
+		if (valid & OBD_MD_FLFLAGS)
+			tgt->o_flags = src->o_flags;
 	}
 
 	/* data_version needs to be valid on all stripes to be correct! */
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index 5daa7faf4..e15ef2ece 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -54,7 +54,6 @@
 #include "../include/lprocfs_status.h"
 #include "../include/lustre_param.h"
 #include "../include/cl_object.h"
-#include "../include/lclient.h"		/* for cl_client_lru */
 #include "../include/lustre/ll_fiemap.h"
 #include "../include/lustre_fid.h"
 
@@ -124,7 +123,6 @@ static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
 static int lov_notify(struct obd_device *obd, struct obd_device *watched,
 		      enum obd_notify_event ev, void *data);
 
-#define MAX_STRING_SIZE 128
 int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
 		    struct obd_connect_data *data)
 {
@@ -965,7 +963,6 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
 		CERROR("Unknown command: %d\n", lcfg->lcfg_command);
 		rc = -EINVAL;
 		goto out;
-
 	}
 	}
 out:
@@ -1734,6 +1731,27 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 	unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
 
 	if (!lsm_has_objects(lsm)) {
+		if (lsm && lsm_is_released(lsm) && (fm_key->fiemap.fm_start <
+		    fm_key->oa.o_size)) {
+			/*
+			 * released file, return a minimal FIEMAP if
+			 * request fits in file-size.
+			 */
+			fiemap->fm_mapped_extents = 1;
+			fiemap->fm_extents[0].fe_logical =
+					fm_key->fiemap.fm_start;
+			if (fm_key->fiemap.fm_start + fm_key->fiemap.fm_length <
+			    fm_key->oa.o_size) {
+				fiemap->fm_extents[0].fe_length =
+					fm_key->fiemap.fm_length;
+			} else {
+				fiemap->fm_extents[0].fe_length =
+					fm_key->oa.o_size - fm_key->fiemap.fm_start;
+				fiemap->fm_extents[0].fe_flags |=
+						(FIEMAP_EXTENT_UNKNOWN |
+						 FIEMAP_EXTENT_LAST);
+			}
+		}
 		rc = 0;
 		goto out;
 	}
@@ -2173,7 +2191,6 @@ void lov_stripe_lock(struct lov_stripe_md *md)
 	LASSERT(md->lsm_lock_owner == 0);
 	md->lsm_lock_owner = current_pid();
 }
-EXPORT_SYMBOL(lov_stripe_lock);
 
 void lov_stripe_unlock(struct lov_stripe_md *md)
 		__releases(&md->lsm_lock)
@@ -2182,7 +2199,6 @@ void lov_stripe_unlock(struct lov_stripe_md *md)
 	md->lsm_lock_owner = 0;
 	spin_unlock(&md->lsm_lock);
 }
-EXPORT_SYMBOL(lov_stripe_unlock);
 
 static int lov_quotactl(struct obd_device *obd, struct obd_export *exp,
 			struct obd_quotactl *oqctl)
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index 1f8ed95a6..561d493b2 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -67,7 +67,7 @@ struct lov_layout_operations {
 	int  (*llo_print)(const struct lu_env *env, void *cookie,
 			  lu_printer_t p, const struct lu_object *o);
 	int  (*llo_page_init)(const struct lu_env *env, struct cl_object *obj,
-			      struct cl_page *page, struct page *vmpage);
+			      struct cl_page *page, pgoff_t index);
 	int  (*llo_lock_init)(const struct lu_env *env,
 			      struct cl_object *obj, struct cl_lock *lock,
 			      const struct cl_io *io);
@@ -193,6 +193,18 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
 	return result;
 }
 
+static int lov_page_slice_fixup(struct lov_object *lov,
+				struct cl_object *stripe)
+{
+	struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
+	struct cl_object *o;
+
+	cl_object_for_each(o, stripe)
+		o->co_slice_off += hdr->coh_page_bufsize;
+
+	return cl_object_header(stripe)->coh_page_bufsize;
+}
+
 static int lov_init_raid0(const struct lu_env *env,
 			  struct lov_device *dev, struct lov_object *lov,
 			  const struct cl_object_conf *conf,
@@ -222,6 +234,8 @@ static int lov_init_raid0(const struct lu_env *env,
 	r0->lo_sub = libcfs_kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]),
 				     GFP_NOFS);
 	if (r0->lo_sub) {
+		int psz = 0;
+
 		result = 0;
 		subconf->coc_inode = conf->coc_inode;
 		spin_lock_init(&r0->lo_sub_lock);
@@ -254,13 +268,24 @@ static int lov_init_raid0(const struct lu_env *env,
 				if (result == -EAGAIN) { /* try again */
 					--i;
 					result = 0;
+					continue;
 				}
 			} else {
 				result = PTR_ERR(stripe);
 			}
+
+			if (result == 0) {
+				int sz = lov_page_slice_fixup(lov, stripe);
+
+				LASSERT(ergo(psz > 0, psz == sz));
+				psz = sz;
+			}
 		}
-	} else
+		if (result == 0)
+			cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz;
+	} else {
 		result = -ENOMEM;
+	}
 out:
 	return result;
 }
@@ -286,8 +311,6 @@ static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov,
 	LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED);
 
 	lov_layout_wait(env, lov);
-
-	cl_object_prune(env, &lov->lo_cl);
 	return 0;
 }
 
@@ -355,7 +378,7 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			struct lovsub_object *los = r0->lo_sub[i];
 
 			if (los) {
-				cl_locks_prune(env, &los->lso_cl, 1);
+				cl_object_prune(env, &los->lso_cl);
 				/*
 				 * If top-level object is to be evicted from
 				 * the cache, so are its sub-objects.
@@ -364,7 +387,6 @@ static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
 			}
 		}
 	}
-	cl_object_prune(env, &lov->lo_cl);
 	return 0;
 }
 
@@ -666,7 +688,6 @@ static int lov_layout_change(const struct lu_env *unused,
 	const struct lov_layout_operations *old_ops;
 	const struct lov_layout_operations *new_ops;
 
-	struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
 	void *cookie;
 	struct lu_env *env;
 	int refcheck;
@@ -691,13 +712,15 @@ static int lov_layout_change(const struct lu_env *unused,
 	old_ops = &lov_dispatch[lov->lo_type];
 	new_ops = &lov_dispatch[llt];
 
+	result = cl_object_prune(env, &lov->lo_cl);
+	if (result != 0)
+		goto out;
+
 	result = old_ops->llo_delete(env, lov, &lov->u);
 	if (result == 0) {
 		old_ops->llo_fini(env, lov, &lov->u);
 
 		LASSERT(atomic_read(&lov->lo_active_ios) == 0);
-		LASSERT(!hdr->coh_tree.rnode);
-		LASSERT(hdr->coh_pages == 0);
 
 		lov->lo_type = LLT_EMPTY;
 		result = new_ops->llo_init(env,
@@ -713,6 +736,7 @@ static int lov_layout_change(const struct lu_env *unused,
 		}
 	}
 
+out:
 	cl_env_put(env, &refcheck);
 	cl_env_reexit(cookie);
 	return result;
@@ -793,7 +817,8 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
 		goto out;
 	}
 
-	lov->lo_layout_invalid = lov_layout_change(env, lov, conf);
+	result = lov_layout_change(env, lov, conf);
+	lov->lo_layout_invalid = result != 0;
 
 out:
 	lov_conf_unlock(lov);
@@ -825,10 +850,10 @@ static int lov_object_print(const struct lu_env *env, void *cookie,
 }
 
 int lov_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		  struct cl_page *page, pgoff_t index)
 {
-	return LOV_2DISPATCH_NOLOCK(cl2lov(obj),
-				    llo_page_init, env, obj, page, vmpage);
+	return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page,
+				    index);
 }
 
 /**
@@ -911,8 +936,9 @@ struct lu_object *lov_object_alloc(const struct lu_env *env,
 		 * for object with different layouts.
 		 */
 		obj->lo_ops = &lov_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c
index ae83eb0f6..9302f06c3 100644
--- a/drivers/staging/lustre/lustre/lov/lov_offset.c
+++ b/drivers/staging/lustre/lustre/lov/lov_offset.c
@@ -66,6 +66,18 @@ u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno)
 	return lov_size;
 }
 
+/**
+ * Compute file level page index by stripe level page offset
+ */
+pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index,
+			 int stripe)
+{
+	loff_t offset;
+
+	offset = lov_stripe_size(lsm, stripe_index << PAGE_SHIFT, stripe);
+	return offset >> PAGE_SHIFT;
+}
+
 /* we have an offset in file backed by an lov and want to find out where
  * that offset lands in our given stripe of the file.  for the easy
  * case where the offset is within the stripe, we just have to scale the
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 3925633a9..0215ea54d 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -136,7 +136,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 		CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X nor 0x%08X\n",
 		       lmm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3);
 		return -EINVAL;
-
 	}
 
 	if (lsm) {
@@ -444,8 +443,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	if (lum.lmm_magic == LOV_USER_MAGIC) {
 		/* User request for v1, we need skip lmm_pool_name */
 		if (lmmk->lmm_magic == LOV_MAGIC_V3) {
-			memmove((char *)(&lmmk->lmm_stripe_count) +
-				sizeof(lmmk->lmm_stripe_count),
+			memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects,
 				((struct lov_mds_md_v3 *)lmmk)->lmm_objects,
 				lmmk->lmm_stripe_count *
 				sizeof(struct lov_ost_data_v1));
@@ -457,9 +455,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	}
 
 	/* User wasn't expecting this many OST entries */
-	if (lum.lmm_stripe_count == 0)
+	if (lum.lmm_stripe_count == 0) {
 		lmm_size = lum_size;
-	else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
+	} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
 		rc = -EOVERFLOW;
 		goto out_set;
 	}
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index fdcaf8047..0306f00c3 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -36,6 +36,7 @@
  * Implementation of cl_page for LOV layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LOV
@@ -52,116 +53,66 @@
  *
  */
 
-static int lov_page_invariant(const struct cl_page_slice *slice)
+/**
+ * Adjust the stripe index by layout of raid0. @max_index is the maximum
+ * page index covered by an underlying DLM lock.
+ * This function converts max_index from stripe level to file level, and make
+ * sure it's not beyond one stripe.
+ */
+static int lov_raid0_page_is_under_lock(const struct lu_env *env,
+					const struct cl_page_slice *slice,
+					struct cl_io *unused,
+					pgoff_t *max_index)
 {
-	const struct cl_page  *page = slice->cpl_page;
-	const struct cl_page  *sub  = lov_sub_page(slice);
-
-	return ergo(sub,
-		    page->cp_child == sub &&
-		    sub->cp_parent == page &&
-		    page->cp_state == sub->cp_state);
-}
+	struct lov_object *loo = cl2lov(slice->cpl_obj);
+	struct lov_layout_raid0 *r0 = lov_r0(loo);
+	pgoff_t index = *max_index;
+	unsigned int pps; /* pages per stripe */
 
-static void lov_page_fini(const struct lu_env *env,
-			  struct cl_page_slice *slice)
-{
-	struct cl_page  *sub = lov_sub_page(slice);
+	CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+	if (index == 0) /* the page is not covered by any lock */
+		return 0;
 
-	LINVRNT(lov_page_invariant(slice));
+	if (r0->lo_nr == 1) /* single stripe file */
+		return 0;
 
-	if (sub) {
-		LASSERT(sub->cp_state == CPS_FREEING);
-		lu_ref_del(&sub->cp_reference, "lov", sub->cp_parent);
-		sub->cp_parent = NULL;
-		slice->cpl_page->cp_child = NULL;
-		cl_page_put(env, sub);
+	/* max_index is stripe level, convert it into file level */
+	if (index != CL_PAGE_EOF) {
+		int stripeno = lov_page_stripe(slice->cpl_page);
+		*max_index = lov_stripe_pgoff(loo->lo_lsm, index, stripeno);
 	}
-}
-
-static int lov_page_own(const struct lu_env *env,
-			const struct cl_page_slice *slice, struct cl_io *io,
-			int nonblock)
-{
-	struct lov_io     *lio = lov_env_io(env);
-	struct lov_io_sub *sub;
 
-	LINVRNT(lov_page_invariant(slice));
-	LINVRNT(!cl2lov_page(slice)->lps_invalid);
+	/* calculate the end of current stripe */
+	pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
+	index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
 
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		lov_sub_page(slice)->cp_owner = sub->sub_io;
-		lov_sub_put(sub);
-	} else
-		LBUG(); /* Arrgh */
+	/* never exceed the end of the stripe */
+	*max_index = min_t(pgoff_t, *max_index, index);
 	return 0;
 }
 
-static void lov_page_assume(const struct lu_env *env,
-			    const struct cl_page_slice *slice, struct cl_io *io)
-{
-	lov_page_own(env, slice, io, 0);
-}
-
-static int lov_page_cache_add(const struct lu_env *env,
-			      const struct cl_page_slice *slice,
-			      struct cl_io *io)
-{
-	struct lov_io     *lio = lov_env_io(env);
-	struct lov_io_sub *sub;
-	int rc = 0;
-
-	LINVRNT(lov_page_invariant(slice));
-	LINVRNT(!cl2lov_page(slice)->lps_invalid);
-
-	sub = lov_page_subio(env, lio, slice);
-	if (!IS_ERR(sub)) {
-		rc = cl_page_cache_add(sub->sub_env, sub->sub_io,
-				       slice->cpl_page->cp_child, CRT_WRITE);
-		lov_sub_put(sub);
-	} else {
-		rc = PTR_ERR(sub);
-		CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, "rc = %d\n", rc);
-	}
-	return rc;
-}
-
-static int lov_page_print(const struct lu_env *env,
-			  const struct cl_page_slice *slice,
-			  void *cookie, lu_printer_t printer)
+static int lov_raid0_page_print(const struct lu_env *env,
+				const struct cl_page_slice *slice,
+				void *cookie, lu_printer_t printer)
 {
 	struct lov_page *lp = cl2lov_page(slice);
 
-	return (*printer)(env, cookie, LUSTRE_LOV_NAME"-page@%p\n", lp);
+	return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp);
 }
 
-static const struct cl_page_operations lov_page_ops = {
-	.cpo_fini   = lov_page_fini,
-	.cpo_own    = lov_page_own,
-	.cpo_assume = lov_page_assume,
-	.io = {
-		[CRT_WRITE] = {
-			.cpo_cache_add = lov_page_cache_add
-		}
-	},
-	.cpo_print  = lov_page_print
+static const struct cl_page_operations lov_raid0_page_ops = {
+	.cpo_is_under_lock = lov_raid0_page_is_under_lock,
+	.cpo_print  = lov_raid0_page_print
 };
 
-static void lov_empty_page_fini(const struct lu_env *env,
-				struct cl_page_slice *slice)
-{
-	LASSERT(!slice->cpl_page->cp_child);
-}
-
 int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage)
+			struct cl_page *page, pgoff_t index)
 {
 	struct lov_object *loo = cl2lov(obj);
 	struct lov_layout_raid0 *r0 = lov_r0(loo);
 	struct lov_io     *lio = lov_env_io(env);
-	struct cl_page    *subpage;
 	struct cl_object  *subobj;
+	struct cl_object  *o;
 	struct lov_io_sub *sub;
 	struct lov_page   *lpg = cl_object_page_slice(obj, page);
 	loff_t	     offset;
@@ -169,59 +120,57 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
 	int		stripe;
 	int		rc;
 
-	offset = cl_offset(obj, page->cp_index);
+	offset = cl_offset(obj, index);
 	stripe = lov_stripe_number(loo->lo_lsm, offset);
 	LASSERT(stripe < r0->lo_nr);
 	rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
 	LASSERT(rc == 0);
 
-	lpg->lps_invalid = 1;
-	cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_page_ops);
+	cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
 
 	sub = lov_sub_get(env, lio, stripe);
-	if (IS_ERR(sub)) {
-		rc = PTR_ERR(sub);
-		goto out;
-	}
+	if (IS_ERR(sub))
+		return PTR_ERR(sub);
 
 	subobj = lovsub2cl(r0->lo_sub[stripe]);
-	subpage = cl_page_find_sub(sub->sub_env, subobj,
-				   cl_index(subobj, suboff), vmpage, page);
-	lov_sub_put(sub);
-	if (IS_ERR(subpage)) {
-		rc = PTR_ERR(subpage);
-		goto out;
-	}
-
-	if (likely(subpage->cp_parent == page)) {
-		lu_ref_add(&subpage->cp_reference, "lov", page);
-		lpg->lps_invalid = 0;
-		rc = 0;
-	} else {
-		CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
-		CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
-		LASSERT(0);
+	list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers,
+			    co_lu.lo_linkage) {
+		if (o->co_ops->coo_page_init) {
+			rc = o->co_ops->coo_page_init(sub->sub_env, o, page,
+						      cl_index(subobj, suboff));
+			if (rc != 0)
+				break;
+		}
 	}
+	lov_sub_put(sub);
 
-out:
 	return rc;
 }
 
+static int lov_empty_page_print(const struct lu_env *env,
+				const struct cl_page_slice *slice,
+				void *cookie, lu_printer_t printer)
+{
+	struct lov_page *lp = cl2lov_page(slice);
+
+	return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n",
+			  lp);
+}
+
 static const struct cl_page_operations lov_empty_page_ops = {
-	.cpo_fini   = lov_empty_page_fini,
-	.cpo_print  = lov_page_print
+	.cpo_print = lov_empty_page_print
 };
 
 int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj,
-			struct cl_page *page, struct page *vmpage)
+			struct cl_page *page, pgoff_t index)
 {
 	struct lov_page *lpg = cl_object_page_slice(obj, page);
 	void *addr;
 
-	cl_page_slice_add(page, &lpg->lps_cl, obj, &lov_empty_page_ops);
-	addr = kmap(vmpage);
+	cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops);
+	addr = kmap(page->cp_vmpage);
 	memset(addr, 0, cl_page_size(obj));
-	kunmap(vmpage);
+	kunmap(page->cp_vmpage);
 	cl_page_export(env, page, 1);
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 9ae1d6f42..690292ece 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -65,7 +65,6 @@ void lov_pool_putref(struct pool_desc *pool)
 		LASSERT(hlist_unhashed(&pool->pool_hash));
 		LASSERT(list_empty(&pool->pool_list));
 		LASSERT(!pool->pool_debugfs_entry);
-		lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
 		lov_ost_pool_free(&(pool->pool_obds));
 		kfree(pool);
 	}
@@ -424,11 +423,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
 	if (rc)
 		goto out_err;
 
-	memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
-	rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
-	if (rc)
-		goto out_free_pool_obds;
-
 	INIT_HLIST_NODE(&new_pool->pool_hash);
 
 	/* get ref for debugfs file */
@@ -469,13 +463,10 @@ out_err:
 	list_del_init(&new_pool->pool_list);
 	lov->lov_pool_count--;
 	spin_unlock(&obd->obd_dev_lock);
-
 	ldebugfs_remove(&new_pool->pool_debugfs_entry);
-
-	lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
-out_free_pool_obds:
 	lov_ost_pool_free(&new_pool->pool_obds);
 	kfree(new_pool);
+
 	return rc;
 }
 
@@ -543,8 +534,6 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 	if (rc)
 		goto out;
 
-	pool->pool_rr.lqr_dirty = 1;
-
 	CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
 	       ostname, poolname,  pool_tgt_count(pool));
 
@@ -589,8 +578,6 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 
 	lov_ost_pool_remove(&pool->pool_obds, lov_idx);
 
-	pool->pool_rr.lqr_dirty = 1;
-
 	CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
 	       poolname);
 
@@ -599,50 +586,3 @@ out:
 	lov_pool_putref(pool);
 	return rc;
 }
-
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
-{
-	int i, rc;
-
-	/* caller may no have a ref on pool if it got the pool
-	 * without calling lov_find_pool() (e.g. go through the lov pool
-	 * list)
-	 */
-	lov_pool_getref(pool);
-
-	down_read(&pool_tgt_rw_sem(pool));
-
-	for (i = 0; i < pool_tgt_count(pool); i++) {
-		if (pool_tgt_array(pool)[i] == idx) {
-			rc = 0;
-			goto out;
-		}
-	}
-	rc = -ENOENT;
-out:
-	up_read(&pool_tgt_rw_sem(pool));
-
-	lov_pool_putref(pool);
-	return rc;
-}
-
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
-{
-	struct pool_desc *pool;
-
-	pool = NULL;
-	if (poolname[0] != '\0') {
-		pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-		if (!pool)
-			CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
-			      poolname);
-		if (pool && (pool_tgt_count(pool) == 0)) {
-			CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
-			      poolname);
-			/* pool is ignored, so we remove ref on it */
-			lov_pool_putref(pool);
-			pool = NULL;
-		}
-	}
-	return pool;
-}
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 7178a02d6..1be4b921c 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -52,7 +52,6 @@ static void lov_init_set(struct lov_request_set *set)
 	INIT_LIST_HEAD(&set->set_list);
 	atomic_set(&set->set_refcount, 1);
 	init_waitqueue_head(&set->set_waitq);
-	spin_lock_init(&set->set_lock);
 }
 
 void lov_finish_set(struct lov_request_set *set)
@@ -235,7 +234,6 @@ out:
 	if (tmp_oa)
 		kmem_cache_free(obdo_cachep, tmp_oa);
 	return rc;
-
 }
 
 int lov_fini_getattr_set(struct lov_request_set *set)
@@ -363,7 +361,6 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
 	set->set_oi = oinfo;
 	set->set_oi->oi_md = lsm;
 	set->set_oi->oi_oa = src_oa;
-	set->set_oti = oti;
 	if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
 		set->set_cookies = oti->oti_logcookies;
 
@@ -480,7 +477,6 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
 	lov_init_set(set);
 
 	set->set_exp = exp;
-	set->set_oti = oti;
 	set->set_oi = oinfo;
 	if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
 		set->set_cookies = oti->oti_logcookies;
@@ -716,12 +712,15 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
 		struct lov_request *req;
 
 		if (!lov->lov_tgts[i] ||
-		    (!lov_check_and_wait_active(lov, i) &&
-		     (oinfo->oi_flags & OBD_STATFS_NODELAY))) {
+		    (oinfo->oi_flags & OBD_STATFS_NODELAY &&
+		     !lov->lov_tgts[i]->ltd_active)) {
 			CDEBUG(D_HA, "lov idx %d inactive\n", i);
 			continue;
 		}
 
+		if (!lov->lov_tgts[i]->ltd_active)
+			lov_check_and_wait_active(lov, i);
+
 		/* skip targets that have been explicitly disabled by the
 		 * administrator
 		 */
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
index c335c020f..35f6b1d66 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_dev.c
@@ -151,8 +151,9 @@ static int lovsub_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (lsr) {
 		cl_req_slice_add(req, &lsr->lsrq_cl, dev, &lovsub_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
@@ -182,10 +183,12 @@ static struct lu_device *lovsub_device_alloc(const struct lu_env *env,
 			d = lovsub2lu_dev(lsd);
 			d->ld_ops	 = &lovsub_lu_ops;
 			lsd->acid_cl.cd_ops = &lovsub_cl_ops;
-		} else
+		} else {
 			d = ERR_PTR(result);
-	} else
+		}
+	} else {
 		d = ERR_PTR(-ENOMEM);
+	}
 	return d;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
index 3bb0c9068..e92edfb61 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_lock.c
@@ -62,391 +62,8 @@ static void lovsub_lock_fini(const struct lu_env *env,
 	kmem_cache_free(lovsub_lock_kmem, lsl);
 }
 
-static void lovsub_parent_lock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_get(parent);
-	lu_ref_add(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_mutex_get(env, parent);
-}
-
-static void lovsub_parent_unlock(const struct lu_env *env, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-
-	parent = lov->lls_cl.cls_lock;
-	cl_lock_mutex_put(env, lov->lls_cl.cls_lock);
-	lu_ref_del(&parent->cll_reference, "lovsub-parent", current);
-	cl_lock_put(env, parent);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for lovsub layer, which
- * method is called whenever sub-lock state changes. Propagates state change
- * to the top-locks.
- */
-static void lovsub_lock_state(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      enum cl_lock_state state)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock_link *scan;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		struct lov_lock *lov    = scan->lll_super;
-		struct cl_lock  *parent = lov->lls_cl.cls_lock;
-
-		if (sub->lss_active != parent) {
-			lovsub_parent_lock(env, lov);
-			cl_lock_signal(env, parent);
-			lovsub_parent_unlock(env, lov);
-		}
-	}
-}
-
-/**
- * Implementation of cl_lock_operation::clo_weigh() estimating lock weight by
- * asking parent lock.
- */
-static unsigned long lovsub_lock_weigh(const struct lu_env *env,
-				       const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock *lock = cl2lovsub_lock(slice);
-	struct lov_lock    *lov;
-	unsigned long       dumbbell;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	if (!list_empty(&lock->lss_parents)) {
-		/*
-		 * It is not clear whether all parents have to be asked and
-		 * their estimations summed, or it is enough to ask one. For
-		 * the current usages, one is always enough.
-		 */
-		lov = container_of(lock->lss_parents.next,
-				   struct lov_lock_link, lll_list)->lll_super;
-
-		lovsub_parent_lock(env, lov);
-		dumbbell = cl_lock_weigh(env, lov->lls_cl.cls_lock);
-		lovsub_parent_unlock(env, lov);
-	} else
-		dumbbell = 0;
-
-	return dumbbell;
-}
-
-/**
- * Maps start/end offsets within a stripe, to offsets within a file.
- */
-static void lovsub_lock_descr_map(const struct cl_lock_descr *in,
-				  struct lov_object *lov,
-				  int stripe, struct cl_lock_descr *out)
-{
-	pgoff_t size; /* stripe size in pages */
-	pgoff_t skip; /* how many pages in every stripe are occupied by
-		       * "other" stripes
-		       */
-	pgoff_t start;
-	pgoff_t end;
-
-	start = in->cld_start;
-	end   = in->cld_end;
-
-	if (lov->lo_lsm->lsm_stripe_count > 1) {
-		size = cl_index(lov2cl(lov), lov->lo_lsm->lsm_stripe_size);
-		skip = (lov->lo_lsm->lsm_stripe_count - 1) * size;
-
-		/* XXX overflow check here? */
-		start += start/size * skip + stripe * size;
-
-		if (end != CL_PAGE_EOF) {
-			end += end/size * skip + stripe * size;
-			/*
-			 * And check for overflow...
-			 */
-			if (end < in->cld_end)
-				end = CL_PAGE_EOF;
-		}
-	}
-	out->cld_start = start;
-	out->cld_end   = end;
-}
-
-/**
- * Adjusts parent lock extent when a sub-lock is attached to a parent. This is
- * called in two ways:
- *
- *     - as part of receive call-back, when server returns granted extent to
- *       the client, and
- *
- *     - when top-lock finds existing sub-lock in the cache.
- *
- * Note, that lock mode is not propagated to the parent: i.e., if CLM_READ
- * top-lock matches CLM_WRITE sub-lock, top-lock is still CLM_READ.
- */
-int lov_sublock_modify(const struct lu_env *env, struct lov_lock *lov,
-		       struct lovsub_lock *sublock,
-		       const struct cl_lock_descr *d, int idx)
-{
-	struct cl_lock       *parent;
-	struct lovsub_object *subobj;
-	struct cl_lock_descr *pd;
-	struct cl_lock_descr *parent_descr;
-	int		   result;
-
-	parent       = lov->lls_cl.cls_lock;
-	parent_descr = &parent->cll_descr;
-	LASSERT(cl_lock_mode_match(d->cld_mode, parent_descr->cld_mode));
-
-	subobj = cl2lovsub(sublock->lss_cl.cls_obj);
-	pd     = &lov_env_info(env)->lti_ldescr;
-
-	pd->cld_obj  = parent_descr->cld_obj;
-	pd->cld_mode = parent_descr->cld_mode;
-	pd->cld_gid  = parent_descr->cld_gid;
-	lovsub_lock_descr_map(d, subobj->lso_super, subobj->lso_index, pd);
-	lov->lls_sub[idx].sub_got = *d;
-	/*
-	 * Notify top-lock about modification, if lock description changes
-	 * materially.
-	 */
-	if (!cl_lock_ext_match(parent_descr, pd))
-		result = cl_lock_modify(env, parent, pd);
-	else
-		result = 0;
-	return result;
-}
-
-static int lovsub_lock_modify(const struct lu_env *env,
-			      const struct cl_lock_slice *s,
-			      const struct cl_lock_descr *d)
-{
-	struct lovsub_lock   *lock   = cl2lovsub_lock(s);
-	struct lov_lock_link *scan;
-	struct lov_lock      *lov;
-	int result		   = 0;
-
-	LASSERT(cl_lock_mode_match(d->cld_mode,
-				   s->cls_lock->cll_descr.cld_mode));
-	list_for_each_entry(scan, &lock->lss_parents, lll_list) {
-		int rc;
-
-		lov = scan->lll_super;
-		lovsub_parent_lock(env, lov);
-		rc = lov_sublock_modify(env, lov, lock, d, scan->lll_idx);
-		lovsub_parent_unlock(env, lov);
-		result = result ?: rc;
-	}
-	return result;
-}
-
-static int lovsub_lock_closure(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       struct cl_lock_closure *closure)
-{
-	struct lovsub_lock   *sub;
-	struct cl_lock       *parent;
-	struct lov_lock_link *scan;
-	int		   result;
-
-	LASSERT(cl_lock_is_mutexed(slice->cls_lock));
-
-	sub    = cl2lovsub_lock(slice);
-	result = 0;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		parent = scan->lll_super->lls_cl.cls_lock;
-		result = cl_lock_closure_build(env, parent, closure);
-		if (result != 0)
-			break;
-	}
-	return result;
-}
-
-/**
- * A helper function for lovsub_lock_delete() that deals with a given parent
- * top-lock.
- */
-static int lovsub_lock_delete_one(const struct lu_env *env,
-				  struct cl_lock *child, struct lov_lock *lov)
-{
-	struct cl_lock *parent;
-	int	     result;
-
-	parent = lov->lls_cl.cls_lock;
-	if (parent->cll_error)
-		return 0;
-
-	result = 0;
-	switch (parent->cll_state) {
-	case CLS_ENQUEUED:
-		/* See LU-1355 for the case that a glimpse lock is
-		 * interrupted by signal
-		 */
-		LASSERT(parent->cll_flags & CLF_CANCELLED);
-		break;
-	case CLS_QUEUING:
-	case CLS_FREEING:
-		cl_lock_signal(env, parent);
-		break;
-	case CLS_INTRANSIT:
-		/*
-		 * Here lies a problem: a sub-lock is canceled while top-lock
-		 * is being unlocked. Top-lock cannot be moved into CLS_NEW
-		 * state, because unlocking has to succeed eventually by
-		 * placing lock into CLS_CACHED (or failing it), see
-		 * cl_unuse_try(). Nor can top-lock be left in CLS_CACHED
-		 * state, because lov maintains an invariant that all
-		 * sub-locks exist in CLS_CACHED (this allows cached top-lock
-		 * to be reused immediately). Nor can we wait for top-lock
-		 * state to change, because this can be synchronous to the
-		 * current thread.
-		 *
-		 * We know for sure that lov_lock_unuse() will be called at
-		 * least one more time to finish un-using, so leave a mark on
-		 * the top-lock, that will be seen by the next call to
-		 * lov_lock_unuse().
-		 */
-		if (cl_lock_is_intransit(parent))
-			lov->lls_cancel_race = 1;
-		break;
-	case CLS_CACHED:
-		/*
-		 * if a sub-lock is canceled move its top-lock into CLS_NEW
-		 * state to preserve an invariant that a top-lock in
-		 * CLS_CACHED is immediately ready for re-use (i.e., has all
-		 * sub-locks), and so that next attempt to re-use the top-lock
-		 * enqueues missing sub-lock.
-		 */
-		cl_lock_state_set(env, parent, CLS_NEW);
-		/* fall through */
-	case CLS_NEW:
-		/*
-		 * if last sub-lock is canceled, destroy the top-lock (which
-		 * is now `empty') proactively.
-		 */
-		if (lov->lls_nr_filled == 0) {
-			/* ... but unfortunately, this cannot be done easily,
-			 * as cancellation of a top-lock might acquire mutices
-			 * of its other sub-locks, violating lock ordering,
-			 * see cl_lock_{cancel,delete}() preconditions.
-			 *
-			 * To work around this, the mutex of this sub-lock is
-			 * released, top-lock is destroyed, and sub-lock mutex
-			 * acquired again. The list of parents has to be
-			 * re-scanned from the beginning after this.
-			 *
-			 * Only do this if no mutices other than on @child and
-			 * @parent are held by the current thread.
-			 *
-			 * TODO: The lock modal here is too complex, because
-			 * the lock may be canceled and deleted by voluntarily:
-			 *    cl_lock_request
-			 *      -> osc_lock_enqueue_wait
-			 *	-> osc_lock_cancel_wait
-			 *	  -> cl_lock_delete
-			 *	    -> lovsub_lock_delete
-			 *	      -> cl_lock_cancel/delete
-			 *		-> ...
-			 *
-			 * The better choice is to spawn a kernel thread for
-			 * this purpose. -jay
-			 */
-			if (cl_lock_nr_mutexed(env) == 2) {
-				cl_lock_mutex_put(env, child);
-				cl_lock_cancel(env, parent);
-				cl_lock_delete(env, parent);
-				result = 1;
-			}
-		}
-		break;
-	case CLS_HELD:
-		CL_LOCK_DEBUG(D_ERROR, env, parent, "Delete CLS_HELD lock\n");
-	default:
-		CERROR("Impossible state: %d\n", parent->cll_state);
-		LBUG();
-		break;
-	}
-
-	return result;
-}
-
-/**
- * An implementation of cl_lock_operations::clo_delete() method. This is
- * invoked in "bottom-to-top" delete, when lock destruction starts from the
- * sub-lock (e.g, as a result of ldlm lock LRU policy).
- */
-static void lovsub_lock_delete(const struct lu_env *env,
-			       const struct cl_lock_slice *slice)
-{
-	struct cl_lock     *child = slice->cls_lock;
-	struct lovsub_lock *sub   = cl2lovsub_lock(slice);
-	int restart;
-
-	LASSERT(cl_lock_is_mutexed(child));
-
-	/*
-	 * Destruction of a sub-lock might take multiple iterations, because
-	 * when the last sub-lock of a given top-lock is deleted, top-lock is
-	 * canceled proactively, and this requires to release sub-lock
-	 * mutex. Once sub-lock mutex has been released, list of its parents
-	 * has to be re-scanned from the beginning.
-	 */
-	do {
-		struct lov_lock      *lov;
-		struct lov_lock_link *scan;
-		struct lov_lock_link *temp;
-		struct lov_lock_sub  *subdata;
-
-		restart = 0;
-		list_for_each_entry_safe(scan, temp,
-					 &sub->lss_parents, lll_list) {
-			lov     = scan->lll_super;
-			subdata = &lov->lls_sub[scan->lll_idx];
-			lovsub_parent_lock(env, lov);
-			subdata->sub_got = subdata->sub_descr;
-			lov_lock_unlink(env, scan, sub);
-			restart = lovsub_lock_delete_one(env, child, lov);
-			lovsub_parent_unlock(env, lov);
-
-			if (restart) {
-				cl_lock_mutex_get(env, child);
-				break;
-			}
-	       }
-	} while (restart);
-}
-
-static int lovsub_lock_print(const struct lu_env *env, void *cookie,
-			     lu_printer_t p, const struct cl_lock_slice *slice)
-{
-	struct lovsub_lock   *sub = cl2lovsub_lock(slice);
-	struct lov_lock      *lov;
-	struct lov_lock_link *scan;
-
-	list_for_each_entry(scan, &sub->lss_parents, lll_list) {
-		lov = scan->lll_super;
-		(*p)(env, cookie, "[%d %p ", scan->lll_idx, lov);
-		if (lov)
-			cl_lock_descr_print(env, cookie, p,
-					    &lov->lls_cl.cls_lock->cll_descr);
-		(*p)(env, cookie, "] ");
-	}
-	return 0;
-}
-
 static const struct cl_lock_operations lovsub_lock_ops = {
 	.clo_fini    = lovsub_lock_fini,
-	.clo_state   = lovsub_lock_state,
-	.clo_delete  = lovsub_lock_delete,
-	.clo_modify  = lovsub_lock_modify,
-	.clo_closure = lovsub_lock_closure,
-	.clo_weigh   = lovsub_lock_weigh,
-	.clo_print   = lovsub_lock_print
 };
 
 int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
@@ -460,8 +77,9 @@ int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj,
 		INIT_LIST_HEAD(&lsk->lss_parents);
 		cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index 6c5430d93..bcaae1e5b 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -67,10 +67,10 @@ int lovsub_object_init(const struct lu_env *env, struct lu_object *obj,
 		lu_object_add(obj, below);
 		cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page));
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
-
 }
 
 static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj)
@@ -154,8 +154,9 @@ struct lu_object *lovsub_object_alloc(const struct lu_env *env,
 		lu_object_add_top(&hdr->coh_lu, obj);
 		los->lso_cl.co_ops = &lovsub_ops;
 		obj->lo_ops = &lovsub_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c
index 2d945532b..9badedcce 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_page.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_page.c
@@ -60,11 +60,11 @@ static const struct cl_page_operations lovsub_page_ops = {
 };
 
 int lovsub_page_init(const struct lu_env *env, struct cl_object *obj,
-		     struct cl_page *page, struct page *unused)
+		     struct cl_page *page, pgoff_t index)
 {
 	struct lovsub_page *lsb = cl_object_page_slice(obj, page);
 
-	cl_page_slice_add(page, &lsb->lsb_cl, obj, &lovsub_page_ops);
+	cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops);
 	return 0;
 }
 
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 38f267a60..5c7a15dd7 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -49,9 +49,9 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
 					      obd_kobj);
 	struct client_obd *cli = &dev->u.cli;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -74,9 +74,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 	if (val < 1 || val > MDC_MAX_RIF_MAX)
 		return -ERANGE;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_rpcs_in_flight = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index b3bfdcb73..856c54e03 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -279,8 +279,7 @@ static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
 	rec->sa_atime  = LTIME_S(op_data->op_attr.ia_atime);
 	rec->sa_mtime  = LTIME_S(op_data->op_attr.ia_mtime);
 	rec->sa_ctime  = LTIME_S(op_data->op_attr.ia_ctime);
-	rec->sa_attr_flags =
-			((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags;
+	rec->sa_attr_flags = op_data->op_attr_flags;
 	if ((op_data->op_attr.ia_valid & ATTR_GID) &&
 	    in_group_p(op_data->op_attr.ia_gid))
 		rec->sa_suppgid =
@@ -439,7 +438,6 @@ void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
 		char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
 
 		LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-
 	}
 }
 
@@ -455,7 +453,7 @@ static void mdc_hsm_release_pack(struct ptlrpc_request *req,
 		lock = ldlm_handle2lock(&op_data->op_lease_handle);
 		if (lock) {
 			data->cd_handle = lock->l_remote_handle;
-			ldlm_lock_put(lock);
+			LDLM_LOCK_PUT(lock);
 		}
 		ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
 
@@ -481,9 +479,9 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&mcw->mcw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 };
 
@@ -497,23 +495,23 @@ int mdc_enter_request(struct client_obd *cli)
 	struct mdc_cache_waiter mcw;
 	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
 		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
 		init_waitqueue_head(&mcw.mcw_waitq);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw),
 				  &lwi);
 		if (rc) {
-			client_obd_list_lock(&cli->cl_loi_list_lock);
+			spin_lock(&cli->cl_loi_list_lock);
 			if (list_empty(&mcw.mcw_entry))
 				cli->cl_r_in_flight--;
 			list_del_init(&mcw.mcw_entry);
-			client_obd_list_unlock(&cli->cl_loi_list_lock);
+			spin_unlock(&cli->cl_loi_list_lock);
 		}
 	} else {
 		cli->cl_r_in_flight++;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 	return rc;
 }
@@ -523,7 +521,7 @@ void mdc_exit_request(struct client_obd *cli)
 	struct list_head *l, *tmp;
 	struct mdc_cache_waiter *mcw;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_r_in_flight--;
 	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
 		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
@@ -538,5 +536,5 @@ void mdc_exit_request(struct client_obd *cli)
 	}
 	/* Empty waiting list? Decrease reqs in-flight number */
 
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index 958a164f6..3b1bc9111 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -869,7 +869,9 @@ resend:
 		 * (explicits or automatically generated by Kernel to clean
 		 * current FLocks upon exit) that can't be trashed
 		 */
-		if ((rc == -EINTR) || (rc == -ETIMEDOUT))
+		if (((rc == -EINTR) || (rc == -ETIMEDOUT)) &&
+		    (einfo->ei_type == LDLM_FLOCK) &&
+		    (einfo->ei_mode == LCK_NL))
 			goto resend;
 		return rc;
 	}
@@ -963,7 +965,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 	if (fid_is_sane(&op_data->op_fid2) &&
 	    it->it_create_mode & M_CHECK_STALE &&
 	    it->it_op != IT_GETATTR) {
-
 		/* Also: did we find the same inode? */
 		/* sever can return one of two fids:
 		 * op_fid2 - new allocated fid - if file is created.
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index b91d3ff18..86b744536 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -142,9 +142,8 @@ static int mdc_getattr_common(struct obd_export *exp,
 
 	CDEBUG(D_NET, "mode: %o\n", body->mode);
 
+	mdc_update_max_ea_from_body(exp, body);
 	if (body->eadatasize != 0) {
-		mdc_update_max_ea_from_body(exp, body);
-
 		eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
 						      body->eadatasize);
 		if (!eadata)
@@ -1169,7 +1168,7 @@ static int mdc_ioc_hsm_progress(struct obd_export *exp,
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS);
@@ -1203,7 +1202,7 @@ static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
 
 	/* Copy hsm_progress struct */
 	archive_mask = req_capsule_client_get(&req->rq_pill,
@@ -1278,7 +1277,7 @@ static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp)
 		goto out;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
 
 	ptlrpc_request_set_replen(req);
 
@@ -1395,7 +1394,7 @@ static int mdc_ioc_hsm_request(struct obd_export *exp,
 		return rc;
 	}
 
-	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, 0, 0);
+	mdc_pack_body(req, NULL, OBD_MD_FLRMTPERM, 0, -1, 0);
 
 	/* Copy hsm_request struct */
 	req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST);
@@ -1952,7 +1951,7 @@ static void lustre_swab_hal(struct hsm_action_list *h)
 	__swab32s(&h->hal_count);
 	__swab32s(&h->hal_archive_id);
 	__swab64s(&h->hal_flags);
-	hai = hai_zero(h);
+	hai = hai_first(h);
 	for (i = 0; i < h->hal_count; i++, hai = hai_next(hai))
 		lustre_swab_hai(hai);
 }
@@ -2249,7 +2248,7 @@ static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
  * recovery, non zero value will be return if the lock can be canceled,
  * or zero returned for not
  */
-static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+static int mdc_cancel_weight(struct ldlm_lock *lock)
 {
 	if (lock->l_resource->lr_type != LDLM_IBITS)
 		return 0;
@@ -2314,12 +2313,14 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 		return -ENOMEM;
 	mdc_init_rpc_lock(cli->cl_rpc_lock);
 
-	ptlrpcd_addref();
+	rc = ptlrpcd_addref();
+	if (rc < 0)
+		goto err_rpc_lock;
 
 	cli->cl_close_lock = kzalloc(sizeof(*cli->cl_close_lock), GFP_NOFS);
 	if (!cli->cl_close_lock) {
 		rc = -ENOMEM;
-		goto err_rpc_lock;
+		goto err_ptlrpcd_decref;
 	}
 	mdc_init_rpc_lock(cli->cl_close_lock);
 
@@ -2331,7 +2332,7 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 	sptlrpc_lprocfs_cliobd_attach(obd);
 	ptlrpc_lprocfs_register_obd(obd);
 
-	ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+	ns_register_cancel(obd->obd_namespace, mdc_cancel_weight);
 
 	obd->obd_namespace->ns_lvbo = &inode_lvbo;
 
@@ -2345,9 +2346,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 
 err_close_lock:
 	kfree(cli->cl_close_lock);
+err_ptlrpcd_decref:
+	ptlrpcd_decref();
 err_rpc_lock:
 	kfree(cli->cl_rpc_lock);
-	ptlrpcd_decref();
 	return rc;
 }
 
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 3924b095b..2311a437c 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -502,8 +502,12 @@ static void do_requeue(struct config_llog_data *cld)
 	 */
 	down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem);
 	if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) {
+		int rc;
+
 		CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname);
-		mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+		rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld);
+		if (rc && rc != -ENOENT)
+			CERROR("failed processing log: %d\n", rc);
 	} else {
 		CDEBUG(D_MGC, "disconnecting, won't update log %s\n",
 		       cld->cld_logname);
@@ -734,7 +738,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	struct task_struct *task;
 	int rc;
 
-	ptlrpcd_addref();
+	rc = ptlrpcd_addref();
+	if (rc < 0)
+		goto err_noref;
 
 	rc = client_obd_setup(obd, lcfg);
 	if (rc)
@@ -773,6 +779,7 @@ err_cleanup:
 	client_obd_cleanup(obd);
 err_decref:
 	ptlrpcd_decref();
+err_noref:
 	return rc;
 }
 
@@ -1720,7 +1727,6 @@ static int mgc_process_config(struct obd_device *obd, u32 len, void *buf)
 		CERROR("Unknown command: %d\n", lcfg->lcfg_command);
 		rc = -EINVAL;
 		goto out;
-
 	}
 	}
 out:
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index f5128b4f1..583fb5f33 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -36,6 +36,7 @@
  * Client IO.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -132,6 +133,7 @@ void cl_io_fini(const struct lu_env *env, struct cl_io *io)
 	case CIT_WRITE:
 		break;
 	case CIT_FAULT:
+		break;
 	case CIT_FSYNC:
 		LASSERT(!io->ci_need_restart);
 		break;
@@ -159,7 +161,6 @@ static int cl_io_init0(const struct lu_env *env, struct cl_io *io,
 
 	io->ci_type = iot;
 	INIT_LIST_HEAD(&io->ci_lockset.cls_todo);
-	INIT_LIST_HEAD(&io->ci_lockset.cls_curr);
 	INIT_LIST_HEAD(&io->ci_lockset.cls_done);
 	INIT_LIST_HEAD(&io->ci_layers);
 
@@ -241,37 +242,7 @@ static int cl_lock_descr_sort(const struct cl_lock_descr *d0,
 			      const struct cl_lock_descr *d1)
 {
 	return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			  lu_object_fid(&d1->cld_obj->co_lu)) ?:
-		__diff_normalize(d0->cld_start, d1->cld_start);
-}
-
-static int cl_lock_descr_cmp(const struct cl_lock_descr *d0,
-			     const struct cl_lock_descr *d1)
-{
-	int ret;
-
-	ret = lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu),
-			 lu_object_fid(&d1->cld_obj->co_lu));
-	if (ret)
-		return ret;
-	if (d0->cld_end < d1->cld_start)
-		return -1;
-	if (d0->cld_start > d0->cld_end)
-		return 1;
-	return 0;
-}
-
-static void cl_lock_descr_merge(struct cl_lock_descr *d0,
-				const struct cl_lock_descr *d1)
-{
-	d0->cld_start = min(d0->cld_start, d1->cld_start);
-	d0->cld_end = max(d0->cld_end, d1->cld_end);
-
-	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
-		d0->cld_mode = CLM_WRITE;
-
-	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
-		d0->cld_mode = CLM_GROUP;
+			  lu_object_fid(&d1->cld_obj->co_lu));
 }
 
 /*
@@ -320,33 +291,35 @@ static void cl_io_locks_sort(struct cl_io *io)
 	} while (!done);
 }
 
-/**
- * Check whether \a queue contains locks matching \a need.
- *
- * \retval +ve there is a matching lock in the \a queue
- * \retval   0 there are no matching locks in the \a queue
- */
-int cl_queue_match(const struct list_head *queue,
-		   const struct cl_lock_descr *need)
+static void cl_lock_descr_merge(struct cl_lock_descr *d0,
+				const struct cl_lock_descr *d1)
 {
-	struct cl_io_lock_link *scan;
+	d0->cld_start = min(d0->cld_start, d1->cld_start);
+	d0->cld_end = max(d0->cld_end, d1->cld_end);
 
-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_match(&scan->cill_descr, need))
-			return 1;
-	}
-	return 0;
+	if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE)
+		d0->cld_mode = CLM_WRITE;
+
+	if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP)
+		d0->cld_mode = CLM_GROUP;
 }
-EXPORT_SYMBOL(cl_queue_match);
 
-static int cl_queue_merge(const struct list_head *queue,
-			  const struct cl_lock_descr *need)
+static int cl_lockset_merge(const struct cl_lockset *set,
+			    const struct cl_lock_descr *need)
 {
 	struct cl_io_lock_link *scan;
 
-	list_for_each_entry(scan, queue, cill_linkage) {
-		if (cl_lock_descr_cmp(&scan->cill_descr, need))
+	list_for_each_entry(scan, &set->cls_todo, cill_linkage) {
+		if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj))
 			continue;
+
+		/* Merge locks for the same object because ldlm lock server
+		 * may expand the lock extent, otherwise there is a deadlock
+		 * case if two conflicted locks are queueud for the same object
+		 * and lock server expands one lock to overlap the another.
+		 * The side effect is that it can generate a multi-stripe lock
+		 * that may cause casacading problem
+		 */
 		cl_lock_descr_merge(&scan->cill_descr, need);
 		CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
 		       scan->cill_descr.cld_mode, scan->cill_descr.cld_start,
@@ -356,87 +329,20 @@ static int cl_queue_merge(const struct list_head *queue,
 	return 0;
 }
 
-static int cl_lockset_match(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_match(&set->cls_curr, need) ||
-	       cl_queue_match(&set->cls_done, need);
-}
-
-static int cl_lockset_merge(const struct cl_lockset *set,
-			    const struct cl_lock_descr *need)
-{
-	return cl_queue_merge(&set->cls_todo, need) ||
-	       cl_lockset_match(set, need);
-}
-
-static int cl_lockset_lock_one(const struct lu_env *env,
-			       struct cl_io *io, struct cl_lockset *set,
-			       struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock;
-	int	     result;
-
-	lock = cl_lock_request(env, io, &link->cill_descr, "io", io);
-
-	if (!IS_ERR(lock)) {
-		link->cill_lock = lock;
-		list_move(&link->cill_linkage, &set->cls_curr);
-		if (!(link->cill_descr.cld_enq_flags & CEF_ASYNC)) {
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-		} else
-			result = 0;
-	} else
-		result = PTR_ERR(lock);
-	return result;
-}
-
-static void cl_lock_link_fini(const struct lu_env *env, struct cl_io *io,
-			      struct cl_io_lock_link *link)
-{
-	struct cl_lock *lock = link->cill_lock;
-
-	list_del_init(&link->cill_linkage);
-	if (lock) {
-		cl_lock_release(env, lock, "io", io);
-		link->cill_lock = NULL;
-	}
-	if (link->cill_fini)
-		link->cill_fini(env, link);
-}
-
 static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io,
 			   struct cl_lockset *set)
 {
 	struct cl_io_lock_link *link;
 	struct cl_io_lock_link *temp;
-	struct cl_lock	 *lock;
 	int result;
 
 	result = 0;
 	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
-		if (!cl_lockset_match(set, &link->cill_descr)) {
-			/* XXX some locking to guarantee that locks aren't
-			 * expanded in between.
-			 */
-			result = cl_lockset_lock_one(env, io, set, link);
-			if (result != 0)
-				break;
-		} else
-			cl_lock_link_fini(env, io, link);
-	}
-	if (result == 0) {
-		list_for_each_entry_safe(link, temp,
-					 &set->cls_curr, cill_linkage) {
-			lock = link->cill_lock;
-			result = cl_wait(env, lock);
-			if (result == 0)
-				list_move(&link->cill_linkage, &set->cls_done);
-			else
-				break;
-		}
+		result = cl_lock_request(env, io, &link->cill_lock);
+		if (result < 0)
+			break;
+
+		list_move(&link->cill_linkage, &set->cls_done);
 	}
 	return result;
 }
@@ -492,16 +398,19 @@ void cl_io_unlock(const struct lu_env *env, struct cl_io *io)
 
 	set = &io->ci_lockset;
 
-	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage)
-		cl_lock_link_fini(env, io, link);
-
-	list_for_each_entry_safe(link, temp, &set->cls_curr, cill_linkage)
-		cl_lock_link_fini(env, io, link);
+	list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) {
+		list_del_init(&link->cill_linkage);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
+	}
 
 	list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) {
-		cl_unuse(env, link->cill_lock);
-		cl_lock_link_fini(env, io, link);
+		list_del_init(&link->cill_linkage);
+		cl_lock_release(env, &link->cill_lock);
+		if (link->cill_fini)
+			link->cill_fini(env, link);
 	}
+
 	cl_io_for_each_reverse(scan, io) {
 		if (scan->cis_iop->op[io->ci_type].cio_unlock)
 			scan->cis_iop->op[io->ci_type].cio_unlock(env, scan);
@@ -595,9 +504,9 @@ int cl_io_lock_add(const struct lu_env *env, struct cl_io *io,
 {
 	int result;
 
-	if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr))
+	if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) {
 		result = 1;
-	else {
+	} else {
 		list_add(&link->cill_linkage, &io->ci_lockset.cls_todo);
 		result = 0;
 	}
@@ -627,8 +536,9 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
 		result = cl_io_lock_add(env, io, link);
 		if (result) /* lock match */
 			link->cill_fini(env, link);
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 
 	return result;
 }
@@ -691,42 +601,6 @@ cl_io_slice_page(const struct cl_io_slice *ios, struct cl_page *page)
 	return slice;
 }
 
-/**
- * True iff \a page is within \a io range.
- */
-static int cl_page_in_io(const struct cl_page *page, const struct cl_io *io)
-{
-	int     result = 1;
-	loff_t  start;
-	loff_t  end;
-	pgoff_t idx;
-
-	idx = page->cp_index;
-	switch (io->ci_type) {
-	case CIT_READ:
-	case CIT_WRITE:
-		/*
-		 * check that [start, end) and [pos, pos + count) extents
-		 * overlap.
-		 */
-		if (!cl_io_is_append(io)) {
-			const struct cl_io_rw_common *crw = &(io->u.ci_rw);
-
-			start = cl_offset(page->cp_obj, idx);
-			end   = cl_offset(page->cp_obj, idx + 1);
-			result = crw->crw_pos < end &&
-				 start < crw->crw_pos + crw->crw_count;
-		}
-		break;
-	case CIT_FAULT:
-		result = io->u.ci_fault.ft_index == idx;
-		break;
-	default:
-		LBUG();
-	}
-	return result;
-}
-
 /**
  * Called by read io, when page has to be read from the server.
  *
@@ -742,7 +616,6 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 	LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT);
 	LINVRNT(cl_page_is_owned(page, io));
 	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_page_in_io(page, io));
 	LINVRNT(cl_io_invariant(io));
 
 	queue = &io->ci_queue;
@@ -769,7 +642,7 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 				break;
 		}
 	}
-	if (result == 0)
+	if (result == 0 && queue->c2_qin.pl_nr > 0)
 		result = cl_io_submit_rw(env, io, CRT_READ, queue);
 	/*
 	 * Unlock unsent pages in case of error.
@@ -781,77 +654,29 @@ int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
 EXPORT_SYMBOL(cl_io_read_page);
 
 /**
- * Called by write io to prepare page to receive data from user buffer.
+ * Commit a list of contiguous pages into writeback cache.
  *
- * \see cl_io_operations::cio_prepare_write()
+ * \returns 0 if all pages committed, or errcode if error occurred.
+ * \see cl_io_operations::cio_commit_async()
  */
-int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
-			struct cl_page *page, unsigned from, unsigned to)
+int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
+		       struct cl_page_list *queue, int from, int to,
+		       cl_commit_cbt cb)
 {
 	const struct cl_io_slice *scan;
 	int result = 0;
 
-	LINVRNT(io->ci_type == CIT_WRITE);
-	LINVRNT(cl_page_is_owned(page, io));
-	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_io_invariant(io));
-	LASSERT(cl_page_in_io(page, io));
-
-	cl_io_for_each_reverse(scan, io) {
-		if (scan->cis_iop->cio_prepare_write) {
-			const struct cl_page_slice *slice;
-
-			slice = cl_io_slice_page(scan, page);
-			result = scan->cis_iop->cio_prepare_write(env, scan,
-								  slice,
-								  from, to);
-			if (result != 0)
-				break;
-		}
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_io_prepare_write);
-
-/**
- * Called by write io after user data were copied into a page.
- *
- * \see cl_io_operations::cio_commit_write()
- */
-int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
-		       struct cl_page *page, unsigned from, unsigned to)
-{
-	const struct cl_io_slice *scan;
-	int result = 0;
-
-	LINVRNT(io->ci_type == CIT_WRITE);
-	LINVRNT(io->ci_state == CIS_IO_GOING || io->ci_state == CIS_LOCKED);
-	LINVRNT(cl_io_invariant(io));
-	/*
-	 * XXX Uh... not nice. Top level cl_io_commit_write() call (vvp->lov)
-	 * already called cl_page_cache_add(), moving page into CPS_CACHED
-	 * state. Better (and more general) way of dealing with such situation
-	 * is needed.
-	 */
-	LASSERT(cl_page_is_owned(page, io) || page->cp_parent);
-	LASSERT(cl_page_in_io(page, io));
-
 	cl_io_for_each(scan, io) {
-		if (scan->cis_iop->cio_commit_write) {
-			const struct cl_page_slice *slice;
-
-			slice = cl_io_slice_page(scan, page);
-			result = scan->cis_iop->cio_commit_write(env, scan,
-								 slice,
-								 from, to);
-			if (result != 0)
-				break;
-		}
+		if (!scan->cis_iop->cio_commit_async)
+			continue;
+		result = scan->cis_iop->cio_commit_async(env, scan, queue,
+							 from, to, cb);
+		if (result != 0)
+			break;
 	}
-	LINVRNT(result <= 0);
 	return result;
 }
-EXPORT_SYMBOL(cl_io_commit_write);
+EXPORT_SYMBOL(cl_io_commit_async);
 
 /**
  * Submits a list of pages for immediate io.
@@ -869,13 +694,10 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 	const struct cl_io_slice *scan;
 	int result = 0;
 
-	LINVRNT(crt < ARRAY_SIZE(scan->cis_iop->req_op));
-
 	cl_io_for_each(scan, io) {
-		if (!scan->cis_iop->req_op[crt].cio_submit)
+		if (!scan->cis_iop->cio_submit)
 			continue;
-		result = scan->cis_iop->req_op[crt].cio_submit(env, scan, crt,
-							       queue);
+		result = scan->cis_iop->cio_submit(env, scan, crt, queue);
 		if (result != 0)
 			break;
 	}
@@ -887,6 +709,9 @@ int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(cl_io_submit_rw);
 
+static void cl_page_list_assume(const struct lu_env *env,
+				struct cl_io *io, struct cl_page_list *plist);
+
 /**
  * Submit a sync_io and wait for the IO to be finished, or error happens.
  * If \a timeout is zero, it means to wait for the IO unconditionally.
@@ -904,7 +729,7 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		pg->cp_sync_io = anchor;
 	}
 
-	cl_sync_io_init(anchor, queue->c2_qin.pl_nr);
+	cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end);
 	rc = cl_io_submit_rw(env, io, iot, queue);
 	if (rc == 0) {
 		/*
@@ -915,12 +740,12 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 		 */
 		cl_page_list_for_each(pg, &queue->c2_qin) {
 			pg->cp_sync_io = NULL;
-			cl_sync_io_note(anchor, 1);
+			cl_sync_io_note(env, anchor, 1);
 		}
 
 		/* wait for the IO to be finished. */
-		rc = cl_sync_io_wait(env, io, &queue->c2_qout,
-				     anchor, timeout);
+		rc = cl_sync_io_wait(env, anchor, timeout);
+		cl_page_list_assume(env, io, &queue->c2_qout);
 	} else {
 		LASSERT(list_empty(&queue->c2_qout.pl_pages));
 		cl_page_list_for_each(pg, &queue->c2_qin)
@@ -930,26 +755,6 @@ int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(cl_io_submit_sync);
 
-/**
- * Cancel an IO which has been submitted by cl_io_submit_rw.
- */
-static int cl_io_cancel(const struct lu_env *env, struct cl_io *io,
-			struct cl_page_list *queue)
-{
-	struct cl_page *page;
-	int result = 0;
-
-	CERROR("Canceling ongoing page transmission\n");
-	cl_page_list_for_each(page, queue) {
-		int rc;
-
-		LINVRNT(cl_page_in_io(page, io));
-		rc = cl_page_cancel(env, page);
-		result = result ?: rc;
-	}
-	return result;
-}
-
 /**
  * Main io loop.
  *
@@ -1072,8 +877,8 @@ EXPORT_SYMBOL(cl_page_list_add);
 /**
  * Removes a page from a page list.
  */
-static void cl_page_list_del(const struct lu_env *env,
-			     struct cl_page_list *plist, struct cl_page *page)
+void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
+		      struct cl_page *page)
 {
 	LASSERT(plist->pl_nr > 0);
 	LINVRNT(plist->pl_owner == current);
@@ -1086,6 +891,7 @@ static void cl_page_list_del(const struct lu_env *env,
 	lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
 	cl_page_put(env, page);
 }
+EXPORT_SYMBOL(cl_page_list_del);
 
 /**
  * Moves a page from one page list to another.
@@ -1105,6 +911,24 @@ void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
 }
 EXPORT_SYMBOL(cl_page_list_move);
 
+/**
+ * Moves a page from one page list to the head of another list.
+ */
+void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
+			    struct cl_page *page)
+{
+	LASSERT(src->pl_nr > 0);
+	LINVRNT(dst->pl_owner == current);
+	LINVRNT(src->pl_owner == current);
+
+	list_move(&page->cp_batch, &dst->pl_pages);
+	--src->pl_nr;
+	++dst->pl_nr;
+	lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue",
+		      src, dst);
+}
+EXPORT_SYMBOL(cl_page_list_move_head);
+
 /**
  * splice the cl_page_list, just as list head does
  */
@@ -1162,8 +986,7 @@ EXPORT_SYMBOL(cl_page_list_disown);
 /**
  * Releases pages from queue.
  */
-static void cl_page_list_fini(const struct lu_env *env,
-			      struct cl_page_list *plist)
+void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist)
 {
 	struct cl_page *page;
 	struct cl_page *temp;
@@ -1174,6 +997,7 @@ static void cl_page_list_fini(const struct lu_env *env,
 		cl_page_list_del(env, plist, page);
 	LASSERT(plist->pl_nr == 0);
 }
+EXPORT_SYMBOL(cl_page_list_fini);
 
 /**
  * Assumes all pages in a queue.
@@ -1260,7 +1084,7 @@ EXPORT_SYMBOL(cl_2queue_init_page);
 /**
  * Returns top-level io.
  *
- * \see cl_object_top(), cl_page_top().
+ * \see cl_object_top()
  */
 struct cl_io *cl_io_top(struct cl_io *io)
 {
@@ -1323,19 +1147,14 @@ static int cl_req_init(const struct lu_env *env, struct cl_req *req,
 	int result;
 
 	result = 0;
-	page = cl_page_top(page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
-			if (dev->cd_ops->cdo_req_init) {
-				result = dev->cd_ops->cdo_req_init(env,
-								   dev, req);
-				if (result != 0)
-					break;
-			}
+	list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+		dev = lu2cl_dev(slice->cpl_obj->co_lu.lo_dev);
+		if (dev->cd_ops->cdo_req_init) {
+			result = dev->cd_ops->cdo_req_init(env, dev, req);
+			if (result != 0)
+				break;
 		}
-		page = page->cp_child;
-	} while (page && result == 0);
+	}
 	return result;
 }
 
@@ -1384,14 +1203,16 @@ struct cl_req *cl_req_alloc(const struct lu_env *env, struct cl_page *page,
 		if (req->crq_o) {
 			req->crq_nrobjs = nr_objects;
 			result = cl_req_init(env, req, page);
-		} else
+		} else {
 			result = -ENOMEM;
+		}
 		if (result != 0) {
 			cl_req_completion(env, req, result);
 			req = ERR_PTR(result);
 		}
-	} else
+	} else {
 		req = ERR_PTR(-ENOMEM);
+	}
 	return req;
 }
 EXPORT_SYMBOL(cl_req_alloc);
@@ -1406,8 +1227,6 @@ void cl_req_page_add(const struct lu_env *env,
 	struct cl_req_obj *rqo;
 	int i;
 
-	page = cl_page_top(page);
-
 	LASSERT(list_empty(&page->cp_flight));
 	LASSERT(!page->cp_req);
 
@@ -1438,8 +1257,6 @@ void cl_req_page_done(const struct lu_env *env, struct cl_page *page)
 {
 	struct cl_req *req = page->cp_req;
 
-	page = cl_page_top(page);
-
 	LASSERT(!list_empty(&page->cp_flight));
 	LASSERT(req->crq_nrpages > 0);
 
@@ -1511,25 +1328,39 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
 }
 EXPORT_SYMBOL(cl_req_attr_set);
 
+/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to
+ * wait for the IO to finish.
+ */
+void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor)
+{
+	wake_up_all(&anchor->csi_waitq);
+
+	/* it's safe to nuke or reuse anchor now */
+	atomic_set(&anchor->csi_barrier, 0);
+}
+EXPORT_SYMBOL(cl_sync_io_end);
 
 /**
- * Initialize synchronous io wait anchor, for transfer of \a nrpages pages.
+ * Initialize synchronous io wait anchor
  */
-void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages)
+void cl_sync_io_init(struct cl_sync_io *anchor, int nr,
+		     void (*end)(const struct lu_env *, struct cl_sync_io *))
 {
+	memset(anchor, 0, sizeof(*anchor));
 	init_waitqueue_head(&anchor->csi_waitq);
-	atomic_set(&anchor->csi_sync_nr, nrpages);
-	atomic_set(&anchor->csi_barrier, nrpages > 0);
+	atomic_set(&anchor->csi_sync_nr, nr);
+	atomic_set(&anchor->csi_barrier, nr > 0);
 	anchor->csi_sync_rc = 0;
+	anchor->csi_end_io = end;
+	LASSERT(end);
 }
 EXPORT_SYMBOL(cl_sync_io_init);
 
 /**
- * Wait until all transfer completes. Transfer completion routine has to call
- * cl_sync_io_note() for every page.
+ * Wait until all IO completes. Transfer completion routine has to call
+ * cl_sync_io_note() for every entity.
  */
-int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
-		    struct cl_page_list *queue, struct cl_sync_io *anchor,
+int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor,
 		    long timeout)
 {
 	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout),
@@ -1542,11 +1373,9 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
 			  atomic_read(&anchor->csi_sync_nr) == 0,
 			  &lwi);
 	if (rc < 0) {
-		CERROR("SYNC IO failed with error: %d, try to cancel %d remaining pages\n",
+		CERROR("IO failed: %d, still wait for %d remaining entries\n",
 		       rc, atomic_read(&anchor->csi_sync_nr));
 
-		(void)cl_io_cancel(env, io, queue);
-
 		lwi = (struct l_wait_info) { 0 };
 		(void)l_wait_event(anchor->csi_waitq,
 				   atomic_read(&anchor->csi_sync_nr) == 0,
@@ -1555,14 +1384,12 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io,
 		rc = anchor->csi_sync_rc;
 	}
 	LASSERT(atomic_read(&anchor->csi_sync_nr) == 0);
-	cl_page_list_assume(env, io, queue);
 
 	/* wait until cl_sync_io_note() has done wakeup */
 	while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) {
 		cpu_relax();
 	}
 
-	POISON(anchor, 0x5a, sizeof(*anchor));
 	return rc;
 }
 EXPORT_SYMBOL(cl_sync_io_wait);
@@ -1570,7 +1397,8 @@ EXPORT_SYMBOL(cl_sync_io_wait);
 /**
  * Indicate that transfer of a single page completed.
  */
-void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
+void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor,
+		     int ioret)
 {
 	if (anchor->csi_sync_rc == 0 && ioret < 0)
 		anchor->csi_sync_rc = ioret;
@@ -1581,9 +1409,9 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret)
 	 */
 	LASSERT(atomic_read(&anchor->csi_sync_nr) > 0);
 	if (atomic_dec_and_test(&anchor->csi_sync_nr)) {
-		wake_up_all(&anchor->csi_waitq);
-		/* it's safe to nuke or reuse anchor now */
-		atomic_set(&anchor->csi_barrier, 0);
+		LASSERT(anchor->csi_end_io);
+		anchor->csi_end_io(env, anchor);
+		/* Can't access anchor any more */
 	}
 }
 EXPORT_SYMBOL(cl_sync_io_note);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
index aec644eb4..26a576b63 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_lock.c
@@ -36,6 +36,7 @@
  * Client Extent Lock.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -47,2116 +48,187 @@
 #include "../include/cl_object.h"
 #include "cl_internal.h"
 
-/** Lock class of cl_lock::cll_guard */
-static struct lock_class_key cl_lock_guard_class;
-static struct kmem_cache *cl_lock_kmem;
-
-static struct lu_kmem_descr cl_lock_caches[] = {
-	{
-		.ckd_cache = &cl_lock_kmem,
-		.ckd_name  = "cl_lock_kmem",
-		.ckd_size  = sizeof (struct cl_lock)
-	},
-	{
-		.ckd_cache = NULL
-	}
-};
-
-#define CS_LOCK_INC(o, item)
-#define CS_LOCK_DEC(o, item)
-#define CS_LOCKSTATE_INC(o, state)
-#define CS_LOCKSTATE_DEC(o, state)
-
-/**
- * Basic lock invariant that is maintained at all times. Caller either has a
- * reference to \a lock, or somehow assures that \a lock cannot be freed.
- *
- * \see cl_lock_invariant()
- */
-static int cl_lock_invariant_trusted(const struct lu_env *env,
-				     const struct cl_lock *lock)
-{
-	return  ergo(lock->cll_state == CLS_FREEING, lock->cll_holds == 0) &&
-		atomic_read(&lock->cll_ref) >= lock->cll_holds &&
-		lock->cll_holds >= lock->cll_users &&
-		lock->cll_holds >= 0 &&
-		lock->cll_users >= 0 &&
-		lock->cll_depth >= 0;
-}
-
-/**
- * Stronger lock invariant, checking that caller has a reference on a lock.
- *
- * \see cl_lock_invariant_trusted()
- */
-static int cl_lock_invariant(const struct lu_env *env,
-			     const struct cl_lock *lock)
-{
-	int result;
-
-	result = atomic_read(&lock->cll_ref) > 0 &&
-		cl_lock_invariant_trusted(env, lock);
-	if (!result && env)
-		CL_LOCK_DEBUG(D_ERROR, env, lock, "invariant broken\n");
-	return result;
-}
-
-/**
- * Returns lock "nesting": 0 for a top-lock and 1 for a sub-lock.
- */
-static enum clt_nesting_level cl_lock_nesting(const struct cl_lock *lock)
-{
-	return cl_object_header(lock->cll_descr.cld_obj)->coh_nesting;
-}
-
-/**
- * Returns a set of counters for this lock, depending on a lock nesting.
- */
-static struct cl_thread_counters *cl_lock_counters(const struct lu_env *env,
-						   const struct cl_lock *lock)
-{
-	struct cl_thread_info *info;
-	enum clt_nesting_level nesting;
-
-	info = cl_env_info(env);
-	nesting = cl_lock_nesting(lock);
-	LASSERT(nesting < ARRAY_SIZE(info->clt_counters));
-	return &info->clt_counters[nesting];
-}
-
 static void cl_lock_trace0(int level, const struct lu_env *env,
-			   const char *prefix, const struct cl_lock *lock,
-			   const char *func, const int line)
-{
-	struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
-
-	CDEBUG(level, "%s: %p@(%d %p %d %d %d %d %d %lx)(%p/%d/%d) at %s():%d\n",
-	       prefix, lock, atomic_read(&lock->cll_ref),
-	       lock->cll_guarder, lock->cll_depth,
-	       lock->cll_state, lock->cll_error, lock->cll_holds,
-	       lock->cll_users, lock->cll_flags,
-	       env, h->coh_nesting, cl_lock_nr_mutexed(env),
-	       func, line);
-}
-
-#define cl_lock_trace(level, env, prefix, lock)			 \
-	cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
-
-#define RETIP ((unsigned long)__builtin_return_address(0))
-
-#ifdef CONFIG_LOCKDEP
-static struct lock_class_key cl_lock_key;
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{
-	lockdep_set_class_and_name(lock, &cl_lock_key, "EXT");
-}
-
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-				    struct cl_lock *lock, __u32 enqflags)
-{
-	cl_lock_counters(env, lock)->ctc_nr_locks_acquired++;
-	lock_map_acquire(&lock->dep_map);
-}
-
-static void cl_lock_lockdep_release(const struct lu_env *env,
-				    struct cl_lock *lock)
-{
-	cl_lock_counters(env, lock)->ctc_nr_locks_acquired--;
-	lock_release(&lock->dep_map, 0, RETIP);
-}
-
-#else /* !CONFIG_LOCKDEP */
-
-static void cl_lock_lockdep_init(struct cl_lock *lock)
-{}
-static void cl_lock_lockdep_acquire(const struct lu_env *env,
-				    struct cl_lock *lock, __u32 enqflags)
-{}
-static void cl_lock_lockdep_release(const struct lu_env *env,
-				    struct cl_lock *lock)
-{}
-
-#endif /* !CONFIG_LOCKDEP */
-
-/**
- * Adds lock slice to the compound lock.
- *
- * This is called by cl_object_operations::coo_lock_init() methods to add a
- * per-layer state to the lock. New state is added at the end of
- * cl_lock::cll_layers list, that is, it is at the bottom of the stack.
- *
- * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add()
- */
-void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
-		       struct cl_object *obj,
-		       const struct cl_lock_operations *ops)
-{
-	slice->cls_lock = lock;
-	list_add_tail(&slice->cls_linkage, &lock->cll_layers);
-	slice->cls_obj = obj;
-	slice->cls_ops = ops;
-}
-EXPORT_SYMBOL(cl_lock_slice_add);
-
-/**
- * Returns true iff a lock with the mode \a has provides at least the same
- * guarantees as a lock with the mode \a need.
- */
-int cl_lock_mode_match(enum cl_lock_mode has, enum cl_lock_mode need)
-{
-	LINVRNT(need == CLM_READ || need == CLM_WRITE ||
-		need == CLM_PHANTOM || need == CLM_GROUP);
-	LINVRNT(has == CLM_READ || has == CLM_WRITE ||
-		has == CLM_PHANTOM || has == CLM_GROUP);
-	CLASSERT(CLM_PHANTOM < CLM_READ);
-	CLASSERT(CLM_READ < CLM_WRITE);
-	CLASSERT(CLM_WRITE < CLM_GROUP);
-
-	if (has != CLM_GROUP)
-		return need <= has;
-	else
-		return need == has;
-}
-EXPORT_SYMBOL(cl_lock_mode_match);
-
-/**
- * Returns true iff extent portions of lock descriptions match.
- */
-int cl_lock_ext_match(const struct cl_lock_descr *has,
-		      const struct cl_lock_descr *need)
-{
-	return
-		has->cld_start <= need->cld_start &&
-		has->cld_end >= need->cld_end &&
-		cl_lock_mode_match(has->cld_mode, need->cld_mode) &&
-		(has->cld_mode != CLM_GROUP || has->cld_gid == need->cld_gid);
-}
-EXPORT_SYMBOL(cl_lock_ext_match);
-
-/**
- * Returns true iff a lock with the description \a has provides at least the
- * same guarantees as a lock with the description \a need.
- */
-int cl_lock_descr_match(const struct cl_lock_descr *has,
-			const struct cl_lock_descr *need)
-{
-	return
-		cl_object_same(has->cld_obj, need->cld_obj) &&
-		cl_lock_ext_match(has, need);
-}
-EXPORT_SYMBOL(cl_lock_descr_match);
-
-static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object *obj = lock->cll_descr.cld_obj;
-
-	LINVRNT(!cl_lock_is_mutexed(lock));
-
-	cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
-	might_sleep();
-	while (!list_empty(&lock->cll_layers)) {
-		struct cl_lock_slice *slice;
-
-		slice = list_entry(lock->cll_layers.next,
-				   struct cl_lock_slice, cls_linkage);
-		list_del_init(lock->cll_layers.next);
-		slice->cls_ops->clo_fini(env, slice);
-	}
-	CS_LOCK_DEC(obj, total);
-	CS_LOCKSTATE_DEC(obj, lock->cll_state);
-	lu_object_ref_del_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock", lock);
-	cl_object_put(env, obj);
-	lu_ref_fini(&lock->cll_reference);
-	lu_ref_fini(&lock->cll_holders);
-	mutex_destroy(&lock->cll_guard);
-	kmem_cache_free(cl_lock_kmem, lock);
-}
-
-/**
- * Releases a reference on a lock.
- *
- * When last reference is released, lock is returned to the cache, unless it
- * is in cl_lock_state::CLS_FREEING state, in which case it is destroyed
- * immediately.
- *
- * \see cl_object_put(), cl_page_put()
- */
-void cl_lock_put(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object	*obj;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	obj = lock->cll_descr.cld_obj;
-	LINVRNT(obj);
-
-	CDEBUG(D_TRACE, "releasing reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-
-	if (atomic_dec_and_test(&lock->cll_ref)) {
-		if (lock->cll_state == CLS_FREEING) {
-			LASSERT(list_empty(&lock->cll_linkage));
-			cl_lock_free(env, lock);
-		}
-		CS_LOCK_DEC(obj, busy);
-	}
-}
-EXPORT_SYMBOL(cl_lock_put);
-
-/**
- * Acquires an additional reference to a lock.
- *
- * This can be called only by caller already possessing a reference to \a
- * lock.
- *
- * \see cl_object_get(), cl_page_get()
- */
-void cl_lock_get(struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_invariant(NULL, lock));
-	CDEBUG(D_TRACE, "acquiring reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-	atomic_inc(&lock->cll_ref);
-}
-EXPORT_SYMBOL(cl_lock_get);
-
-/**
- * Acquires a reference to a lock.
- *
- * This is much like cl_lock_get(), except that this function can be used to
- * acquire initial reference to the cached lock. Caller has to deal with all
- * possible races. Use with care!
- *
- * \see cl_page_get_trust()
- */
-void cl_lock_get_trust(struct cl_lock *lock)
-{
-	CDEBUG(D_TRACE, "acquiring trusted reference: %d %p %lu\n",
-	       atomic_read(&lock->cll_ref), lock, RETIP);
-	if (atomic_inc_return(&lock->cll_ref) == 1)
-		CS_LOCK_INC(lock->cll_descr.cld_obj, busy);
-}
-EXPORT_SYMBOL(cl_lock_get_trust);
-
-/**
- * Helper function destroying the lock that wasn't completely initialized.
- *
- * Other threads can acquire references to the top-lock through its
- * sub-locks. Hence, it cannot be cl_lock_free()-ed immediately.
- */
-static void cl_lock_finish(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_mutex_get(env, lock);
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
-	cl_lock_mutex_put(env, lock);
-	cl_lock_put(env, lock);
-}
-
-static struct cl_lock *cl_lock_alloc(const struct lu_env *env,
-				     struct cl_object *obj,
-				     const struct cl_io *io,
-				     const struct cl_lock_descr *descr)
-{
-	struct cl_lock	  *lock;
-	struct lu_object_header *head;
-
-	lock = kmem_cache_zalloc(cl_lock_kmem, GFP_NOFS);
-	if (lock) {
-		atomic_set(&lock->cll_ref, 1);
-		lock->cll_descr = *descr;
-		lock->cll_state = CLS_NEW;
-		cl_object_get(obj);
-		lu_object_ref_add_at(&obj->co_lu, &lock->cll_obj_ref, "cl_lock",
-				     lock);
-		INIT_LIST_HEAD(&lock->cll_layers);
-		INIT_LIST_HEAD(&lock->cll_linkage);
-		INIT_LIST_HEAD(&lock->cll_inclosure);
-		lu_ref_init(&lock->cll_reference);
-		lu_ref_init(&lock->cll_holders);
-		mutex_init(&lock->cll_guard);
-		lockdep_set_class(&lock->cll_guard, &cl_lock_guard_class);
-		init_waitqueue_head(&lock->cll_wq);
-		head = obj->co_lu.lo_header;
-		CS_LOCKSTATE_INC(obj, CLS_NEW);
-		CS_LOCK_INC(obj, total);
-		CS_LOCK_INC(obj, create);
-		cl_lock_lockdep_init(lock);
-		list_for_each_entry(obj, &head->loh_layers, co_lu.lo_linkage) {
-			int err;
-
-			err = obj->co_ops->coo_lock_init(env, obj, lock, io);
-			if (err != 0) {
-				cl_lock_finish(env, lock);
-				lock = ERR_PTR(err);
-				break;
-			}
-		}
-	} else
-		lock = ERR_PTR(-ENOMEM);
-	return lock;
-}
-
-/**
- * Transfer the lock into INTRANSIT state and return the original state.
- *
- * \pre  state: CLS_CACHED, CLS_HELD or CLS_ENQUEUED
- * \post state: CLS_INTRANSIT
- * \see CLS_INTRANSIT
- */
-static enum cl_lock_state cl_lock_intransit(const struct lu_env *env,
-					    struct cl_lock *lock)
-{
-	enum cl_lock_state state = lock->cll_state;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(state != CLS_INTRANSIT);
-	LASSERTF(state >= CLS_ENQUEUED && state <= CLS_CACHED,
-		 "Malformed lock state %d.\n", state);
-
-	cl_lock_state_set(env, lock, CLS_INTRANSIT);
-	lock->cll_intransit_owner = current;
-	cl_lock_hold_add(env, lock, "intransit", current);
-	return state;
-}
-
-/**
- *  Exit the intransit state and restore the lock state to the original state
- */
-static void cl_lock_extransit(const struct lu_env *env, struct cl_lock *lock,
-			      enum cl_lock_state state)
-{
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(lock->cll_state == CLS_INTRANSIT);
-	LASSERT(state != CLS_INTRANSIT);
-	LASSERT(lock->cll_intransit_owner == current);
-
-	lock->cll_intransit_owner = NULL;
-	cl_lock_state_set(env, lock, state);
-	cl_lock_unhold(env, lock, "intransit", current);
-}
-
-/**
- * Checking whether the lock is intransit state
- */
-int cl_lock_is_intransit(struct cl_lock *lock)
-{
-	LASSERT(cl_lock_is_mutexed(lock));
-	return lock->cll_state == CLS_INTRANSIT &&
-	       lock->cll_intransit_owner != current;
-}
-EXPORT_SYMBOL(cl_lock_is_intransit);
-/**
- * Returns true iff lock is "suitable" for given io. E.g., locks acquired by
- * truncate and O_APPEND cannot be reused for read/non-append-write, as they
- * cover multiple stripes and can trigger cascading timeouts.
- */
-static int cl_lock_fits_into(const struct lu_env *env,
-			     const struct cl_lock *lock,
-			     const struct cl_lock_descr *need,
-			     const struct cl_io *io)
-{
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_invariant_trusted(env, lock));
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_fits_into &&
-		    !slice->cls_ops->clo_fits_into(env, slice, need, io))
-			return 0;
-	}
-	return 1;
-}
-
-static struct cl_lock *cl_lock_lookup(const struct lu_env *env,
-				      struct cl_object *obj,
-				      const struct cl_io *io,
-				      const struct cl_lock_descr *need)
-{
-	struct cl_lock	  *lock;
-	struct cl_object_header *head;
-
-	head = cl_object_header(obj);
-	assert_spin_locked(&head->coh_lock_guard);
-	CS_LOCK_INC(obj, lookup);
-	list_for_each_entry(lock, &head->coh_locks, cll_linkage) {
-		int matched;
-
-		matched = cl_lock_ext_match(&lock->cll_descr, need) &&
-			  lock->cll_state < CLS_FREEING &&
-			  lock->cll_error == 0 &&
-			  !(lock->cll_flags & CLF_CANCELLED) &&
-			  cl_lock_fits_into(env, lock, need, io);
-		CDEBUG(D_DLMTRACE, "has: "DDESCR"(%d) need: "DDESCR": %d\n",
-		       PDESCR(&lock->cll_descr), lock->cll_state, PDESCR(need),
-		       matched);
-		if (matched) {
-			cl_lock_get_trust(lock);
-			CS_LOCK_INC(obj, hit);
-			return lock;
-		}
-	}
-	return NULL;
-}
-
-/**
- * Returns a lock matching description \a need.
- *
- * This is the main entry point into the cl_lock caching interface. First, a
- * cache (implemented as a per-object linked list) is consulted. If lock is
- * found there, it is returned immediately. Otherwise new lock is allocated
- * and returned. In any case, additional reference to lock is acquired.
- *
- * \see cl_object_find(), cl_page_find()
- */
-static struct cl_lock *cl_lock_find(const struct lu_env *env,
-				    const struct cl_io *io,
-				    const struct cl_lock_descr *need)
-{
-	struct cl_object_header *head;
-	struct cl_object	*obj;
-	struct cl_lock	  *lock;
-
-	obj  = need->cld_obj;
-	head = cl_object_header(obj);
-
-	spin_lock(&head->coh_lock_guard);
-	lock = cl_lock_lookup(env, obj, io, need);
-	spin_unlock(&head->coh_lock_guard);
-
-	if (!lock) {
-		lock = cl_lock_alloc(env, obj, io, need);
-		if (!IS_ERR(lock)) {
-			struct cl_lock *ghost;
-
-			spin_lock(&head->coh_lock_guard);
-			ghost = cl_lock_lookup(env, obj, io, need);
-			if (!ghost) {
-				cl_lock_get_trust(lock);
-				list_add_tail(&lock->cll_linkage,
-					      &head->coh_locks);
-				spin_unlock(&head->coh_lock_guard);
-				CS_LOCK_INC(obj, busy);
-			} else {
-				spin_unlock(&head->coh_lock_guard);
-				/*
-				 * Other threads can acquire references to the
-				 * top-lock through its sub-locks. Hence, it
-				 * cannot be cl_lock_free()-ed immediately.
-				 */
-				cl_lock_finish(env, lock);
-				lock = ghost;
-			}
-		}
-	}
-	return lock;
-}
-
-/**
- * Returns existing lock matching given description. This is similar to
- * cl_lock_find() except that no new lock is created, and returned lock is
- * guaranteed to be in enum cl_lock_state::CLS_HELD state.
- */
-struct cl_lock *cl_lock_peek(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source)
-{
-	struct cl_object_header *head;
-	struct cl_object	*obj;
-	struct cl_lock	  *lock;
-
-	obj  = need->cld_obj;
-	head = cl_object_header(obj);
-
-	do {
-		spin_lock(&head->coh_lock_guard);
-		lock = cl_lock_lookup(env, obj, io, need);
-		spin_unlock(&head->coh_lock_guard);
-		if (!lock)
-			return NULL;
-
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state == CLS_INTRANSIT)
-			/* Don't care return value. */
-			cl_lock_state_wait(env, lock);
-		if (lock->cll_state == CLS_FREEING) {
-			cl_lock_mutex_put(env, lock);
-			cl_lock_put(env, lock);
-			lock = NULL;
-		}
-	} while (!lock);
-
-	cl_lock_hold_add(env, lock, scope, source);
-	cl_lock_user_add(env, lock);
-	if (lock->cll_state == CLS_CACHED)
-		cl_use_try(env, lock, 1);
-	if (lock->cll_state == CLS_HELD) {
-		cl_lock_mutex_put(env, lock);
-		cl_lock_lockdep_acquire(env, lock, 0);
-		cl_lock_put(env, lock);
-	} else {
-		cl_unuse_try(env, lock);
-		cl_lock_unhold(env, lock, scope, source);
-		cl_lock_mutex_put(env, lock);
-		cl_lock_put(env, lock);
-		lock = NULL;
-	}
-
-	return lock;
-}
-EXPORT_SYMBOL(cl_lock_peek);
-
-/**
- * Returns a slice within a lock, corresponding to the given layer in the
- * device stack.
- *
- * \see cl_page_at()
- */
-const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
-				       const struct lu_device_type *dtype)
-{
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_invariant_trusted(NULL, lock));
-
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
-			return slice;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(cl_lock_at);
-
-static void cl_lock_mutex_tail(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_counters *counters;
-
-	counters = cl_lock_counters(env, lock);
-	lock->cll_depth++;
-	counters->ctc_nr_locks_locked++;
-	lu_ref_add(&counters->ctc_locks_locked, "cll_guard", lock);
-	cl_lock_trace(D_TRACE, env, "got mutex", lock);
-}
-
-/**
- * Locks cl_lock object.
- *
- * This is used to manipulate cl_lock fields, and to serialize state
- * transitions in the lock state machine.
- *
- * \post cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_put()
- */
-void cl_lock_mutex_get(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_guarder == current) {
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(lock->cll_depth > 0);
-	} else {
-		struct cl_object_header *hdr;
-		struct cl_thread_info   *info;
-		int i;
-
-		LINVRNT(lock->cll_guarder != current);
-		hdr = cl_object_header(lock->cll_descr.cld_obj);
-		/*
-		 * Check that mutices are taken in the bottom-to-top order.
-		 */
-		info = cl_env_info(env);
-		for (i = 0; i < hdr->coh_nesting; ++i)
-			LASSERT(info->clt_counters[i].ctc_nr_locks_locked == 0);
-		mutex_lock_nested(&lock->cll_guard, hdr->coh_nesting);
-		lock->cll_guarder = current;
-		LINVRNT(lock->cll_depth == 0);
-	}
-	cl_lock_mutex_tail(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_mutex_get);
-
-/**
- * Try-locks cl_lock object.
- *
- * \retval 0 \a lock was successfully locked
- *
- * \retval -EBUSY \a lock cannot be locked right now
- *
- * \post ergo(result == 0, cl_lock_is_mutexed(lock))
- *
- * \see cl_lock_mutex_get()
- */
-static int cl_lock_mutex_try(const struct lu_env *env, struct cl_lock *lock)
-{
-	int result;
-
-	LINVRNT(cl_lock_invariant_trusted(env, lock));
-
-	result = 0;
-	if (lock->cll_guarder == current) {
-		LINVRNT(lock->cll_depth > 0);
-		cl_lock_mutex_tail(env, lock);
-	} else if (mutex_trylock(&lock->cll_guard)) {
-		LINVRNT(lock->cll_depth == 0);
-		lock->cll_guarder = current;
-		cl_lock_mutex_tail(env, lock);
-	} else
-		result = -EBUSY;
-	return result;
-}
-
-/**
- {* Unlocks cl_lock object.
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_mutex_get()
- */
-void cl_lock_mutex_put(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_counters *counters;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(lock->cll_guarder == current);
-	LINVRNT(lock->cll_depth > 0);
-
-	counters = cl_lock_counters(env, lock);
-	LINVRNT(counters->ctc_nr_locks_locked > 0);
-
-	cl_lock_trace(D_TRACE, env, "put mutex", lock);
-	lu_ref_del(&counters->ctc_locks_locked, "cll_guard", lock);
-	counters->ctc_nr_locks_locked--;
-	if (--lock->cll_depth == 0) {
-		lock->cll_guarder = NULL;
-		mutex_unlock(&lock->cll_guard);
-	}
-}
-EXPORT_SYMBOL(cl_lock_mutex_put);
-
-/**
- * Returns true iff lock's mutex is owned by the current thread.
- */
-int cl_lock_is_mutexed(struct cl_lock *lock)
-{
-	return lock->cll_guarder == current;
-}
-EXPORT_SYMBOL(cl_lock_is_mutexed);
-
-/**
- * Returns number of cl_lock mutices held by the current thread (environment).
- */
-int cl_lock_nr_mutexed(const struct lu_env *env)
-{
-	struct cl_thread_info *info;
-	int i;
-	int locked;
-
-	/*
-	 * NOTE: if summation across all nesting levels (currently 2) proves
-	 *       too expensive, a summary counter can be added to
-	 *       struct cl_thread_info.
-	 */
-	info = cl_env_info(env);
-	for (i = 0, locked = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
-		locked += info->clt_counters[i].ctc_nr_locks_locked;
-	return locked;
-}
-EXPORT_SYMBOL(cl_lock_nr_mutexed);
-
-static void cl_lock_cancel0(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	if (!(lock->cll_flags & CLF_CANCELLED)) {
-		const struct cl_lock_slice *slice;
-
-		lock->cll_flags |= CLF_CANCELLED;
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_cancel)
-				slice->cls_ops->clo_cancel(env, slice);
-		}
-	}
-}
-
-static void cl_lock_delete0(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_object_header    *head;
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_state < CLS_FREEING) {
-		bool in_cache;
-
-		LASSERT(lock->cll_state != CLS_INTRANSIT);
-		cl_lock_state_set(env, lock, CLS_FREEING);
-
-		head = cl_object_header(lock->cll_descr.cld_obj);
-
-		spin_lock(&head->coh_lock_guard);
-		in_cache = !list_empty(&lock->cll_linkage);
-		if (in_cache)
-			list_del_init(&lock->cll_linkage);
-		spin_unlock(&head->coh_lock_guard);
-
-		if (in_cache) /* coh_locks cache holds a refcount. */
-			cl_lock_put(env, lock);
-
-		/*
-		 * From now on, no new references to this lock can be acquired
-		 * by cl_lock_lookup().
-		 */
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_delete)
-				slice->cls_ops->clo_delete(env, slice);
-		}
-		/*
-		 * From now on, no new references to this lock can be acquired
-		 * by layer-specific means (like a pointer from struct
-		 * ldlm_lock in osc, or a pointer from top-lock to sub-lock in
-		 * lov).
-		 *
-		 * Lock will be finally freed in cl_lock_put() when last of
-		 * existing references goes away.
-		 */
-	}
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_holds counter for a given lock. Also, for a
- * top-lock (nesting == 0) accounts for this modification in the per-thread
- * debugging counters. Sub-lock holds can be released by a thread different
- * from one that acquired it.
- */
-static void cl_lock_hold_mod(const struct lu_env *env, struct cl_lock *lock,
-			     int delta)
-{
-	struct cl_thread_counters *counters;
-	enum clt_nesting_level     nesting;
-
-	lock->cll_holds += delta;
-	nesting = cl_lock_nesting(lock);
-	if (nesting == CNL_TOP) {
-		counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-		counters->ctc_nr_held += delta;
-		LASSERT(counters->ctc_nr_held >= 0);
-	}
-}
-
-/**
- * Mod(ifie)s cl_lock::cll_users counter for a given lock. See
- * cl_lock_hold_mod() for the explanation of the debugging code.
- */
-static void cl_lock_used_mod(const struct lu_env *env, struct cl_lock *lock,
-			     int delta)
-{
-	struct cl_thread_counters *counters;
-	enum clt_nesting_level     nesting;
-
-	lock->cll_users += delta;
-	nesting = cl_lock_nesting(lock);
-	if (nesting == CNL_TOP) {
-		counters = &cl_env_info(env)->clt_counters[CNL_TOP];
-		counters->ctc_nr_used += delta;
-		LASSERT(counters->ctc_nr_used >= 0);
-	}
-}
-
-void cl_lock_hold_release(const struct lu_env *env, struct cl_lock *lock,
-			  const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_holds > 0);
-
-	cl_lock_trace(D_DLMTRACE, env, "hold release lock", lock);
-	lu_ref_del(&lock->cll_holders, scope, source);
-	cl_lock_hold_mod(env, lock, -1);
-	if (lock->cll_holds == 0) {
-		CL_LOCK_ASSERT(lock->cll_state != CLS_HELD, env, lock);
-		if (lock->cll_descr.cld_mode == CLM_PHANTOM ||
-		    lock->cll_descr.cld_mode == CLM_GROUP ||
-		    lock->cll_state != CLS_CACHED)
-			/*
-			 * If lock is still phantom or grouplock when user is
-			 * done with it---destroy the lock.
-			 */
-			lock->cll_flags |= CLF_CANCELPEND|CLF_DOOMED;
-		if (lock->cll_flags & CLF_CANCELPEND) {
-			lock->cll_flags &= ~CLF_CANCELPEND;
-			cl_lock_cancel0(env, lock);
-		}
-		if (lock->cll_flags & CLF_DOOMED) {
-			/* no longer doomed: it's dead... Jim. */
-			lock->cll_flags &= ~CLF_DOOMED;
-			cl_lock_delete0(env, lock);
-		}
-	}
-}
-EXPORT_SYMBOL(cl_lock_hold_release);
-
-/**
- * Waits until lock state is changed.
- *
- * This function is called with cl_lock mutex locked, atomically releases
- * mutex and goes to sleep, waiting for a lock state change (signaled by
- * cl_lock_signal()), and re-acquires the mutex before return.
- *
- * This function is used to wait until lock state machine makes some progress
- * and to emulate synchronous operations on top of asynchronous lock
- * interface.
- *
- * \retval -EINTR wait was interrupted
- *
- * \retval 0 wait wasn't interrupted
- *
- * \pre cl_lock_is_mutexed(lock)
- *
- * \see cl_lock_signal()
- */
-int cl_lock_state_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-	wait_queue_t waiter;
-	sigset_t blocked;
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_depth == 1);
-	LASSERT(lock->cll_state != CLS_FREEING); /* too late to wait */
-
-	cl_lock_trace(D_DLMTRACE, env, "state wait lock", lock);
-	result = lock->cll_error;
-	if (result == 0) {
-		/* To avoid being interrupted by the 'non-fatal' signals
-		 * (SIGCHLD, for instance), we'd block them temporarily.
-		 * LU-305
-		 */
-		blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
-
-		init_waitqueue_entry(&waiter, current);
-		add_wait_queue(&lock->cll_wq, &waiter);
-		set_current_state(TASK_INTERRUPTIBLE);
-		cl_lock_mutex_put(env, lock);
-
-		LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-		/* Returning ERESTARTSYS instead of EINTR so syscalls
-		 * can be restarted if signals are pending here
-		 */
-		result = -ERESTARTSYS;
-		if (likely(!OBD_FAIL_CHECK(OBD_FAIL_LOCK_STATE_WAIT_INTR))) {
-			schedule();
-			if (!cfs_signal_pending())
-				result = 0;
-		}
-
-		cl_lock_mutex_get(env, lock);
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&lock->cll_wq, &waiter);
-
-		/* Restore old blocked signals */
-		cfs_restore_sigs(blocked);
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_state_wait);
-
-static void cl_lock_state_signal(const struct lu_env *env, struct cl_lock *lock,
-				 enum cl_lock_state state)
-{
-	const struct cl_lock_slice *slice;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage)
-		if (slice->cls_ops->clo_state)
-			slice->cls_ops->clo_state(env, slice, state);
-	wake_up_all(&lock->cll_wq);
-}
-
-/**
- * Notifies waiters that lock state changed.
- *
- * Wakes up all waiters sleeping in cl_lock_state_wait(), also notifies all
- * layers about state change by calling cl_lock_operations::clo_state()
- * top-to-bottom.
- */
-void cl_lock_signal(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_trace(D_DLMTRACE, env, "state signal lock", lock);
-	cl_lock_state_signal(env, lock, lock->cll_state);
-}
-EXPORT_SYMBOL(cl_lock_signal);
-
-/**
- * Changes lock state.
- *
- * This function is invoked to notify layers that lock state changed, possible
- * as a result of an asynchronous event such as call-back reception.
- *
- * \post lock->cll_state == state
- *
- * \see cl_lock_operations::clo_state()
- */
-void cl_lock_state_set(const struct lu_env *env, struct cl_lock *lock,
-		       enum cl_lock_state state)
-{
-	LASSERT(lock->cll_state <= state ||
-		(lock->cll_state == CLS_CACHED &&
-		 (state == CLS_HELD || /* lock found in cache */
-		  state == CLS_NEW  ||   /* sub-lock canceled */
-		  state == CLS_INTRANSIT)) ||
-		/* lock is in transit state */
-		lock->cll_state == CLS_INTRANSIT);
-
-	if (lock->cll_state != state) {
-		CS_LOCKSTATE_DEC(lock->cll_descr.cld_obj, lock->cll_state);
-		CS_LOCKSTATE_INC(lock->cll_descr.cld_obj, state);
-
-		cl_lock_state_signal(env, lock, state);
-		lock->cll_state = state;
-	}
-}
-EXPORT_SYMBOL(cl_lock_state_set);
-
-static int cl_unuse_try_internal(const struct lu_env *env, struct cl_lock *lock)
-{
-	const struct cl_lock_slice *slice;
-	int result;
-
-	do {
-		result = 0;
-
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(cl_lock_invariant(env, lock));
-		LASSERT(lock->cll_state == CLS_INTRANSIT);
-
-		result = -ENOSYS;
-		list_for_each_entry_reverse(slice, &lock->cll_layers,
-					    cls_linkage) {
-			if (slice->cls_ops->clo_unuse) {
-				result = slice->cls_ops->clo_unuse(env, slice);
-				if (result != 0)
-					break;
-			}
-		}
-		LASSERT(result != -ENOSYS);
-	} while (result == CLO_REPEAT);
-
-	return result;
-}
-
-/**
- * Yanks lock from the cache (cl_lock_state::CLS_CACHED state) by calling
- * cl_lock_operations::clo_use() top-to-bottom to notify layers.
- * @atomic = 1, it must unuse the lock to recovery the lock to keep the
- *  use process atomic
- */
-int cl_use_try(const struct lu_env *env, struct cl_lock *lock, int atomic)
-{
-	const struct cl_lock_slice *slice;
-	int result;
-	enum cl_lock_state state;
-
-	cl_lock_trace(D_DLMTRACE, env, "use lock", lock);
-
-	LASSERT(lock->cll_state == CLS_CACHED);
-	if (lock->cll_error)
-		return lock->cll_error;
-
-	result = -ENOSYS;
-	state = cl_lock_intransit(env, lock);
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_use) {
-			result = slice->cls_ops->clo_use(env, slice);
-			if (result != 0)
-				break;
-		}
-	}
-	LASSERT(result != -ENOSYS);
-
-	LASSERTF(lock->cll_state == CLS_INTRANSIT, "Wrong state %d.\n",
-		 lock->cll_state);
-
-	if (result == 0) {
-		state = CLS_HELD;
-	} else {
-		if (result == -ESTALE) {
-			/*
-			 * ESTALE means sublock being cancelled
-			 * at this time, and set lock state to
-			 * be NEW here and ask the caller to repeat.
-			 */
-			state = CLS_NEW;
-			result = CLO_REPEAT;
-		}
-
-		/* @atomic means back-off-on-failure. */
-		if (atomic) {
-			int rc;
-
-			rc = cl_unuse_try_internal(env, lock);
-			/* Vet the results. */
-			if (rc < 0 && result > 0)
-				result = rc;
-		}
-
-	}
-	cl_lock_extransit(env, lock, state);
-	return result;
-}
-EXPORT_SYMBOL(cl_use_try);
-
-/**
- * Helper for cl_enqueue_try() that calls ->clo_enqueue() across all layers
- * top-to-bottom.
- */
-static int cl_enqueue_kick(const struct lu_env *env,
-			   struct cl_lock *lock,
-			   struct cl_io *io, __u32 flags)
-{
-	int result;
-	const struct cl_lock_slice *slice;
-
-	result = -ENOSYS;
-	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_enqueue) {
-			result = slice->cls_ops->clo_enqueue(env,
-							     slice, io, flags);
-			if (result != 0)
-				break;
-		}
-	}
-	LASSERT(result != -ENOSYS);
-	return result;
-}
-
-/**
- * Tries to enqueue a lock.
- *
- * This function is called repeatedly by cl_enqueue() until either lock is
- * enqueued, or error occurs. This function does not block waiting for
- * networking communication to complete.
- *
- * \post ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *			 lock->cll_state == CLS_HELD)
- *
- * \see cl_enqueue() cl_lock_operations::clo_enqueue()
- * \see cl_lock_state::CLS_ENQUEUED
- */
-int cl_enqueue_try(const struct lu_env *env, struct cl_lock *lock,
-		   struct cl_io *io, __u32 flags)
-{
-	int result;
-
-	cl_lock_trace(D_DLMTRACE, env, "enqueue lock", lock);
-	do {
-		LINVRNT(cl_lock_is_mutexed(lock));
-
-		result = lock->cll_error;
-		if (result != 0)
-			break;
-
-		switch (lock->cll_state) {
-		case CLS_NEW:
-			cl_lock_state_set(env, lock, CLS_QUEUING);
-			/* fall-through */
-		case CLS_QUEUING:
-			/* kick layers. */
-			result = cl_enqueue_kick(env, lock, io, flags);
-			/* For AGL case, the cl_lock::cll_state may
-			 * become CLS_HELD already.
-			 */
-			if (result == 0 && lock->cll_state == CLS_QUEUING)
-				cl_lock_state_set(env, lock, CLS_ENQUEUED);
-			break;
-		case CLS_INTRANSIT:
-			LASSERT(cl_lock_is_intransit(lock));
-			result = CLO_WAIT;
-			break;
-		case CLS_CACHED:
-			/* yank lock from the cache. */
-			result = cl_use_try(env, lock, 0);
-			break;
-		case CLS_ENQUEUED:
-		case CLS_HELD:
-			result = 0;
-			break;
-		default:
-		case CLS_FREEING:
-			/*
-			 * impossible, only held locks with increased
-			 * ->cll_holds can be enqueued, and they cannot be
-			 * freed.
-			 */
-			LBUG();
-		}
-	} while (result == CLO_REPEAT);
-	return result;
-}
-EXPORT_SYMBOL(cl_enqueue_try);
-
-/**
- * Cancel the conflicting lock found during previous enqueue.
- *
- * \retval 0 conflicting lock has been canceled.
- * \retval -ve error code.
- */
-int cl_lock_enqueue_wait(const struct lu_env *env,
-			 struct cl_lock *lock,
-			 int keep_mutex)
-{
-	struct cl_lock  *conflict;
-	int	      rc = 0;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERT(lock->cll_state == CLS_QUEUING);
-	LASSERT(lock->cll_conflict);
-
-	conflict = lock->cll_conflict;
-	lock->cll_conflict = NULL;
-
-	cl_lock_mutex_put(env, lock);
-	LASSERT(cl_lock_nr_mutexed(env) == 0);
-
-	cl_lock_mutex_get(env, conflict);
-	cl_lock_trace(D_DLMTRACE, env, "enqueue wait", conflict);
-	cl_lock_cancel(env, conflict);
-	cl_lock_delete(env, conflict);
-
-	while (conflict->cll_state != CLS_FREEING) {
-		rc = cl_lock_state_wait(env, conflict);
-		if (rc != 0)
-			break;
-	}
-	cl_lock_mutex_put(env, conflict);
-	lu_ref_del(&conflict->cll_reference, "cancel-wait", lock);
-	cl_lock_put(env, conflict);
-
-	if (keep_mutex)
-		cl_lock_mutex_get(env, lock);
-
-	LASSERT(rc <= 0);
-	return rc;
-}
-EXPORT_SYMBOL(cl_lock_enqueue_wait);
-
-static int cl_enqueue_locked(const struct lu_env *env, struct cl_lock *lock,
-			     struct cl_io *io, __u32 enqflags)
+			   const char *prefix, const struct cl_lock *lock,
+			   const char *func, const int line)
 {
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_holds > 0);
+	struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj);
 
-	cl_lock_user_add(env, lock);
-	do {
-		result = cl_enqueue_try(env, lock, io, enqflags);
-		if (result == CLO_WAIT) {
-			if (lock->cll_conflict)
-				result = cl_lock_enqueue_wait(env, lock, 1);
-			else
-				result = cl_lock_state_wait(env, lock);
-			if (result == 0)
-				continue;
-		}
-		break;
-	} while (1);
-	if (result != 0)
-		cl_unuse_try(env, lock);
-	LASSERT(ergo(result == 0 && !(enqflags & CEF_AGL),
-		     lock->cll_state == CLS_ENQUEUED ||
-		     lock->cll_state == CLS_HELD));
-	return result;
+	CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n",
+	       prefix, lock, env, h->coh_nesting, func, line);
 }
+#define cl_lock_trace(level, env, prefix, lock)				\
+	cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__)
 
 /**
- * Tries to unlock a lock.
- *
- * This function is called to release underlying resource:
- * 1. for top lock, the resource is sublocks it held;
- * 2. for sublock, the resource is the reference to dlmlock.
+ * Adds lock slice to the compound lock.
  *
- * cl_unuse_try is a one-shot operation, so it must NOT return CLO_WAIT.
+ * This is called by cl_object_operations::coo_lock_init() methods to add a
+ * per-layer state to the lock. New state is added at the end of
+ * cl_lock::cll_layers list, that is, it is at the bottom of the stack.
  *
- * \see cl_unuse() cl_lock_operations::clo_unuse()
- * \see cl_lock_state::CLS_CACHED
+ * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add()
  */
-int cl_unuse_try(const struct lu_env *env, struct cl_lock *lock)
+void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice,
+		       struct cl_object *obj,
+		       const struct cl_lock_operations *ops)
 {
-	int			 result;
-	enum cl_lock_state	  state = CLS_NEW;
-
-	cl_lock_trace(D_DLMTRACE, env, "unuse lock", lock);
-
-	if (lock->cll_users > 1) {
-		cl_lock_user_del(env, lock);
-		return 0;
-	}
-
-	/* Only if the lock is in CLS_HELD or CLS_ENQUEUED state, it can hold
-	 * underlying resources.
-	 */
-	if (!(lock->cll_state == CLS_HELD || lock->cll_state == CLS_ENQUEUED)) {
-		cl_lock_user_del(env, lock);
-		return 0;
-	}
-
-	/*
-	 * New lock users (->cll_users) are not protecting unlocking
-	 * from proceeding. From this point, lock eventually reaches
-	 * CLS_CACHED, is reinitialized to CLS_NEW or fails into
-	 * CLS_FREEING.
-	 */
-	state = cl_lock_intransit(env, lock);
-
-	result = cl_unuse_try_internal(env, lock);
-	LASSERT(lock->cll_state == CLS_INTRANSIT);
-	LASSERT(result != CLO_WAIT);
-	cl_lock_user_del(env, lock);
-	if (result == 0 || result == -ESTALE) {
-		/*
-		 * Return lock back to the cache. This is the only
-		 * place where lock is moved into CLS_CACHED state.
-		 *
-		 * If one of ->clo_unuse() methods returned -ESTALE, lock
-		 * cannot be placed into cache and has to be
-		 * re-initialized. This happens e.g., when a sub-lock was
-		 * canceled while unlocking was in progress.
-		 */
-		if (state == CLS_HELD && result == 0)
-			state = CLS_CACHED;
-		else
-			state = CLS_NEW;
-		cl_lock_extransit(env, lock, state);
-
-		/*
-		 * Hide -ESTALE error.
-		 * If the lock is a glimpse lock, and it has multiple
-		 * stripes. Assuming that one of its sublock returned -ENAVAIL,
-		 * and other sublocks are matched write locks. In this case,
-		 * we can't set this lock to error because otherwise some of
-		 * its sublocks may not be canceled. This causes some dirty
-		 * pages won't be written to OSTs. -jay
-		 */
-		result = 0;
-	} else {
-		CERROR("result = %d, this is unlikely!\n", result);
-		state = CLS_NEW;
-		cl_lock_extransit(env, lock, state);
-	}
-	return result ?: lock->cll_error;
+	slice->cls_lock = lock;
+	list_add_tail(&slice->cls_linkage, &lock->cll_layers);
+	slice->cls_obj = obj;
+	slice->cls_ops = ops;
 }
-EXPORT_SYMBOL(cl_unuse_try);
+EXPORT_SYMBOL(cl_lock_slice_add);
 
-static void cl_unuse_locked(const struct lu_env *env, struct cl_lock *lock)
+void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock)
 {
-	int result;
+	cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock);
 
-	result = cl_unuse_try(env, lock);
-	if (result)
-		CL_LOCK_DEBUG(D_ERROR, env, lock, "unuse return %d\n", result);
-}
+	while (!list_empty(&lock->cll_layers)) {
+		struct cl_lock_slice *slice;
 
-/**
- * Unlocks a lock.
- */
-void cl_unuse(const struct lu_env *env, struct cl_lock *lock)
-{
-	cl_lock_mutex_get(env, lock);
-	cl_unuse_locked(env, lock);
-	cl_lock_mutex_put(env, lock);
-	cl_lock_lockdep_release(env, lock);
+		slice = list_entry(lock->cll_layers.next,
+				   struct cl_lock_slice, cls_linkage);
+		list_del_init(lock->cll_layers.next);
+		slice->cls_ops->clo_fini(env, slice);
+	}
+	POISON(lock, 0x5a, sizeof(*lock));
 }
-EXPORT_SYMBOL(cl_unuse);
+EXPORT_SYMBOL(cl_lock_fini);
 
-/**
- * Tries to wait for a lock.
- *
- * This function is called repeatedly by cl_wait() until either lock is
- * granted, or error occurs. This function does not block waiting for network
- * communication to complete.
- *
- * \see cl_wait() cl_lock_operations::clo_wait()
- * \see cl_lock_state::CLS_HELD
- */
-int cl_wait_try(const struct lu_env *env, struct cl_lock *lock)
+int cl_lock_init(const struct lu_env *env, struct cl_lock *lock,
+		 const struct cl_io *io)
 {
-	const struct cl_lock_slice *slice;
-	int			 result;
-
-	cl_lock_trace(D_DLMTRACE, env, "wait lock try", lock);
-	do {
-		LINVRNT(cl_lock_is_mutexed(lock));
-		LINVRNT(cl_lock_invariant(env, lock));
-		LASSERTF(lock->cll_state == CLS_QUEUING ||
-			 lock->cll_state == CLS_ENQUEUED ||
-			 lock->cll_state == CLS_HELD ||
-			 lock->cll_state == CLS_INTRANSIT,
-			 "lock state: %d\n", lock->cll_state);
-		LASSERT(lock->cll_users > 0);
-		LASSERT(lock->cll_holds > 0);
+	struct cl_object *obj = lock->cll_descr.cld_obj;
+	struct cl_object *scan;
+	int result = 0;
 
-		result = lock->cll_error;
-		if (result != 0)
-			break;
+	/* Make sure cl_lock::cll_descr is initialized. */
+	LASSERT(obj);
 
-		if (cl_lock_is_intransit(lock)) {
-			result = CLO_WAIT;
+	INIT_LIST_HEAD(&lock->cll_layers);
+	list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers,
+			    co_lu.lo_linkage) {
+		result = scan->co_ops->coo_lock_init(env, scan, lock, io);
+		if (result != 0) {
+			cl_lock_fini(env, lock);
 			break;
 		}
+	}
 
-		if (lock->cll_state == CLS_HELD)
-			/* nothing to do */
-			break;
-
-		result = -ENOSYS;
-		list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-			if (slice->cls_ops->clo_wait) {
-				result = slice->cls_ops->clo_wait(env, slice);
-				if (result != 0)
-					break;
-			}
-		}
-		LASSERT(result != -ENOSYS);
-		if (result == 0) {
-			LASSERT(lock->cll_state != CLS_INTRANSIT);
-			cl_lock_state_set(env, lock, CLS_HELD);
-		}
-	} while (result == CLO_REPEAT);
 	return result;
 }
-EXPORT_SYMBOL(cl_wait_try);
+EXPORT_SYMBOL(cl_lock_init);
 
 /**
- * Waits until enqueued lock is granted.
- *
- * \pre current thread or io owns a hold on the lock
- * \pre ergo(result == 0, lock->cll_state == CLS_ENQUEUED ||
- *			lock->cll_state == CLS_HELD)
+ * Returns a slice with a lock, corresponding to the given layer in the
+ * device stack.
  *
- * \post ergo(result == 0, lock->cll_state == CLS_HELD)
- */
-int cl_wait(const struct lu_env *env, struct cl_lock *lock)
-{
-	int result;
-
-	cl_lock_mutex_get(env, lock);
-
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERTF(lock->cll_state == CLS_ENQUEUED || lock->cll_state == CLS_HELD,
-		 "Wrong state %d\n", lock->cll_state);
-	LASSERT(lock->cll_holds > 0);
-
-	do {
-		result = cl_wait_try(env, lock);
-		if (result == CLO_WAIT) {
-			result = cl_lock_state_wait(env, lock);
-			if (result == 0)
-				continue;
-		}
-		break;
-	} while (1);
-	if (result < 0) {
-		cl_unuse_try(env, lock);
-		cl_lock_lockdep_release(env, lock);
-	}
-	cl_lock_trace(D_DLMTRACE, env, "wait lock", lock);
-	cl_lock_mutex_put(env, lock);
-	LASSERT(ergo(result == 0, lock->cll_state == CLS_HELD));
-	return result;
-}
-EXPORT_SYMBOL(cl_wait);
-
-/**
- * Executes cl_lock_operations::clo_weigh(), and sums results to estimate lock
- * value.
+ * \see cl_page_at()
  */
-unsigned long cl_lock_weigh(const struct lu_env *env, struct cl_lock *lock)
+const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock,
+				       const struct lu_device_type *dtype)
 {
 	const struct cl_lock_slice *slice;
-	unsigned long pound;
-	unsigned long ounce;
-
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
 
-	pound = 0;
-	list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_weigh) {
-			ounce = slice->cls_ops->clo_weigh(env, slice);
-			pound += ounce;
-			if (pound < ounce) /* over-weight^Wflow */
-				pound = ~0UL;
-		}
+	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+		if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype)
+			return slice;
 	}
-	return pound;
+	return NULL;
 }
-EXPORT_SYMBOL(cl_lock_weigh);
+EXPORT_SYMBOL(cl_lock_at);
 
-/**
- * Notifies layers that lock description changed.
- *
- * The server can grant client a lock different from one that was requested
- * (e.g., larger in extent). This method is called when actually granted lock
- * description becomes known to let layers to accommodate for changed lock
- * description.
- *
- * \see cl_lock_operations::clo_modify()
- */
-int cl_lock_modify(const struct lu_env *env, struct cl_lock *lock,
-		   const struct cl_lock_descr *desc)
+void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
 {
 	const struct cl_lock_slice *slice;
-	struct cl_object	   *obj = lock->cll_descr.cld_obj;
-	struct cl_object_header    *hdr = cl_object_header(obj);
-	int result;
-
-	cl_lock_trace(D_DLMTRACE, env, "modify lock", lock);
-	/* don't allow object to change */
-	LASSERT(obj == desc->cld_obj);
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
 
+	cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
 	list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) {
-		if (slice->cls_ops->clo_modify) {
-			result = slice->cls_ops->clo_modify(env, slice, desc);
-			if (result != 0)
-				return result;
-		}
-	}
-	CL_LOCK_DEBUG(D_DLMTRACE, env, lock, " -> "DDESCR"@"DFID"\n",
-		      PDESCR(desc), PFID(lu_object_fid(&desc->cld_obj->co_lu)));
-	/*
-	 * Just replace description in place. Nothing more is needed for
-	 * now. If locks were indexed according to their extent and/or mode,
-	 * that index would have to be updated here.
-	 */
-	spin_lock(&hdr->coh_lock_guard);
-	lock->cll_descr = *desc;
-	spin_unlock(&hdr->coh_lock_guard);
-	return 0;
-}
-EXPORT_SYMBOL(cl_lock_modify);
-
-/**
- * Initializes lock closure with a given origin.
- *
- * \see cl_lock_closure
- */
-void cl_lock_closure_init(const struct lu_env *env,
-			  struct cl_lock_closure *closure,
-			  struct cl_lock *origin, int wait)
-{
-	LINVRNT(cl_lock_is_mutexed(origin));
-	LINVRNT(cl_lock_invariant(env, origin));
-
-	INIT_LIST_HEAD(&closure->clc_list);
-	closure->clc_origin = origin;
-	closure->clc_wait   = wait;
-	closure->clc_nr     = 0;
-}
-EXPORT_SYMBOL(cl_lock_closure_init);
-
-/**
- * Builds a closure of \a lock.
- *
- * Building of a closure consists of adding initial lock (\a lock) into it,
- * and calling cl_lock_operations::clo_closure() methods of \a lock. These
- * methods might call cl_lock_closure_build() recursively again, adding more
- * locks to the closure, etc.
- *
- * \see cl_lock_closure
- */
-int cl_lock_closure_build(const struct lu_env *env, struct cl_lock *lock,
-			  struct cl_lock_closure *closure)
-{
-	const struct cl_lock_slice *slice;
-	int result;
-
-	LINVRNT(cl_lock_is_mutexed(closure->clc_origin));
-	LINVRNT(cl_lock_invariant(env, closure->clc_origin));
-
-	result = cl_lock_enclosure(env, lock, closure);
-	if (result == 0) {
-		list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
-			if (slice->cls_ops->clo_closure) {
-				result = slice->cls_ops->clo_closure(env, slice,
-								     closure);
-				if (result != 0)
-					break;
-			}
-		}
-	}
-	if (result != 0)
-		cl_lock_disclosure(env, closure);
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_closure_build);
-
-/**
- * Adds new lock to a closure.
- *
- * Try-locks \a lock and if succeeded, adds it to the closure (never more than
- * once). If try-lock failed, returns CLO_REPEAT, after optionally waiting
- * until next try-lock is likely to succeed.
- */
-int cl_lock_enclosure(const struct lu_env *env, struct cl_lock *lock,
-		      struct cl_lock_closure *closure)
-{
-	int result = 0;
-
-	cl_lock_trace(D_DLMTRACE, env, "enclosure lock", lock);
-	if (!cl_lock_mutex_try(env, lock)) {
-		/*
-		 * If lock->cll_inclosure is not empty, lock is already in
-		 * this closure.
-		 */
-		if (list_empty(&lock->cll_inclosure)) {
-			cl_lock_get_trust(lock);
-			lu_ref_add(&lock->cll_reference, "closure", closure);
-			list_add(&lock->cll_inclosure, &closure->clc_list);
-			closure->clc_nr++;
-		} else
-			cl_lock_mutex_put(env, lock);
-		result = 0;
-	} else {
-		cl_lock_disclosure(env, closure);
-		if (closure->clc_wait) {
-			cl_lock_get_trust(lock);
-			lu_ref_add(&lock->cll_reference, "closure-w", closure);
-			cl_lock_mutex_put(env, closure->clc_origin);
-
-			LASSERT(cl_lock_nr_mutexed(env) == 0);
-			cl_lock_mutex_get(env, lock);
-			cl_lock_mutex_put(env, lock);
-
-			cl_lock_mutex_get(env, closure->clc_origin);
-			lu_ref_del(&lock->cll_reference, "closure-w", closure);
-			cl_lock_put(env, lock);
-		}
-		result = CLO_REPEAT;
-	}
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_enclosure);
-
-/** Releases mutices of enclosed locks. */
-void cl_lock_disclosure(const struct lu_env *env,
-			struct cl_lock_closure *closure)
-{
-	struct cl_lock *scan;
-	struct cl_lock *temp;
-
-	cl_lock_trace(D_DLMTRACE, env, "disclosure lock", closure->clc_origin);
-	list_for_each_entry_safe(scan, temp, &closure->clc_list,
-				 cll_inclosure) {
-		list_del_init(&scan->cll_inclosure);
-		cl_lock_mutex_put(env, scan);
-		lu_ref_del(&scan->cll_reference, "closure", closure);
-		cl_lock_put(env, scan);
-		closure->clc_nr--;
-	}
-	LASSERT(closure->clc_nr == 0);
-}
-EXPORT_SYMBOL(cl_lock_disclosure);
-
-/** Finalizes a closure. */
-void cl_lock_closure_fini(struct cl_lock_closure *closure)
-{
-	LASSERT(closure->clc_nr == 0);
-	LASSERT(list_empty(&closure->clc_list));
-}
-EXPORT_SYMBOL(cl_lock_closure_fini);
-
-/**
- * Destroys this lock. Notifies layers (bottom-to-top) that lock is being
- * destroyed, then destroy the lock. If there are holds on the lock, postpone
- * destruction until all holds are released. This is called when a decision is
- * made to destroy the lock in the future. E.g., when a blocking AST is
- * received on it, or fatal communication error happens.
- *
- * Caller must have a reference on this lock to prevent a situation, when
- * deleted lock lingers in memory for indefinite time, because nobody calls
- * cl_lock_put() to finish it.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- * \pre ergo(cl_lock_nesting(lock) == CNL_TOP,
- *	   cl_lock_nr_mutexed(env) == 1)
- *      [i.e., if a top-lock is deleted, mutices of no other locks can be
- *      held, as deletion of sub-locks might require releasing a top-lock
- *      mutex]
- *
- * \see cl_lock_operations::clo_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_delete(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(ergo(cl_lock_nesting(lock) == CNL_TOP,
-		     cl_lock_nr_mutexed(env) == 1));
-
-	cl_lock_trace(D_DLMTRACE, env, "delete lock", lock);
-	if (lock->cll_holds == 0)
-		cl_lock_delete0(env, lock);
-	else
-		lock->cll_flags |= CLF_DOOMED;
-}
-EXPORT_SYMBOL(cl_lock_delete);
-
-/**
- * Mark lock as irrecoverably failed, and mark it for destruction. This
- * happens when, e.g., server fails to grant a lock to us, or networking
- * time-out happens.
- *
- * \pre atomic_read(&lock->cll_ref) > 0
- *
- * \see clo_lock_delete()
- * \see cl_lock::cll_holds
- */
-void cl_lock_error(const struct lu_env *env, struct cl_lock *lock, int error)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	if (lock->cll_error == 0 && error != 0) {
-		cl_lock_trace(D_DLMTRACE, env, "set lock error", lock);
-		lock->cll_error = error;
-		cl_lock_signal(env, lock);
-		cl_lock_cancel(env, lock);
-		cl_lock_delete(env, lock);
+		if (slice->cls_ops->clo_cancel)
+			slice->cls_ops->clo_cancel(env, slice);
 	}
 }
-EXPORT_SYMBOL(cl_lock_error);
-
-/**
- * Cancels this lock. Notifies layers
- * (bottom-to-top) that lock is being cancelled, then destroy the lock. If
- * there are holds on the lock, postpone cancellation until
- * all holds are released.
- *
- * Cancellation notification is delivered to layers at most once.
- *
- * \see cl_lock_operations::clo_cancel()
- * \see cl_lock::cll_holds
- */
-void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock);
-	if (lock->cll_holds == 0)
-		cl_lock_cancel0(env, lock);
-	else
-		lock->cll_flags |= CLF_CANCELPEND;
-}
 EXPORT_SYMBOL(cl_lock_cancel);
 
 /**
- * Finds an existing lock covering given index and optionally different from a
- * given \a except lock.
+ * Enqueue a lock.
+ * \param anchor: if we need to wait for resources before getting the lock,
+ *		  use @anchor for the purpose.
+ * \retval 0  enqueue successfully
+ * \retval <0 error code
  */
-struct cl_lock *cl_lock_at_pgoff(const struct lu_env *env,
-				 struct cl_object *obj, pgoff_t index,
-				 struct cl_lock *except,
-				 int pending, int canceld)
+int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock, struct cl_sync_io *anchor)
 {
-	struct cl_object_header *head;
-	struct cl_lock	  *scan;
-	struct cl_lock	  *lock;
-	struct cl_lock_descr    *need;
-
-	head = cl_object_header(obj);
-	need = &cl_env_info(env)->clt_descr;
-	lock = NULL;
+	const struct cl_lock_slice *slice;
+	int rc = -ENOSYS;
 
-	need->cld_mode = CLM_READ; /* CLM_READ matches both READ & WRITE, but
-				    * not PHANTOM
-				    */
-	need->cld_start = need->cld_end = index;
-	need->cld_enq_flags = 0;
+	list_for_each_entry(slice, &lock->cll_layers, cls_linkage) {
+		if (!slice->cls_ops->clo_enqueue)
+			continue;
 
-	spin_lock(&head->coh_lock_guard);
-	/* It is fine to match any group lock since there could be only one
-	 * with a uniq gid and it conflicts with all other lock modes too
-	 */
-	list_for_each_entry(scan, &head->coh_locks, cll_linkage) {
-		if (scan != except &&
-		    (scan->cll_descr.cld_mode == CLM_GROUP ||
-		    cl_lock_ext_match(&scan->cll_descr, need)) &&
-		    scan->cll_state >= CLS_HELD &&
-		    scan->cll_state < CLS_FREEING &&
-		    /*
-		     * This check is racy as the lock can be canceled right
-		     * after it is done, but this is fine, because page exists
-		     * already.
-		     */
-		    (canceld || !(scan->cll_flags & CLF_CANCELLED)) &&
-		    (pending || !(scan->cll_flags & CLF_CANCELPEND))) {
-			/* Don't increase cs_hit here since this
-			 * is just a helper function.
-			 */
-			cl_lock_get_trust(scan);
-			lock = scan;
+		rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor);
+		if (rc != 0)
 			break;
 		}
-	}
-	spin_unlock(&head->coh_lock_guard);
-	return lock;
-}
-EXPORT_SYMBOL(cl_lock_at_pgoff);
-
-/**
- * Calculate the page offset at the layer of @lock.
- * At the time of this writing, @page is top page and @lock is sub lock.
- */
-static pgoff_t pgoff_at_lock(struct cl_page *page, struct cl_lock *lock)
-{
-	struct lu_device_type *dtype;
-	const struct cl_page_slice *slice;
-
-	dtype = lock->cll_descr.cld_obj->co_lu.lo_dev->ld_type;
-	slice = cl_page_at(page, dtype);
-	return slice->cpl_page->cp_index;
+	return rc;
 }
+EXPORT_SYMBOL(cl_lock_enqueue);
 
 /**
- * Check if page @page is covered by an extra lock or discard it.
+ * Main high-level entry point of cl_lock interface that finds existing or
+ * enqueues new lock matching given description.
  */
-static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
-				struct cl_page *page, void *cbdata)
-{
-	struct cl_thread_info *info = cl_env_info(env);
-	struct cl_lock *lock = cbdata;
-	pgoff_t index = pgoff_at_lock(page, lock);
-
-	if (index >= info->clt_fn_index) {
-		struct cl_lock *tmp;
-
-		/* refresh non-overlapped index */
-		tmp = cl_lock_at_pgoff(env, lock->cll_descr.cld_obj, index,
-				       lock, 1, 0);
-		if (tmp) {
-			/* Cache the first-non-overlapped index so as to skip
-			 * all pages within [index, clt_fn_index). This
-			 * is safe because if tmp lock is canceled, it will
-			 * discard these pages.
-			 */
-			info->clt_fn_index = tmp->cll_descr.cld_end + 1;
-			if (tmp->cll_descr.cld_end == CL_PAGE_EOF)
-				info->clt_fn_index = CL_PAGE_EOF;
-			cl_lock_put(env, tmp);
-		} else if (cl_page_own(env, io, page) == 0) {
-			/* discard the page */
-			cl_page_unmap(env, io, page);
-			cl_page_discard(env, io, page);
-			cl_page_disown(env, io, page);
-		} else {
-			LASSERT(page->cp_state == CPS_FREEING);
-		}
-	}
-
-	info->clt_next_index = index + 1;
-	return CLP_GANG_OKAY;
-}
-
-static int discard_cb(const struct lu_env *env, struct cl_io *io,
-		      struct cl_page *page, void *cbdata)
+int cl_lock_request(const struct lu_env *env, struct cl_io *io,
+		    struct cl_lock *lock)
 {
-	struct cl_thread_info *info = cl_env_info(env);
-	struct cl_lock *lock   = cbdata;
+	struct cl_sync_io *anchor = NULL;
+	__u32 enq_flags = lock->cll_descr.cld_enq_flags;
+	int rc;
 
-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
-	KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
-		      !PageWriteback(cl_page_vmpage(env, page))));
-	KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
-		      !PageDirty(cl_page_vmpage(env, page))));
+	rc = cl_lock_init(env, lock, io);
+	if (rc < 0)
+		return rc;
 
-	info->clt_next_index = pgoff_at_lock(page, lock) + 1;
-	if (cl_page_own(env, io, page) == 0) {
-		/* discard the page */
-		cl_page_unmap(env, io, page);
-		cl_page_discard(env, io, page);
-		cl_page_disown(env, io, page);
-	} else {
-		LASSERT(page->cp_state == CPS_FREEING);
+	if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) {
+		anchor = &cl_env_info(env)->clt_anchor;
+		cl_sync_io_init(anchor, 1, cl_sync_io_end);
 	}
 
-	return CLP_GANG_OKAY;
-}
-
-/**
- * Discard pages protected by the given lock. This function traverses radix
- * tree to find all covering pages and discard them. If a page is being covered
- * by other locks, it should remain in cache.
- *
- * If error happens on any step, the process continues anyway (the reasoning
- * behind this being that lock cancellation cannot be delayed indefinitely).
- */
-int cl_lock_discard_pages(const struct lu_env *env, struct cl_lock *lock)
-{
-	struct cl_thread_info *info  = cl_env_info(env);
-	struct cl_io	  *io    = &info->clt_io;
-	struct cl_lock_descr  *descr = &lock->cll_descr;
-	cl_page_gang_cb_t      cb;
-	int res;
-	int result;
-
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	io->ci_obj = cl_object_top(descr->cld_obj);
-	io->ci_ignore_layout = 1;
-	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
-	if (result != 0)
-		goto out;
-
-	cb = descr->cld_mode == CLM_READ ? check_and_discard_cb : discard_cb;
-	info->clt_fn_index = info->clt_next_index = descr->cld_start;
-	do {
-		res = cl_page_gang_lookup(env, descr->cld_obj, io,
-					  info->clt_next_index, descr->cld_end,
-					  cb, (void *)lock);
-		if (info->clt_next_index > descr->cld_end)
-			break;
-
-		if (res == CLP_GANG_RESCHED)
-			cond_resched();
-	} while (res != CLP_GANG_OKAY);
-out:
-	cl_io_fini(env, io);
-	return result;
-}
-EXPORT_SYMBOL(cl_lock_discard_pages);
-
-/**
- * Eliminate all locks for a given object.
- *
- * Caller has to guarantee that no lock is in active use.
- *
- * \param cancel when this is set, cl_locks_prune() cancels locks before
- *	       destroying.
- */
-void cl_locks_prune(const struct lu_env *env, struct cl_object *obj, int cancel)
-{
-	struct cl_object_header *head;
-	struct cl_lock	  *lock;
-
-	head = cl_object_header(obj);
-	/*
-	 * If locks are destroyed without cancellation, all pages must be
-	 * already destroyed (as otherwise they will be left unprotected).
-	 */
-	LASSERT(ergo(!cancel,
-		     !head->coh_tree.rnode && head->coh_pages == 0));
+	rc = cl_lock_enqueue(env, io, lock, anchor);
 
-	spin_lock(&head->coh_lock_guard);
-	while (!list_empty(&head->coh_locks)) {
-		lock = container_of(head->coh_locks.next,
-				    struct cl_lock, cll_linkage);
-		cl_lock_get_trust(lock);
-		spin_unlock(&head->coh_lock_guard);
-		lu_ref_add(&lock->cll_reference, "prune", current);
-
-again:
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state < CLS_FREEING) {
-			LASSERT(lock->cll_users <= 1);
-			if (unlikely(lock->cll_users == 1)) {
-				struct l_wait_info lwi = { 0 };
-
-				cl_lock_mutex_put(env, lock);
-				l_wait_event(lock->cll_wq,
-					     lock->cll_users == 0,
-					     &lwi);
-				goto again;
-			}
-
-			if (cancel)
-				cl_lock_cancel(env, lock);
-			cl_lock_delete(env, lock);
-		}
-		cl_lock_mutex_put(env, lock);
-		lu_ref_del(&lock->cll_reference, "prune", current);
-		cl_lock_put(env, lock);
-		spin_lock(&head->coh_lock_guard);
-	}
-	spin_unlock(&head->coh_lock_guard);
-}
-EXPORT_SYMBOL(cl_locks_prune);
-
-static struct cl_lock *cl_lock_hold_mutex(const struct lu_env *env,
-					  const struct cl_io *io,
-					  const struct cl_lock_descr *need,
-					  const char *scope, const void *source)
-{
-	struct cl_lock *lock;
+	if (anchor) {
+		int rc2;
 
-	while (1) {
-		lock = cl_lock_find(env, io, need);
-		if (IS_ERR(lock))
-			break;
-		cl_lock_mutex_get(env, lock);
-		if (lock->cll_state < CLS_FREEING &&
-		    !(lock->cll_flags & CLF_CANCELLED)) {
-			cl_lock_hold_mod(env, lock, 1);
-			lu_ref_add(&lock->cll_holders, scope, source);
-			lu_ref_add(&lock->cll_reference, scope, source);
-			break;
-		}
-		cl_lock_mutex_put(env, lock);
-		cl_lock_put(env, lock);
+		/* drop the reference count held at initialization time */
+		cl_sync_io_note(env, anchor, 0);
+		rc2 = cl_sync_io_wait(env, anchor, 0);
+		if (rc2 < 0 && rc == 0)
+			rc = rc2;
 	}
-	return lock;
-}
-
-/**
- * Returns a lock matching \a need description with a reference and a hold on
- * it.
- *
- * This is much like cl_lock_find(), except that cl_lock_hold() additionally
- * guarantees that lock is not in the CLS_FREEING state on return.
- */
-struct cl_lock *cl_lock_hold(const struct lu_env *env, const struct cl_io *io,
-			     const struct cl_lock_descr *need,
-			     const char *scope, const void *source)
-{
-	struct cl_lock *lock;
-
-	lock = cl_lock_hold_mutex(env, io, need, scope, source);
-	if (!IS_ERR(lock))
-		cl_lock_mutex_put(env, lock);
-	return lock;
-}
-EXPORT_SYMBOL(cl_lock_hold);
-
-/**
- * Main high-level entry point of cl_lock interface that finds existing or
- * enqueues new lock matching given description.
- */
-struct cl_lock *cl_lock_request(const struct lu_env *env, struct cl_io *io,
-				const struct cl_lock_descr *need,
-				const char *scope, const void *source)
-{
-	struct cl_lock       *lock;
-	int		   rc;
-	__u32		 enqflags = need->cld_enq_flags;
 
-	do {
-		lock = cl_lock_hold_mutex(env, io, need, scope, source);
-		if (IS_ERR(lock))
-			break;
+	if (rc < 0)
+		cl_lock_release(env, lock);
 
-		rc = cl_enqueue_locked(env, lock, io, enqflags);
-		if (rc == 0) {
-			if (cl_lock_fits_into(env, lock, need, io)) {
-				if (!(enqflags & CEF_AGL)) {
-					cl_lock_mutex_put(env, lock);
-					cl_lock_lockdep_acquire(env, lock,
-								enqflags);
-					break;
-				}
-				rc = 1;
-			}
-			cl_unuse_locked(env, lock);
-		}
-		cl_lock_trace(D_DLMTRACE, env,
-			      rc <= 0 ? "enqueue failed" : "agl succeed", lock);
-		cl_lock_hold_release(env, lock, scope, source);
-		cl_lock_mutex_put(env, lock);
-		lu_ref_del(&lock->cll_reference, scope, source);
-		cl_lock_put(env, lock);
-		if (rc > 0) {
-			LASSERT(enqflags & CEF_AGL);
-			lock = NULL;
-		} else if (rc != 0) {
-			lock = ERR_PTR(rc);
-		}
-	} while (rc == 0);
-	return lock;
+	return rc;
 }
 EXPORT_SYMBOL(cl_lock_request);
 
-/**
- * Adds a hold to a known lock.
- */
-void cl_lock_hold_add(const struct lu_env *env, struct cl_lock *lock,
-		      const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_state != CLS_FREEING);
-
-	cl_lock_hold_mod(env, lock, 1);
-	cl_lock_get(lock);
-	lu_ref_add(&lock->cll_holders, scope, source);
-	lu_ref_add(&lock->cll_reference, scope, source);
-}
-EXPORT_SYMBOL(cl_lock_hold_add);
-
-/**
- * Releases a hold and a reference on a lock, on which caller acquired a
- * mutex.
- */
-void cl_lock_unhold(const struct lu_env *env, struct cl_lock *lock,
-		    const char *scope, const void *source)
-{
-	LINVRNT(cl_lock_invariant(env, lock));
-	cl_lock_hold_release(env, lock, scope, source);
-	lu_ref_del(&lock->cll_reference, scope, source);
-	cl_lock_put(env, lock);
-}
-EXPORT_SYMBOL(cl_lock_unhold);
-
 /**
  * Releases a hold and a reference on a lock, obtained by cl_lock_hold().
  */
-void cl_lock_release(const struct lu_env *env, struct cl_lock *lock,
-		     const char *scope, const void *source)
+void cl_lock_release(const struct lu_env *env, struct cl_lock *lock)
 {
-	LINVRNT(cl_lock_invariant(env, lock));
 	cl_lock_trace(D_DLMTRACE, env, "release lock", lock);
-	cl_lock_mutex_get(env, lock);
-	cl_lock_hold_release(env, lock, scope, source);
-	cl_lock_mutex_put(env, lock);
-	lu_ref_del(&lock->cll_reference, scope, source);
-	cl_lock_put(env, lock);
+	cl_lock_cancel(env, lock);
+	cl_lock_fini(env, lock);
 }
 EXPORT_SYMBOL(cl_lock_release);
 
-void cl_lock_user_add(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-
-	cl_lock_used_mod(env, lock, 1);
-}
-EXPORT_SYMBOL(cl_lock_user_add);
-
-void cl_lock_user_del(const struct lu_env *env, struct cl_lock *lock)
-{
-	LINVRNT(cl_lock_is_mutexed(lock));
-	LINVRNT(cl_lock_invariant(env, lock));
-	LASSERT(lock->cll_users > 0);
-
-	cl_lock_used_mod(env, lock, -1);
-	if (lock->cll_users == 0)
-		wake_up_all(&lock->cll_wq);
-}
-EXPORT_SYMBOL(cl_lock_user_del);
-
 const char *cl_lock_mode_name(const enum cl_lock_mode mode)
 {
 	static const char *names[] = {
-		[CLM_PHANTOM] = "P",
 		[CLM_READ]    = "R",
 		[CLM_WRITE]   = "W",
 		[CLM_GROUP]   = "G"
@@ -2189,10 +261,8 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
 		   lu_printer_t printer, const struct cl_lock *lock)
 {
 	const struct cl_lock_slice *slice;
-	(*printer)(env, cookie, "lock@%p[%d %d %d %d %d %08lx] ",
-		   lock, atomic_read(&lock->cll_ref),
-		   lock->cll_state, lock->cll_error, lock->cll_holds,
-		   lock->cll_users, lock->cll_flags);
+
+	(*printer)(env, cookie, "lock@%p", lock);
 	cl_lock_descr_print(env, cookie, printer, &lock->cll_descr);
 	(*printer)(env, cookie, " {\n");
 
@@ -2207,13 +277,3 @@ void cl_lock_print(const struct lu_env *env, void *cookie,
 	(*printer)(env, cookie, "} lock@%p\n", lock);
 }
 EXPORT_SYMBOL(cl_lock_print);
-
-int cl_lock_init(void)
-{
-	return lu_kmem_init(cl_lock_caches);
-}
-
-void cl_lock_fini(void)
-{
-	lu_kmem_fini(cl_lock_caches);
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 43e299d4d..5940f3031 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -36,6 +36,7 @@
  * Client Lustre Object.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 /*
@@ -43,8 +44,6 @@
  *
  *  i_mutex
  *      PG_locked
- *	  ->coh_page_guard
- *	  ->coh_lock_guard
  *	  ->coh_attr_guard
  *	  ->ls_guard
  */
@@ -63,10 +62,6 @@
 
 static struct kmem_cache *cl_env_kmem;
 
-/** Lock class of cl_object_header::coh_page_guard */
-static struct lock_class_key cl_page_guard_class;
-/** Lock class of cl_object_header::coh_lock_guard */
-static struct lock_class_key cl_lock_guard_class;
 /** Lock class of cl_object_header::coh_attr_guard */
 static struct lock_class_key cl_attr_guard_class;
 
@@ -81,17 +76,9 @@ int cl_object_header_init(struct cl_object_header *h)
 
 	result = lu_object_header_init(&h->coh_lu);
 	if (result == 0) {
-		spin_lock_init(&h->coh_page_guard);
-		spin_lock_init(&h->coh_lock_guard);
 		spin_lock_init(&h->coh_attr_guard);
-		lockdep_set_class(&h->coh_page_guard, &cl_page_guard_class);
-		lockdep_set_class(&h->coh_lock_guard, &cl_lock_guard_class);
 		lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class);
-		h->coh_pages = 0;
-		/* XXX hard coded GFP_* mask. */
-		INIT_RADIX_TREE(&h->coh_tree, GFP_ATOMIC);
-		INIT_LIST_HEAD(&h->coh_locks);
-		h->coh_page_bufsize = ALIGN(sizeof(struct cl_page), 8);
+		h->coh_page_bufsize = 0;
 	}
 	return result;
 }
@@ -145,7 +132,7 @@ EXPORT_SYMBOL(cl_object_get);
 /**
  * Returns the top-object for a given \a o.
  *
- * \see cl_page_top(), cl_io_top()
+ * \see cl_io_top()
  */
 struct cl_object *cl_object_top(struct cl_object *o)
 {
@@ -314,6 +301,29 @@ int cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 }
 EXPORT_SYMBOL(cl_conf_set);
 
+/**
+ * Prunes caches of pages and locks for this object.
+ */
+int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct lu_object_header *top;
+	struct cl_object *o;
+	int result;
+
+	top = obj->co_lu.lo_header;
+	result = 0;
+	list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) {
+		if (o->co_ops->coo_prune) {
+			result = o->co_ops->coo_prune(env, o);
+			if (result != 0)
+				break;
+		}
+	}
+
+	return result;
+}
+EXPORT_SYMBOL(cl_object_prune);
+
 /**
  * Helper function removing all object locks, and marking object for
  * deletion. All object pages must have been deleted at this point.
@@ -323,34 +333,12 @@ EXPORT_SYMBOL(cl_conf_set);
  */
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj)
 {
-	struct cl_object_header *hdr;
-
-	hdr = cl_object_header(obj);
-	LASSERT(!hdr->coh_tree.rnode);
-	LASSERT(hdr->coh_pages == 0);
+	struct cl_object_header *hdr = cl_object_header(obj);
 
 	set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags);
-	/*
-	 * Destroy all locks. Object destruction (including cl_inode_fini())
-	 * cannot cancel the locks, because in the case of a local client,
-	 * where client and server share the same thread running
-	 * prune_icache(), this can dead-lock with ldlm_cancel_handler()
-	 * waiting on __wait_on_freeing_inode().
-	 */
-	cl_locks_prune(env, obj, 0);
 }
 EXPORT_SYMBOL(cl_object_kill);
 
-/**
- * Prunes caches of pages and locks for this object.
- */
-void cl_object_prune(const struct lu_env *env, struct cl_object *obj)
-{
-	cl_pages_prune(env, obj);
-	cl_locks_prune(env, obj, 1);
-}
-EXPORT_SYMBOL(cl_object_prune);
-
 void cache_stats_init(struct cache_stats *cs, const char *name)
 {
 	int i;
@@ -383,6 +371,8 @@ static int cache_stats_print(const struct cache_stats *cs,
 	return 0;
 }
 
+static void cl_env_percpu_refill(void);
+
 /**
  * Initialize client site.
  *
@@ -397,11 +387,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
 	result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
 	if (result == 0) {
 		cache_stats_init(&s->cs_pages, "pages");
-		cache_stats_init(&s->cs_locks, "locks");
 		for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i)
 			atomic_set(&s->cs_pages_state[0], 0);
-		for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
-			atomic_set(&s->cs_locks_state[i], 0);
+		cl_env_percpu_refill();
 	}
 	return result;
 }
@@ -435,15 +423,6 @@ int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
 		[CPS_PAGEIN]  = "r",
 		[CPS_FREEING] = "f"
 	};
-	static const char *lstate[] = {
-		[CLS_NEW]       = "n",
-		[CLS_QUEUING]   = "q",
-		[CLS_ENQUEUED]  = "e",
-		[CLS_HELD]      = "h",
-		[CLS_INTRANSIT] = "t",
-		[CLS_CACHED]    = "c",
-		[CLS_FREEING]   = "f"
-	};
 /*
        lookup    hit  total   busy create
 pages: ...... ...... ...... ...... ...... [...... ...... ...... ......]
@@ -457,12 +436,6 @@ locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......]
 		seq_printf(m, "%s: %u ", pstate[i],
 			   atomic_read(&site->cs_pages_state[i]));
 	seq_printf(m, "]\n");
-	cache_stats_print(&site->cs_locks, m, 0);
-	seq_printf(m, " [");
-	for (i = 0; i < ARRAY_SIZE(site->cs_locks_state); ++i)
-		seq_printf(m, "%s: %u ", lstate[i],
-			   atomic_read(&site->cs_locks_state[i]));
-	seq_printf(m, "]\n");
 	cache_stats_print(&cl_env_stats, m, 0);
 	seq_printf(m, "\n");
 	return 0;
@@ -492,6 +465,13 @@ EXPORT_SYMBOL(cl_site_stats_print);
  * bz20044, bz22683.
  */
 
+static LIST_HEAD(cl_envs);
+static unsigned int cl_envs_cached_nr;
+static unsigned int cl_envs_cached_max = 128; /* XXX: prototype: arbitrary limit
+					       * for now.
+					       */
+static DEFINE_SPINLOCK(cl_envs_guard);
+
 struct cl_env {
 	void	     *ce_magic;
 	struct lu_env     ce_lu;
@@ -674,8 +654,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
 				lu_context_enter(&cle->ce_ses);
 				env->le_ses = &cle->ce_ses;
 				cl_env_init0(cle, debug);
-			} else
+			} else {
 				lu_env_fini(env);
+			}
 		}
 		if (rc != 0) {
 			kmem_cache_free(cl_env_kmem, cle);
@@ -684,8 +665,9 @@ static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug)
 			CL_ENV_INC(create);
 			CL_ENV_INC(total);
 		}
-	} else
+	} else {
 		env = ERR_PTR(-ENOMEM);
+	}
 	return env;
 }
 
@@ -697,6 +679,39 @@ static void cl_env_fini(struct cl_env *cle)
 	kmem_cache_free(cl_env_kmem, cle);
 }
 
+static struct lu_env *cl_env_obtain(void *debug)
+{
+	struct cl_env *cle;
+	struct lu_env *env;
+
+	spin_lock(&cl_envs_guard);
+	LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+	if (cl_envs_cached_nr > 0) {
+		int rc;
+
+		cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+		list_del_init(&cle->ce_linkage);
+		cl_envs_cached_nr--;
+		spin_unlock(&cl_envs_guard);
+
+		env = &cle->ce_lu;
+		rc = lu_env_refill(env);
+		if (rc == 0) {
+			cl_env_init0(cle, debug);
+			lu_context_enter(&env->le_ctx);
+			lu_context_enter(&cle->ce_ses);
+		} else {
+			cl_env_fini(cle);
+			env = ERR_PTR(rc);
+		}
+	} else {
+		spin_unlock(&cl_envs_guard);
+		env = cl_env_new(lu_context_tags_default,
+				 lu_session_tags_default, debug);
+	}
+	return env;
+}
+
 static inline struct cl_env *cl_env_container(struct lu_env *env)
 {
 	return container_of(env, struct cl_env, ce_lu);
@@ -727,6 +742,8 @@ static struct lu_env *cl_env_peek(int *refcheck)
  * Returns lu_env: if there already is an environment associated with the
  * current thread, it is returned, otherwise, new environment is allocated.
  *
+ * Allocations are amortized through the global cache of environments.
+ *
  * \param refcheck pointer to a counter used to detect environment leaks. In
  * the usual case cl_env_get() and cl_env_put() are called in the same lexical
  * scope and pointer to the same integer is passed as \a refcheck. This is
@@ -740,10 +757,7 @@ struct lu_env *cl_env_get(int *refcheck)
 
 	env = cl_env_peek(refcheck);
 	if (!env) {
-		env = cl_env_new(lu_context_tags_default,
-				 lu_session_tags_default,
-				 __builtin_return_address(0));
-
+		env = cl_env_obtain(__builtin_return_address(0));
 		if (!IS_ERR(env)) {
 			struct cl_env *cle;
 
@@ -786,6 +800,32 @@ static void cl_env_exit(struct cl_env *cle)
 	lu_context_exit(&cle->ce_ses);
 }
 
+/**
+ * Finalizes and frees a given number of cached environments. This is done to
+ * (1) free some memory (not currently hooked into VM), or (2) release
+ * references to modules.
+ */
+unsigned int cl_env_cache_purge(unsigned int nr)
+{
+	struct cl_env *cle;
+
+	spin_lock(&cl_envs_guard);
+	for (; !list_empty(&cl_envs) && nr > 0; --nr) {
+		cle = container_of(cl_envs.next, struct cl_env, ce_linkage);
+		list_del_init(&cle->ce_linkage);
+		LASSERT(cl_envs_cached_nr > 0);
+		cl_envs_cached_nr--;
+		spin_unlock(&cl_envs_guard);
+
+		cl_env_fini(cle);
+		spin_lock(&cl_envs_guard);
+	}
+	LASSERT(equi(cl_envs_cached_nr == 0, list_empty(&cl_envs)));
+	spin_unlock(&cl_envs_guard);
+	return nr;
+}
+EXPORT_SYMBOL(cl_env_cache_purge);
+
 /**
  * Release an environment.
  *
@@ -808,7 +848,22 @@ void cl_env_put(struct lu_env *env, int *refcheck)
 		cl_env_detach(cle);
 		cle->ce_debug = NULL;
 		cl_env_exit(cle);
-		cl_env_fini(cle);
+		/*
+		 * Don't bother to take a lock here.
+		 *
+		 * Return environment to the cache only when it was allocated
+		 * with the standard tags.
+		 */
+		if (cl_envs_cached_nr < cl_envs_cached_max &&
+		    (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD &&
+		    (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) {
+			spin_lock(&cl_envs_guard);
+			list_add(&cle->ce_linkage, &cl_envs);
+			cl_envs_cached_nr++;
+			spin_unlock(&cl_envs_guard);
+		} else {
+			cl_env_fini(cle);
+		}
 	}
 }
 EXPORT_SYMBOL(cl_env_put);
@@ -914,6 +969,104 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 }
 EXPORT_SYMBOL(cl_lvb2attr);
 
+static struct cl_env cl_env_percpu[NR_CPUS];
+
+static int cl_env_percpu_init(void)
+{
+	struct cl_env *cle;
+	int tags = LCT_REMEMBER | LCT_NOREF;
+	int i, j;
+	int rc = 0;
+
+	for_each_possible_cpu(i) {
+		struct lu_env *env;
+
+		cle = &cl_env_percpu[i];
+		env = &cle->ce_lu;
+
+		INIT_LIST_HEAD(&cle->ce_linkage);
+		cle->ce_magic = &cl_env_init0;
+		rc = lu_env_init(env, LCT_CL_THREAD | tags);
+		if (rc == 0) {
+			rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
+			if (rc == 0) {
+				lu_context_enter(&cle->ce_ses);
+				env->le_ses = &cle->ce_ses;
+			} else {
+				lu_env_fini(env);
+			}
+		}
+		if (rc != 0)
+			break;
+	}
+	if (rc != 0) {
+		/* Indices 0 to i (excluding i) were correctly initialized,
+		 * thus we must uninitialize up to i, the rest are undefined.
+		 */
+		for (j = 0; j < i; j++) {
+			cle = &cl_env_percpu[i];
+			lu_context_exit(&cle->ce_ses);
+			lu_context_fini(&cle->ce_ses);
+			lu_env_fini(&cle->ce_lu);
+		}
+	}
+
+	return rc;
+}
+
+static void cl_env_percpu_fini(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cl_env *cle = &cl_env_percpu[i];
+
+		lu_context_exit(&cle->ce_ses);
+		lu_context_fini(&cle->ce_ses);
+		lu_env_fini(&cle->ce_lu);
+	}
+}
+
+static void cl_env_percpu_refill(void)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		lu_env_refill(&cl_env_percpu[i].ce_lu);
+}
+
+void cl_env_percpu_put(struct lu_env *env)
+{
+	struct cl_env *cle;
+	int cpu;
+
+	cpu = smp_processor_id();
+	cle = cl_env_container(env);
+	LASSERT(cle == &cl_env_percpu[cpu]);
+
+	cle->ce_ref--;
+	LASSERT(cle->ce_ref == 0);
+
+	CL_ENV_DEC(busy);
+	cl_env_detach(cle);
+	cle->ce_debug = NULL;
+
+	put_cpu();
+}
+EXPORT_SYMBOL(cl_env_percpu_put);
+
+struct lu_env *cl_env_percpu_get(void)
+{
+	struct cl_env *cle;
+
+	cle = &cl_env_percpu[get_cpu()];
+	cl_env_init0(cle, __builtin_return_address(0));
+
+	cl_env_attach(cle);
+	return &cle->ce_lu;
+}
+EXPORT_SYMBOL(cl_env_percpu_get);
+
 /*****************************************************************************
  *
  * Temporary prototype thing: mirror obd-devices into cl devices.
@@ -944,8 +1097,9 @@ struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site,
 			CERROR("can't init device '%s', %d\n", typename, rc);
 			d = ERR_PTR(rc);
 		}
-	} else
+	} else {
 		CERROR("Cannot allocate device: '%s'\n", typename);
+	}
 	return lu2cl_dev(d);
 }
 EXPORT_SYMBOL(cl_type_setup);
@@ -959,12 +1113,6 @@ void cl_stack_fini(const struct lu_env *env, struct cl_device *cl)
 }
 EXPORT_SYMBOL(cl_stack_fini);
 
-int  cl_lock_init(void);
-void cl_lock_fini(void);
-
-int  cl_page_init(void);
-void cl_page_fini(void);
-
 static struct lu_context_key cl_key;
 
 struct cl_thread_info *cl_env_info(const struct lu_env *env)
@@ -1059,17 +1207,13 @@ int cl_global_init(void)
 	if (result)
 		goto out_kmem;
 
-	result = cl_lock_init();
+	result = cl_env_percpu_init();
 	if (result)
+		/* no cl_env_percpu_fini on error */
 		goto out_context;
 
-	result = cl_page_init();
-	if (result)
-		goto out_lock;
-
 	return 0;
-out_lock:
-	cl_lock_fini();
+
 out_context:
 	lu_context_key_degister(&cl_key);
 out_kmem:
@@ -1084,8 +1228,7 @@ out_store:
  */
 void cl_global_fini(void)
 {
-	cl_lock_fini();
-	cl_page_fini();
+	cl_env_percpu_fini();
 	lu_context_key_degister(&cl_key);
 	lu_kmem_fini(cl_object_caches);
 	cl_env_store_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index 394580016..b754f516e 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -36,6 +36,7 @@
  * Client Lustre Page.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
@@ -48,8 +49,7 @@
 #include "../include/cl_object.h"
 #include "cl_internal.h"
 
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
-			    int radix);
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
 
 # define PASSERT(env, page, expr)					   \
 	do {								   \
@@ -62,25 +62,12 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 # define PINVRNT(env, page, exp) \
 	((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
 
-/**
- * Internal version of cl_page_top, it should be called if the page is
- * known to be not freed, says with page referenced, or radix tree lock held,
- * or page owned.
- */
-static struct cl_page *cl_page_top_trusted(struct cl_page *page)
-{
-	while (page->cp_parent)
-		page = page->cp_parent;
-	return page;
-}
-
 /**
  * Internal version of cl_page_get().
  *
  * This function can be used to obtain initial reference to previously
  * unreferenced cached object. It can be called only if concurrent page
- * reclamation is somehow prevented, e.g., by locking page radix-tree
- * (cl_object_header::hdr->coh_page_guard), or by keeping a lock on a VM page,
+ * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
  * associated with \a page.
  *
  * Use with care! Not exported.
@@ -103,142 +90,12 @@ cl_page_at_trusted(const struct cl_page *page,
 {
 	const struct cl_page_slice *slice;
 
-	page = cl_page_top_trusted((struct cl_page *)page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
-				return slice;
-		}
-		page = page->cp_child;
-	} while (page);
-	return NULL;
-}
-
-/**
- * Returns a page with given index in the given object, or NULL if no page is
- * found. Acquires a reference on \a page.
- *
- * Locking: called under cl_object_header::coh_page_guard spin-lock.
- */
-struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
-{
-	struct cl_page *page;
-
-	assert_spin_locked(&hdr->coh_page_guard);
-
-	page = radix_tree_lookup(&hdr->coh_tree, index);
-	if (page)
-		cl_page_get_trust(page);
-	return page;
-}
-EXPORT_SYMBOL(cl_page_lookup);
-
-/**
- * Returns a list of pages by a given [start, end] of \a obj.
- *
- * \param resched If not NULL, then we give up before hogging CPU for too
- * long and set *resched = 1, in that case caller should implement a retry
- * logic.
- *
- * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
- * crucial in the face of [offset, EOF] locks.
- *
- * Return at least one page in @queue unless there is no covered page.
- */
-int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
-			struct cl_io *io, pgoff_t start, pgoff_t end,
-			cl_page_gang_cb_t cb, void *cbdata)
-{
-	struct cl_object_header *hdr;
-	struct cl_page	  *page;
-	struct cl_page	 **pvec;
-	const struct cl_page_slice  *slice;
-	const struct lu_device_type *dtype;
-	pgoff_t		  idx;
-	unsigned int	     nr;
-	unsigned int	     i;
-	unsigned int	     j;
-	int		      res = CLP_GANG_OKAY;
-	int		      tree_lock = 1;
-
-	idx = start;
-	hdr = cl_object_header(obj);
-	pvec = cl_env_info(env)->clt_pvec;
-	dtype = cl_object_top(obj)->co_lu.lo_dev->ld_type;
-	spin_lock(&hdr->coh_page_guard);
-	while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
-					    idx, CLT_PVEC_SIZE)) > 0) {
-		int end_of_region = 0;
-
-		idx = pvec[nr - 1]->cp_index + 1;
-		for (i = 0, j = 0; i < nr; ++i) {
-			page = pvec[i];
-			pvec[i] = NULL;
-
-			LASSERT(page->cp_type == CPT_CACHEABLE);
-			if (page->cp_index > end) {
-				end_of_region = 1;
-				break;
-			}
-			if (page->cp_state == CPS_FREEING)
-				continue;
-
-			slice = cl_page_at_trusted(page, dtype);
-			/*
-			 * Pages for lsm-less file has no underneath sub-page
-			 * for osc, in case of ...
-			 */
-			PASSERT(env, page, slice);
-
-			page = slice->cpl_page;
-			/*
-			 * Can safely call cl_page_get_trust() under
-			 * radix-tree spin-lock.
-			 *
-			 * XXX not true, because @page is from object another
-			 * than @hdr and protected by different tree lock.
-			 */
-			cl_page_get_trust(page);
-			lu_ref_add_atomic(&page->cp_reference,
-					  "gang_lookup", current);
-			pvec[j++] = page;
-		}
-
-		/*
-		 * Here a delicate locking dance is performed. Current thread
-		 * holds a reference to a page, but has to own it before it
-		 * can be placed into queue. Owning implies waiting, so
-		 * radix-tree lock is to be released. After a wait one has to
-		 * check that pages weren't truncated (cl_page_own() returns
-		 * error in the latter case).
-		 */
-		spin_unlock(&hdr->coh_page_guard);
-		tree_lock = 0;
-
-		for (i = 0; i < j; ++i) {
-			page = pvec[i];
-			if (res == CLP_GANG_OKAY)
-				res = (*cb)(env, io, page, cbdata);
-			lu_ref_del(&page->cp_reference,
-				   "gang_lookup", current);
-			cl_page_put(env, page);
-		}
-		if (nr < CLT_PVEC_SIZE || end_of_region)
-			break;
-
-		if (res == CLP_GANG_OKAY && need_resched())
-			res = CLP_GANG_RESCHED;
-		if (res != CLP_GANG_OKAY)
-			break;
-
-		spin_lock(&hdr->coh_page_guard);
-		tree_lock = 1;
+	list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
+		if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
+			return slice;
 	}
-	if (tree_lock)
-		spin_unlock(&hdr->coh_page_guard);
-	return res;
+	return NULL;
 }
-EXPORT_SYMBOL(cl_page_gang_lookup);
 
 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
 {
@@ -247,17 +104,16 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
 	PASSERT(env, page, list_empty(&page->cp_batch));
 	PASSERT(env, page, !page->cp_owner);
 	PASSERT(env, page, !page->cp_req);
-	PASSERT(env, page, !page->cp_parent);
 	PASSERT(env, page, page->cp_state == CPS_FREEING);
 
-	might_sleep();
 	while (!list_empty(&page->cp_layers)) {
 		struct cl_page_slice *slice;
 
 		slice = list_entry(page->cp_layers.next,
 				   struct cl_page_slice, cpl_linkage);
 		list_del_init(page->cp_layers.next);
-		slice->cpl_ops->cpo_fini(env, slice);
+		if (unlikely(slice->cpl_ops->cpo_fini))
+			slice->cpl_ops->cpo_fini(env, slice);
 	}
 	lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
 	cl_object_put(env, obj);
@@ -276,10 +132,10 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
 	*(enum cl_page_state *)&page->cp_state = state;
 }
 
-static struct cl_page *cl_page_alloc(const struct lu_env *env,
-				     struct cl_object *o, pgoff_t ind,
-				     struct page *vmpage,
-				     enum cl_page_type type)
+struct cl_page *cl_page_alloc(const struct lu_env *env,
+			      struct cl_object *o, pgoff_t ind,
+			      struct page *vmpage,
+			      enum cl_page_type type)
 {
 	struct cl_page	  *page;
 	struct lu_object_header *head;
@@ -289,13 +145,11 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 		int result = 0;
 
 		atomic_set(&page->cp_ref, 1);
-		if (type == CPT_CACHEABLE) /* for radix tree */
-			atomic_inc(&page->cp_ref);
 		page->cp_obj = o;
 		cl_object_get(o);
 		lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
 				     page);
-		page->cp_index = ind;
+		page->cp_vmpage = vmpage;
 		cl_page_state_set_trust(page, CPS_CACHED);
 		page->cp_type = type;
 		INIT_LIST_HEAD(&page->cp_layers);
@@ -306,10 +160,10 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 		head = o->co_lu.lo_header;
 		list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
 			if (o->co_ops->coo_page_init) {
-				result = o->co_ops->coo_page_init(env, o,
-								  page, vmpage);
+				result = o->co_ops->coo_page_init(env, o, page,
+								  ind);
 				if (result != 0) {
-					cl_page_delete0(env, page, 0);
+					cl_page_delete0(env, page);
 					cl_page_free(env, page);
 					page = ERR_PTR(result);
 					break;
@@ -321,6 +175,7 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
 	}
 	return page;
 }
+EXPORT_SYMBOL(cl_page_alloc);
 
 /**
  * Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -333,16 +188,13 @@ static struct cl_page *cl_page_alloc(const struct lu_env *env,
  *
  * \see cl_object_find(), cl_lock_find()
  */
-static struct cl_page *cl_page_find0(const struct lu_env *env,
-				     struct cl_object *o,
-				     pgoff_t idx, struct page *vmpage,
-				     enum cl_page_type type,
-				     struct cl_page *parent)
+struct cl_page *cl_page_find(const struct lu_env *env,
+			     struct cl_object *o,
+			     pgoff_t idx, struct page *vmpage,
+			     enum cl_page_type type)
 {
 	struct cl_page	  *page = NULL;
-	struct cl_page	  *ghost = NULL;
 	struct cl_object_header *hdr;
-	int err;
 
 	LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
 	might_sleep();
@@ -368,120 +220,25 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
 		 *       reference on it.
 		 */
 		page = cl_vmpage_page(vmpage, o);
-		PINVRNT(env, page,
-			ergo(page,
-			     cl_page_vmpage(env, page) == vmpage &&
-			     (void *)radix_tree_lookup(&hdr->coh_tree,
-						       idx) == page));
-	}
 
-	if (page)
-		return page;
+		if (page)
+			return page;
+	}
 
 	/* allocate and initialize cl_page */
 	page = cl_page_alloc(env, o, idx, vmpage, type);
-	if (IS_ERR(page))
-		return page;
-
-	if (type == CPT_TRANSIENT) {
-		if (parent) {
-			LASSERT(!page->cp_parent);
-			page->cp_parent = parent;
-			parent->cp_child = page;
-		}
-		return page;
-	}
-
-	/*
-	 * XXX optimization: use radix_tree_preload() here, and change tree
-	 * gfp mask to GFP_KERNEL in cl_object_header_init().
-	 */
-	spin_lock(&hdr->coh_page_guard);
-	err = radix_tree_insert(&hdr->coh_tree, idx, page);
-	if (err != 0) {
-		ghost = page;
-		/*
-		 * Noted by Jay: a lock on \a vmpage protects cl_page_find()
-		 * from this race, but
-		 *
-		 *     0. it's better to have cl_page interface "locally
-		 *     consistent" so that its correctness can be reasoned
-		 *     about without appealing to the (obscure world of) VM
-		 *     locking.
-		 *
-		 *     1. handling this race allows ->coh_tree to remain
-		 *     consistent even when VM locking is somehow busted,
-		 *     which is very useful during diagnosing and debugging.
-		 */
-		page = ERR_PTR(err);
-		CL_PAGE_DEBUG(D_ERROR, env, ghost,
-			      "fail to insert into radix tree: %d\n", err);
-	} else {
-		if (parent) {
-			LASSERT(!page->cp_parent);
-			page->cp_parent = parent;
-			parent->cp_child = page;
-		}
-		hdr->coh_pages++;
-	}
-	spin_unlock(&hdr->coh_page_guard);
-
-	if (unlikely(ghost)) {
-		cl_page_delete0(env, ghost, 0);
-		cl_page_free(env, ghost);
-	}
 	return page;
 }
-
-struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *o,
-			     pgoff_t idx, struct page *vmpage,
-			     enum cl_page_type type)
-{
-	return cl_page_find0(env, o, idx, vmpage, type, NULL);
-}
 EXPORT_SYMBOL(cl_page_find);
 
-struct cl_page *cl_page_find_sub(const struct lu_env *env, struct cl_object *o,
-				 pgoff_t idx, struct page *vmpage,
-				 struct cl_page *parent)
-{
-	return cl_page_find0(env, o, idx, vmpage, parent->cp_type, parent);
-}
-EXPORT_SYMBOL(cl_page_find_sub);
-
 static inline int cl_page_invariant(const struct cl_page *pg)
 {
-	struct cl_object_header *header;
-	struct cl_page	  *parent;
-	struct cl_page	  *child;
-	struct cl_io	    *owner;
-
 	/*
 	 * Page invariant is protected by a VM lock.
 	 */
 	LINVRNT(cl_page_is_vmlocked(NULL, pg));
 
-	header = cl_object_header(pg->cp_obj);
-	parent = pg->cp_parent;
-	child  = pg->cp_child;
-	owner  = pg->cp_owner;
-
-	return cl_page_in_use(pg) &&
-		ergo(parent, parent->cp_child == pg) &&
-		ergo(child, child->cp_parent == pg) &&
-		ergo(child, pg->cp_obj != child->cp_obj) &&
-		ergo(parent, pg->cp_obj != parent->cp_obj) &&
-		ergo(owner && parent,
-		     parent->cp_owner == pg->cp_owner->ci_parent) &&
-		ergo(owner && child, child->cp_owner->ci_parent == owner) &&
-		/*
-		 * Either page is early in initialization (has neither child
-		 * nor parent yet), or it is in the object radix tree.
-		 */
-		ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
-		     (void *)radix_tree_lookup(&header->coh_tree,
-					       pg->cp_index) == pg ||
-		     (!child && !parent));
+	return cl_page_in_use_noref(pg);
 }
 
 static void cl_page_state_set0(const struct lu_env *env,
@@ -534,13 +291,9 @@ static void cl_page_state_set0(const struct lu_env *env,
 	old = page->cp_state;
 	PASSERT(env, page, allowed_transitions[old][state]);
 	CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
-	for (; page; page = page->cp_child) {
-		PASSERT(env, page, page->cp_state == old);
-		PASSERT(env, page,
-			equi(state == CPS_OWNED, page->cp_owner));
-
-		cl_page_state_set_trust(page, state);
-	}
+	PASSERT(env, page, page->cp_state == old);
+	PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
+	cl_page_state_set_trust(page, state);
 }
 
 static void cl_page_state_set(const struct lu_env *env,
@@ -574,8 +327,6 @@ EXPORT_SYMBOL(cl_page_get);
  */
 void cl_page_put(const struct lu_env *env, struct cl_page *page)
 {
-	PASSERT(env, page, atomic_read(&page->cp_ref) > !!page->cp_parent);
-
 	CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
 		       atomic_read(&page->cp_ref));
 
@@ -594,35 +345,11 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
 }
 EXPORT_SYMBOL(cl_page_put);
 
-/**
- * Returns a VM page associated with a given cl_page.
- */
-struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
-{
-	const struct cl_page_slice *slice;
-
-	/*
-	 * Find uppermost layer with ->cpo_vmpage() method, and return its
-	 * result.
-	 */
-	page = cl_page_top(page);
-	do {
-		list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
-			if (slice->cpl_ops->cpo_vmpage)
-				return slice->cpl_ops->cpo_vmpage(env, slice);
-		}
-		page = page->cp_child;
-	} while (page);
-	LBUG(); /* ->cpo_vmpage() has to be defined somewhere in the stack */
-}
-EXPORT_SYMBOL(cl_page_vmpage);
-
 /**
  * Returns a cl_page associated with a VM page, and given cl_object.
  */
 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 {
-	struct cl_page *top;
 	struct cl_page *page;
 
 	KLASSERT(PageLocked(vmpage));
@@ -633,36 +360,15 @@ struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 	 *       bottom-to-top pass.
 	 */
 
-	/*
-	 * This loop assumes that ->private points to the top-most page. This
-	 * can be rectified easily.
-	 */
-	top = (struct cl_page *)vmpage->private;
-	if (!top)
-		return NULL;
-
-	for (page = top; page; page = page->cp_child) {
-		if (cl_object_same(page->cp_obj, obj)) {
-			cl_page_get_trust(page);
-			break;
-		}
+	page = (struct cl_page *)vmpage->private;
+	if (page) {
+		cl_page_get_trust(page);
+		LASSERT(page->cp_type == CPT_CACHEABLE);
 	}
-	LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
 	return page;
 }
 EXPORT_SYMBOL(cl_vmpage_page);
 
-/**
- * Returns the top-page for a given page.
- *
- * \see cl_object_top(), cl_io_top()
- */
-struct cl_page *cl_page_top(struct cl_page *page)
-{
-	return cl_page_top_trusted(page);
-}
-EXPORT_SYMBOL(cl_page_top);
-
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
 				       const struct lu_device_type *dtype)
 {
@@ -682,26 +388,43 @@ EXPORT_SYMBOL(cl_page_at);
 	int		       (*__method)_proto;		    \
 									\
 	__result = 0;						   \
-	__page = cl_page_top(__page);				   \
-	do {							    \
-		list_for_each_entry(__scan, &__page->cp_layers,     \
-					cpl_linkage) {		  \
-			__method = *(void **)((char *)__scan->cpl_ops + \
-					      __op);		    \
-			if (__method) {					\
-				__result = (*__method)(__env, __scan,   \
-						       ## __VA_ARGS__); \
-				if (__result != 0)		      \
-					break;			  \
-			}					       \
-		}						       \
-		__page = __page->cp_child;			      \
-	} while (__page && __result == 0);			      \
+	list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
+		__method = *(void **)((char *)__scan->cpl_ops +  __op); \
+		if (__method) {						\
+			__result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+			if (__result != 0)				\
+				break;					\
+		}							\
+	}								\
 	if (__result > 0)					       \
 		__result = 0;					   \
 	__result;						       \
 })
 
+#define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...)		\
+({									\
+	const struct lu_env        *__env  = (_env);			\
+	struct cl_page             *__page = (_page);			\
+	const struct cl_page_slice *__scan;				\
+	int                         __result;				\
+	ptrdiff_t                   __op   = (_op);			\
+	int                       (*__method)_proto;			\
+									\
+	__result = 0;							\
+	list_for_each_entry_reverse(__scan, &__page->cp_layers,		\
+					cpl_linkage) {			\
+		__method = *(void **)((char *)__scan->cpl_ops +  __op);	\
+		if (__method) {						\
+			__result = (*__method)(__env, __scan, ## __VA_ARGS__); \
+			if (__result != 0)				\
+				break;					\
+		}							\
+	}								\
+	if (__result > 0)						\
+		__result = 0;						\
+	__result;							\
+})
+
 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)		   \
 do {								    \
 	const struct lu_env	*__env  = (_env);		    \
@@ -710,18 +433,11 @@ do {								    \
 	ptrdiff_t		   __op   = (_op);		     \
 	void		      (*__method)_proto;		    \
 									\
-	__page = cl_page_top(__page);				   \
-	do {							    \
-		list_for_each_entry(__scan, &__page->cp_layers,     \
-					cpl_linkage) {		  \
-			__method = *(void **)((char *)__scan->cpl_ops + \
-					      __op);		    \
-			if (__method)				   \
-				(*__method)(__env, __scan,	      \
-					    ## __VA_ARGS__);	    \
-		}						       \
-		__page = __page->cp_child;			      \
-	} while (__page);					       \
+	list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {	\
+		__method = *(void **)((char *)__scan->cpl_ops + __op);	\
+		if (__method)						\
+			(*__method)(__env, __scan, ## __VA_ARGS__);	\
+	}								\
 } while (0)
 
 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)	       \
@@ -732,20 +448,11 @@ do {									\
 	ptrdiff_t		   __op   = (_op);			 \
 	void		      (*__method)_proto;			\
 									    \
-	/* get to the bottom page. */				       \
-	while (__page->cp_child)					    \
-		__page = __page->cp_child;				  \
-	do {								\
-		list_for_each_entry_reverse(__scan, &__page->cp_layers, \
-						cpl_linkage) {	      \
-			__method = *(void **)((char *)__scan->cpl_ops +     \
-					      __op);			\
-			if (__method)				       \
-				(*__method)(__env, __scan,		  \
-					    ## __VA_ARGS__);		\
-		}							   \
-		__page = __page->cp_parent;				 \
-	} while (__page);						   \
+	list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
+		__method = *(void **)((char *)__scan->cpl_ops + __op);	\
+		if (__method)						\
+			(*__method)(__env, __scan, ## __VA_ARGS__);	\
+	}								\
 } while (0)
 
 static int cl_page_invoke(const struct lu_env *env,
@@ -771,20 +478,17 @@ static void cl_page_invoid(const struct lu_env *env,
 
 static void cl_page_owner_clear(struct cl_page *page)
 {
-	for (page = cl_page_top(page); page; page = page->cp_child) {
-		if (page->cp_owner) {
-			LASSERT(page->cp_owner->ci_owned_nr > 0);
-			page->cp_owner->ci_owned_nr--;
-			page->cp_owner = NULL;
-			page->cp_task = NULL;
-		}
+	if (page->cp_owner) {
+		LASSERT(page->cp_owner->ci_owned_nr > 0);
+		page->cp_owner->ci_owned_nr--;
+		page->cp_owner = NULL;
+		page->cp_task = NULL;
 	}
 }
 
 static void cl_page_owner_set(struct cl_page *page)
 {
-	for (page = cl_page_top(page); page; page = page->cp_child)
-		page->cp_owner->ci_owned_nr++;
+	page->cp_owner->ci_owned_nr++;
 }
 
 void cl_page_disown0(const struct lu_env *env,
@@ -794,7 +498,7 @@ void cl_page_disown0(const struct lu_env *env,
 
 	state = pg->cp_state;
 	PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
-	PINVRNT(env, pg, cl_page_invariant(pg));
+	PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
 	cl_page_owner_clear(pg);
 
 	if (state == CPS_OWNED)
@@ -815,8 +519,9 @@ void cl_page_disown0(const struct lu_env *env,
  */
 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
 {
+	struct cl_io *top = cl_io_top((struct cl_io *)io);
 	LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
-	return pg->cp_state == CPS_OWNED && pg->cp_owner == io;
+	return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
 }
 EXPORT_SYMBOL(cl_page_is_owned);
 
@@ -847,7 +552,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 
 	PINVRNT(env, pg, !cl_page_is_owned(pg, io));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 
 	if (pg->cp_state == CPS_FREEING) {
@@ -861,7 +565,7 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 		if (result == 0) {
 			PASSERT(env, pg, !pg->cp_owner);
 			PASSERT(env, pg, !pg->cp_req);
-			pg->cp_owner = io;
+			pg->cp_owner = cl_io_top(io);
 			pg->cp_task  = current;
 			cl_page_owner_set(pg);
 			if (pg->cp_state != CPS_FREEING) {
@@ -914,12 +618,11 @@ void cl_page_assume(const struct lu_env *env,
 {
 	PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 
 	cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
 	PASSERT(env, pg, !pg->cp_owner);
-	pg->cp_owner = io;
+	pg->cp_owner = cl_io_top(io);
 	pg->cp_task = current;
 	cl_page_owner_set(pg);
 	cl_page_state_set(env, pg, CPS_OWNED);
@@ -943,7 +646,6 @@ void cl_page_unassume(const struct lu_env *env,
 	PINVRNT(env, pg, cl_page_is_owned(pg, io));
 	PINVRNT(env, pg, cl_page_invariant(pg));
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 	cl_page_owner_clear(pg);
 	cl_page_state_set(env, pg, CPS_CACHED);
@@ -968,9 +670,9 @@ EXPORT_SYMBOL(cl_page_unassume);
 void cl_page_disown(const struct lu_env *env,
 		    struct cl_io *io, struct cl_page *pg)
 {
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
+	PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
+		pg->cp_state == CPS_FREEING);
 
-	pg = cl_page_top(pg);
 	io = cl_io_top(io);
 	cl_page_disown0(env, io, pg);
 }
@@ -1001,12 +703,8 @@ EXPORT_SYMBOL(cl_page_discard);
  * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
  * path. Doesn't check page invariant.
  */
-static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
-			    int radix)
+static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
 {
-	struct cl_page *tmp = pg;
-
-	PASSERT(env, pg, pg == cl_page_top(pg));
 	PASSERT(env, pg, pg->cp_state != CPS_FREEING);
 
 	/*
@@ -1014,41 +712,11 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 	 */
 	cl_page_owner_clear(pg);
 
-	/*
-	 * unexport the page firstly before freeing it so that
-	 * the page content is considered to be invalid.
-	 * We have to do this because a CPS_FREEING cl_page may
-	 * be NOT under the protection of a cl_lock.
-	 * Afterwards, if this page is found by other threads, then this
-	 * page will be forced to reread.
-	 */
-	cl_page_export(env, pg, 0);
 	cl_page_state_set0(env, pg, CPS_FREEING);
 
-	CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
-		       (const struct lu_env *, const struct cl_page_slice *));
-
-	if (tmp->cp_type == CPT_CACHEABLE) {
-		if (!radix)
-			/* !radix means that @pg is not yet in the radix tree,
-			 * skip removing it.
-			 */
-			tmp = pg->cp_child;
-		for (; tmp; tmp = tmp->cp_child) {
-			void		    *value;
-			struct cl_object_header *hdr;
-
-			hdr = cl_object_header(tmp->cp_obj);
-			spin_lock(&hdr->coh_page_guard);
-			value = radix_tree_delete(&hdr->coh_tree,
-						  tmp->cp_index);
-			PASSERT(env, tmp, value == tmp);
-			PASSERT(env, tmp, hdr->coh_pages > 0);
-			hdr->coh_pages--;
-			spin_unlock(&hdr->coh_page_guard);
-			cl_page_put(env, tmp);
-		}
-	}
+	CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
+			       (const struct lu_env *,
+				const struct cl_page_slice *));
 }
 
 /**
@@ -1070,7 +738,6 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
  * drain after some time, at which point page will be recycled.
  *
- * \pre  pg == cl_page_top(pg)
  * \pre  VM page is locked
  * \post pg->cp_state == CPS_FREEING
  *
@@ -1079,29 +746,10 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
 {
 	PINVRNT(env, pg, cl_page_invariant(pg));
-	cl_page_delete0(env, pg, 1);
+	cl_page_delete0(env, pg);
 }
 EXPORT_SYMBOL(cl_page_delete);
 
-/**
- * Unmaps page from user virtual memory.
- *
- * Calls cl_page_operations::cpo_unmap() through all layers top-to-bottom. The
- * layer responsible for VM interaction has to unmap page from user space
- * virtual memory.
- *
- * \see cl_page_operations::cpo_unmap()
- */
-int cl_page_unmap(const struct lu_env *env,
-		  struct cl_io *io, struct cl_page *pg)
-{
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
-	PINVRNT(env, pg, cl_page_invariant(pg));
-
-	return cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_unmap));
-}
-EXPORT_SYMBOL(cl_page_unmap);
-
 /**
  * Marks page up-to-date.
  *
@@ -1129,7 +777,6 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
 	int result;
 	const struct cl_page_slice *slice;
 
-	pg = cl_page_top_trusted((struct cl_page *)pg);
 	slice = container_of(pg->cp_layers.next,
 			     const struct cl_page_slice, cpl_linkage);
 	PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
@@ -1241,7 +888,7 @@ void cl_page_completion(const struct lu_env *env,
 	cl_page_put(env, pg);
 
 	if (anchor)
-		cl_sync_io_note(anchor, ioret);
+		cl_sync_io_note(env, anchor, ioret);
 }
 EXPORT_SYMBOL(cl_page_completion);
 
@@ -1275,44 +922,6 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
 }
 EXPORT_SYMBOL(cl_page_make_ready);
 
-/**
- * Notify layers that high level io decided to place this page into a cache
- * for future transfer.
- *
- * The layer implementing transfer engine (osc) has to register this page in
- * its queues.
- *
- * \pre  cl_page_is_owned(pg, io)
- * \post cl_page_is_owned(pg, io)
- *
- * \see cl_page_operations::cpo_cache_add()
- */
-int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
-		      struct cl_page *pg, enum cl_req_type crt)
-{
-	const struct cl_page_slice *scan;
-	int result = 0;
-
-	PINVRNT(env, pg, crt < CRT_NR);
-	PINVRNT(env, pg, cl_page_is_owned(pg, io));
-	PINVRNT(env, pg, cl_page_invariant(pg));
-
-	if (crt >= CRT_NR)
-		return -EINVAL;
-
-	list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
-		if (!scan->cpl_ops->io[crt].cpo_cache_add)
-			continue;
-
-		result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
-		if (result != 0)
-			break;
-	}
-	CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
-	return result;
-}
-EXPORT_SYMBOL(cl_page_cache_add);
-
 /**
  * Called if a pge is being written back by kernel's intention.
  *
@@ -1344,68 +953,21 @@ EXPORT_SYMBOL(cl_page_flush);
  * \see cl_page_operations::cpo_is_under_lock()
  */
 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page)
+			  struct cl_page *page, pgoff_t *max_index)
 {
 	int rc;
 
 	PINVRNT(env, page, cl_page_invariant(page));
 
-	rc = CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_is_under_lock),
-			    (const struct lu_env *,
-			     const struct cl_page_slice *, struct cl_io *),
-			    io);
-	PASSERT(env, page, rc != 0);
+	rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
+				    (const struct lu_env *,
+				     const struct cl_page_slice *,
+				      struct cl_io *, pgoff_t *),
+				    io, max_index);
 	return rc;
 }
 EXPORT_SYMBOL(cl_page_is_under_lock);
 
-static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
-			 struct cl_page *page, void *cbdata)
-{
-	cl_page_own(env, io, page);
-	cl_page_unmap(env, io, page);
-	cl_page_discard(env, io, page);
-	cl_page_disown(env, io, page);
-	return CLP_GANG_OKAY;
-}
-
-/**
- * Purges all cached pages belonging to the object \a obj.
- */
-int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
-{
-	struct cl_thread_info   *info;
-	struct cl_object	*obj = cl_object_top(clobj);
-	struct cl_io	    *io;
-	int		      result;
-
-	info  = cl_env_info(env);
-	io    = &info->clt_io;
-
-	/*
-	 * initialize the io. This is ugly since we never do IO in this
-	 * function, we just make cl_page_list functions happy. -jay
-	 */
-	io->ci_obj = obj;
-	io->ci_ignore_layout = 1;
-	result = cl_io_init(env, io, CIT_MISC, obj);
-	if (result != 0) {
-		cl_io_fini(env, io);
-		return io->ci_result;
-	}
-
-	do {
-		result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
-					     page_prune_cb, NULL);
-		if (result == CLP_GANG_RESCHED)
-			cond_resched();
-	} while (result != CLP_GANG_OKAY);
-
-	cl_io_fini(env, io);
-	return result;
-}
-EXPORT_SYMBOL(cl_pages_prune);
-
 /**
  * Tells transfer engine that only part of a page is to be transmitted.
  *
@@ -1431,9 +993,8 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
 			  lu_printer_t printer, const struct cl_page *pg)
 {
 	(*printer)(env, cookie,
-		   "page@%p[%d %p:%lu ^%p_%p %d %d %d %p %p %#x]\n",
+		   "page@%p[%d %p %d %d %d %p %p %#x]\n",
 		   pg, atomic_read(&pg->cp_ref), pg->cp_obj,
-		   pg->cp_index, pg->cp_parent, pg->cp_child,
 		   pg->cp_state, pg->cp_error, pg->cp_type,
 		   pg->cp_owner, pg->cp_req, pg->cp_flags);
 }
@@ -1445,11 +1006,7 @@ EXPORT_SYMBOL(cl_page_header_print);
 void cl_page_print(const struct lu_env *env, void *cookie,
 		   lu_printer_t printer, const struct cl_page *pg)
 {
-	struct cl_page *scan;
-
-	for (scan = cl_page_top((struct cl_page *)pg); scan;
-	     scan = scan->cp_child)
-		cl_page_header_print(env, cookie, printer, scan);
+	cl_page_header_print(env, cookie, printer, pg);
 	CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
 		       (const struct lu_env *env,
 			const struct cl_page_slice *slice,
@@ -1509,21 +1066,13 @@ EXPORT_SYMBOL(cl_page_size);
  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
  */
 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
-		       struct cl_object *obj,
+		       struct cl_object *obj, pgoff_t index,
 		       const struct cl_page_operations *ops)
 {
 	list_add_tail(&slice->cpl_linkage, &page->cp_layers);
 	slice->cpl_obj  = obj;
+	slice->cpl_index = index;
 	slice->cpl_ops  = ops;
 	slice->cpl_page = page;
 }
 EXPORT_SYMBOL(cl_page_slice_add);
-
-int  cl_page_init(void)
-{
-	return 0;
-}
-
-void cl_page_fini(void)
-{
-}
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index c2cf01596..f48816af8 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -60,6 +60,8 @@ unsigned int obd_dump_on_eviction;
 EXPORT_SYMBOL(obd_dump_on_eviction);
 unsigned int obd_max_dirty_pages = 256;
 EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_t obd_unstable_pages;
+EXPORT_SYMBOL(obd_unstable_pages);
 atomic_t obd_dirty_pages;
 EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
@@ -335,7 +337,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 		err = 0;
 		goto out;
 	}
-
 	}
 
 	if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
@@ -461,7 +462,7 @@ static int obd_init_checks(void)
 		CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
 		ret = -EINVAL;
 	}
-	if ((u64val & ~CFS_PAGE_MASK) >= PAGE_SIZE) {
+	if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
 		CWARN("mask failed: u64val %llu >= %llu\n", u64val,
 		      (__u64)PAGE_SIZE);
 		ret = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 43a7f7a79..e4edfb2c0 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -68,8 +68,8 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
 
 	LASSERT(addr);
 
-	ne_off = le64_to_cpu (off);
-	id = le64_to_cpu (id);
+	ne_off = le64_to_cpu(off);
+	id = le64_to_cpu(id);
 	if (memcmp(addr, (char *)&ne_off, LPDS)) {
 		CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n",
 		       who, id, off, *(__u64 *)addr, ne_off);
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index cf97b8f06..d95f11d62 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -604,7 +604,6 @@ int obd_init_caches(void)
  out:
 	obd_cleanup_caches();
 	return -ENOMEM;
-
 }
 
 /* map connection to client */
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 8eddf206f..2cd452246 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -158,9 +158,7 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
 {
 	int err;
 
-	err = copy_to_user(arg, data, len);
-	if (err)
-		err = -EFAULT;
+	err = copy_to_user(arg, data, len) ? -EFAULT : 0;
 	return err;
 }
 EXPORT_SYMBOL(obd_ioctl_popdata);
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 992573eae..79194d8cb 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -265,7 +265,6 @@ repeat:
 		for (rec = (struct llog_rec_hdr *)buf;
 		     (char *)rec < buf + LLOG_CHUNK_SIZE;
 		     rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
-
 			CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
 			       rec, rec->lrh_type);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index d93f42fee..5a1eae1de 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -49,7 +49,7 @@
 static const char * const obd_connect_names[] = {
 	"read_only",
 	"lov_index",
-	"unused",
+	"connect_from_mds",
 	"write_grant",
 	"server_lock",
 	"version",
@@ -122,6 +122,56 @@ int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep)
 }
 EXPORT_SYMBOL(obd_connect_flags2str);
 
+static void obd_connect_data_seqprint(struct seq_file *m,
+				      struct obd_connect_data *ocd)
+{
+	int flags;
+
+	LASSERT(ocd);
+	flags = ocd->ocd_connect_flags;
+
+	seq_printf(m, "    connect_data:\n"
+		   "       flags: %llx\n"
+		   "       instance: %u\n",
+		   ocd->ocd_connect_flags,
+		   ocd->ocd_instance);
+	if (flags & OBD_CONNECT_VERSION)
+		seq_printf(m, "       target_version: %u.%u.%u.%u\n",
+			   OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+			   OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+			   OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+			   OBD_OCD_VERSION_FIX(ocd->ocd_version));
+	if (flags & OBD_CONNECT_MDS)
+		seq_printf(m, "       mdt_index: %d\n", ocd->ocd_group);
+	if (flags & OBD_CONNECT_GRANT)
+		seq_printf(m, "       initial_grant: %d\n", ocd->ocd_grant);
+	if (flags & OBD_CONNECT_INDEX)
+		seq_printf(m, "       target_index: %u\n", ocd->ocd_index);
+	if (flags & OBD_CONNECT_BRW_SIZE)
+		seq_printf(m, "       max_brw_size: %d\n", ocd->ocd_brw_size);
+	if (flags & OBD_CONNECT_IBITS)
+		seq_printf(m, "       ibits_known: %llx\n",
+			   ocd->ocd_ibits_known);
+	if (flags & OBD_CONNECT_GRANT_PARAM)
+		seq_printf(m, "       grant_block_size: %d\n"
+			   "       grant_inode_size: %d\n"
+			   "       grant_extent_overhead: %d\n",
+			   ocd->ocd_blocksize,
+			   ocd->ocd_inodespace,
+			   ocd->ocd_grant_extent);
+	if (flags & OBD_CONNECT_TRANSNO)
+		seq_printf(m, "       first_transno: %llx\n",
+			   ocd->ocd_transno);
+	if (flags & OBD_CONNECT_CKSUM)
+		seq_printf(m, "       cksum_types: %#x\n",
+			   ocd->ocd_cksum_types);
+	if (flags & OBD_CONNECT_MAX_EASIZE)
+		seq_printf(m, "       max_easize: %d\n", ocd->ocd_max_easize);
+	if (flags & OBD_CONNECT_MAXBYTES)
+		seq_printf(m, "       max_object_bytes: %llx\n",
+			   ocd->ocd_maxbytes);
+}
+
 int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
 			     int mult)
 {
@@ -624,6 +674,7 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 	struct obd_device		*obd	= data;
 	struct obd_import		*imp;
 	struct obd_import_conn		*conn;
+	struct obd_connect_data *ocd;
 	int				j;
 	int				k;
 	int				rw	= 0;
@@ -635,9 +686,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 		return rc;
 
 	imp = obd->u.cli.cl_import;
+	ocd = &imp->imp_connect_data;
 
-	seq_printf(m,
-		   "import:\n"
+	seq_printf(m, "import:\n"
 		   "    name: %s\n"
 		   "    target: %s\n"
 		   "    state: %s\n"
@@ -649,9 +700,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 		   imp->imp_connect_data.ocd_instance);
 	obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags,
 				  ", ");
-	seq_printf(m,
-		   " ]\n"
-		   "    import_flags: [ ");
+	seq_printf(m, " ]\n");
+	obd_connect_data_seqprint(m, ocd);
+	seq_printf(m, "    import_flags: [ ");
 	obd_import_flags2str(imp, m);
 
 	seq_printf(m,
@@ -694,8 +745,9 @@ int lprocfs_rd_import(struct seq_file *m, void *data)
 
 		do_div(sum, ret.lc_count);
 		ret.lc_sum = sum;
-	} else
+	} else {
 		ret.lc_sum = 0;
+	}
 	seq_printf(m,
 		   "    rpcs:\n"
 		   "       inflight: %u\n"
@@ -1471,10 +1523,10 @@ EXPORT_SYMBOL(lprocfs_oh_tally);
 
 void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
 {
-	unsigned int val;
+	unsigned int val = 0;
 
-	for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
-		;
+	if (likely(value != 0))
+		val = min(fls(value - 1), OBD_HIST_MAX);
 
 	lprocfs_oh_tally(oh, val);
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 978568ada..e04385760 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -55,6 +55,7 @@
 #include "../include/lustre_disk.h"
 #include "../include/lustre_fid.h"
 #include "../include/lu_object.h"
+#include "../include/cl_object.h"
 #include "../include/lu_ref.h"
 #include <linux/list.h>
 
@@ -103,7 +104,6 @@ void lu_object_put(const struct lu_env *env, struct lu_object *o)
 
 	if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
 		if (lu_object_is_dying(top)) {
-
 			/*
 			 * somebody may be waiting for this, currently only
 			 * used for cl_object, see cl_object_put_last().
@@ -357,7 +357,6 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
 
 			if (count > 0 && --count == 0)
 				break;
-
 		}
 		cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
 		cond_resched();
@@ -715,8 +714,9 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
 		obj = lu_object_locate(top->lo_header, dev->ld_type);
 		if (!obj)
 			lu_object_put(env, top);
-	} else
+	} else {
 		obj = top;
+	}
 	return obj;
 }
 EXPORT_SYMBOL(lu_object_find_slice);
@@ -935,7 +935,7 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
  * Initialize site \a s, with \a d as the top level device.
  */
 #define LU_SITE_BITS_MIN    12
-#define LU_SITE_BITS_MAX    24
+#define LU_SITE_BITS_MAX    19
 /**
  * total 256 buckets, we don't want too many buckets because:
  * - consume too much memory
@@ -1468,6 +1468,7 @@ void lu_context_key_quiesce(struct lu_context_key *key)
 		/*
 		 * XXX layering violation.
 		 */
+		cl_env_cache_purge(~0);
 		key->lct_tags |= LCT_QUIESCENT;
 		/*
 		 * XXX memory barrier has to go here.
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5f812460b..b1abe023b 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -163,8 +163,9 @@ int class_del_uuid(const char *uuid)
 				break;
 			}
 		}
-	} else
+	} else {
 		list_splice_init(&g_uuid_list, &deathrow);
+	}
 	spin_unlock(&g_uuid_lock);
 
 	if (uuid && list_empty(&deathrow)) {
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 5395e994d..cb1d65c3d 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -606,7 +606,7 @@ static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
 	return rc;
 }
 
-LIST_HEAD(lustre_profile_list);
+static LIST_HEAD(lustre_profile_list);
 
 struct lustre_profile *class_get_profile(const char *prof)
 {
@@ -961,7 +961,6 @@ int class_process_config(struct lustre_cfg *lcfg)
 	default: {
 		err = obd_process_config(obd, sizeof(*lcfg), lcfg);
 		goto out;
-
 	}
 	}
 out:
@@ -1001,7 +1000,13 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 	for (i = 1; i < lcfg->lcfg_bufcount; i++) {
 		key = lustre_cfg_buf(lcfg, i);
 		/* Strip off prefix */
-		class_match_param(key, prefix, &key);
+		if (class_match_param(key, prefix, &key)) {
+			/*
+			 * If the prefix doesn't match, return error so we
+			 * can pass it down the stack
+			 */
+			return -ENOSYS;
+		}
 		sval = strchr(key, '=');
 		if (!sval || (*(sval + 1) == 0)) {
 			CERROR("Can't parse param %s (missing '=')\n", key);
@@ -1034,18 +1039,14 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 			j++;
 		}
 		if (!matched) {
-			/* If the prefix doesn't match, return error so we
-			 * can pass it down the stack
-			 */
-			if (strnchr(key, keylen, '.'))
-				return -ENOSYS;
-			CERROR("%s: unknown param %s\n",
+			CERROR("%.*s: %s unknown param %s\n",
+			       (int)strlen(prefix) - 1, prefix,
 			       (char *)lustre_cfg_string(lcfg, 0), key);
 			/* rc = -EINVAL;	continue parsing other params */
 			skip++;
 		} else if (rc < 0) {
-			CERROR("writing proc entry %s err %d\n",
-			       var->name, rc);
+			CERROR("%s: error writing proc entry '%s': rc = %d\n",
+			       prefix, var->name, rc);
 			rc = 0;
 		} else {
 			CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n",
@@ -1350,6 +1351,7 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
 					lustre_cfg_string(lcfg, i));
 		}
 	}
+	ptr += snprintf(ptr, end - ptr, "\n");
 	/* return consumed bytes */
 	rc = ptr - buf;
 	return rc;
@@ -1368,7 +1370,7 @@ int class_config_dump_handler(const struct lu_env *env,
 
 	if (rec->lrh_type == OBD_CFG_REC) {
 		class_config_parse_rec(rec, outstr, 256);
-		LCONSOLE(D_WARNING, "   %s\n", outstr);
+		LCONSOLE(D_WARNING, "   %s", outstr);
 	} else {
 		LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
 		rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index d3e28a389..e0c90adc7 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -102,7 +102,7 @@ int lustre_process_log(struct super_block *sb, char *logname,
 		LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d).  Make sure this client and the MGS are running compatible versions of Lustre.\n",
 				   mgc->obd_name, logname, rc);
 
-	if (rc)
+	else if (rc)
 		LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
 				   mgc->obd_name, logname,
 				   rc);
@@ -307,7 +307,8 @@ int lustre_start_mgc(struct super_block *sb)
 	while (class_parse_nid(ptr, &nid, &ptr) == 0) {
 		rc = do_lcfg(mgcname, nid,
 			     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
-		i++;
+		if (!rc)
+			i++;
 		/* Stop at the first failover nid */
 		if (*ptr == ':')
 			break;
@@ -345,16 +346,18 @@ int lustre_start_mgc(struct super_block *sb)
 		sprintf(niduuid, "%s_%x", mgcname, i);
 		j = 0;
 		while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
-			j++;
-			rc = do_lcfg(mgcname, nid,
-				     LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
+			rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid,
+				     NULL, NULL, NULL);
+			if (!rc)
+				++j;
 			if (*ptr == ':')
 				break;
 		}
 		if (j > 0) {
 			rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
 				     niduuid, NULL, NULL, NULL);
-			i++;
+			if (!rc)
+				i++;
 		} else {
 			/* at ":/fsname" */
 			break;
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index e6436cb4a..748e33f01 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -185,8 +185,7 @@ void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
 		op_data->op_attr.ia_valid |= ATTR_BLOCKS;
 	}
 	if (valid & OBD_MD_FLFLAGS) {
-		((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
-			oa->o_flags;
+		op_data->op_attr_flags = oa->o_flags;
 		op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
 	}
 }
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 1e83669c2..91ef06f17 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -81,7 +81,6 @@ struct echo_object_conf {
 struct echo_page {
 	struct cl_page_slice   ep_cl;
 	struct mutex		ep_lock;
-	struct page	    *ep_vmpage;
 };
 
 struct echo_lock {
@@ -164,15 +163,13 @@ static int cl_echo_object_put(struct echo_object *eco);
 static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 			      struct page **pages, int npages, int async);
 
-static struct echo_thread_info *echo_env_info(const struct lu_env *env);
-
 struct echo_thread_info {
 	struct echo_object_conf eti_conf;
 	struct lustre_md	eti_md;
 
 	struct cl_2queue	eti_queue;
 	struct cl_io	    eti_io;
-	struct cl_lock_descr    eti_descr;
+	struct cl_lock          eti_lock;
 	struct lu_fid	   eti_fid;
 	struct lu_fid		eti_fid2;
 };
@@ -219,12 +216,6 @@ static struct lu_kmem_descr echo_caches[] = {
  *
  * @{
  */
-static struct page *echo_page_vmpage(const struct lu_env *env,
-				     const struct cl_page_slice *slice)
-{
-	return cl2echo_page(slice)->ep_vmpage;
-}
-
 static int echo_page_own(const struct lu_env *env,
 			 const struct cl_page_slice *slice,
 			 struct cl_io *io, int nonblock)
@@ -273,12 +264,10 @@ static void echo_page_completion(const struct lu_env *env,
 static void echo_page_fini(const struct lu_env *env,
 			   struct cl_page_slice *slice)
 {
-	struct echo_page *ep    = cl2echo_page(slice);
 	struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
-	struct page *vmpage      = ep->ep_vmpage;
 
 	atomic_dec(&eco->eo_npages);
-	put_page(vmpage);
+	put_page(slice->cpl_page->cp_vmpage);
 }
 
 static int echo_page_prep(const struct lu_env *env,
@@ -295,7 +284,8 @@ static int echo_page_print(const struct lu_env *env,
 	struct echo_page *ep = cl2echo_page(slice);
 
 	(*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
-		   ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
+		   ep, mutex_is_locked(&ep->ep_lock),
+		   slice->cpl_page->cp_vmpage);
 	return 0;
 }
 
@@ -303,7 +293,6 @@ static const struct cl_page_operations echo_page_ops = {
 	.cpo_own	   = echo_page_own,
 	.cpo_disown	= echo_page_disown,
 	.cpo_discard       = echo_page_discard,
-	.cpo_vmpage	= echo_page_vmpage,
 	.cpo_fini	  = echo_page_fini,
 	.cpo_print	 = echo_page_print,
 	.cpo_is_vmlocked   = echo_page_is_vmlocked,
@@ -336,26 +325,8 @@ static void echo_lock_fini(const struct lu_env *env,
 	kmem_cache_free(echo_lock_kmem, ecl);
 }
 
-static void echo_lock_delete(const struct lu_env *env,
-			     const struct cl_lock_slice *slice)
-{
-	struct echo_lock *ecl      = cl2echo_lock(slice);
-
-	LASSERT(list_empty(&ecl->el_chain));
-}
-
-static int echo_lock_fits_into(const struct lu_env *env,
-			       const struct cl_lock_slice *slice,
-			       const struct cl_lock_descr *need,
-			       const struct cl_io *unused)
-{
-	return 1;
-}
-
 static struct cl_lock_operations echo_lock_ops = {
 	.clo_fini      = echo_lock_fini,
-	.clo_delete    = echo_lock_delete,
-	.clo_fits_into = echo_lock_fits_into
 };
 
 /** @} echo_lock */
@@ -367,15 +338,14 @@ static struct cl_lock_operations echo_lock_ops = {
  * @{
  */
 static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
-			  struct cl_page *page, struct page *vmpage)
+			  struct cl_page *page, pgoff_t index)
 {
 	struct echo_page *ep = cl_object_page_slice(obj, page);
 	struct echo_object *eco = cl2echo_obj(obj);
 
-	ep->ep_vmpage = vmpage;
-	get_page(vmpage);
+	get_page(page->cp_vmpage);
 	mutex_init(&ep->ep_lock);
-	cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
+	cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
 	atomic_inc(&eco->eo_npages);
 	return 0;
 }
@@ -568,6 +538,8 @@ static struct lu_object *echo_object_alloc(const struct lu_env *env,
 
 		obj = &echo_obj2cl(eco)->co_lu;
 		cl_object_header_init(hdr);
+		hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
+
 		lu_object_init(obj, &hdr->coh_lu, dev);
 		lu_object_add_top(&hdr->coh_lu, obj);
 
@@ -694,8 +666,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 	struct obd_device  *obd = NULL; /* to keep compiler happy */
 	struct obd_device  *tgt;
 	const char *tgt_type_name;
-	int rc;
-	int cleanup = 0;
+	int rc, err;
 
 	ed = kzalloc(sizeof(*ed), GFP_NOFS);
 	if (!ed) {
@@ -703,16 +674,14 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 		goto out;
 	}
 
-	cleanup = 1;
 	cd = &ed->ed_cl;
 	rc = cl_device_init(cd, t);
 	if (rc)
-		goto out;
+		goto out_free;
 
 	cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
 	cd->cd_ops = &echo_device_cl_ops;
 
-	cleanup = 2;
 	obd = class_name2obd(lustre_cfg_string(cfg, 0));
 	LASSERT(obd);
 	LASSERT(env);
@@ -722,28 +691,25 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 		CERROR("Can not find tgt device %s\n",
 		       lustre_cfg_string(cfg, 1));
 		rc = -ENODEV;
-		goto out;
+		goto out_device_fini;
 	}
 
 	next = tgt->obd_lu_dev;
 	if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
 		CERROR("echo MDT client must be run on server\n");
 		rc = -EOPNOTSUPP;
-		goto out;
+		goto out_device_fini;
 	}
 
 	rc = echo_site_init(env, ed);
 	if (rc)
-		goto out;
-
-	cleanup = 3;
+		goto out_device_fini;
 
 	rc = echo_client_setup(env, obd, cfg);
 	if (rc)
-		goto out;
+		goto out_site_fini;
 
 	ed->ed_ec = &obd->u.echo_client;
-	cleanup = 4;
 
 	/* if echo client is to be stacked upon ost device, the next is
 	 * NULL since ost is not a clio device so far
@@ -755,7 +721,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 	if (next) {
 		if (next->ld_site) {
 			rc = -EBUSY;
-			goto out;
+			goto out_cleanup;
 		}
 
 		next->ld_site = &ed->ed_site->cs_lu;
@@ -763,7 +729,7 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 						next->ld_type->ldt_name,
 							      NULL);
 		if (rc)
-			goto out;
+			goto out_cleanup;
 
 	} else {
 		LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
@@ -771,27 +737,19 @@ static struct lu_device *echo_device_alloc(const struct lu_env *env,
 
 	ed->ed_next = next;
 	return &cd->cd_lu_dev;
-out:
-	switch (cleanup) {
-	case 4: {
-		int rc2;
-
-		rc2 = echo_client_cleanup(obd);
-		if (rc2)
-			CERROR("Cleanup obd device %s error(%d)\n",
-			       obd->obd_name, rc2);
-	}
 
-	case 3:
-		echo_site_fini(env, ed);
-	case 2:
-		cl_device_fini(&ed->ed_cl);
-	case 1:
-		kfree(ed);
-	case 0:
-	default:
-		break;
-	}
+out_cleanup:
+	err = echo_client_cleanup(obd);
+	if (err)
+		CERROR("Cleanup obd device %s error(%d)\n",
+		       obd->obd_name, err);
+out_site_fini:
+	echo_site_fini(env, ed);
+out_device_fini:
+	cl_device_fini(&ed->ed_cl);
+out_free:
+	kfree(ed);
+out:
 	return ERR_PTR(rc);
 }
 
@@ -819,16 +777,7 @@ static void echo_lock_release(const struct lu_env *env,
 {
 	struct cl_lock *clk = echo_lock2cl(ecl);
 
-	cl_lock_get(clk);
-	cl_unuse(env, clk);
-	cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
-	if (!still_used) {
-		cl_lock_mutex_get(env, clk);
-		cl_lock_cancel(env, clk);
-		cl_lock_delete(env, clk);
-		cl_lock_mutex_put(env, clk);
-	}
-	cl_lock_put(env, clk);
+	cl_lock_release(env, clk);
 }
 
 static struct lu_device *echo_device_free(const struct lu_env *env,
@@ -1022,9 +971,11 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 
 	info = echo_env_info(env);
 	io = &info->eti_io;
-	descr = &info->eti_descr;
+	lck = &info->eti_lock;
 	obj = echo_obj2cl(eco);
 
+	memset(lck, 0, sizeof(*lck));
+	descr = &lck->cll_descr;
 	descr->cld_obj   = obj;
 	descr->cld_start = cl_index(obj, start);
 	descr->cld_end   = cl_index(obj, end);
@@ -1032,25 +983,20 @@ static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
 	descr->cld_enq_flags = enqflags;
 	io->ci_obj = obj;
 
-	lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
-	if (lck) {
+	rc = cl_lock_request(env, io, lck);
+	if (rc == 0) {
 		struct echo_client_obd *ec = eco->eo_dev->ed_ec;
 		struct echo_lock *el;
 
-		rc = cl_wait(env, lck);
-		if (rc == 0) {
-			el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
-			spin_lock(&ec->ec_lock);
-			if (list_empty(&el->el_chain)) {
-				list_add(&el->el_chain, &ec->ec_locks);
-				el->el_cookie = ++ec->ec_unique;
-			}
-			atomic_inc(&el->el_refcount);
-			*cookie = el->el_cookie;
-			spin_unlock(&ec->ec_lock);
-		} else {
-			cl_lock_release(env, lck, "ec enqueue", current);
+		el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
+		spin_lock(&ec->ec_lock);
+		if (list_empty(&el->el_chain)) {
+			list_add(&el->el_chain, &ec->ec_locks);
+			el->el_cookie = ++ec->ec_unique;
 		}
+		atomic_inc(&el->el_refcount);
+		*cookie = el->el_cookie;
+		spin_unlock(&ec->ec_lock);
 	}
 	return rc;
 }
@@ -1085,22 +1031,17 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
 	return 0;
 }
 
-static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
-			     enum cl_req_type unused, struct cl_2queue *queue)
+static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
+				 struct cl_page *page)
 {
-	struct cl_page *clp;
-	struct cl_page *temp;
-	int result = 0;
+	struct echo_thread_info *info;
+	struct cl_2queue *queue;
 
-	cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
-		int rc;
+	info = echo_env_info(env);
+	LASSERT(io == &info->eti_io);
 
-		rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
-		if (rc == 0)
-			continue;
-		result = result ?: rc;
-	}
-	return result;
+	queue = &info->eti_queue;
+	cl_page_list_add(&queue->c2_qout, page);
 }
 
 static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
@@ -1119,7 +1060,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 	int rc;
 	int i;
 
-	LASSERT((offset & ~CFS_PAGE_MASK) == 0);
+	LASSERT((offset & ~PAGE_MASK) == 0);
 	LASSERT(ed->ed_next);
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
@@ -1179,7 +1120,9 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 
 		async = async && (typ == CRT_WRITE);
 		if (async)
-			rc = cl_echo_async_brw(env, io, typ, queue);
+			rc = cl_io_commit_async(env, io, &queue->c2_qin,
+						0, PAGE_SIZE,
+						echo_commit_callback);
 		else
 			rc = cl_io_submit_sync(env, io, typ, queue, 0);
 		CDEBUG(D_INFO, "echo_client %s write returns %d\n",
@@ -1387,7 +1330,7 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 	LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
 
 	if (count <= 0 ||
-	    (count & (~CFS_PAGE_MASK)) != 0)
+	    (count & (~PAGE_MASK)) != 0)
 		return -EINVAL;
 
 	/* XXX think again with misaligned I/O */
@@ -1409,7 +1352,6 @@ static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
 	for (i = 0, pgp = pga, off = offset;
 	     i < npages;
 	     i++, pgp++, off += PAGE_SIZE) {
-
 		LASSERT(!pgp->pg);      /* for cleanup */
 
 		rc = -ENOMEM;
@@ -1470,7 +1412,7 @@ static int echo_client_prep_commit(const struct lu_env *env,
 	u64 npages, tot_pages;
 	int i, ret = 0, brw_flags = 0;
 
-	if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0)
+	if (count <= 0 || (count & (~PAGE_MASK)) != 0)
 		return -EINVAL;
 
 	npages = batch >> PAGE_SHIFT;
@@ -1886,7 +1828,6 @@ static int __init obdecho_init(void)
 static void /*__exit*/ obdecho_exit(void)
 {
 	echo_client_exit();
-
 }
 
 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index a3358c39b..33a113213 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -121,9 +121,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 		atomic_add(added, &osc_pool_req_count);
 	}
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_rpcs_in_flight = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
@@ -139,9 +139,9 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj,
 	long val;
 	int mult;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	val = cli->cl_dirty_max;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	mult = 1 << 20;
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
@@ -169,10 +169,10 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
 	    pages_number > totalram_pages / 4) /* 1/4 of RAM */
 		return -ERANGE;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
@@ -222,8 +222,16 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
 		return -ERANGE;
 
 	rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
-	if (rc > 0)
-		(void)osc_lru_shrink(cli, rc);
+	if (rc > 0) {
+		struct lu_env *env;
+		int refcheck;
+
+		env = cl_env_get(&refcheck);
+		if (!IS_ERR(env)) {
+			(void)osc_lru_shrink(env, cli, rc, true);
+			cl_env_put(env, &refcheck);
+		}
+	}
 
 	return count;
 }
@@ -239,9 +247,9 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_dirty);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -256,9 +264,9 @@ static ssize_t cur_grant_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_avail_grant);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -279,12 +287,12 @@ static ssize_t cur_grant_bytes_store(struct kobject *kobj,
 		return rc;
 
 	/* this is only for shrinking grant */
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (val >= cli->cl_avail_grant) {
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		return -EINVAL;
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	if (cli->cl_import->imp_state == LUSTRE_IMP_FULL)
 		rc = osc_shrink_grant_to_target(cli, val);
@@ -303,9 +311,9 @@ static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj,
 	struct client_obd *cli = &dev->u.cli;
 	int len;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	len = sprintf(buf, "%lu\n", cli->cl_lost_grant);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
 }
@@ -577,14 +585,31 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
 	if (val == 0 || val > ocd->ocd_brw_size >> PAGE_SHIFT) {
 		return -ERANGE;
 	}
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_pages_per_rpc = val;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
 }
 LUSTRE_RW_ATTR(max_pages_per_rpc);
 
+static ssize_t unstable_stats_show(struct kobject *kobj,
+				   struct attribute *attr,
+				   char *buf)
+{
+	struct obd_device *dev = container_of(kobj, struct obd_device,
+					      obd_kobj);
+	struct client_obd *cli = &dev->u.cli;
+	int pages, mb;
+
+	pages = atomic_read(&cli->cl_unstable_count);
+	mb = (pages * PAGE_SIZE) >> 20;
+
+	return sprintf(buf, "unstable_pages: %8d\n"
+		       "unstable_mb:    %8d\n", pages, mb);
+}
+LUSTRE_RO_ATTR(unstable_stats);
+
 LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
 LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
 LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid);
@@ -623,7 +648,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
 
 	ktime_get_real_ts64(&now);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 
 	seq_printf(seq, "snapshot_time:	 %llu.%9lu (secs.usecs)\n",
 		   (s64)now.tv_sec, (unsigned long)now.tv_nsec);
@@ -707,7 +732,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
 			break;
 	}
 
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return 0;
 }
@@ -794,6 +819,7 @@ static struct attribute *osc_attrs[] = {
 	&lustre_attr_max_pages_per_rpc.attr,
 	&lustre_attr_max_rpcs_in_flight.attr,
 	&lustre_attr_resend_count.attr,
+	&lustre_attr_unstable_stats.attr,
 	NULL,
 };
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 5f25bf83d..5a14bea96 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -76,6 +76,8 @@ static inline char *ext_flags(struct osc_extent *ext, char *flags)
 	*buf++ = ext->oe_rw ? 'r' : 'w';
 	if (ext->oe_intree)
 		*buf++ = 'i';
+	if (ext->oe_sync)
+		*buf++ = 'S';
 	if (ext->oe_srvlock)
 		*buf++ = 's';
 	if (ext->oe_hp)
@@ -121,9 +123,13 @@ static const char *oes_strings[] = {
 		__ext->oe_grants, __ext->oe_nr_pages,			      \
 		list_empty_marker(&__ext->oe_pages),			      \
 		waitqueue_active(&__ext->oe_waitq) ? '+' : '-',		      \
-		__ext->oe_osclock, __ext->oe_mppr, __ext->oe_owner,	      \
+		__ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner,	      \
 		/* ----- part 4 ----- */				      \
 		## __VA_ARGS__);					      \
+	if (lvl == D_ERROR && __ext->oe_dlmlock)			      \
+		LDLM_ERROR(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
+	else								      \
+		LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p\n", __ext);	      \
 } while (0)
 
 #undef EASSERTF
@@ -240,20 +246,25 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 		goto out;
 	}
 
-	if (!ext->oe_osclock && ext->oe_grants > 0) {
+	if (ext->oe_sync && ext->oe_grants > 0) {
 		rc = 90;
 		goto out;
 	}
 
-	if (ext->oe_osclock) {
-		struct cl_lock_descr *descr;
+	if (ext->oe_dlmlock) {
+		struct ldlm_extent *extent;
 
-		descr = &ext->oe_osclock->cll_descr;
-		if (!(descr->cld_start <= ext->oe_start &&
-		      descr->cld_end >= ext->oe_max_end)) {
+		extent = &ext->oe_dlmlock->l_policy_data.l_extent;
+		if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) &&
+		      extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) {
 			rc = 100;
 			goto out;
 		}
+
+		if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) {
+			rc = 102;
+			goto out;
+		}
 	}
 
 	if (ext->oe_nr_pages > ext->oe_mppr) {
@@ -276,7 +287,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 
 	page_count = 0;
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
-		pgoff_t index = oap2cl_page(oap)->cp_index;
+		pgoff_t index = osc_index(oap2osc(oap));
 		++page_count;
 		if (index > ext->oe_end || index < ext->oe_start) {
 			rc = 110;
@@ -359,7 +370,7 @@ static struct osc_extent *osc_extent_alloc(struct osc_object *obj)
 	ext->oe_state = OES_INV;
 	INIT_LIST_HEAD(&ext->oe_pages);
 	init_waitqueue_head(&ext->oe_waitq);
-	ext->oe_osclock = NULL;
+	ext->oe_dlmlock = NULL;
 
 	return ext;
 }
@@ -385,9 +396,11 @@ static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext)
 		LASSERT(ext->oe_state == OES_INV);
 		LASSERT(!ext->oe_intree);
 
-		if (ext->oe_osclock) {
-			cl_lock_put(env, ext->oe_osclock);
-			ext->oe_osclock = NULL;
+		if (ext->oe_dlmlock) {
+			lu_ref_add(&ext->oe_dlmlock->l_reference,
+				   "osc_extent", ext);
+			LDLM_LOCK_PUT(ext->oe_dlmlock);
+			ext->oe_dlmlock = NULL;
 		}
 		osc_extent_free(ext);
 	}
@@ -543,7 +556,7 @@ static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur,
 	if (cur->oe_max_end != victim->oe_max_end)
 		return -ERANGE;
 
-	LASSERT(cur->oe_osclock == victim->oe_osclock);
+	LASSERT(cur->oe_dlmlock == victim->oe_dlmlock);
 	ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT;
 	chunk_start = cur->oe_start >> ppc_bits;
 	chunk_end = cur->oe_end >> ppc_bits;
@@ -624,10 +637,10 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
 static struct osc_extent *osc_extent_find(const struct lu_env *env,
 					  struct osc_object *obj, pgoff_t index,
 					  int *grants)
-
 {
 	struct client_obd *cli = osc_cli(obj);
-	struct cl_lock *lock;
+	struct osc_lock   *olck;
+	struct cl_lock_descr *descr;
 	struct osc_extent *cur;
 	struct osc_extent *ext;
 	struct osc_extent *conflict = NULL;
@@ -644,8 +657,12 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	if (!cur)
 		return ERR_PTR(-ENOMEM);
 
-	lock = cl_lock_at_pgoff(env, osc2cl(obj), index, NULL, 1, 0);
-	LASSERT(lock->cll_descr.cld_mode >= CLM_WRITE);
+	olck = osc_env_io(env)->oi_write_osclock;
+	LASSERTF(olck, "page %lu is not covered by lock\n", index);
+	LASSERT(olck->ols_state == OLS_GRANTED);
+
+	descr = &olck->ols_cl.cls_lock->cll_descr;
+	LASSERT(descr->cld_mode >= CLM_WRITE);
 
 	LASSERT(cli->cl_chunkbits >= PAGE_SHIFT);
 	ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
@@ -657,19 +674,23 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	max_pages = cli->cl_max_pages_per_rpc;
 	LASSERT((max_pages & ~chunk_mask) == 0);
 	max_end = index - (index % max_pages) + max_pages - 1;
-	max_end = min_t(pgoff_t, max_end, lock->cll_descr.cld_end);
+	max_end = min_t(pgoff_t, max_end, descr->cld_end);
 
 	/* initialize new extent by parameters so far */
 	cur->oe_max_end = max_end;
 	cur->oe_start = index & chunk_mask;
 	cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1;
-	if (cur->oe_start < lock->cll_descr.cld_start)
-		cur->oe_start = lock->cll_descr.cld_start;
+	if (cur->oe_start < descr->cld_start)
+		cur->oe_start = descr->cld_start;
 	if (cur->oe_end > max_end)
 		cur->oe_end = max_end;
-	cur->oe_osclock = lock;
 	cur->oe_grants = 0;
 	cur->oe_mppr = max_pages;
+	if (olck->ols_dlmlock) {
+		LASSERT(olck->ols_hold);
+		cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock);
+		lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur);
+	}
 
 	/* grants has been allocated by caller */
 	LASSERTF(*grants >= chunksize + cli->cl_extent_tax,
@@ -691,7 +712,7 @@ restart:
 			break;
 
 		/* if covering by different locks, no chance to match */
-		if (lock != ext->oe_osclock) {
+		if (olck->ols_dlmlock != ext->oe_dlmlock) {
 			EASSERTF(!overlapped(ext, cur), ext,
 				 EXTSTR"\n", EXTPARA(cur));
 
@@ -795,7 +816,7 @@ restart:
 	if (found) {
 		LASSERT(!conflict);
 		if (!IS_ERR(found)) {
-			LASSERT(found->oe_osclock == cur->oe_osclock);
+			LASSERT(found->oe_dlmlock == cur->oe_dlmlock);
 			OSC_EXTENT_DUMP(D_CACHE, found,
 					"found caching ext for %lu.\n", index);
 		}
@@ -810,7 +831,7 @@ restart:
 		found = osc_extent_hold(cur);
 		osc_extent_insert(obj, cur);
 		OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n",
-				index, lock->cll_descr.cld_end);
+				index, descr->cld_end);
 	}
 	osc_object_unlock(obj);
 
@@ -856,6 +877,8 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 
 	ext->oe_rc = rc ?: ext->oe_nr_pages;
 	EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext);
+
+	osc_lru_add_batch(cli, &ext->oe_pages);
 	list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
 		list_del_init(&oap->oap_rpc_item);
 		list_del_init(&oap->oap_pending_item);
@@ -877,10 +900,9 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		 * span a whole chunk on the OST side, or our accounting goes
 		 * wrong.  Should match the code in filter_grant_check.
 		 */
-		int offset = oap->oap_page_off & ~CFS_PAGE_MASK;
-		int count = oap->oap_count + (offset & (blocksize - 1));
-		int end = (offset + oap->oap_count) & (blocksize - 1);
-
+		int offset = last_off & ~PAGE_MASK;
+		int count = last_count + (offset & (blocksize - 1));
+		int end = (offset + last_count) & (blocksize - 1);
 		if (end)
 			count += blocksize - end;
 
@@ -943,7 +965,7 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
 			"%s: wait ext to %d timedout, recovery in progress?\n",
 			osc_export(obj)->exp_obd->obd_name, state);
 
-		lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+		lwi = LWI_INTR(NULL, NULL);
 		rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
 				  &lwi);
 	}
@@ -990,19 +1012,19 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
 
 	/* discard all pages with index greater then trunc_index */
 	list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) {
-		struct cl_page *sub = oap2cl_page(oap);
-		struct cl_page *page = cl_page_top(sub);
+		pgoff_t index = osc_index(oap2osc(oap));
+		struct cl_page *page = oap2cl_page(oap);
 
 		LASSERT(list_empty(&oap->oap_rpc_item));
 
 		/* only discard the pages with their index greater than
 		 * trunc_index, and ...
 		 */
-		if (sub->cp_index < trunc_index ||
-		    (sub->cp_index == trunc_index && partial)) {
+		if (index < trunc_index ||
+		    (index == trunc_index && partial)) {
 			/* accounting how many pages remaining in the chunk
 			 * so that we can calculate grants correctly. */
-			if (sub->cp_index >> ppc_bits == trunc_chunk)
+			if (index >> ppc_bits == trunc_chunk)
 				++pages_in_chunk;
 			continue;
 		}
@@ -1013,7 +1035,6 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index,
 		lu_ref_add(&page->cp_reference, "truncate", current);
 
 		if (cl_page_own(env, io, page) == 0) {
-			cl_page_unmap(env, io, page);
 			cl_page_discard(env, io, page);
 			cl_page_disown(env, io, page);
 		} else {
@@ -1126,7 +1147,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
 		last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
 		LASSERT(last->oap_count > 0);
 		LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
+		spin_lock(&last->oap_lock);
 		last->oap_async_flags |= ASYNC_COUNT_STABLE;
+		spin_unlock(&last->oap_lock);
 	}
 
 	/* for the rest of pages, we don't need to call osf_refresh_count()
@@ -1135,7 +1158,9 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
 		if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) {
 			oap->oap_count = PAGE_SIZE - oap->oap_page_off;
+			spin_lock(&last->oap_lock);
 			oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+			spin_unlock(&last->oap_lock);
 		}
 	}
 
@@ -1256,7 +1281,7 @@ static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
 			  int cmd)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = cl_page_top(oap2cl_page(oap));
+	struct cl_page  *page = oap2cl_page(oap);
 	int result;
 
 	LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */
@@ -1271,7 +1296,7 @@ static int osc_refresh_count(const struct lu_env *env,
 			     struct osc_async_page *oap, int cmd)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = oap2cl_page(oap);
+	pgoff_t index = osc_index(oap2osc(oap));
 	struct cl_object *obj;
 	struct cl_attr *attr = &osc_env_info(env)->oti_attr;
 
@@ -1288,10 +1313,10 @@ static int osc_refresh_count(const struct lu_env *env,
 	if (result < 0)
 		return result;
 	kms = attr->cat_kms;
-	if (cl_offset(obj, page->cp_index) >= kms)
+	if (cl_offset(obj, index) >= kms)
 		/* catch race with truncate */
 		return 0;
-	else if (cl_offset(obj, page->cp_index + 1) > kms)
+	else if (cl_offset(obj, index + 1) > kms)
 		/* catch sub-page write at end of file */
 		return kms % PAGE_SIZE;
 	else
@@ -1302,14 +1327,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 			  int cmd, int rc)
 {
 	struct osc_page *opg = oap2osc_page(oap);
-	struct cl_page *page = cl_page_top(oap2cl_page(oap));
+	struct cl_page    *page = oap2cl_page(oap);
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 	enum cl_req_type crt;
 	int srvlock;
 
 	cmd &= ~OBD_BRW_NOQUOTA;
-	LASSERT(equi(page->cp_state == CPS_PAGEIN,  cmd == OBD_BRW_READ));
-	LASSERT(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE));
+	LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ),
+		 "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
+	LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE),
+		 "cp_state:%u, cmd:%d\n", page->cp_state, cmd);
 	LASSERT(opg->ops_transfer_pinned);
 
 	/*
@@ -1358,22 +1385,28 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 	return 0;
 }
 
-#define OSC_DUMP_GRANT(cli, fmt, args...) do {				      \
+#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do {			      \
 	struct client_obd *__tmp = (cli);				      \
-	CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d "	      \
-	       "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt,   \
+	CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d "	      \
+	       "unstable_pages: %d/%d dropped: %ld avail: %ld, "	      \
+	       "reserved: %ld, flight: %d } lru {in list: %d, "		      \
+	       "left: %d, waiters: %d }" fmt,                                 \
 	       __tmp->cl_import->imp_obd->obd_name,			      \
 	       __tmp->cl_dirty, __tmp->cl_dirty_max,			      \
 	       atomic_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
+	       atomic_read(&obd_unstable_pages), obd_max_dirty_pages,	      \
 	       __tmp->cl_lost_grant, __tmp->cl_avail_grant,		      \
-	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args);      \
+	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight,		      \
+	       atomic_read(&__tmp->cl_lru_in_list),			      \
+	       atomic_read(&__tmp->cl_lru_busy),			      \
+	       atomic_read(&__tmp->cl_lru_shrinkers), ##args);		      \
 } while (0)
 
 /* caller must hold loi_list_lock */
 static void osc_consume_write_grant(struct client_obd *cli,
 				    struct brw_page *pga)
 {
-	assert_spin_locked(&cli->cl_loi_list_lock.lock);
+	assert_spin_locked(&cli->cl_loi_list_lock);
 	LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
 	atomic_inc(&obd_dirty_pages);
 	cli->cl_dirty += PAGE_SIZE;
@@ -1389,7 +1422,7 @@ static void osc_consume_write_grant(struct client_obd *cli,
 static void osc_release_write_grant(struct client_obd *cli,
 				    struct brw_page *pga)
 {
-	assert_spin_locked(&cli->cl_loi_list_lock.lock);
+	assert_spin_locked(&cli->cl_loi_list_lock);
 	if (!(pga->flag & OBD_BRW_FROM_GRANT)) {
 		return;
 	}
@@ -1408,7 +1441,7 @@ static void osc_release_write_grant(struct client_obd *cli,
  * To avoid sleeping with object lock held, it's good for us allocate enough
  * grants before entering into critical section.
  *
- * client_obd_list_lock held by caller
+ * spin_lock held by caller
  */
 static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes)
 {
@@ -1442,11 +1475,11 @@ static void __osc_unreserve_grant(struct client_obd *cli,
 static void osc_unreserve_grant(struct client_obd *cli,
 				unsigned int reserved, unsigned int unused)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	__osc_unreserve_grant(cli, reserved, unused);
 	if (unused > 0)
 		osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 /**
@@ -1467,7 +1500,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 {
 	int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	atomic_sub(nr_pages, &obd_dirty_pages);
 	cli->cl_dirty -= nr_pages << PAGE_SHIFT;
 	cli->cl_lost_grant += lost_grant;
@@ -1479,7 +1512,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 		cli->cl_avail_grant += grant;
 	}
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
 	       lost_grant, cli->cl_lost_grant,
 	       cli->cl_avail_grant, cli->cl_dirty);
@@ -1491,9 +1524,9 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
  */
 static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	osc_release_write_grant(cli, &oap->oap_brw_page);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 /**
@@ -1506,14 +1539,15 @@ static int osc_enter_cache_try(struct client_obd *cli,
 {
 	int rc;
 
-	OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
 
 	rc = osc_reserve_grant(cli, bytes);
 	if (rc < 0)
 		return 0;
 
 	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
-	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
+	    atomic_read(&obd_unstable_pages) + 1 +
+	    atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
 			cli->cl_dirty_transit += PAGE_SIZE;
@@ -1532,9 +1566,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
 {
 	int rc;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	rc = list_empty(&ocw->ocw_entry);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	return rc;
 }
 
@@ -1551,12 +1585,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	struct osc_object *osc = oap->oap_obj;
 	struct lov_oinfo *loi = osc->oo_oinfo;
 	struct osc_cache_waiter ocw;
-	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
+						  LWI_ON_SIGNAL_NOOP, NULL);
 	int rc = -EDQUOT;
 
-	OSC_DUMP_GRANT(cli, "need:%d.\n", bytes);
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 
 	/* force the caller to try sync io.  this can jump the list
 	 * of queued writes and create a discontiguous rpc stream
@@ -1587,7 +1622,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
 		list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
 		ocw.ocw_rc = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 
 		osc_io_unplug_async(env, cli, NULL);
 
@@ -1596,10 +1631,17 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 
 		rc = l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 
-		/* l_wait_event is interrupted by signal */
+		/* l_wait_event is interrupted by signal, or timed out */
 		if (rc < 0) {
+			if (rc == -ETIMEDOUT) {
+				OSC_DUMP_GRANT(D_ERROR, cli,
+					       "try to reserve %d.\n", bytes);
+				osc_extent_tree_dump(D_ERROR, osc);
+				rc = -EDQUOT;
+			}
+
 			list_del_init(&ocw.ocw_entry);
 			goto out;
 		}
@@ -1615,8 +1657,8 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 		}
 	}
 out:
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
-	OSC_DUMP_GRANT(cli, "returned %d.\n", rc);
+	spin_unlock(&cli->cl_loi_list_lock);
+	OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
 	return rc;
 }
 
@@ -1633,8 +1675,8 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
 		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
-		    (atomic_read(&obd_dirty_pages) + 1 >
-		     obd_max_dirty_pages)) {
+		    (atomic_read(&obd_unstable_pages) + 1 +
+		     atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
 			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
 			       cli->cl_dirty,
 			       cli->cl_dirty_max, obd_max_dirty_pages);
@@ -1776,9 +1818,9 @@ static int osc_list_maint(struct client_obd *cli, struct osc_object *osc)
 {
 	int is_ready;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	is_ready = __osc_list_maint(cli, osc);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return is_ready;
 }
@@ -1799,13 +1841,101 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
 		ar->ar_force_sync = 1;
 		ar->ar_min_xid = ptlrpc_sample_next_xid();
 		return;
-
 	}
 
 	if (ar->ar_force_sync && (xid >= ar->ar_min_xid))
 		ar->ar_force_sync = 0;
 }
 
+/**
+ * Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable.
+ */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	int page_count = desc->bd_iov_count;
+	int i;
+
+	/* No unstable page tracking */
+	if (!cli->cl_cache)
+		return;
+
+	LASSERT(page_count >= 0);
+
+	for (i = 0; i < page_count; i++)
+		dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
+
+	atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
+	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+
+	atomic_sub(page_count, &cli->cl_unstable_count);
+	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+
+	atomic_sub(page_count, &obd_unstable_pages);
+	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+
+	spin_lock(&req->rq_lock);
+	req->rq_committed = 1;
+	req->rq_unstable  = 0;
+	spin_unlock(&req->rq_lock);
+
+	wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+}
+
+/* "unstable" page accounting. See: osc_dec_unstable_pages. */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	long page_count = desc->bd_iov_count;
+	int i;
+
+	/* No unstable page tracking */
+	if (!cli->cl_cache)
+		return;
+
+	LASSERT(page_count >= 0);
+
+	for (i = 0; i < page_count; i++)
+		inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
+
+	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
+	atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
+	atomic_add(page_count, &cli->cl_unstable_count);
+
+	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
+	atomic_add(page_count, &obd_unstable_pages);
+
+	spin_lock(&req->rq_lock);
+
+	/*
+	 * If the request has already been committed (i.e. brw_commit
+	 * called via rq_commit_cb), we need to undo the unstable page
+	 * increments we just performed because rq_commit_cb wont be
+	 * called again. Otherwise, just set the commit callback so the
+	 * unstable page accounting is properly updated when the request
+	 * is committed
+	 */
+	if (req->rq_committed) {
+		/* Drop lock before calling osc_dec_unstable_pages */
+		spin_unlock(&req->rq_lock);
+		osc_dec_unstable_pages(req);
+		spin_lock(&req->rq_lock);
+	} else {
+		req->rq_unstable = 1;
+		req->rq_commit_cb = osc_dec_unstable_pages;
+	}
+
+	spin_unlock(&req->rq_lock);
+}
+
 /* this must be called holding the loi list lock to give coverage to exit_cache,
  * async_flag maintenance, and oap_request
  */
@@ -1817,6 +1947,9 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 	__u64 xid = 0;
 
 	if (oap->oap_request) {
+		if (!rc)
+			osc_inc_unstable_pages(oap->oap_request);
+
 		xid = ptlrpc_req_xid(oap->oap_request);
 		ptlrpc_req_finished(oap->oap_request);
 		oap->oap_request = NULL;
@@ -1829,10 +1962,10 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 	oap->oap_interrupted = 0;
 
 	if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) {
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		osc_process_ar(&cli->cl_ar, xid, rc);
 		osc_process_ar(&loi->loi_ar, xid, rc);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 	}
 
 	rc = osc_completion(env, oap, oap->oap_cmd, rc);
@@ -2133,9 +2266,8 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
 		}
 
 		cl_object_get(obj);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
-		lu_object_ref_add_at(&obj->co_lu, &link, "check",
-				     current);
+		spin_unlock(&cli->cl_loi_list_lock);
+		lu_object_ref_add_at(&obj->co_lu, &link, "check", current);
 
 		/* attempt some read/write balancing by alternating between
 		 * reads and writes in an object.  The makes_rpc checks here
@@ -2178,11 +2310,10 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli)
 		osc_object_unlock(osc);
 
 		osc_list_maint(cli, osc);
-		lu_object_ref_del_at(&obj->co_lu, &link, "check",
-				     current);
+		lu_object_ref_del_at(&obj->co_lu, &link, "check", current);
 		cl_object_put(env, obj);
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 	}
 }
 
@@ -2199,9 +2330,9 @@ static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
 		 * potential stack overrun problem. LU-2859
 		 */
 		atomic_inc(&cli->cl_lru_shrinkers);
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		osc_check_rpcs(env, cli);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		atomic_dec(&cli->cl_lru_shrinkers);
 	} else {
 		CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
@@ -2238,7 +2369,7 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
 
 	oap->oap_page = page;
 	oap->oap_obj_off = offset;
-	LASSERT(!(offset & ~CFS_PAGE_MASK));
+	LASSERT(!(offset & ~PAGE_MASK));
 
 	if (!client_is_remote(exp) && capable(CFS_CAP_SYS_RESOURCE))
 		oap->oap_brw_flags = OBD_BRW_NOQUOTA;
@@ -2306,16 +2437,23 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			return rc;
 	}
 
+	if (osc_over_unstable_soft_limit(cli))
+		brw_flags |= OBD_BRW_SOFT_SYNC;
+
 	oap->oap_cmd = cmd;
 	oap->oap_page_off = ops->ops_from;
 	oap->oap_count = ops->ops_to - ops->ops_from;
+	/*
+	 * No need to hold a lock here,
+	 * since this page is not in any list yet.
+	 */
 	oap->oap_async_flags = 0;
 	oap->oap_brw_flags = brw_flags;
 
 	OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n",
 		     oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK);
 
-	index = oap2cl_page(oap)->cp_index;
+	index = osc_index(oap2osc(oap));
 
 	/* Add this page into extent by the following steps:
 	 * 1. if there exists an active extent for this IO, mostly this page
@@ -2334,9 +2472,9 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			grants = 0;
 
 		/* it doesn't need any grant to dirty this page */
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		rc = osc_enter_cache_try(cli, oap, grants, 0);
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		if (rc == 0) { /* try failed */
 			grants = 0;
 			need_release = 1;
@@ -2427,21 +2565,21 @@ int osc_teardown_async_page(const struct lu_env *env,
 	LASSERT(oap->oap_magic == OAP_MAGIC);
 
 	CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n",
-	       oap, ops, oap2cl_page(oap)->cp_index);
+	       oap, ops, osc_index(oap2osc(oap)));
 
 	osc_object_lock(obj);
 	if (!list_empty(&oap->oap_rpc_item)) {
 		CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap);
 		rc = -EBUSY;
 	} else if (!list_empty(&oap->oap_pending_item)) {
-		ext = osc_extent_lookup(obj, oap2cl_page(oap)->cp_index);
+		ext = osc_extent_lookup(obj, osc_index(oap2osc(oap)));
 		/* only truncated pages are allowed to be taken out.
 		 * See osc_extent_truncate() and osc_cache_truncate_start()
 		 * for details.
 		 */
 		if (ext && ext->oe_state != OES_TRUNC) {
 			OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n",
-					oap2cl_page(oap)->cp_index);
+					osc_index(oap2osc(oap)));
 			rc = -EBUSY;
 		}
 	}
@@ -2464,7 +2602,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 	struct osc_extent *ext = NULL;
 	struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj);
 	struct cl_page *cp = ops->ops_cl.cpl_page;
-	pgoff_t	index = cp->cp_index;
+	pgoff_t            index = osc_index(ops);
 	struct osc_async_page *oap = &ops->ops_oap;
 	bool unplug = false;
 	int rc = 0;
@@ -2479,8 +2617,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 	switch (ext->oe_state) {
 	case OES_RPC:
 	case OES_LOCK_DONE:
-		CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(cp),
-			      "flush an in-rpc page?\n");
+		CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n");
 		LASSERT(0);
 		break;
 	case OES_LOCKING:
@@ -2506,7 +2643,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 		break;
 	}
 
-	rc = cl_page_prep(env, io, cl_page_top(cp), CRT_WRITE);
+	rc = cl_page_prep(env, io, cp, CRT_WRITE);
 	if (rc)
 		goto out;
 
@@ -2550,7 +2687,7 @@ int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops)
 	struct osc_extent *ext;
 	struct osc_extent *found = NULL;
 	struct list_head *plist;
-	pgoff_t index = oap2cl_page(oap)->cp_index;
+	pgoff_t index = osc_index(ops);
 	int rc = -EBUSY;
 	int cmd;
 
@@ -2613,12 +2750,12 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	pgoff_t end = 0;
 
 	list_for_each_entry(oap, list, oap_pending_item) {
-		struct cl_page *cp = oap2cl_page(oap);
+		pgoff_t index = osc_index(oap2osc(oap));
 
-		if (cp->cp_index > end)
-			end = cp->cp_index;
-		if (cp->cp_index < start)
-			start = cp->cp_index;
+		if (index > end)
+			end = index;
+		if (index < start)
+			start = index;
 		++page_count;
 		mppr <<= (page_count > mppr);
 	}
@@ -2633,6 +2770,7 @@ int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
 	}
 
 	ext->oe_rw = !!(cmd & OBD_BRW_READ);
+	ext->oe_sync = 1;
 	ext->oe_urgent = 1;
 	ext->oe_start = start;
 	ext->oe_end = ext->oe_max_end = end;
@@ -2988,7 +3126,200 @@ int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
 			result = rc;
 	}
 
-	OSC_IO_DEBUG(obj, "cache page out.\n");
+	OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result);
+	return result;
+}
+
+/**
+ * Returns a list of pages by a given [start, end] of \a obj.
+ *
+ * \param resched If not NULL, then we give up before hogging CPU for too
+ * long and set *resched = 1, in that case caller should implement a retry
+ * logic.
+ *
+ * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
+ * crucial in the face of [offset, EOF] locks.
+ *
+ * Return at least one page in @queue unless there is no covered page.
+ */
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+			 struct osc_object *osc, pgoff_t start, pgoff_t end,
+			 osc_page_gang_cbt cb, void *cbdata)
+{
+	struct osc_page *ops;
+	void            **pvec;
+	pgoff_t         idx;
+	unsigned int    nr;
+	unsigned int    i;
+	unsigned int    j;
+	int             res = CLP_GANG_OKAY;
+	bool            tree_lock = true;
+
+	idx = start;
+	pvec = osc_env_info(env)->oti_pvec;
+	spin_lock(&osc->oo_tree_lock);
+	while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec,
+					    idx, OTI_PVEC_SIZE)) > 0) {
+		struct cl_page *page;
+		bool end_of_region = false;
+
+		for (i = 0, j = 0; i < nr; ++i) {
+			ops = pvec[i];
+			pvec[i] = NULL;
+
+			idx = osc_index(ops);
+			if (idx > end) {
+				end_of_region = true;
+				break;
+			}
+
+			page = ops->ops_cl.cpl_page;
+			LASSERT(page->cp_type == CPT_CACHEABLE);
+			if (page->cp_state == CPS_FREEING)
+				continue;
+
+			cl_page_get(page);
+			lu_ref_add_atomic(&page->cp_reference,
+					  "gang_lookup", current);
+			pvec[j++] = ops;
+		}
+		++idx;
+
+		/*
+		 * Here a delicate locking dance is performed. Current thread
+		 * holds a reference to a page, but has to own it before it
+		 * can be placed into queue. Owning implies waiting, so
+		 * radix-tree lock is to be released. After a wait one has to
+		 * check that pages weren't truncated (cl_page_own() returns
+		 * error in the latter case).
+		 */
+		spin_unlock(&osc->oo_tree_lock);
+		tree_lock = false;
+
+		for (i = 0; i < j; ++i) {
+			ops = pvec[i];
+			if (res == CLP_GANG_OKAY)
+				res = (*cb)(env, io, ops, cbdata);
+
+			page = ops->ops_cl.cpl_page;
+			lu_ref_del(&page->cp_reference, "gang_lookup", current);
+			cl_page_put(env, page);
+		}
+		if (nr < OTI_PVEC_SIZE || end_of_region)
+			break;
+
+		if (res == CLP_GANG_OKAY && need_resched())
+			res = CLP_GANG_RESCHED;
+		if (res != CLP_GANG_OKAY)
+			break;
+
+		spin_lock(&osc->oo_tree_lock);
+		tree_lock = true;
+	}
+	if (tree_lock)
+		spin_unlock(&osc->oo_tree_lock);
+	return res;
+}
+
+/**
+ * Check if page @page is covered by an extra lock or discard it.
+ */
+static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io,
+				struct osc_page *ops, void *cbdata)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct osc_object *osc = cbdata;
+	pgoff_t index;
+
+	index = osc_index(ops);
+	if (index >= info->oti_fn_index) {
+		struct ldlm_lock *tmp;
+		struct cl_page *page = ops->ops_cl.cpl_page;
+
+		/* refresh non-overlapped index */
+		tmp = osc_dlmlock_at_pgoff(env, osc, index, 0, 0);
+		if (tmp) {
+			__u64 end = tmp->l_policy_data.l_extent.end;
+			/* Cache the first-non-overlapped index so as to skip
+			 * all pages within [index, oti_fn_index). This is safe
+			 * because if tmp lock is canceled, it will discard
+			 * these pages.
+			 */
+			info->oti_fn_index = cl_index(osc2cl(osc), end + 1);
+			if (end == OBD_OBJECT_EOF)
+				info->oti_fn_index = CL_PAGE_EOF;
+			LDLM_LOCK_PUT(tmp);
+		} else if (cl_page_own(env, io, page) == 0) {
+			/* discard the page */
+			cl_page_discard(env, io, page);
+			cl_page_disown(env, io, page);
+		} else {
+			LASSERT(page->cp_state == CPS_FREEING);
+		}
+	}
+
+	info->oti_next_index = index + 1;
+	return CLP_GANG_OKAY;
+}
+
+static int discard_cb(const struct lu_env *env, struct cl_io *io,
+		      struct osc_page *ops, void *cbdata)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct cl_page *page = ops->ops_cl.cpl_page;
+
+	/* page is top page. */
+	info->oti_next_index = osc_index(ops) + 1;
+	if (cl_page_own(env, io, page) == 0) {
+		KLASSERT(ergo(page->cp_type == CPT_CACHEABLE,
+			      !PageDirty(cl_page_vmpage(page))));
+
+		/* discard the page */
+		cl_page_discard(env, io, page);
+		cl_page_disown(env, io, page);
+	} else {
+		LASSERT(page->cp_state == CPS_FREEING);
+	}
+
+	return CLP_GANG_OKAY;
+}
+
+/**
+ * Discard pages protected by the given lock. This function traverses radix
+ * tree to find all covering pages and discard them. If a page is being covered
+ * by other locks, it should remain in cache.
+ *
+ * If error happens on any step, the process continues anyway (the reasoning
+ * behind this being that lock cancellation cannot be delayed indefinitely).
+ */
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode)
+{
+	struct osc_thread_info *info = osc_env_info(env);
+	struct cl_io *io = &info->oti_io;
+	osc_page_gang_cbt cb;
+	int res;
+	int result;
+
+	io->ci_obj = cl_object_top(osc2cl(osc));
+	io->ci_ignore_layout = 1;
+	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+	if (result != 0)
+		goto out;
+
+	cb = mode == CLM_READ ? check_and_discard_cb : discard_cb;
+	info->oti_fn_index = info->oti_next_index = start;
+	do {
+		res = osc_page_gang_lookup(env, io, osc,
+					   info->oti_next_index, end, cb, osc);
+		if (info->oti_next_index > end)
+			break;
+
+		if (res == CLP_GANG_RESCHED)
+			cond_resched();
+	} while (res != CLP_GANG_OKAY);
+out:
+	cl_io_fini(env, io);
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index d55d04d04..ae19d396b 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -51,7 +51,6 @@
 #include "../include/obd.h"
 /* osc_build_res_name() */
 #include "../include/cl_object.h"
-#include "../include/lclient.h"
 #include "osc_internal.h"
 
 /** \defgroup osc osc
@@ -68,6 +67,9 @@ struct osc_io {
 	struct cl_io_slice oi_cl;
 	/** true if this io is lockless. */
 	int		oi_lockless;
+	/** how many LRU pages are reserved for this IO */
+	int oi_lru_reserved;
+
 	/** active extents, we know how many bytes is going to be written,
 	 * so having an active extent will prevent it from being fragmented
 	 */
@@ -77,6 +79,8 @@ struct osc_io {
 	 */
 	struct osc_extent *oi_trunc;
 
+	/** write osc_lock for this IO, used by osc_extent_find(). */
+	struct osc_lock   *oi_write_osclock;
 	struct obd_info    oi_info;
 	struct obdo	oi_oa;
 	struct osc_async_cbargs {
@@ -100,7 +104,7 @@ struct osc_session {
 	struct osc_io       os_io;
 };
 
-#define OTI_PVEC_SIZE 64
+#define OTI_PVEC_SIZE 256
 struct osc_thread_info {
 	struct ldlm_res_id      oti_resname;
 	ldlm_policy_data_t      oti_policy;
@@ -109,7 +113,13 @@ struct osc_thread_info {
 	struct lustre_handle    oti_handle;
 	struct cl_page_list     oti_plist;
 	struct cl_io		oti_io;
-	struct cl_page	       *oti_pvec[OTI_PVEC_SIZE];
+	void			*oti_pvec[OTI_PVEC_SIZE];
+	/**
+	 * Fields used by cl_lock_discard_pages().
+	 */
+	pgoff_t			oti_next_index;
+	pgoff_t			oti_fn_index; /* first non-overlapped index */
+	struct cl_sync_io	oti_anchor;
 };
 
 struct osc_object {
@@ -125,7 +135,7 @@ struct osc_object {
 	 */
 	struct list_head	 oo_inflight[CRT_NR];
 	/**
-	 * Lock, protecting ccc_object::cob_inflight, because a seat-belt is
+	 * Lock, protecting osc_page::ops_inflight, because a seat-belt is
 	 * locked during take-off and landing.
 	 */
 	spinlock_t	   oo_seatbelt;
@@ -159,6 +169,17 @@ struct osc_object {
 	 * oo_{read|write}_pages soon.
 	 */
 	spinlock_t	    oo_lock;
+
+	/**
+	 * Radix tree for caching pages
+	 */
+	struct radix_tree_root	oo_tree;
+	spinlock_t		oo_tree_lock;
+	unsigned long		oo_npages;
+
+	/* Protect osc_lock this osc_object has */
+	spinlock_t		oo_ol_spin;
+	struct list_head	oo_ol_list;
 };
 
 static inline void osc_object_lock(struct osc_object *obj)
@@ -198,8 +219,6 @@ enum osc_lock_state {
 	OLS_ENQUEUED,
 	OLS_UPCALL_RECEIVED,
 	OLS_GRANTED,
-	OLS_RELEASED,
-	OLS_BLOCKED,
 	OLS_CANCELLED
 };
 
@@ -208,10 +227,8 @@ enum osc_lock_state {
  *
  * Interaction with DLM.
  *
- * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
- *
  * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
- * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
+ * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock.
  *
  * This pointer is protected through a reference, acquired by
  * osc_lock_upcall0(). Also, an additional reference is acquired by
@@ -249,26 +266,27 @@ enum osc_lock_state {
  */
 struct osc_lock {
 	struct cl_lock_slice     ols_cl;
+	/** Internal lock to protect states, etc. */
+	spinlock_t		ols_lock;
+	/** Owner sleeps on this channel for state change */
+	struct cl_sync_io	*ols_owner;
+	/** waiting list for this lock to be cancelled */
+	struct list_head	ols_waiting_list;
+	/** wait entry of ols_waiting_list */
+	struct list_head	ols_wait_entry;
+	/** list entry for osc_object::oo_ol_list */
+	struct list_head	ols_nextlock_oscobj;
+
 	/** underlying DLM lock */
-	struct ldlm_lock	*ols_lock;
-	/** lock value block */
-	struct ost_lvb	   ols_lvb;
+	struct ldlm_lock	*ols_dlmlock;
 	/** DLM flags with which osc_lock::ols_lock was enqueued */
 	__u64		    ols_flags;
 	/** osc_lock::ols_lock handle */
 	struct lustre_handle     ols_handle;
 	struct ldlm_enqueue_info ols_einfo;
 	enum osc_lock_state      ols_state;
-
-	/**
-	 * How many pages are using this lock for io, currently only used by
-	 * read-ahead. If non-zero, the underlying dlm lock won't be cancelled
-	 * during recovery to avoid deadlock. see bz16774.
-	 *
-	 * \see osc_page::ops_lock
-	 * \see osc_page_addref_lock(), osc_page_putref_lock()
-	 */
-	atomic_t	     ols_pageref;
+	/** lock value block */
+	struct ost_lvb		ols_lvb;
 
 	/**
 	 * true, if ldlm_lock_addref() was called against
@@ -298,16 +316,6 @@ struct osc_lock {
 	 * If true, osc_lock_enqueue is able to tolerate the -EUSERS error.
 	 */
 				 ols_locklessable:1,
-	/**
-	 * set by osc_lock_use() to wait until blocking AST enters into
-	 * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
-	 * further synchronization.
-	 */
-				 ols_ast_wait:1,
-	/**
-	 * If the data of this lock has been flushed to server side.
-	 */
-				 ols_flush:1,
 	/**
 	 * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
 	 * the EVAVAIL error as tolerable, this will make upper logic happy
@@ -321,15 +329,6 @@ struct osc_lock {
 	 * For async glimpse lock.
 	 */
 				 ols_agl:1;
-	/**
-	 * IO that owns this lock. This field is used for a dead-lock
-	 * avoidance by osc_lock_enqueue_wait().
-	 *
-	 * XXX: unfortunately, the owner of a osc_lock is not unique,
-	 * the lock may have multiple users, if the lock is granted and
-	 * then matched.
-	 */
-	struct osc_io	   *ols_owner;
 };
 
 /**
@@ -369,18 +368,15 @@ struct osc_page {
 	 * Set if the page must be transferred with OBD_BRW_SRVLOCK.
 	 */
 			      ops_srvlock:1;
-	union {
-		/**
-		 * lru page list. ops_inflight and ops_lru are exclusive so
-		 * that they can share the same data.
-		 */
-		struct list_head	      ops_lru;
-		/**
-		 * Linkage into a per-osc_object list of pages in flight. For
-		 * debugging.
-		 */
-		struct list_head	    ops_inflight;
-	};
+	/**
+	 * lru page list. See osc_lru_{del|use}() in osc_page.c for usage.
+	 */
+	struct list_head	      ops_lru;
+	/**
+	 * Linkage into a per-osc_object list of pages in flight. For
+	 * debugging.
+	 */
+	struct list_head	    ops_inflight;
 	/**
 	 * Thread that submitted this page for transfer. For debugging.
 	 */
@@ -389,16 +385,6 @@ struct osc_page {
 	 * Submit time - the time when the page is starting RPC. For debugging.
 	 */
 	unsigned long	    ops_submit_time;
-
-	/**
-	 * A lock of which we hold a reference covers this page. Only used by
-	 * read-ahead: for a readahead page, we hold it's covering lock to
-	 * prevent it from being canceled during recovery.
-	 *
-	 * \see osc_lock::ols_pageref
-	 * \see osc_page_addref_lock(), osc_page_putref_lock().
-	 */
-	struct cl_lock       *ops_lock;
 };
 
 extern struct kmem_cache *osc_lock_kmem;
@@ -417,21 +403,22 @@ extern struct lu_context_key osc_session_key;
 int osc_lock_init(const struct lu_env *env,
 		  struct cl_object *obj, struct cl_lock *lock,
 		  const struct cl_io *io);
-int osc_io_init  (const struct lu_env *env,
-		  struct cl_object *obj, struct cl_io *io);
-int osc_req_init (const struct lu_env *env, struct cl_device *dev,
-		  struct cl_req *req);
+int osc_io_init(const struct lu_env *env,
+		struct cl_object *obj, struct cl_io *io);
+int osc_req_init(const struct lu_env *env, struct cl_device *dev,
+		 struct cl_req *req);
 struct lu_object *osc_object_alloc(const struct lu_env *env,
 				   const struct lu_object_header *hdr,
 				   struct lu_device *dev);
 int osc_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage);
+		  struct cl_page *page, pgoff_t ind);
 
-void osc_index2policy  (ldlm_policy_data_t *policy, const struct cl_object *obj,
-			pgoff_t start, pgoff_t end);
-int  osc_lvb_print     (const struct lu_env *env, void *cookie,
-			lu_printer_t p, const struct ost_lvb *lvb);
+void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
+		      pgoff_t start, pgoff_t end);
+int osc_lvb_print(const struct lu_env *env, void *cookie,
+		  lu_printer_t p, const struct ost_lvb *lvb);
 
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *list);
 void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 		     enum cl_req_type crt, int brw_flags);
 int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
@@ -441,6 +428,8 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
 			struct page *page, loff_t offset);
 int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 		       struct osc_page *ops);
+int osc_page_cache_add(const struct lu_env *env,
+		       const struct cl_page_slice *slice, struct cl_io *io);
 int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
 			    struct osc_page *ops);
 int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
@@ -457,12 +446,13 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
 			 pgoff_t start, pgoff_t end);
 void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
 		   struct osc_object *osc);
+int lru_queue_work(const struct lu_env *env, void *data);
 
-void osc_object_set_contended  (struct osc_object *obj);
+void osc_object_set_contended(struct osc_object *obj);
 void osc_object_clear_contended(struct osc_object *obj);
-int  osc_object_is_contended   (struct osc_object *obj);
+int  osc_object_is_contended(struct osc_object *obj);
 
-int  osc_lock_is_lockless      (const struct osc_lock *olck);
+int  osc_lock_is_lockless(const struct osc_lock *olck);
 
 /*****************************************************************************
  *
@@ -558,6 +548,11 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap)
 	return container_of0(oap, struct osc_page, ops_oap);
 }
 
+static inline pgoff_t osc_index(struct osc_page *opg)
+{
+	return opg->ops_cl.cpl_index;
+}
+
 static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
 {
 	return oap2osc(oap)->ops_cl.cpl_page;
@@ -608,7 +603,7 @@ enum osc_extent_state {
  *
  * LOCKING ORDER
  * =============
- * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock)
+ * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock)
  */
 struct osc_extent {
 	/** red-black tree node */
@@ -627,6 +622,8 @@ struct osc_extent {
 	unsigned int       oe_intree:1,
 	/** 0 is write, 1 is read */
 			   oe_rw:1,
+	/** sync extent, queued by osc_queue_sync_pages() */
+				oe_sync:1,
 			   oe_srvlock:1,
 			   oe_memalloc:1,
 	/** an ACTIVE extent is going to be truncated, so when this extent
@@ -675,7 +672,7 @@ struct osc_extent {
 	 */
 	wait_queue_head_t	oe_waitq;
 	/** lock covering this extent */
-	struct cl_lock    *oe_osclock;
+	struct ldlm_lock	*oe_dlmlock;
 	/** terminator of this extent. Must be true if this extent is in IO. */
 	struct task_struct	*oe_owner;
 	/** return value of writeback. If somebody is waiting for this extent,
@@ -690,6 +687,14 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 		      int sent, int rc);
 void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);
 
+int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
+			   pgoff_t start, pgoff_t end, enum cl_lock_mode mode);
+
+typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *,
+				 struct osc_page *, void *);
+int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
+			 struct osc_object *osc, pgoff_t start, pgoff_t end,
+			 osc_page_gang_cbt cb, void *cbdata);
 /** @} osc */
 
 #endif /* OSC_CL_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index ea695c209..7fad82781 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -83,6 +83,12 @@ struct osc_async_page {
 #define oap_count       oap_brw_page.count
 #define oap_brw_flags   oap_brw_page.flag
 
+static inline struct osc_async_page *brw_page2oap(struct brw_page *pga)
+{
+	return (struct osc_async_page *)container_of(pga, struct osc_async_page,
+						     oap_brw_page);
+}
+
 struct osc_cache_waiter {
 	struct list_head	      ocw_entry;
 	wait_queue_head_t	     ocw_waitq;
@@ -102,12 +108,14 @@ void osc_update_next_shrink(struct client_obd *cli);
 
 extern struct ptlrpc_request_set *PTLRPCD_SET;
 
+typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh,
+				    int rc);
+
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall,
+		     osc_enqueue_upcall_f upcall,
 		     void *cookie, struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl);
 int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
 
@@ -130,9 +138,11 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
 int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		  struct list_head *ext_list, int cmd);
-int osc_lru_shrink(struct client_obd *cli, int target);
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		   int target, bool force);
+int osc_lru_reclaim(struct client_obd *cli);
 
-extern spinlock_t osc_ast_guard;
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
 
 int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
 
@@ -173,8 +183,6 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
 	return container_of0(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev);
 }
 
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
-
 extern struct kmem_cache *osc_quota_kmem;
 struct osc_quota_info {
 	/** linkage for quota hash table */
@@ -192,5 +200,12 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
 int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
 		   struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+void osc_inc_unstable_pages(struct ptlrpc_request *req);
+void osc_dec_unstable_pages(struct ptlrpc_request *req);
+int  osc_over_unstable_soft_limit(struct client_obd *cli);
+
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+				       struct osc_object *obj, pgoff_t index,
+				       int pending, int canceling);
 
 #endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 6bd0a45d8..d534b0e0e 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -68,11 +68,15 @@ static struct osc_io *cl2osc_io(const struct lu_env *env,
 	return oio;
 }
 
-static struct osc_page *osc_cl_page_osc(struct cl_page *page)
+static struct osc_page *osc_cl_page_osc(struct cl_page *page,
+					struct osc_object *osc)
 {
 	const struct cl_page_slice *slice;
 
-	slice = cl_page_at(page, &osc_device_type);
+	if (osc)
+		slice = cl_object_page_slice(&osc->oo_cl, page);
+	else
+		slice = cl_page_at(page, &osc_device_type);
 	LASSERT(slice);
 
 	return cl2osc_page(slice);
@@ -137,7 +141,7 @@ static int osc_io_submit(const struct lu_env *env,
 		io = page->cp_owner;
 		LASSERT(io);
 
-		opg = osc_cl_page_osc(page);
+		opg = osc_cl_page_osc(page, osc);
 		oap = &opg->ops_oap;
 		LASSERT(osc == oap->oap_obj);
 
@@ -164,8 +168,10 @@ static int osc_io_submit(const struct lu_env *env,
 		}
 
 		cl_page_list_move(qout, qin, page);
+		spin_lock(&oap->oap_lock);
 		oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
 		oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+		spin_unlock(&oap->oap_lock);
 
 		osc_page_submit(env, opg, crt, brw_flags);
 		list_add_tail(&oap->oap_pending_item, &list);
@@ -185,6 +191,13 @@ static int osc_io_submit(const struct lu_env *env,
 	return qout->pl_nr > 0 ? 0 : result;
 }
 
+/**
+ * This is called when a page is accessed within file in a way that creates
+ * new page, if one were missing (i.e., if there were a hole at that place in
+ * the file, or accessed page is beyond the current file size).
+ *
+ * Expand stripe KMS if necessary.
+ */
 static void osc_page_touch_at(const struct lu_env *env,
 			      struct cl_object *obj, pgoff_t idx, unsigned to)
 {
@@ -208,7 +221,8 @@ static void osc_page_touch_at(const struct lu_env *env,
 	       kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
 	       loi->loi_lvb.lvb_size);
 
-	valid = 0;
+	attr->cat_mtime = attr->cat_ctime = LTIME_S(CURRENT_TIME);
+	valid = CAT_MTIME | CAT_CTIME;
 	if (kms > loi->loi_kms) {
 		attr->cat_kms = kms;
 		valid |= CAT_KMS;
@@ -221,91 +235,128 @@ static void osc_page_touch_at(const struct lu_env *env,
 	cl_object_attr_unlock(obj);
 }
 
-/**
- * This is called when a page is accessed within file in a way that creates
- * new page, if one were missing (i.e., if there were a hole at that place in
- * the file, or accessed page is beyond the current file size). Examples:
- * ->commit_write() and ->nopage() methods.
- *
- * Expand stripe KMS if necessary.
- */
-static void osc_page_touch(const struct lu_env *env,
-			   struct osc_page *opage, unsigned to)
-{
-	struct cl_page *page = opage->ops_cl.cpl_page;
-	struct cl_object *obj = opage->ops_cl.cpl_obj;
-
-	osc_page_touch_at(env, obj, page->cp_index, to);
-}
-
-/**
- * Implements cl_io_operations::cio_prepare_write() method for osc layer.
- *
- * \retval -EIO transfer initiated against this osc will most likely fail
- * \retval 0    transfer initiated against this osc will most likely succeed.
- *
- * The reason for this check is to immediately return an error to the caller
- * in the case of a deactivated import. Note, that import can be deactivated
- * later, while pages, dirtied by this IO, are still in the cache, but this is
- * irrelevant, because that would still return an error to the application (if
- * it does fsync), but many applications don't do fsync because of performance
- * issues, and we wanted to return an -EIO at write time to notify the
- * application.
- */
-static int osc_io_prepare_write(const struct lu_env *env,
-				const struct cl_io_slice *ios,
-				const struct cl_page_slice *slice,
-				unsigned from, unsigned to)
+static int osc_io_commit_async(const struct lu_env *env,
+			       const struct cl_io_slice *ios,
+			       struct cl_page_list *qin, int from, int to,
+			       cl_commit_cbt cb)
 {
-	struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
-	struct obd_import *imp = class_exp2cliimp(dev->od_exp);
+	struct cl_io *io = ios->cis_io;
 	struct osc_io *oio = cl2osc_io(env, ios);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct cl_page *page;
+	struct cl_page *last_page;
+	struct osc_page *opg;
 	int result = 0;
 
-	/*
-	 * This implements OBD_BRW_CHECK logic from old client.
-	 */
+	LASSERT(qin->pl_nr > 0);
+
+	/* Handle partial page cases */
+	last_page = cl_page_list_last(qin);
+	if (oio->oi_lockless) {
+		page = cl_page_list_first(qin);
+		if (page == last_page) {
+			cl_page_clip(env, page, from, to);
+		} else {
+			if (from != 0)
+				cl_page_clip(env, page, from, PAGE_SIZE);
+			if (to != PAGE_SIZE)
+				cl_page_clip(env, last_page, 0, to);
+		}
+	}
+
+	while (qin->pl_nr > 0) {
+		struct osc_async_page *oap;
+
+		page = cl_page_list_first(qin);
+		opg = osc_cl_page_osc(page, osc);
+		oap = &opg->ops_oap;
+
+		if (!list_empty(&oap->oap_rpc_item)) {
+			CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
+			       oap, opg);
+			result = -EBUSY;
+			break;
+		}
+
+		/* The page may be already in dirty cache. */
+		if (list_empty(&oap->oap_pending_item)) {
+			result = osc_page_cache_add(env, &opg->ops_cl, io);
+			if (result != 0)
+				break;
+		}
+
+		osc_page_touch_at(env, osc2cl(osc), osc_index(opg),
+				  page == last_page ? to : PAGE_SIZE);
+
+		cl_page_list_del(env, qin, page);
 
-	if (!imp || imp->imp_invalid)
-		result = -EIO;
-	if (result == 0 && oio->oi_lockless)
-		/* this page contains `invalid' data, but who cares?
-		 * nobody can access the invalid data.
-		 * in osc_io_commit_write(), we're going to write exact
-		 * [from, to) bytes of this page to OST. -jay
+		(*cb)(env, io, page);
+		/* Can't access page any more. Page can be in transfer and
+		 * complete at any time.
 		 */
-		cl_page_export(env, slice->cpl_page, 1);
+	}
 
+	/* for sync write, kernel will wait for this page to be flushed before
+	 * osc_io_end() is called, so release it earlier.
+	 * for mkwrite(), it's known there is no further pages.
+	 */
+	if (cl_io_is_sync_write(io) && oio->oi_active) {
+		osc_extent_release(env, oio->oi_active);
+		oio->oi_active = NULL;
+	}
+
+	CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result);
 	return result;
 }
 
-static int osc_io_commit_write(const struct lu_env *env,
-			       const struct cl_io_slice *ios,
-			       const struct cl_page_slice *slice,
-			       unsigned from, unsigned to)
+static int osc_io_rw_iter_init(const struct lu_env *env,
+			       const struct cl_io_slice *ios)
 {
-	struct osc_io *oio = cl2osc_io(env, ios);
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-	struct osc_async_page *oap = &opg->ops_oap;
+	struct cl_io *io = ios->cis_io;
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct client_obd *cli = osc_cli(osc);
+	unsigned long c;
+	unsigned int npages;
+	unsigned int max_pages;
+
+	if (cl_io_is_append(io))
+		return 0;
+
+	npages = io->u.ci_rw.crw_count >> PAGE_SHIFT;
+	if (io->u.ci_rw.crw_pos & ~PAGE_MASK)
+		++npages;
+
+	max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight;
+	if (npages > max_pages)
+		npages = max_pages;
+
+	c = atomic_read(cli->cl_lru_left);
+	if (c < npages && osc_lru_reclaim(cli) > 0)
+		c = atomic_read(cli->cl_lru_left);
+	while (c >= npages) {
+		if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+			oio->oi_lru_reserved = npages;
+			break;
+		}
+		c = atomic_read(cli->cl_lru_left);
+	}
 
-	LASSERT(to > 0);
-	/*
-	 * XXX instead of calling osc_page_touch() here and in
-	 * osc_io_fault_start() it might be more logical to introduce
-	 * cl_page_touch() method, that generic cl_io_commit_write() and page
-	 * fault code calls.
-	 */
-	osc_page_touch(env, cl2osc_page(slice), to);
-	if (!client_is_remote(osc_export(obj)) &&
-	    capable(CFS_CAP_SYS_RESOURCE))
-		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+	return 0;
+}
 
-	if (oio->oi_lockless)
-		/* see osc_io_prepare_write() for lockless io handling. */
-		cl_page_clip(env, slice->cpl_page, from, to);
+static void osc_io_rw_iter_fini(const struct lu_env *env,
+				const struct cl_io_slice *ios)
+{
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(ios->cis_obj);
+	struct client_obd *cli = osc_cli(osc);
 
-	return 0;
+	if (oio->oi_lru_reserved > 0) {
+		atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+		oio->oi_lru_reserved = 0;
+	}
+	oio->oi_write_osclock = NULL;
 }
 
 static int osc_io_fault_start(const struct lu_env *env,
@@ -342,31 +393,21 @@ static int osc_async_upcall(void *a, int rc)
  * Checks that there are no pages being written in the extent being truncated.
  */
 static int trunc_check_cb(const struct lu_env *env, struct cl_io *io,
-			  struct cl_page *page, void *cbdata)
+			  struct osc_page *ops, void *cbdata)
 {
-	const struct cl_page_slice *slice;
-	struct osc_page *ops;
+	struct cl_page *page = ops->ops_cl.cpl_page;
 	struct osc_async_page *oap;
 	__u64 start = *(__u64 *)cbdata;
 
-	slice = cl_page_at(page, &osc_device_type);
-	LASSERT(slice);
-	ops = cl2osc_page(slice);
 	oap = &ops->ops_oap;
-
 	if (oap->oap_cmd & OBD_BRW_WRITE &&
 	    !list_empty(&oap->oap_pending_item))
 		CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n",
 			      start, current->comm);
 
-	{
-		struct page *vmpage = cl_page_vmpage(env, page);
-
-		if (PageLocked(vmpage))
-			CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
-			       ops, page->cp_index,
-			       (oap->oap_cmd & OBD_BRW_RWMASK));
-	}
+	if (PageLocked(page->cp_vmpage))
+		CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n",
+		       ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK);
 
 	return CLP_GANG_OKAY;
 }
@@ -385,8 +426,9 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io,
 	/*
 	 * Complain if there are pages in the truncated region.
 	 */
-	cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF,
-			    trunc_check_cb, (void *)&size);
+	osc_page_gang_lookup(env, io, cl2osc(clob),
+			     start + partial, CL_PAGE_EOF,
+			     trunc_check_cb, (void *)&size);
 }
 
 static int osc_io_setattr_start(const struct lu_env *env,
@@ -650,6 +692,8 @@ static const struct cl_io_operations osc_io_ops = {
 			.cio_fini   = osc_io_fini
 		},
 		[CIT_WRITE] = {
+			.cio_iter_init = osc_io_rw_iter_init,
+			.cio_iter_fini = osc_io_rw_iter_fini,
 			.cio_start  = osc_io_write_start,
 			.cio_end    = osc_io_end,
 			.cio_fini   = osc_io_fini
@@ -672,16 +716,8 @@ static const struct cl_io_operations osc_io_ops = {
 			.cio_fini   = osc_io_fini
 		}
 	},
-	.req_op = {
-		 [CRT_READ] = {
-			 .cio_submit    = osc_io_submit
-		 },
-		 [CRT_WRITE] = {
-			 .cio_submit    = osc_io_submit
-		 }
-	 },
-	.cio_prepare_write = osc_io_prepare_write,
-	.cio_commit_write  = osc_io_commit_write
+	.cio_submit                 = osc_io_submit,
+	.cio_commit_async           = osc_io_commit_async
 };
 
 /*****************************************************************************
@@ -718,8 +754,7 @@ static void osc_req_attr_set(const struct lu_env *env,
 	struct lov_oinfo *oinfo;
 	struct cl_req *clerq;
 	struct cl_page *apage; /* _some_ page in @clerq */
-	struct cl_lock *lock;  /* _some_ lock protecting @apage */
-	struct osc_lock *olck;
+	struct ldlm_lock *lock;  /* _some_ lock protecting @apage */
 	struct osc_page *opg;
 	struct obdo *oa;
 	struct ost_lvb *lvb;
@@ -753,31 +788,32 @@ static void osc_req_attr_set(const struct lu_env *env,
 		LASSERT(!list_empty(&clerq->crq_pages));
 		apage = container_of(clerq->crq_pages.next,
 				     struct cl_page, cp_flight);
-		opg = osc_cl_page_osc(apage);
-		apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */
-		lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1);
-		if (!lock) {
-			struct cl_object_header *head;
-			struct cl_lock *scan;
-
-			head = cl_object_header(apage->cp_obj);
-			list_for_each_entry(scan, &head->coh_locks, cll_linkage)
-				CL_LOCK_DEBUG(D_ERROR, env, scan,
-					      "no cover page!\n");
-			CL_PAGE_DEBUG(D_ERROR, env, apage,
-				      "dump uncover page!\n");
+		opg = osc_cl_page_osc(apage, NULL);
+		lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg),
+					    1, 1);
+		if (!lock && !opg->ops_srvlock) {
+			struct ldlm_resource *res;
+			struct ldlm_res_id *resname;
+
+			CL_PAGE_DEBUG(D_ERROR, env, apage, "uncovered page!\n");
+
+			resname = &osc_env_info(env)->oti_resname;
+			ostid_build_res_name(&oinfo->loi_oi, resname);
+			res = ldlm_resource_get(
+				osc_export(cl2osc(obj))->exp_obd->obd_namespace,
+				NULL, resname, LDLM_EXTENT, 0);
+			ldlm_resource_dump(D_ERROR, res);
+
 			dump_stack();
 			LBUG();
 		}
 
-		olck = osc_lock_at(lock);
-		LASSERT(ergo(opg->ops_srvlock, !olck->ols_lock));
 		/* check for lockless io. */
-		if (olck->ols_lock) {
-			oa->o_handle = olck->ols_lock->l_remote_handle;
+		if (lock) {
+			oa->o_handle = lock->l_remote_handle;
 			oa->o_valid |= OBD_MD_FLHANDLE;
+			LDLM_LOCK_PUT(lock);
 		}
-		cl_lock_put(env, lock);
 	}
 }
 
@@ -807,8 +843,9 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev,
 	if (or) {
 		cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
 		result = 0;
-	} else
+	} else {
 		result = -ENOMEM;
+	}
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 013df9787..16f9cd9d3 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -36,6 +36,7 @@
  * Implementation of cl_lock for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
@@ -50,8 +51,6 @@
  *  @{
  */
 
-#define _PAGEREF_MAGIC  (-10000000)
-
 /*****************************************************************************
  *
  * Type conversions.
@@ -62,7 +61,6 @@ static const struct cl_lock_operations osc_lock_ops;
 static const struct cl_lock_operations osc_lock_lockless_ops;
 static void osc_lock_to_lockless(const struct lu_env *env,
 				 struct osc_lock *ols, int force);
-static int osc_lock_has_pages(struct osc_lock *olck);
 
 int osc_lock_is_lockless(const struct osc_lock *olck)
 {
@@ -90,11 +88,11 @@ static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle)
 static int osc_lock_invariant(struct osc_lock *ols)
 {
 	struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle);
-	struct ldlm_lock *olock = ols->ols_lock;
+	struct ldlm_lock *olock = ols->ols_dlmlock;
 	int handle_used = lustre_handle_is_used(&ols->ols_handle);
 
 	if (ergo(osc_lock_is_lockless(ols),
-		 ols->ols_locklessable && !ols->ols_lock))
+		 ols->ols_locklessable && !ols->ols_dlmlock))
 		return 1;
 
 	/*
@@ -111,7 +109,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
 		  ergo(!lock, !olock)))
 		return 0;
 	/*
-	 * Check that ->ols_handle and ->ols_lock are consistent, but
+	 * Check that ->ols_handle and ->ols_dlmlock are consistent, but
 	 * take into account that they are set at the different time.
 	 */
 	if (!ergo(ols->ols_state == OLS_CANCELLED,
@@ -122,7 +120,7 @@ static int osc_lock_invariant(struct osc_lock *ols)
 	 * ast.
 	 */
 	if (!ergo(olock && ols->ols_state < OLS_CANCELLED,
-		  ((olock->l_flags & LDLM_FL_DESTROYED) == 0)))
+		  !ldlm_is_destroyed(olock)))
 		return 0;
 
 	if (!ergo(ols->ols_state == OLS_GRANTED,
@@ -138,117 +136,13 @@ static int osc_lock_invariant(struct osc_lock *ols)
  *
  */
 
-/**
- * Breaks a link between osc_lock and dlm_lock.
- */
-static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
-{
-	struct ldlm_lock *dlmlock;
-
-	spin_lock(&osc_ast_guard);
-	dlmlock = olck->ols_lock;
-	if (!dlmlock) {
-		spin_unlock(&osc_ast_guard);
-		return;
-	}
-
-	olck->ols_lock = NULL;
-	/* wb(); --- for all who checks (ols->ols_lock != NULL) before
-	 * call to osc_lock_detach()
-	 */
-	dlmlock->l_ast_data = NULL;
-	olck->ols_handle.cookie = 0ULL;
-	spin_unlock(&osc_ast_guard);
-
-	lock_res_and_lock(dlmlock);
-	if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
-		struct cl_object *obj = olck->ols_cl.cls_obj;
-		struct cl_attr *attr = &osc_env_info(env)->oti_attr;
-		__u64 old_kms;
-
-		cl_object_attr_lock(obj);
-		/* Must get the value under the lock to avoid possible races. */
-		old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
-		/* Update the kms. Need to loop all granted locks.
-		 * Not a problem for the client
-		 */
-		attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
-
-		cl_object_attr_set(env, obj, attr, CAT_KMS);
-		cl_object_attr_unlock(obj);
-	}
-	unlock_res_and_lock(dlmlock);
-
-	/* release a reference taken in osc_lock_upcall0(). */
-	LASSERT(olck->ols_has_ref);
-	lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
-	LDLM_LOCK_RELEASE(dlmlock);
-	olck->ols_has_ref = 0;
-}
-
-static int osc_lock_unhold(struct osc_lock *ols)
-{
-	int result = 0;
-
-	if (ols->ols_hold) {
-		ols->ols_hold = 0;
-		result = osc_cancel_base(&ols->ols_handle,
-					 ols->ols_einfo.ei_mode);
-	}
-	return result;
-}
-
-static int osc_lock_unuse(const struct lu_env *env,
-			  const struct cl_lock_slice *slice)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-
-	LINVRNT(osc_lock_invariant(ols));
-
-	switch (ols->ols_state) {
-	case OLS_NEW:
-		LASSERT(!ols->ols_hold);
-		LASSERT(ols->ols_agl);
-		return 0;
-	case OLS_UPCALL_RECEIVED:
-		osc_lock_unhold(ols);
-	case OLS_ENQUEUED:
-		LASSERT(!ols->ols_hold);
-		osc_lock_detach(env, ols);
-		ols->ols_state = OLS_NEW;
-		return 0;
-	case OLS_GRANTED:
-		LASSERT(!ols->ols_glimpse);
-		LASSERT(ols->ols_hold);
-		/*
-		 * Move lock into OLS_RELEASED state before calling
-		 * osc_cancel_base() so that possible synchronous cancellation
-		 * sees that lock is released.
-		 */
-		ols->ols_state = OLS_RELEASED;
-		return osc_lock_unhold(ols);
-	default:
-		CERROR("Impossible state: %d\n", ols->ols_state);
-		LBUG();
-	}
-}
-
 static void osc_lock_fini(const struct lu_env *env,
 			  struct cl_lock_slice *slice)
 {
 	struct osc_lock *ols = cl2osc_lock(slice);
 
 	LINVRNT(osc_lock_invariant(ols));
-	/*
-	 * ->ols_hold can still be true at this point if, for example, a
-	 * thread that requested a lock was killed (and released a reference
-	 * to the lock), before reply from a server was received. In this case
-	 * lock is destroyed immediately after upcall.
-	 */
-	osc_lock_unhold(ols);
-	LASSERT(!ols->ols_lock);
-	LASSERT(atomic_read(&ols->ols_pageref) == 0 ||
-		atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC);
+	LASSERT(!ols->ols_dlmlock);
 
 	kmem_cache_free(osc_lock_kmem, ols);
 }
@@ -275,54 +169,11 @@ static __u64 osc_enq2ldlm_flags(__u32 enqflags)
 		result |= LDLM_FL_HAS_INTENT;
 	if (enqflags & CEF_DISCARD_DATA)
 		result |= LDLM_FL_AST_DISCARD_DATA;
+	if (enqflags & CEF_PEEK)
+		result |= LDLM_FL_TEST_LOCK;
 	return result;
 }
 
-/**
- * Global spin-lock protecting consistency of ldlm_lock::l_ast_data
- * pointers. Initialized in osc_init().
- */
-spinlock_t osc_ast_guard;
-
-static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock)
-{
-	struct osc_lock *olck;
-
-	lock_res_and_lock(dlm_lock);
-	spin_lock(&osc_ast_guard);
-	olck = dlm_lock->l_ast_data;
-	if (olck) {
-		struct cl_lock *lock = olck->ols_cl.cls_lock;
-		/*
-		 * If osc_lock holds a reference on ldlm lock, return it even
-		 * when cl_lock is in CLS_FREEING state. This way
-		 *
-		 *	 osc_ast_data_get(dlmlock) == NULL
-		 *
-		 * guarantees that all osc references on dlmlock were
-		 * released. osc_dlm_blocking_ast0() relies on that.
-		 */
-		if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) {
-			cl_lock_get_trust(lock);
-			lu_ref_add_atomic(&lock->cll_reference,
-					  "ast", current);
-		} else
-			olck = NULL;
-	}
-	spin_unlock(&osc_ast_guard);
-	unlock_res_and_lock(dlm_lock);
-	return olck;
-}
-
-static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
-{
-	struct cl_lock *lock;
-
-	lock = olck->ols_cl.cls_lock;
-	lu_ref_del(&lock->cll_reference, "ast", current);
-	cl_lock_put(env, lock);
-}
-
 /**
  * Updates object attributes from a lock value block (lvb) received together
  * with the DLM lock reply from the server. Copy of osc_update_enqueue()
@@ -333,35 +184,30 @@ static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck)
  *
  * Called under lock and resource spin-locks.
  */
-static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
-				int rc)
+static void osc_lock_lvb_update(const struct lu_env *env,
+				struct osc_object *osc,
+				struct ldlm_lock *dlmlock,
+				struct ost_lvb *lvb)
 {
-	struct ost_lvb *lvb;
-	struct cl_object *obj;
-	struct lov_oinfo *oinfo;
-	struct cl_attr *attr;
+	struct cl_object *obj = osc2cl(osc);
+	struct lov_oinfo *oinfo = osc->oo_oinfo;
+	struct cl_attr *attr = &osc_env_info(env)->oti_attr;
 	unsigned valid;
 
-	if (!(olck->ols_flags & LDLM_FL_LVB_READY))
-		return;
-
-	lvb = &olck->ols_lvb;
-	obj = olck->ols_cl.cls_obj;
-	oinfo = cl2osc(obj)->oo_oinfo;
-	attr = &osc_env_info(env)->oti_attr;
 	valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE;
+	if (!lvb)
+		lvb = dlmlock->l_lvb_data;
+
 	cl_lvb2attr(attr, lvb);
 
 	cl_object_attr_lock(obj);
-	if (rc == 0) {
-		struct ldlm_lock *dlmlock;
+	if (dlmlock) {
 		__u64 size;
 
-		dlmlock = olck->ols_lock;
-
-		/* re-grab LVB from a dlm lock under DLM spin-locks. */
-		*lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
+		check_res_locked(dlmlock->l_resource);
+		LASSERT(lvb == dlmlock->l_lvb_data);
 		size = lvb->lvb_size;
+
 		/* Extend KMS up to the end of this lock and no further
 		 * A lock on [x,y] means a KMS of up to y + 1 bytes!
 		 */
@@ -378,102 +224,67 @@ static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
 				   dlmlock->l_policy_data.l_extent.end);
 		}
 		ldlm_lock_allow_match_locked(dlmlock);
-	} else if (rc == -ENAVAIL && olck->ols_glimpse) {
-		CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n",
-		       lvb->lvb_size, oinfo->loi_kms);
-	} else
-		valid = 0;
-
-	if (valid != 0)
-		cl_object_attr_set(env, obj, attr, valid);
+	}
 
+	cl_object_attr_set(env, obj, attr, valid);
 	cl_object_attr_unlock(obj);
 }
 
-/**
- * Called when a lock is granted, from an upcall (when server returned a
- * granted lock), or from completion AST, when server returned a blocked lock.
- *
- * Called under lock and resource spin-locks, that are released temporarily
- * here.
- */
-static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
-			     struct ldlm_lock *dlmlock, int rc)
+static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl,
+			     struct lustre_handle *lockh, bool lvb_update)
 {
-	struct ldlm_extent *ext;
-	struct cl_lock *lock;
-	struct cl_lock_descr *descr;
+	struct ldlm_lock *dlmlock;
 
-	LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+	dlmlock = ldlm_handle2lock_long(lockh, 0);
+	LASSERT(dlmlock);
 
-	if (olck->ols_state < OLS_GRANTED) {
-		lock = olck->ols_cl.cls_lock;
-		ext = &dlmlock->l_policy_data.l_extent;
-		descr = &osc_env_info(env)->oti_descr;
-		descr->cld_obj = lock->cll_descr.cld_obj;
+	/* lock reference taken by ldlm_handle2lock_long() is
+	 * owned by osc_lock and released in osc_lock_detach()
+	 */
+	lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl);
+	oscl->ols_has_ref = 1;
 
-		/* XXX check that ->l_granted_mode is valid. */
-		descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
-		descr->cld_start = cl_index(descr->cld_obj, ext->start);
-		descr->cld_end = cl_index(descr->cld_obj, ext->end);
-		descr->cld_gid = ext->gid;
-		/*
-		 * tell upper layers the extent of the lock that was actually
-		 * granted
-		 */
-		olck->ols_state = OLS_GRANTED;
-		osc_lock_lvb_update(env, olck, rc);
-
-		/* release DLM spin-locks to allow cl_lock_{modify,signal}()
-		 * to take a semaphore on a parent lock. This is safe, because
-		 * spin-locks are needed to protect consistency of
-		 * dlmlock->l_*_mode and LVB, and we have finished processing
-		 * them.
+	LASSERT(!oscl->ols_dlmlock);
+	oscl->ols_dlmlock = dlmlock;
+
+	/* This may be a matched lock for glimpse request, do not hold
+	 * lock reference in that case.
+	 */
+	if (!oscl->ols_glimpse) {
+		/* hold a refc for non glimpse lock which will
+		 * be released in osc_lock_cancel()
 		 */
-		unlock_res_and_lock(dlmlock);
-		cl_lock_modify(env, lock, descr);
-		cl_lock_signal(env, lock);
-		LINVRNT(osc_lock_invariant(olck));
-		lock_res_and_lock(dlmlock);
+		lustre_handle_copy(&oscl->ols_handle, lockh);
+		ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode);
+		oscl->ols_hold = 1;
 	}
-}
-
-static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
-
-{
-	struct ldlm_lock *dlmlock;
-
-	dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0);
-	LASSERT(dlmlock);
 
+	/* Lock must have been granted. */
 	lock_res_and_lock(dlmlock);
-	spin_lock(&osc_ast_guard);
-	LASSERT(dlmlock->l_ast_data == olck);
-	LASSERT(!olck->ols_lock);
-	olck->ols_lock = dlmlock;
-	spin_unlock(&osc_ast_guard);
+	if (dlmlock->l_granted_mode == dlmlock->l_req_mode) {
+		struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent;
+		struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
 
-	/*
-	 * Lock might be not yet granted. In this case, completion ast
-	 * (osc_ldlm_completion_ast()) comes later and finishes lock
-	 * granting.
-	 */
-	if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
-		osc_lock_granted(env, olck, dlmlock, 0);
-	unlock_res_and_lock(dlmlock);
+		/* extend the lock extent, otherwise it will have problem when
+		 * we decide whether to grant a lockless lock.
+		 */
+		descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode);
+		descr->cld_start = cl_index(descr->cld_obj, ext->start);
+		descr->cld_end = cl_index(descr->cld_obj, ext->end);
+		descr->cld_gid = ext->gid;
 
-	/*
-	 * osc_enqueue_interpret() decrefs asynchronous locks, counter
-	 * this.
-	 */
-	ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode);
-	olck->ols_hold = 1;
+		/* no lvb update for matched lock */
+		if (lvb_update) {
+			LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+			osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj),
+					    dlmlock, NULL);
+		}
+		LINVRNT(osc_lock_invariant(oscl));
+	}
+	unlock_res_and_lock(dlmlock);
 
-	/* lock reference taken by ldlm_handle2lock_long() is owned by
-	 * osc_lock and released in osc_lock_detach()
-	 */
-	lu_ref_add(&dlmlock->l_reference, "osc_lock", olck);
-	olck->ols_has_ref = 1;
+	LASSERT(oscl->ols_state != OLS_GRANTED);
+	oscl->ols_state = OLS_GRANTED;
 }
 
 /**
@@ -481,143 +292,124 @@ static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck)
  * received from a server, or after osc_enqueue_base() matched a local DLM
  * lock.
  */
-static int osc_lock_upcall(void *cookie, int errcode)
+static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh,
+			   int errcode)
 {
-	struct osc_lock	*olck = cookie;
-	struct cl_lock_slice *slice = &olck->ols_cl;
-	struct cl_lock *lock = slice->cls_lock;
+	struct osc_lock *oscl = cookie;
+	struct cl_lock_slice *slice = &oscl->ols_cl;
 	struct lu_env *env;
 	struct cl_env_nest nest;
+	int rc;
 
 	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		int rc;
+	/* should never happen, similar to osc_ldlm_blocking_ast(). */
+	LASSERT(!IS_ERR(env));
+
+	rc = ldlm_error2errno(errcode);
+	if (oscl->ols_state == OLS_ENQUEUED) {
+		oscl->ols_state = OLS_UPCALL_RECEIVED;
+	} else if (oscl->ols_state == OLS_CANCELLED) {
+		rc = -EIO;
+	} else {
+		CERROR("Impossible state: %d\n", oscl->ols_state);
+		LBUG();
+	}
 
-		cl_lock_mutex_get(env, lock);
+	if (rc == 0)
+		osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK);
 
-		LASSERT(lock->cll_state >= CLS_QUEUING);
-		if (olck->ols_state == OLS_ENQUEUED) {
-			olck->ols_state = OLS_UPCALL_RECEIVED;
-			rc = ldlm_error2errno(errcode);
-		} else if (olck->ols_state == OLS_CANCELLED) {
-			rc = -EIO;
-		} else {
-			CERROR("Impossible state: %d\n", olck->ols_state);
-			LBUG();
-		}
-		if (rc) {
-			struct ldlm_lock *dlmlock;
-
-			dlmlock = ldlm_handle2lock(&olck->ols_handle);
-			if (dlmlock) {
-				lock_res_and_lock(dlmlock);
-				spin_lock(&osc_ast_guard);
-				LASSERT(!olck->ols_lock);
-				dlmlock->l_ast_data = NULL;
-				olck->ols_handle.cookie = 0ULL;
-				spin_unlock(&osc_ast_guard);
-				ldlm_lock_fail_match_locked(dlmlock);
-				unlock_res_and_lock(dlmlock);
-				LDLM_LOCK_PUT(dlmlock);
-			}
-		} else {
-			if (olck->ols_glimpse)
-				olck->ols_glimpse = 0;
-			osc_lock_upcall0(env, olck);
-		}
+	/* Error handling, some errors are tolerable. */
+	if (oscl->ols_locklessable && rc == -EUSERS) {
+		/* This is a tolerable error, turn this lock into
+		 * lockless lock.
+		 */
+		osc_object_set_contended(cl2osc(slice->cls_obj));
+		LASSERT(slice->cls_ops == &osc_lock_ops);
+
+		/* Change this lock to ldlmlock-less lock. */
+		osc_lock_to_lockless(env, oscl, 1);
+		oscl->ols_state = OLS_GRANTED;
+		rc = 0;
+	} else if (oscl->ols_glimpse && rc == -ENAVAIL) {
+		LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY);
+		osc_lock_lvb_update(env, cl2osc(slice->cls_obj),
+				    NULL, &oscl->ols_lvb);
+		/* Hide the error. */
+		rc = 0;
+	}
 
-		/* Error handling, some errors are tolerable. */
-		if (olck->ols_locklessable && rc == -EUSERS) {
-			/* This is a tolerable error, turn this lock into
-			 * lockless lock.
-			 */
-			osc_object_set_contended(cl2osc(slice->cls_obj));
-			LASSERT(slice->cls_ops == &osc_lock_ops);
+	if (oscl->ols_owner)
+		cl_sync_io_note(env, oscl->ols_owner, rc);
+	cl_env_nested_put(&nest, env);
 
-			/* Change this lock to ldlmlock-less lock. */
-			osc_lock_to_lockless(env, olck, 1);
-			olck->ols_state = OLS_GRANTED;
-			rc = 0;
-		} else if (olck->ols_glimpse && rc == -ENAVAIL) {
-			osc_lock_lvb_update(env, olck, rc);
-			cl_lock_delete(env, lock);
-			/* Hide the error. */
-			rc = 0;
-		}
-
-		if (rc == 0) {
-			/* For AGL case, the RPC sponsor may exits the cl_lock
-			*  processing without wait() called before related OSC
-			*  lock upcall(). So update the lock status according
-			*  to the enqueue result inside AGL upcall().
-			*/
-			if (olck->ols_agl) {
-				lock->cll_flags |= CLF_FROM_UPCALL;
-				cl_wait_try(env, lock);
-				lock->cll_flags &= ~CLF_FROM_UPCALL;
-				if (!olck->ols_glimpse)
-					olck->ols_agl = 0;
-			}
-			cl_lock_signal(env, lock);
-			/* del user for lock upcall cookie */
-			cl_unuse_try(env, lock);
-		} else {
-			/* del user for lock upcall cookie */
-			cl_lock_user_del(env, lock);
-			cl_lock_error(env, lock, rc);
-		}
+	return rc;
+}
 
-		/* release cookie reference, acquired by osc_lock_enqueue() */
-		cl_lock_hold_release(env, lock, "upcall", lock);
-		cl_lock_mutex_put(env, lock);
+static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh,
+			       int errcode)
+{
+	struct osc_object *osc = cookie;
+	struct ldlm_lock *dlmlock;
+	struct lu_env *env;
+	struct cl_env_nest nest;
 
-		lu_ref_del(&lock->cll_reference, "upcall", lock);
-		/* This maybe the last reference, so must be called after
-		 * cl_lock_mutex_put().
-		 */
-		cl_lock_put(env, lock);
+	env = cl_env_nested_get(&nest);
+	LASSERT(!IS_ERR(env));
 
-		cl_env_nested_put(&nest, env);
-	} else {
-		/* should never happen, similar to osc_ldlm_blocking_ast(). */
-		LBUG();
+	if (errcode == ELDLM_LOCK_MATCHED) {
+		errcode = ELDLM_OK;
+		goto out;
 	}
-	return errcode;
+
+	if (errcode != ELDLM_OK)
+		goto out;
+
+	dlmlock = ldlm_handle2lock(lockh);
+	LASSERT(dlmlock);
+
+	lock_res_and_lock(dlmlock);
+	LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode);
+
+	/* there is no osc_lock associated with AGL lock */
+	osc_lock_lvb_update(env, osc, dlmlock, NULL);
+
+	unlock_res_and_lock(dlmlock);
+	LDLM_LOCK_PUT(dlmlock);
+
+out:
+	cl_object_put(env, osc2cl(osc));
+	cl_env_nested_put(&nest, env);
+	return ldlm_error2errno(errcode);
 }
 
-/**
- * Core of osc_dlm_blocking_ast() logic.
- */
-static void osc_lock_blocking(const struct lu_env *env,
-			      struct ldlm_lock *dlmlock,
-			      struct osc_lock *olck, int blocking)
+static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end,
+			  enum cl_lock_mode mode, int discard)
 {
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
+	struct lu_env *env;
+	struct cl_env_nest nest;
+	int rc = 0;
+	int rc2 = 0;
 
-	LASSERT(olck->ols_lock == dlmlock);
-	CLASSERT(OLS_BLOCKED < OLS_CANCELLED);
-	LASSERT(!osc_lock_is_lockless(olck));
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	if (mode == CLM_WRITE) {
+		rc = osc_cache_writeback_range(env, obj, start, end, 1,
+					       discard);
+		CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n",
+		       obj, start, end, rc,
+		       discard ? "discarded" : "written back");
+		if (rc > 0)
+			rc = 0;
+	}
 
-	/*
-	 * Lock might be still addref-ed here, if e.g., blocking ast
-	 * is sent for a failed lock.
-	 */
-	osc_lock_unhold(olck);
+	rc2 = osc_lock_discard_pages(env, obj, start, end, mode);
+	if (rc == 0 && rc2 < 0)
+		rc = rc2;
 
-	if (blocking && olck->ols_state < OLS_BLOCKED)
-		/*
-		 * Move osc_lock into OLS_BLOCKED before canceling the lock,
-		 * because it recursively re-enters osc_lock_blocking(), with
-		 * the state set to OLS_CANCELLED.
-		 */
-		olck->ols_state = OLS_BLOCKED;
-	/*
-	 * cancel and destroy lock at least once no matter how blocking ast is
-	 * entered (see comment above osc_ldlm_blocking_ast() for use
-	 * cases). cl_lock_cancel() and cl_lock_delete() are idempotent.
-	 */
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
+	cl_env_nested_put(&nest, env);
+	return rc;
 }
 
 /**
@@ -628,65 +420,63 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 				 struct ldlm_lock *dlmlock,
 				 void *data, int flag)
 {
-	struct osc_lock *olck;
-	struct cl_lock *lock;
-	int result;
-	int cancel;
-
-	LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING);
-
-	cancel = 0;
-	olck = osc_ast_data_get(dlmlock);
-	if (olck) {
-		lock = olck->ols_cl.cls_lock;
-		cl_lock_mutex_get(env, lock);
-		LINVRNT(osc_lock_invariant(olck));
-		if (olck->ols_ast_wait) {
-			/* wake up osc_lock_use() */
-			cl_lock_signal(env, lock);
-			olck->ols_ast_wait = 0;
-		}
-		/*
-		 * Lock might have been canceled while this thread was
-		 * sleeping for lock mutex, but olck is pinned in memory.
-		 */
-		if (olck == dlmlock->l_ast_data) {
-			/*
-			 * NOTE: DLM sends blocking AST's for failed locks
-			 *       (that are still in pre-OLS_GRANTED state)
-			 *       too, and they have to be canceled otherwise
-			 *       DLM lock is never destroyed and stuck in
-			 *       the memory.
-			 *
-			 *       Alternatively, ldlm_cli_cancel() can be
-			 *       called here directly for osc_locks with
-			 *       ols_state < OLS_GRANTED to maintain an
-			 *       invariant that ->clo_cancel() is only called
-			 *       for locks that were granted.
-			 */
-			LASSERT(data == olck);
-			osc_lock_blocking(env, dlmlock,
-					  olck, flag == LDLM_CB_BLOCKING);
-		} else
-			cancel = 1;
-		cl_lock_mutex_put(env, lock);
-		osc_ast_data_put(env, olck);
-	} else
-		/*
-		 * DLM lock exists, but there is no cl_lock attached to it.
-		 * This is a `normal' race. cl_object and its cl_lock's can be
-		 * removed by memory pressure, together with all pages.
+	struct cl_object *obj = NULL;
+	int result = 0;
+	int discard;
+	enum cl_lock_mode mode = CLM_READ;
+
+	LASSERT(flag == LDLM_CB_CANCELING);
+
+	lock_res_and_lock(dlmlock);
+	if (dlmlock->l_granted_mode != dlmlock->l_req_mode) {
+		dlmlock->l_ast_data = NULL;
+		unlock_res_and_lock(dlmlock);
+		return 0;
+	}
+
+	discard = ldlm_is_discard_data(dlmlock);
+	if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))
+		mode = CLM_WRITE;
+
+	if (dlmlock->l_ast_data) {
+		obj = osc2cl(dlmlock->l_ast_data);
+		dlmlock->l_ast_data = NULL;
+
+		cl_object_get(obj);
+	}
+
+	unlock_res_and_lock(dlmlock);
+
+	/* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
+	 * the object has been destroyed.
+	 */
+	if (obj) {
+		struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent;
+		struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+		__u64 old_kms;
+
+		/* Destroy pages covered by the extent of the DLM lock */
+		result = osc_lock_flush(cl2osc(obj),
+					cl_index(obj, extent->start),
+					cl_index(obj, extent->end),
+					mode, discard);
+
+		/* losing a lock, update kms */
+		lock_res_and_lock(dlmlock);
+		cl_object_attr_lock(obj);
+		/* Must get the value under the lock to avoid race. */
+		old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
+		/* Update the kms. Need to loop all granted locks.
+		 * Not a problem for the client
 		 */
-		cancel = (flag == LDLM_CB_BLOCKING);
+		attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
 
-	if (cancel) {
-		struct lustre_handle *lockh;
+		cl_object_attr_set(env, obj, attr, CAT_KMS);
+		cl_object_attr_unlock(obj);
+		unlock_res_and_lock(dlmlock);
 
-		lockh = &osc_env_info(env)->oti_handle;
-		ldlm_lock2handle(dlmlock, lockh);
-		result = ldlm_cli_cancel(lockh, LCF_ASYNC);
-	} else
-		result = 0;
+		cl_object_put(env, obj);
+	}
 	return result;
 }
 
@@ -736,107 +526,52 @@ static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock,
 				 struct ldlm_lock_desc *new, void *data,
 				 int flag)
 {
-	struct lu_env *env;
-	struct cl_env_nest nest;
-	int result;
+	int result = 0;
 
-	/*
-	 * This can be called in the context of outer IO, e.g.,
-	 *
-	 *     cl_enqueue()->...
-	 *       ->osc_enqueue_base()->...
-	 *	 ->ldlm_prep_elc_req()->...
-	 *	   ->ldlm_cancel_callback()->...
-	 *	     ->osc_ldlm_blocking_ast()
-	 *
-	 * new environment has to be created to not corrupt outer context.
-	 */
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
-		cl_env_nested_put(&nest, env);
-	} else {
-		result = PTR_ERR(env);
-		/*
-		 * XXX This should never happen, as cl_lock is
-		 * stuck. Pre-allocated environment a la vvp_inode_fini_env
-		 * should be used.
-		 */
-		LBUG();
-	}
-	if (result != 0) {
+	switch (flag) {
+	case LDLM_CB_BLOCKING: {
+		struct lustre_handle lockh;
+
+		ldlm_lock2handle(dlmlock, &lockh);
+		result = ldlm_cli_cancel(&lockh, LCF_ASYNC);
 		if (result == -ENODATA)
 			result = 0;
-		else
-			CERROR("BAST failed: %d\n", result);
+		break;
 	}
-	return result;
-}
+	case LDLM_CB_CANCELING: {
+		struct lu_env *env;
+		struct cl_env_nest nest;
 
-static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
-				   __u64 flags, void *data)
-{
-	struct cl_env_nest nest;
-	struct lu_env *env;
-	struct osc_lock *olck;
-	struct cl_lock *lock;
-	int result;
-	int dlmrc;
-
-	/* first, do dlm part of the work */
-	dlmrc = ldlm_completion_ast_async(dlmlock, flags, data);
-	/* then, notify cl_lock */
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		olck = osc_ast_data_get(dlmlock);
-		if (olck) {
-			lock = olck->ols_cl.cls_lock;
-			cl_lock_mutex_get(env, lock);
-			/*
-			 * ldlm_handle_cp_callback() copied LVB from request
-			 * to lock->l_lvb_data, store it in osc_lock.
-			 */
-			LASSERT(dlmlock->l_lvb_data);
-			lock_res_and_lock(dlmlock);
-			olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
-			if (!olck->ols_lock) {
-				/*
-				 * upcall (osc_lock_upcall()) hasn't yet been
-				 * called. Do nothing now, upcall will bind
-				 * olck to dlmlock and signal the waiters.
-				 *
-				 * This maintains an invariant that osc_lock
-				 * and ldlm_lock are always bound when
-				 * osc_lock is in OLS_GRANTED state.
-				 */
-			} else if (dlmlock->l_granted_mode ==
-				   dlmlock->l_req_mode) {
-				osc_lock_granted(env, olck, dlmlock, dlmrc);
-			}
-			unlock_res_and_lock(dlmlock);
+		/*
+		 * This can be called in the context of outer IO, e.g.,
+		 *
+		 *     osc_enqueue_base()->...
+		 *	 ->ldlm_prep_elc_req()->...
+		 *	   ->ldlm_cancel_callback()->...
+		 *	     ->osc_ldlm_blocking_ast()
+		 *
+		 * new environment has to be created to not corrupt outer
+		 * context.
+		 */
+		env = cl_env_nested_get(&nest);
+		if (IS_ERR(env)) {
+			result = PTR_ERR(env);
+			break;
+		}
 
-			if (dlmrc != 0) {
-				CL_LOCK_DEBUG(D_ERROR, env, lock,
-					      "dlmlock returned %d\n", dlmrc);
-				cl_lock_error(env, lock, dlmrc);
-			}
-			cl_lock_mutex_put(env, lock);
-			osc_ast_data_put(env, olck);
-			result = 0;
-		} else
-			result = -ELDLM_NO_LOCK_DATA;
+		result = osc_dlm_blocking_ast0(env, dlmlock, data, flag);
 		cl_env_nested_put(&nest, env);
-	} else
-		result = PTR_ERR(env);
-	return dlmrc ?: result;
+		break;
+		}
+	default:
+		LBUG();
+	}
+	return result;
 }
 
 static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 {
 	struct ptlrpc_request *req = data;
-	struct osc_lock	*olck;
-	struct cl_lock *lock;
-	struct cl_object *obj;
 	struct cl_env_nest nest;
 	struct lu_env *env;
 	struct ost_lvb *lvb;
@@ -847,14 +582,16 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 
 	env = cl_env_nested_get(&nest);
 	if (!IS_ERR(env)) {
-		/* osc_ast_data_get() has to go after environment is
-		 * allocated, because osc_ast_data() acquires a
-		 * reference to a lock, and it can only be released in
-		 * environment.
-		 */
-		olck = osc_ast_data_get(dlmlock);
-		if (olck) {
-			lock = olck->ols_cl.cls_lock;
+		struct cl_object *obj = NULL;
+
+		lock_res_and_lock(dlmlock);
+		if (dlmlock->l_ast_data) {
+			obj = osc2cl(dlmlock->l_ast_data);
+			cl_object_get(obj);
+		}
+		unlock_res_and_lock(dlmlock);
+
+		if (obj) {
 			/* Do not grab the mutex of cl_lock for glimpse.
 			 * See LU-1274 for details.
 			 * BTW, it's okay for cl_lock to be cancelled during
@@ -869,7 +606,6 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 			result = req_capsule_server_pack(cap);
 			if (result == 0) {
 				lvb = req_capsule_server_get(cap, &RMF_DLM_LVB);
-				obj = lock->cll_descr.cld_obj;
 				result = cl_object_glimpse(env, obj, lvb);
 			}
 			if (!exp_connect_lvb_type(req->rq_export))
@@ -877,7 +613,7 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 						   &RMF_DLM_LVB,
 						   sizeof(struct ost_lvb_v1),
 						   RCL_SERVER);
-			osc_ast_data_put(env, olck);
+			cl_object_put(env, obj);
 		} else {
 			/*
 			 * These errors are normal races, so we don't want to
@@ -888,44 +624,123 @@ static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data)
 			result = -ELDLM_NO_LOCK_DATA;
 		}
 		cl_env_nested_put(&nest, env);
-	} else
+	} else {
 		result = PTR_ERR(env);
+	}
 	req->rq_status = result;
 	return result;
 }
 
-static unsigned long osc_lock_weigh(const struct lu_env *env,
-				    const struct cl_lock_slice *slice)
+static int weigh_cb(const struct lu_env *env, struct cl_io *io,
+		    struct osc_page *ops, void *cbdata)
 {
-	/*
-	 * don't need to grab coh_page_guard since we don't care the exact #
-	 * of pages..
-	 */
-	return cl_object_header(slice->cls_obj)->coh_pages;
+	struct cl_page *page = ops->ops_cl.cpl_page;
+
+	if (cl_page_is_vmlocked(env, page) ||
+	    PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage)
+	   ) {
+		(*(unsigned long *)cbdata)++;
+		return CLP_GANG_ABORT;
+	}
+
+	return CLP_GANG_OKAY;
 }
 
-static void osc_lock_build_einfo(const struct lu_env *env,
-				 const struct cl_lock *clock,
-				 struct osc_lock *lock,
-				 struct ldlm_enqueue_info *einfo)
+static unsigned long osc_lock_weight(const struct lu_env *env,
+				     struct osc_object *oscobj,
+				     struct ldlm_extent *extent)
+{
+	struct cl_io *io = &osc_env_info(env)->oti_io;
+	struct cl_object *obj = cl_object_top(&oscobj->oo_cl);
+	unsigned long npages = 0;
+	int result;
+
+	io->ci_obj = obj;
+	io->ci_ignore_layout = 1;
+	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+	if (result != 0)
+		return result;
+
+	do {
+		result = osc_page_gang_lookup(env, io, oscobj,
+					      cl_index(obj, extent->start),
+					      cl_index(obj, extent->end),
+					      weigh_cb, (void *)&npages);
+		if (result == CLP_GANG_ABORT)
+			break;
+		if (result == CLP_GANG_RESCHED)
+			cond_resched();
+	} while (result != CLP_GANG_OKAY);
+	cl_io_fini(env, io);
+
+	return npages;
+}
+
+/**
+ * Get the weight of dlm lock for early cancellation.
+ */
+unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock)
 {
-	enum cl_lock_mode mode;
+	struct cl_env_nest       nest;
+	struct lu_env           *env;
+	struct osc_object	*obj;
+	struct osc_lock		*oscl;
+	unsigned long            weight;
+	bool			 found = false;
+
+	might_sleep();
+	/*
+	 * osc_ldlm_weigh_ast has a complex context since it might be called
+	 * because of lock canceling, or from user's input. We have to make
+	 * a new environment for it. Probably it is implementation safe to use
+	 * the upper context because cl_lock_put don't modify environment
+	 * variables. But just in case ..
+	 */
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		/* Mostly because lack of memory, do not eliminate this lock */
+		return 1;
 
-	mode = clock->cll_descr.cld_mode;
-	if (mode == CLM_PHANTOM)
+	LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT);
+	obj = dlmlock->l_ast_data;
+	if (obj) {
+		weight = 1;
+		goto out;
+	}
+
+	spin_lock(&obj->oo_ol_spin);
+	list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) {
+		if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock)
+			continue;
+		found = true;
+	}
+	spin_unlock(&obj->oo_ol_spin);
+	if (found) {
 		/*
-		 * For now, enqueue all glimpse locks in read mode. In the
-		 * future, client might choose to enqueue LCK_PW lock for
-		 * glimpse on a file opened for write.
+		 * If the lock is being used by an IO, definitely not cancel it.
 		 */
-		mode = CLM_READ;
+		weight = 1;
+		goto out;
+	}
+
+	weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent);
+
+out:
+	cl_env_nested_put(&nest, env);
+	return weight;
+}
 
+static void osc_lock_build_einfo(const struct lu_env *env,
+				 const struct cl_lock *lock,
+				 struct osc_object *osc,
+				 struct ldlm_enqueue_info *einfo)
+{
 	einfo->ei_type = LDLM_EXTENT;
-	einfo->ei_mode = osc_cl_lock2ldlm(mode);
+	einfo->ei_mode   = osc_cl_lock2ldlm(lock->cll_descr.cld_mode);
 	einfo->ei_cb_bl = osc_ldlm_blocking_ast;
-	einfo->ei_cb_cp = osc_ldlm_completion_ast;
+	einfo->ei_cb_cp  = ldlm_completion_ast;
 	einfo->ei_cb_gl = osc_ldlm_glimpse_ast;
-	einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */
+	einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */
 }
 
 /**
@@ -981,113 +796,100 @@ static void osc_lock_to_lockless(const struct lu_env *env,
 	LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
 }
 
-static int osc_lock_compatible(const struct osc_lock *qing,
-			       const struct osc_lock *qed)
+static bool osc_lock_compatible(const struct osc_lock *qing,
+				const struct osc_lock *qed)
 {
-	enum cl_lock_mode qing_mode;
-	enum cl_lock_mode qed_mode;
+	struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr;
+	struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr;
 
-	qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode;
-	if (qed->ols_glimpse &&
-	    (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ))
-		return 1;
+	if (qed->ols_glimpse)
+		return true;
+
+	if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ)
+		return true;
+
+	if (qed->ols_state < OLS_GRANTED)
+		return true;
+
+	if (qed_descr->cld_mode  >= qing_descr->cld_mode &&
+	    qed_descr->cld_start <= qing_descr->cld_start &&
+	    qed_descr->cld_end   >= qing_descr->cld_end)
+		return true;
 
-	qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode;
-	return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ));
+	return false;
 }
 
-/**
- * Cancel all conflicting locks and wait for them to be destroyed.
- *
- * This function is used for two purposes:
- *
- *     - early cancel all conflicting locks before starting IO, and
- *
- *     - guarantee that pages added to the page cache by lockless IO are never
- *       covered by locks other than lockless IO lock, and, hence, are not
- *       visible to other threads.
- */
-static int osc_lock_enqueue_wait(const struct lu_env *env,
-				 const struct osc_lock *olck)
+static void osc_lock_wake_waiters(const struct lu_env *env,
+				  struct osc_object *osc,
+				  struct osc_lock *oscl)
 {
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
-	struct cl_lock_descr *descr = &lock->cll_descr;
-	struct cl_object_header *hdr = cl_object_header(descr->cld_obj);
-	struct cl_lock *scan;
-	struct cl_lock *conflict = NULL;
-	int lockless = osc_lock_is_lockless(olck);
-	int rc = 0;
+	spin_lock(&osc->oo_ol_spin);
+	list_del_init(&oscl->ols_nextlock_oscobj);
+	spin_unlock(&osc->oo_ol_spin);
 
-	LASSERT(cl_lock_is_mutexed(lock));
+	spin_lock(&oscl->ols_lock);
+	while (!list_empty(&oscl->ols_waiting_list)) {
+		struct osc_lock *scan;
 
-	/* make it enqueue anyway for glimpse lock, because we actually
-	 * don't need to cancel any conflicting locks.
-	 */
-	if (olck->ols_glimpse)
-		return 0;
+		scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock,
+				  ols_wait_entry);
+		list_del_init(&scan->ols_wait_entry);
 
-	spin_lock(&hdr->coh_lock_guard);
-	list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) {
-		struct cl_lock_descr *cld = &scan->cll_descr;
-		const struct osc_lock *scan_ols;
+		cl_sync_io_note(env, scan->ols_owner, 0);
+	}
+	spin_unlock(&oscl->ols_lock);
+}
+
+static void osc_lock_enqueue_wait(const struct lu_env *env,
+				  struct osc_object *obj,
+				  struct osc_lock *oscl)
+{
+	struct osc_lock *tmp_oscl;
+	struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr;
+	struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor;
 
-		if (scan == lock)
+	spin_lock(&obj->oo_ol_spin);
+	list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list);
+
+restart:
+	list_for_each_entry(tmp_oscl, &obj->oo_ol_list,
+			    ols_nextlock_oscobj) {
+		struct cl_lock_descr *descr;
+
+		if (tmp_oscl == oscl)
 			break;
 
-		if (scan->cll_state < CLS_QUEUING ||
-		    scan->cll_state == CLS_FREEING ||
-		    cld->cld_start > descr->cld_end ||
-		    cld->cld_end < descr->cld_start)
+		descr = &tmp_oscl->ols_cl.cls_lock->cll_descr;
+		if (descr->cld_start > need->cld_end ||
+		    descr->cld_end   < need->cld_start)
 			continue;
 
-		/* overlapped and living locks. */
+		/* We're not supposed to give up group lock */
+		if (descr->cld_mode == CLM_GROUP)
+			break;
 
-		/* We're not supposed to give up group lock. */
-		if (scan->cll_descr.cld_mode == CLM_GROUP) {
-			LASSERT(descr->cld_mode != CLM_GROUP ||
-				descr->cld_gid != scan->cll_descr.cld_gid);
+		if (!osc_lock_is_lockless(oscl) &&
+		    osc_lock_compatible(oscl, tmp_oscl))
 			continue;
-		}
 
-		scan_ols = osc_lock_at(scan);
+		/* wait for conflicting lock to be canceled */
+		cl_sync_io_init(waiter, 1, cl_sync_io_end);
+		oscl->ols_owner = waiter;
 
-		/* We need to cancel the compatible locks if we're enqueuing
-		 * a lockless lock, for example:
-		 * imagine that client has PR lock on [0, 1000], and thread T0
-		 * is doing lockless IO in [500, 1500] region. Concurrent
-		 * thread T1 can see lockless data in [500, 1000], which is
-		 * wrong, because these data are possibly stale.
-		 */
-		if (!lockless && osc_lock_compatible(olck, scan_ols))
-			continue;
+		spin_lock(&tmp_oscl->ols_lock);
+		/* add oscl into tmp's ols_waiting list */
+		list_add_tail(&oscl->ols_wait_entry,
+			      &tmp_oscl->ols_waiting_list);
+		spin_unlock(&tmp_oscl->ols_lock);
 
-		cl_lock_get_trust(scan);
-		conflict = scan;
-		break;
-	}
-	spin_unlock(&hdr->coh_lock_guard);
+		spin_unlock(&obj->oo_ol_spin);
+		(void)cl_sync_io_wait(env, waiter, 0);
 
-	if (conflict) {
-		if (lock->cll_descr.cld_mode == CLM_GROUP) {
-			/* we want a group lock but a previous lock request
-			 * conflicts, we do not wait but return 0 so the
-			 * request is send to the server
-			 */
-			CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n",
-			       lock, conflict);
-			cl_lock_put(env, conflict);
-			rc = 0;
-		} else {
-			CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n",
-			       lock, conflict);
-			LASSERT(!lock->cll_conflict);
-			lu_ref_add(&conflict->cll_reference, "cancel-wait",
-				   lock);
-			lock->cll_conflict = conflict;
-			rc = CLO_WAIT;
-		}
+		spin_lock(&obj->oo_ol_spin);
+		oscl->ols_owner = NULL;
+		goto restart;
 	}
-	return rc;
+	spin_unlock(&obj->oo_ol_spin);
 }
 
 /**
@@ -1106,188 +908,122 @@ static int osc_lock_enqueue_wait(const struct lu_env *env,
  */
 static int osc_lock_enqueue(const struct lu_env *env,
 			    const struct cl_lock_slice *slice,
-			    struct cl_io *unused, __u32 enqflags)
+			    struct cl_io *unused, struct cl_sync_io *anchor)
 {
-	struct osc_lock *ols = cl2osc_lock(slice);
-	struct cl_lock *lock = ols->ols_cl.cls_lock;
+	struct osc_thread_info *info = osc_env_info(env);
+	struct osc_io *oio = osc_env_io(env);
+	struct osc_object *osc = cl2osc(slice->cls_obj);
+	struct osc_lock *oscl = cl2osc_lock(slice);
+	struct cl_lock *lock = slice->cls_lock;
+	struct ldlm_res_id *resname = &info->oti_resname;
+	ldlm_policy_data_t *policy = &info->oti_policy;
+	osc_enqueue_upcall_f upcall = osc_lock_upcall;
+	void *cookie = oscl;
+	bool async = false;
 	int result;
 
-	LASSERT(cl_lock_is_mutexed(lock));
-	LASSERTF(ols->ols_state == OLS_NEW,
-		 "Impossible state: %d\n", ols->ols_state);
-
-	LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
-		 "lock = %p, ols = %p\n", lock, ols);
+	LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ),
+		 "lock = %p, ols = %p\n", lock, oscl);
 
-	result = osc_lock_enqueue_wait(env, ols);
-	if (result == 0) {
-		if (!osc_lock_is_lockless(ols)) {
-			struct osc_object *obj = cl2osc(slice->cls_obj);
-			struct osc_thread_info *info = osc_env_info(env);
-			struct ldlm_res_id *resname = &info->oti_resname;
-			ldlm_policy_data_t *policy = &info->oti_policy;
-			struct ldlm_enqueue_info *einfo = &ols->ols_einfo;
+	if (oscl->ols_state == OLS_GRANTED)
+		return 0;
 
-			/* lock will be passed as upcall cookie,
-			 * hold ref to prevent to be released.
-			 */
-			cl_lock_hold_add(env, lock, "upcall", lock);
-			/* a user for lock also */
-			cl_lock_user_add(env, lock);
-			ols->ols_state = OLS_ENQUEUED;
+	if (oscl->ols_flags & LDLM_FL_TEST_LOCK)
+		goto enqueue_base;
 
-			/*
-			 * XXX: this is possible blocking point as
-			 * ldlm_lock_match(LDLM_FL_LVB_READY) waits for
-			 * LDLM_CP_CALLBACK.
-			 */
-			ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
-			osc_lock_build_policy(env, lock, policy);
-			result = osc_enqueue_base(osc_export(obj), resname,
-						  &ols->ols_flags, policy,
-						  &ols->ols_lvb,
-						  obj->oo_oinfo->loi_kms_valid,
-						  osc_lock_upcall,
-						  ols, einfo, &ols->ols_handle,
-						  PTLRPCD_SET, 1, ols->ols_agl);
-			if (result != 0) {
-				cl_lock_user_del(env, lock);
-				cl_lock_unhold(env, lock, "upcall", lock);
-				if (unlikely(result == -ECANCELED)) {
-					ols->ols_state = OLS_NEW;
-					result = 0;
-				}
-			}
-		} else {
-			ols->ols_state = OLS_GRANTED;
-			ols->ols_owner = osc_env_io(env);
-		}
+	if (oscl->ols_glimpse) {
+		LASSERT(equi(oscl->ols_agl, !anchor));
+		async = true;
+		goto enqueue_base;
 	}
-	LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
-	return result;
-}
 
-static int osc_lock_wait(const struct lu_env *env,
-			 const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
-
-	LINVRNT(osc_lock_invariant(olck));
-
-	if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) {
-		if (olck->ols_flags & LDLM_FL_LVB_READY) {
-			return 0;
-		} else if (olck->ols_agl) {
-			if (lock->cll_flags & CLF_FROM_UPCALL)
-				/* It is from enqueue RPC reply upcall for
-				 * updating state. Do not re-enqueue.
-				 */
-				return -ENAVAIL;
-			olck->ols_state = OLS_NEW;
-		} else {
-			LASSERT(lock->cll_error);
-			return lock->cll_error;
-		}
+	osc_lock_enqueue_wait(env, osc, oscl);
+
+	/* we can grant lockless lock right after all conflicting locks
+	 * are canceled.
+	 */
+	if (osc_lock_is_lockless(oscl)) {
+		oscl->ols_state = OLS_GRANTED;
+		oio->oi_lockless = 1;
+		return 0;
 	}
 
-	if (olck->ols_state == OLS_NEW) {
-		int rc;
-
-		LASSERT(olck->ols_agl);
-		olck->ols_agl = 0;
-		olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT;
-		rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST);
-		if (rc != 0)
-			return rc;
-		else
-			return CLO_REENQUEUED;
+enqueue_base:
+	oscl->ols_state = OLS_ENQUEUED;
+	if (anchor) {
+		atomic_inc(&anchor->csi_sync_nr);
+		oscl->ols_owner = anchor;
 	}
 
-	LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED &&
-		     lock->cll_error == 0, olck->ols_lock));
+	/**
+	 * DLM lock's ast data must be osc_object;
+	 * if glimpse or AGL lock, async of osc_enqueue_base() must be true,
+	 * DLM's enqueue callback set to osc_lock_upcall() with cookie as
+	 * osc_lock.
+	 */
+	ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+	osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo);
+	osc_lock_build_policy(env, lock, policy);
+	if (oscl->ols_agl) {
+		oscl->ols_einfo.ei_cbdata = NULL;
+		/* hold a reference for callback */
+		cl_object_get(osc2cl(osc));
+		upcall = osc_lock_upcall_agl;
+		cookie = osc;
+	}
+	result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags,
+				  policy, &oscl->ols_lvb,
+				  osc->oo_oinfo->loi_kms_valid,
+				  upcall, cookie,
+				  &oscl->ols_einfo, PTLRPCD_SET, async,
+				  oscl->ols_agl);
+	if (result != 0) {
+		oscl->ols_state = OLS_CANCELLED;
+		osc_lock_wake_waiters(env, osc, oscl);
 
-	return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT;
+		/* hide error for AGL lock. */
+		if (oscl->ols_agl) {
+			cl_object_put(env, osc2cl(osc));
+			result = 0;
+		}
+		if (anchor)
+			cl_sync_io_note(env, anchor, result);
+	} else {
+		if (osc_lock_is_lockless(oscl)) {
+			oio->oi_lockless = 1;
+		} else if (!async) {
+			LASSERT(oscl->ols_state == OLS_GRANTED);
+			LASSERT(oscl->ols_hold);
+			LASSERT(oscl->ols_dlmlock);
+		}
+	}
+	return result;
 }
 
 /**
- * An implementation of cl_lock_operations::clo_use() method that pins cached
- * lock.
+ * Breaks a link between osc_lock and dlm_lock.
  */
-static int osc_lock_use(const struct lu_env *env,
-			const struct cl_lock_slice *slice)
+static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
 {
-	struct osc_lock *olck = cl2osc_lock(slice);
-	int rc;
-
-	LASSERT(!olck->ols_hold);
+	struct ldlm_lock *dlmlock;
 
-	/*
-	 * Atomically check for LDLM_FL_CBPENDING and addref a lock if this
-	 * flag is not set. This protects us from a concurrent blocking ast.
-	 */
-	rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode);
-	if (rc == 0) {
-		olck->ols_hold = 1;
-		olck->ols_state = OLS_GRANTED;
-	} else {
-		struct cl_lock *lock;
+	dlmlock = olck->ols_dlmlock;
+	if (!dlmlock)
+		return;
 
-		/*
-		 * Lock is being cancelled somewhere within
-		 * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already
-		 * set, but osc_ldlm_blocking_ast() hasn't yet acquired
-		 * cl_lock mutex.
-		 */
-		lock = slice->cls_lock;
-		LASSERT(lock->cll_state == CLS_INTRANSIT);
-		LASSERT(lock->cll_users > 0);
-		/* set a flag for osc_dlm_blocking_ast0() to signal the
-		 * lock.
-		 */
-		olck->ols_ast_wait = 1;
-		rc = CLO_WAIT;
+	if (olck->ols_hold) {
+		olck->ols_hold = 0;
+		osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+		olck->ols_handle.cookie = 0ULL;
 	}
-	return rc;
-}
 
-static int osc_lock_flush(struct osc_lock *ols, int discard)
-{
-	struct cl_lock *lock = ols->ols_cl.cls_lock;
-	struct cl_env_nest nest;
-	struct lu_env *env;
-	int result = 0;
-
-	env = cl_env_nested_get(&nest);
-	if (!IS_ERR(env)) {
-		struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj);
-		struct cl_lock_descr *descr = &lock->cll_descr;
-		int rc = 0;
-
-		if (descr->cld_mode >= CLM_WRITE) {
-			result = osc_cache_writeback_range(env, obj,
-							   descr->cld_start,
-							   descr->cld_end,
-							   1, discard);
-			LDLM_DEBUG(ols->ols_lock,
-				   "lock %p: %d pages were %s.\n", lock, result,
-				   discard ? "discarded" : "written");
-			if (result > 0)
-				result = 0;
-		}
+	olck->ols_dlmlock = NULL;
 
-		rc = cl_lock_discard_pages(env, lock);
-		if (result == 0 && rc < 0)
-			result = rc;
-
-		cl_env_nested_put(&nest, env);
-	} else
-		result = PTR_ERR(env);
-	if (result == 0) {
-		ols->ols_flush = 1;
-		LINVRNT(!osc_lock_has_pages(ols));
-	}
-	return result;
+	/* release a reference taken in osc_lock_upcall(). */
+	LASSERT(olck->ols_has_ref);
+	lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
+	LDLM_LOCK_RELEASE(dlmlock);
+	olck->ols_has_ref = 0;
 }
 
 /**
@@ -1307,96 +1043,16 @@ static int osc_lock_flush(struct osc_lock *ols, int discard)
 static void osc_lock_cancel(const struct lu_env *env,
 			    const struct cl_lock_slice *slice)
 {
-	struct cl_lock *lock = slice->cls_lock;
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct ldlm_lock *dlmlock = olck->ols_lock;
-	int result = 0;
-	int discard;
-
-	LASSERT(cl_lock_is_mutexed(lock));
-	LINVRNT(osc_lock_invariant(olck));
-
-	if (dlmlock) {
-		int do_cancel;
-
-		discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA);
-		if (olck->ols_state >= OLS_GRANTED)
-			result = osc_lock_flush(olck, discard);
-		osc_lock_unhold(olck);
-
-		lock_res_and_lock(dlmlock);
-		/* Now that we're the only user of dlm read/write reference,
-		 * mostly the ->l_readers + ->l_writers should be zero.
-		 * However, there is a corner case.
-		 * See bug 18829 for details.
-		 */
-		do_cancel = (dlmlock->l_readers == 0 &&
-			     dlmlock->l_writers == 0);
-		dlmlock->l_flags |= LDLM_FL_CBPENDING;
-		unlock_res_and_lock(dlmlock);
-		if (do_cancel)
-			result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC);
-		if (result < 0)
-			CL_LOCK_DEBUG(D_ERROR, env, lock,
-				      "lock %p cancel failure with error(%d)\n",
-				      lock, result);
-	}
-	olck->ols_state = OLS_CANCELLED;
-	olck->ols_flags &= ~LDLM_FL_LVB_READY;
-	osc_lock_detach(env, olck);
-}
-
-static int osc_lock_has_pages(struct osc_lock *olck)
-{
-	return 0;
-}
-
-static void osc_lock_delete(const struct lu_env *env,
-			    const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck;
+	struct osc_object *obj  = cl2osc(slice->cls_obj);
+	struct osc_lock *oscl = cl2osc_lock(slice);
 
-	olck = cl2osc_lock(slice);
-	if (olck->ols_glimpse) {
-		LASSERT(!olck->ols_hold);
-		LASSERT(!olck->ols_lock);
-		return;
-	}
+	LINVRNT(osc_lock_invariant(oscl));
 
-	LINVRNT(osc_lock_invariant(olck));
-	LINVRNT(!osc_lock_has_pages(olck));
+	osc_lock_detach(env, oscl);
+	oscl->ols_state = OLS_CANCELLED;
+	oscl->ols_flags &= ~LDLM_FL_LVB_READY;
 
-	osc_lock_unhold(olck);
-	osc_lock_detach(env, olck);
-}
-
-/**
- * Implements cl_lock_operations::clo_state() method for osc layer.
- *
- * Maintains osc_lock::ols_owner field.
- *
- * This assumes that lock always enters CLS_HELD (from some other state) in
- * the same IO context as one that requested the lock. This should not be a
- * problem, because context is by definition shared by all activity pertaining
- * to the same high-level IO.
- */
-static void osc_lock_state(const struct lu_env *env,
-			   const struct cl_lock_slice *slice,
-			   enum cl_lock_state state)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
-
-	/*
-	 * XXX multiple io contexts can use the lock at the same time.
-	 */
-	LINVRNT(osc_lock_invariant(lock));
-	if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) {
-		struct osc_io *oio = osc_env_io(env);
-
-		LASSERT(!lock->ols_owner);
-		lock->ols_owner = oio;
-	} else if (state != CLS_HELD)
-		lock->ols_owner = NULL;
+	osc_lock_wake_waiters(env, obj, oscl);
 }
 
 static int osc_lock_print(const struct lu_env *env, void *cookie,
@@ -1404,221 +1060,161 @@ static int osc_lock_print(const struct lu_env *env, void *cookie,
 {
 	struct osc_lock *lock = cl2osc_lock(slice);
 
-	/*
-	 * XXX print ldlm lock and einfo properly.
-	 */
 	(*p)(env, cookie, "%p %#16llx %#llx %d %p ",
-	     lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie,
+	     lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie,
 	     lock->ols_state, lock->ols_owner);
 	osc_lvb_print(env, cookie, p, &lock->ols_lvb);
 	return 0;
 }
 
-static int osc_lock_fits_into(const struct lu_env *env,
-			      const struct cl_lock_slice *slice,
-			      const struct cl_lock_descr *need,
-			      const struct cl_io *io)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-
-	if (need->cld_enq_flags & CEF_NEVER)
-		return 0;
-
-	if (ols->ols_state >= OLS_CANCELLED)
-		return 0;
-
-	if (need->cld_mode == CLM_PHANTOM) {
-		if (ols->ols_agl)
-			return !(ols->ols_state > OLS_RELEASED);
-
-		/*
-		 * Note: the QUEUED lock can't be matched here, otherwise
-		 * it might cause the deadlocks.
-		 * In read_process,
-		 * P1: enqueued read lock, create sublock1
-		 * P2: enqueued write lock, create sublock2(conflicted
-		 *     with sublock1).
-		 * P1: Grant read lock.
-		 * P1: enqueued glimpse lock(with holding sublock1_read),
-		 *     matched with sublock2, waiting sublock2 to be granted.
-		 *     But sublock2 can not be granted, because P1
-		 *     will not release sublock1. Bang!
-		 */
-		if (ols->ols_state < OLS_GRANTED ||
-		    ols->ols_state > OLS_RELEASED)
-			return 0;
-	} else if (need->cld_enq_flags & CEF_MUST) {
-		/*
-		 * If the lock hasn't ever enqueued, it can't be matched
-		 * because enqueue process brings in many information
-		 * which can be used to determine things such as lockless,
-		 * CEF_MUST, etc.
-		 */
-		if (ols->ols_state < OLS_UPCALL_RECEIVED &&
-		    ols->ols_locklessable)
-			return 0;
-	}
-	return 1;
-}
-
 static const struct cl_lock_operations osc_lock_ops = {
 	.clo_fini    = osc_lock_fini,
 	.clo_enqueue = osc_lock_enqueue,
-	.clo_wait    = osc_lock_wait,
-	.clo_unuse   = osc_lock_unuse,
-	.clo_use     = osc_lock_use,
-	.clo_delete  = osc_lock_delete,
-	.clo_state   = osc_lock_state,
 	.clo_cancel  = osc_lock_cancel,
-	.clo_weigh   = osc_lock_weigh,
 	.clo_print   = osc_lock_print,
-	.clo_fits_into = osc_lock_fits_into,
 };
 
-static int osc_lock_lockless_unuse(const struct lu_env *env,
-				   const struct cl_lock_slice *slice)
-{
-	struct osc_lock *ols = cl2osc_lock(slice);
-	struct cl_lock *lock = slice->cls_lock;
-
-	LASSERT(ols->ols_state == OLS_GRANTED);
-	LINVRNT(osc_lock_invariant(ols));
-
-	cl_lock_cancel(env, lock);
-	cl_lock_delete(env, lock);
-	return 0;
-}
-
 static void osc_lock_lockless_cancel(const struct lu_env *env,
 				     const struct cl_lock_slice *slice)
 {
 	struct osc_lock *ols = cl2osc_lock(slice);
+	struct osc_object *osc = cl2osc(slice->cls_obj);
+	struct cl_lock_descr *descr = &slice->cls_lock->cll_descr;
 	int result;
 
-	result = osc_lock_flush(ols, 0);
+	LASSERT(!ols->ols_dlmlock);
+	result = osc_lock_flush(osc, descr->cld_start, descr->cld_end,
+				descr->cld_mode, 0);
 	if (result)
 		CERROR("Pages for lockless lock %p were not purged(%d)\n",
 		       ols, result);
-	ols->ols_state = OLS_CANCELLED;
-}
-
-static int osc_lock_lockless_wait(const struct lu_env *env,
-				  const struct cl_lock_slice *slice)
-{
-	struct osc_lock *olck = cl2osc_lock(slice);
-	struct cl_lock *lock = olck->ols_cl.cls_lock;
 
-	LINVRNT(osc_lock_invariant(olck));
-	LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED);
-
-	return lock->cll_error;
+	osc_lock_wake_waiters(env, osc, ols);
 }
 
-static void osc_lock_lockless_state(const struct lu_env *env,
-				    const struct cl_lock_slice *slice,
-				    enum cl_lock_state state)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
+static const struct cl_lock_operations osc_lock_lockless_ops = {
+	.clo_fini	= osc_lock_fini,
+	.clo_enqueue	= osc_lock_enqueue,
+	.clo_cancel	= osc_lock_lockless_cancel,
+	.clo_print	= osc_lock_print
+};
 
-	LINVRNT(osc_lock_invariant(lock));
-	if (state == CLS_HELD) {
-		struct osc_io *oio = osc_env_io(env);
+static void osc_lock_set_writer(const struct lu_env *env,
+				const struct cl_io *io,
+				struct cl_object *obj, struct osc_lock *oscl)
+{
+	struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr;
+	pgoff_t io_start;
+	pgoff_t io_end;
 
-		LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio));
-		lock->ols_owner = oio;
+	if (!cl_object_same(io->ci_obj, obj))
+		return;
 
-		/* set the io to be lockless if this lock is for io's
-		 * host object
-		 */
-		if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj))
-			oio->oi_lockless = 1;
+	if (likely(io->ci_type == CIT_WRITE)) {
+		io_start = cl_index(obj, io->u.ci_rw.crw_pos);
+		io_end = cl_index(obj, io->u.ci_rw.crw_pos +
+				  io->u.ci_rw.crw_count - 1);
+		if (cl_io_is_append(io)) {
+			io_start = 0;
+			io_end = CL_PAGE_EOF;
+		}
+	} else {
+		LASSERT(cl_io_is_mkwrite(io));
+		io_start = io_end = io->u.ci_fault.ft_index;
 	}
-}
 
-static int osc_lock_lockless_fits_into(const struct lu_env *env,
-				       const struct cl_lock_slice *slice,
-				       const struct cl_lock_descr *need,
-				       const struct cl_io *io)
-{
-	struct osc_lock *lock = cl2osc_lock(slice);
-
-	if (!(need->cld_enq_flags & CEF_NEVER))
-		return 0;
+	if (descr->cld_mode >= CLM_WRITE &&
+	    descr->cld_start <= io_start && descr->cld_end >= io_end) {
+		struct osc_io *oio = osc_env_io(env);
 
-	/* lockless lock should only be used by its owning io. b22147 */
-	return (lock->ols_owner == osc_env_io(env));
+		/* There must be only one lock to match the write region */
+		LASSERT(!oio->oi_write_osclock);
+		oio->oi_write_osclock = oscl;
+	}
 }
 
-static const struct cl_lock_operations osc_lock_lockless_ops = {
-	.clo_fini      = osc_lock_fini,
-	.clo_enqueue   = osc_lock_enqueue,
-	.clo_wait      = osc_lock_lockless_wait,
-	.clo_unuse     = osc_lock_lockless_unuse,
-	.clo_state     = osc_lock_lockless_state,
-	.clo_fits_into = osc_lock_lockless_fits_into,
-	.clo_cancel    = osc_lock_lockless_cancel,
-	.clo_print     = osc_lock_print
-};
-
 int osc_lock_init(const struct lu_env *env,
 		  struct cl_object *obj, struct cl_lock *lock,
-		  const struct cl_io *unused)
+		  const struct cl_io *io)
 {
-	struct osc_lock *clk;
-	int result;
-
-	clk = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
-	if (clk) {
-		__u32 enqflags = lock->cll_descr.cld_enq_flags;
+	struct osc_lock *oscl;
+	__u32 enqflags = lock->cll_descr.cld_enq_flags;
+
+	oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS);
+	if (!oscl)
+		return -ENOMEM;
+
+	oscl->ols_state = OLS_NEW;
+	spin_lock_init(&oscl->ols_lock);
+	INIT_LIST_HEAD(&oscl->ols_waiting_list);
+	INIT_LIST_HEAD(&oscl->ols_wait_entry);
+	INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj);
+
+	oscl->ols_flags = osc_enq2ldlm_flags(enqflags);
+	oscl->ols_agl = !!(enqflags & CEF_AGL);
+	if (oscl->ols_agl)
+		oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
+	if (oscl->ols_flags & LDLM_FL_HAS_INTENT) {
+		oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED;
+		oscl->ols_glimpse = 1;
+	}
 
-		osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo);
-		atomic_set(&clk->ols_pageref, 0);
-		clk->ols_state = OLS_NEW;
+	cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops);
 
-		clk->ols_flags = osc_enq2ldlm_flags(enqflags);
-		clk->ols_agl = !!(enqflags & CEF_AGL);
-		if (clk->ols_agl)
-			clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT;
-		if (clk->ols_flags & LDLM_FL_HAS_INTENT)
-			clk->ols_glimpse = 1;
+	if (!(enqflags & CEF_MUST))
+		/* try to convert this lock to a lockless lock */
+		osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER));
+	if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
+		oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
 
-		cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops);
+	if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io))
+		osc_lock_set_writer(env, io, obj, oscl);
 
-		if (!(enqflags & CEF_MUST))
-			/* try to convert this lock to a lockless lock */
-			osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER));
-		if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA))
-			clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION;
 
-		LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx",
-				  lock, clk, clk->ols_flags);
+	LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx\n",
+			  lock, oscl, oscl->ols_flags);
 
-		result = 0;
-	} else
-		result = -ENOMEM;
-	return result;
+	return 0;
 }
 
-int osc_dlm_lock_pageref(struct ldlm_lock *dlm)
+/**
+ * Finds an existing lock covering given index and optionally different from a
+ * given \a except lock.
+ */
+struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
+				       struct osc_object *obj, pgoff_t index,
+				       int pending, int canceling)
 {
-	struct osc_lock *olock;
-	int rc = 0;
-
-	spin_lock(&osc_ast_guard);
-	olock = dlm->l_ast_data;
+	struct osc_thread_info *info = osc_env_info(env);
+	struct ldlm_res_id *resname = &info->oti_resname;
+	ldlm_policy_data_t *policy  = &info->oti_policy;
+	struct lustre_handle lockh;
+	struct ldlm_lock *lock = NULL;
+	enum ldlm_mode mode;
+	__u64 flags;
+
+	ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname);
+	osc_index2policy(policy, osc2cl(obj), index, index);
+	policy->l_extent.gid = LDLM_GID_ANY;
+
+	flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+	if (pending)
+		flags |= LDLM_FL_CBPENDING;
 	/*
-	 * there's a very rare race with osc_page_addref_lock(), but that
-	 * doesn't matter because in the worst case we don't cancel a lock
-	 * which we actually can, that's no harm.
+	 * It is fine to match any group lock since there could be only one
+	 * with a uniq gid and it conflicts with all other lock modes too
 	 */
-	if (olock &&
-	    atomic_add_return(_PAGEREF_MAGIC,
-			      &olock->ols_pageref) != _PAGEREF_MAGIC) {
-		atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref);
-		rc = 1;
+again:
+	mode = ldlm_lock_match(osc_export(obj)->exp_obd->obd_namespace,
+			       flags, resname, LDLM_EXTENT, policy,
+			       LCK_PR | LCK_PW | LCK_GROUP, &lockh, canceling);
+	if (mode != 0) {
+		lock = ldlm_handle2lock(&lockh);
+		/* RACE: the lock is cancelled so let's try again */
+		if (unlikely(!lock))
+			goto again;
 	}
-	spin_unlock(&osc_ast_guard);
-	return rc;
+	return lock;
 }
 
 /** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index 9d474fcdd..738ab10ab 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -36,6 +36,7 @@
  * Implementation of cl_object for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
@@ -94,6 +95,9 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj,
 	atomic_set(&osc->oo_nr_reads, 0);
 	atomic_set(&osc->oo_nr_writes, 0);
 	spin_lock_init(&osc->oo_lock);
+	spin_lock_init(&osc->oo_tree_lock);
+	spin_lock_init(&osc->oo_ol_spin);
+	INIT_LIST_HEAD(&osc->oo_ol_list);
 
 	cl_object_page_init(lu2cl(obj), sizeof(struct osc_page));
 
@@ -120,6 +124,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj)
 	LASSERT(list_empty(&osc->oo_reading_exts));
 	LASSERT(atomic_read(&osc->oo_nr_reads) == 0);
 	LASSERT(atomic_read(&osc->oo_nr_writes) == 0);
+	LASSERT(list_empty(&osc->oo_ol_list));
 
 	lu_object_fini(obj);
 	kmem_cache_free(osc_object_kmem, osc);
@@ -192,6 +197,32 @@ static int osc_object_glimpse(const struct lu_env *env,
 	return 0;
 }
 
+static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
+{
+	LASSERT(lock->l_granted_mode == lock->l_req_mode);
+	if (lock->l_ast_data == data)
+		lock->l_ast_data = NULL;
+	return LDLM_ITER_CONTINUE;
+}
+
+static int osc_object_prune(const struct lu_env *env, struct cl_object *obj)
+{
+	struct osc_object       *osc = cl2osc(obj);
+	struct ldlm_res_id      *resname = &osc_env_info(env)->oti_resname;
+
+	LASSERTF(osc->oo_npages == 0,
+		 DFID "still have %lu pages, obj: %p, osc: %p\n",
+		 PFID(lu_object_fid(&obj->co_lu)), osc->oo_npages, obj, osc);
+
+	/* DLM locks don't hold a reference of osc_object so we have to
+	 * clear it before the object is being destroyed.
+	 */
+	ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname);
+	ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname,
+			      osc_object_ast_clear, osc);
+	return 0;
+}
+
 void osc_object_set_contended(struct osc_object *obj)
 {
 	obj->oo_contention_time = cfs_time_current();
@@ -236,12 +267,12 @@ static const struct cl_object_operations osc_ops = {
 	.coo_io_init   = osc_io_init,
 	.coo_attr_get  = osc_attr_get,
 	.coo_attr_set  = osc_attr_set,
-	.coo_glimpse   = osc_object_glimpse
+	.coo_glimpse   = osc_object_glimpse,
+	.coo_prune     = osc_object_prune
 };
 
 static const struct lu_object_operations osc_lu_obj_ops = {
 	.loo_object_init      = osc_object_init,
-	.loo_object_delete    = NULL,
 	.loo_object_release   = NULL,
 	.loo_object_free      = osc_object_free,
 	.loo_object_print     = osc_object_print,
@@ -261,8 +292,9 @@ struct lu_object *osc_object_alloc(const struct lu_env *env,
 		lu_object_init(obj, NULL, dev);
 		osc->oo_cl.co_ops = &osc_ops;
 		obj->lo_ops = &osc_lu_obj_ops;
-	} else
+	} else {
 		obj = NULL;
+	}
 	return obj;
 }
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index ce9ddd515..c29c2eabe 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -36,14 +36,15 @@
  * Implementation of cl_page for OSC layer.
  *
  *   Author: Nikita Danilov <nikita.danilov@sun.com>
+ *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_OSC
 
 #include "osc_cl_internal.h"
 
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del);
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg);
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg);
 static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 			   struct osc_page *opg);
 
@@ -63,18 +64,9 @@ static int osc_page_protected(const struct lu_env *env,
  * Page operations.
  *
  */
-static void osc_page_fini(const struct lu_env *env,
-			  struct cl_page_slice *slice)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-
-	CDEBUG(D_TRACE, "%p\n", opg);
-	LASSERT(!opg->ops_lock);
-}
-
 static void osc_page_transfer_get(struct osc_page *opg, const char *label)
 {
-	struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+	struct cl_page *page = opg->ops_cl.cpl_page;
 
 	LASSERT(!opg->ops_transfer_pinned);
 	cl_page_get(page);
@@ -85,11 +77,11 @@ static void osc_page_transfer_get(struct osc_page *opg, const char *label)
 static void osc_page_transfer_put(const struct lu_env *env,
 				  struct osc_page *opg)
 {
-	struct cl_page *page = cl_page_top(opg->ops_cl.cpl_page);
+	struct cl_page *page = opg->ops_cl.cpl_page;
 
 	if (opg->ops_transfer_pinned) {
-		lu_ref_del(&page->cp_reference, "transfer", page);
 		opg->ops_transfer_pinned = 0;
+		lu_ref_del(&page->cp_reference, "transfer", page);
 		cl_page_put(env, page);
 	}
 }
@@ -104,10 +96,7 @@ static void osc_page_transfer_add(const struct lu_env *env,
 {
 	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
 
-	/* ops_lru and ops_inflight share the same field, so take it from LRU
-	 * first and then use it as inflight.
-	 */
-	osc_lru_del(osc_cli(obj), opg, false);
+	osc_lru_use(osc_cli(obj), opg);
 
 	spin_lock(&obj->oo_seatbelt);
 	list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
@@ -115,11 +104,9 @@ static void osc_page_transfer_add(const struct lu_env *env,
 	spin_unlock(&obj->oo_seatbelt);
 }
 
-static int osc_page_cache_add(const struct lu_env *env,
-			      const struct cl_page_slice *slice,
-			      struct cl_io *io)
+int osc_page_cache_add(const struct lu_env *env,
+		       const struct cl_page_slice *slice, struct cl_io *io)
 {
-	struct osc_io *oio = osc_env_io(env);
 	struct osc_page *opg = cl2osc_page(slice);
 	int result;
 
@@ -132,17 +119,6 @@ static int osc_page_cache_add(const struct lu_env *env,
 	else
 		osc_page_transfer_add(env, opg, CRT_WRITE);
 
-	/* for sync write, kernel will wait for this page to be flushed before
-	 * osc_io_end() is called, so release it earlier.
-	 * for mkwrite(), it's known there is no further pages.
-	 */
-	if (cl_io_is_sync_write(io) || cl_io_is_mkwrite(io)) {
-		if (oio->oi_active) {
-			osc_extent_release(env, oio->oi_active);
-			oio->oi_active = NULL;
-		}
-	}
-
 	return result;
 }
 
@@ -154,102 +130,25 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
 	policy->l_extent.end = cl_offset(obj, end + 1) - 1;
 }
 
-static int osc_page_addref_lock(const struct lu_env *env,
-				struct osc_page *opg,
-				struct cl_lock *lock)
-{
-	struct osc_lock *olock;
-	int rc;
-
-	LASSERT(!opg->ops_lock);
-
-	olock = osc_lock_at(lock);
-	if (atomic_inc_return(&olock->ols_pageref) <= 0) {
-		atomic_dec(&olock->ols_pageref);
-		rc = -ENODATA;
-	} else {
-		cl_lock_get(lock);
-		opg->ops_lock = lock;
-		rc = 0;
-	}
-	return rc;
-}
-
-static void osc_page_putref_lock(const struct lu_env *env,
-				 struct osc_page *opg)
-{
-	struct cl_lock *lock = opg->ops_lock;
-	struct osc_lock *olock;
-
-	LASSERT(lock);
-	olock = osc_lock_at(lock);
-
-	atomic_dec(&olock->ols_pageref);
-	opg->ops_lock = NULL;
-
-	cl_lock_put(env, lock);
-}
-
 static int osc_page_is_under_lock(const struct lu_env *env,
 				  const struct cl_page_slice *slice,
-				  struct cl_io *unused)
+				  struct cl_io *unused, pgoff_t *max_index)
 {
-	struct cl_lock *lock;
+	struct osc_page *opg = cl2osc_page(slice);
+	struct ldlm_lock *dlmlock;
 	int result = -ENODATA;
 
-	lock = cl_lock_at_page(env, slice->cpl_obj, slice->cpl_page,
-			       NULL, 1, 0);
-	if (lock) {
-		if (osc_page_addref_lock(env, cl2osc_page(slice), lock) == 0)
-			result = -EBUSY;
-		cl_lock_put(env, lock);
+	dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj),
+				       osc_index(opg), 1, 0);
+	if (dlmlock) {
+		*max_index = cl_index(slice->cpl_obj,
+				      dlmlock->l_policy_data.l_extent.end);
+		LDLM_LOCK_PUT(dlmlock);
+		result = 0;
 	}
 	return result;
 }
 
-static void osc_page_disown(const struct lu_env *env,
-			    const struct cl_page_slice *slice,
-			    struct cl_io *io)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-
-	if (unlikely(opg->ops_lock))
-		osc_page_putref_lock(env, opg);
-}
-
-static void osc_page_completion_read(const struct lu_env *env,
-				     const struct cl_page_slice *slice,
-				     int ioret)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
-
-	if (likely(opg->ops_lock))
-		osc_page_putref_lock(env, opg);
-	osc_lru_add(osc_cli(obj), opg);
-}
-
-static void osc_page_completion_write(const struct lu_env *env,
-				      const struct cl_page_slice *slice,
-				      int ioret)
-{
-	struct osc_page *opg = cl2osc_page(slice);
-	struct osc_object *obj = cl2osc(slice->cpl_obj);
-
-	osc_lru_add(osc_cli(obj), opg);
-}
-
-static int osc_page_fail(const struct lu_env *env,
-			 const struct cl_page_slice *slice,
-			 struct cl_io *unused)
-{
-	/*
-	 * Cached read?
-	 */
-	LBUG();
-	return 0;
-}
-
 static const char *osc_list(struct list_head *head)
 {
 	return list_empty(head) ? "-" : "+";
@@ -272,8 +171,8 @@ static int osc_page_print(const struct lu_env *env,
 	struct osc_object *obj = cl2osc(slice->cpl_obj);
 	struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli;
 
-	return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
-			  opg,
+	return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
+			  opg, osc_index(opg),
 			  /* 1 */
 			  oap->oap_magic, oap->oap_cmd,
 			  oap->oap_interrupted,
@@ -321,7 +220,7 @@ static void osc_page_delete(const struct lu_env *env,
 	osc_page_transfer_put(env, opg);
 	rc = osc_teardown_async_page(env, obj, opg);
 	if (rc) {
-		CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page),
+		CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page,
 			      "Trying to teardown failed: %d\n", rc);
 		LASSERT(0);
 	}
@@ -334,7 +233,19 @@ static void osc_page_delete(const struct lu_env *env,
 	}
 	spin_unlock(&obj->oo_seatbelt);
 
-	osc_lru_del(osc_cli(obj), opg, true);
+	osc_lru_del(osc_cli(obj), opg);
+
+	if (slice->cpl_page->cp_type == CPT_CACHEABLE) {
+		void *value;
+
+		spin_lock(&obj->oo_tree_lock);
+		value = radix_tree_delete(&obj->oo_tree, osc_index(opg));
+		if (value)
+			--obj->oo_npages;
+		spin_unlock(&obj->oo_tree_lock);
+
+		LASSERT(ergo(value, value == opg));
+	}
 }
 
 static void osc_page_clip(const struct lu_env *env,
@@ -382,28 +293,16 @@ static int osc_page_flush(const struct lu_env *env,
 }
 
 static const struct cl_page_operations osc_page_ops = {
-	.cpo_fini	  = osc_page_fini,
 	.cpo_print	 = osc_page_print,
 	.cpo_delete	= osc_page_delete,
 	.cpo_is_under_lock = osc_page_is_under_lock,
-	.cpo_disown	= osc_page_disown,
-	.io = {
-		[CRT_READ] = {
-			.cpo_cache_add  = osc_page_fail,
-			.cpo_completion = osc_page_completion_read
-		},
-		[CRT_WRITE] = {
-			.cpo_cache_add  = osc_page_cache_add,
-			.cpo_completion = osc_page_completion_write
-		}
-	},
 	.cpo_clip	   = osc_page_clip,
 	.cpo_cancel	 = osc_page_cancel,
 	.cpo_flush	  = osc_page_flush
 };
 
 int osc_page_init(const struct lu_env *env, struct cl_object *obj,
-		  struct cl_page *page, struct page *vmpage)
+		  struct cl_page *page, pgoff_t index)
 {
 	struct osc_object *osc = cl2osc(obj);
 	struct osc_page *opg = cl_object_page_slice(obj, page);
@@ -412,13 +311,14 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	opg->ops_from = 0;
 	opg->ops_to = PAGE_SIZE;
 
-	result = osc_prep_async_page(osc, opg, vmpage,
-				     cl_offset(obj, page->cp_index));
+	result = osc_prep_async_page(osc, opg, page->cp_vmpage,
+				     cl_offset(obj, index));
 	if (result == 0) {
 		struct osc_io *oio = osc_env_io(env);
 
 		opg->ops_srvlock = osc_io_srvlock(oio);
-		cl_page_slice_add(page, &opg->ops_cl, obj, &osc_page_ops);
+		cl_page_slice_add(page, &opg->ops_cl, obj, index,
+				  &osc_page_ops);
 	}
 	/*
 	 * Cannot assert osc_page_protected() here as read-ahead
@@ -431,12 +331,47 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	INIT_LIST_HEAD(&opg->ops_lru);
 
 	/* reserve an LRU space for this page */
-	if (page->cp_type == CPT_CACHEABLE && result == 0)
+	if (page->cp_type == CPT_CACHEABLE && result == 0) {
 		result = osc_lru_reserve(env, osc, opg);
+		if (result == 0) {
+			spin_lock(&osc->oo_tree_lock);
+			result = radix_tree_insert(&osc->oo_tree, index, opg);
+			if (result == 0)
+				++osc->oo_npages;
+			spin_unlock(&osc->oo_tree_lock);
+			LASSERT(result == 0);
+		}
+	}
 
 	return result;
 }
 
+int osc_over_unstable_soft_limit(struct client_obd *cli)
+{
+	long obd_upages, obd_dpages, osc_upages;
+
+	/* Can't check cli->cl_unstable_count, therefore, no soft limit */
+	if (!cli)
+		return 0;
+
+	obd_upages = atomic_read(&obd_unstable_pages);
+	obd_dpages = atomic_read(&obd_dirty_pages);
+
+	osc_upages = atomic_read(&cli->cl_unstable_count);
+
+	/*
+	 * obd_max_dirty_pages is the max number of (dirty + unstable)
+	 * pages allowed at any given time. To simulate an unstable page
+	 * only limit, we subtract the current number of dirty pages
+	 * from this max. This difference is roughly the amount of pages
+	 * currently available for unstable pages. Thus, the soft limit
+	 * is half of that difference. Check osc_upages to ensure we don't
+	 * set SOFT_SYNC for OSCs without any outstanding unstable pages.
+	 */
+	return osc_upages &&
+	       obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2;
+}
+
 /**
  * Helper function called by osc_io_submit() for every page in an immediate
  * transfer (i.e., transferred synchronously).
@@ -460,6 +395,9 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 	oap->oap_count = opg->ops_to - opg->ops_from;
 	oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
 
+	if (osc_over_unstable_soft_limit(oap->oap_cli))
+		oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
+
 	if (!client_is_remote(osc_export(obj)) &&
 	    capable(CFS_CAP_SYS_RESOURCE)) {
 		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
@@ -483,13 +421,12 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
  */
 
 static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq);
-static atomic_t osc_lru_waiters = ATOMIC_INIT(0);
 /* LRU pages are freed in batch mode. OSC should at least free this
  * number of pages to avoid running out of LRU budget, and..
  */
 static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT);  /* 2M */
 /* free this number at most otherwise it will take too long time to finish. */
-static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
+static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
 
 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
  * we should free slots aggressively. In this way, slots are freed in a steady
@@ -500,65 +437,142 @@ static const int lru_shrink_max = 32 << (20 - PAGE_SHIFT); /* 32M */
 static int osc_cache_too_much(struct client_obd *cli)
 {
 	struct cl_client_cache *cache = cli->cl_cache;
-	int pages = atomic_read(&cli->cl_lru_in_list) >> 1;
+	int pages = atomic_read(&cli->cl_lru_in_list);
+	unsigned long budget;
 
-	if (atomic_read(&osc_lru_waiters) > 0 &&
-	    atomic_read(cli->cl_lru_left) < lru_shrink_max)
-		/* drop lru pages aggressively */
-		return min(pages, lru_shrink_max);
+	budget = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
 
 	/* if it's going to run out LRU slots, we should free some, but not
 	 * too much to maintain fairness among OSCs.
 	 */
 	if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
-		unsigned long tmp;
+		if (pages >= budget)
+			return lru_shrink_max;
+		else if (pages >= budget / 2)
+			return lru_shrink_min;
+	} else if (pages >= budget * 2) {
+		return lru_shrink_min;
+	}
+	return 0;
+}
 
-		tmp = cache->ccc_lru_max / atomic_read(&cache->ccc_users);
-		if (pages > tmp)
-			return min(pages, lru_shrink_max);
+int lru_queue_work(const struct lu_env *env, void *data)
+{
+	struct client_obd *cli = data;
 
-		return pages > lru_shrink_min ? lru_shrink_min : 0;
-	}
+	CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli);
+
+	if (osc_cache_too_much(cli))
+		osc_lru_shrink(env, cli, lru_shrink_max, true);
 
 	return 0;
 }
 
-/* Return how many pages are not discarded in @pvec. */
-static int discard_pagevec(const struct lu_env *env, struct cl_io *io,
-			   struct cl_page **pvec, int max_index)
+void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
+{
+	LIST_HEAD(lru);
+	struct osc_async_page *oap;
+	int npages = 0;
+
+	list_for_each_entry(oap, plist, oap_pending_item) {
+		struct osc_page *opg = oap2osc_page(oap);
+
+		if (!opg->ops_in_lru)
+			continue;
+
+		++npages;
+		LASSERT(list_empty(&opg->ops_lru));
+		list_add(&opg->ops_lru, &lru);
+	}
+
+	if (npages > 0) {
+		spin_lock(&cli->cl_lru_list_lock);
+		list_splice_tail(&lru, &cli->cl_lru_list);
+		atomic_sub(npages, &cli->cl_lru_busy);
+		atomic_add(npages, &cli->cl_lru_in_list);
+		spin_unlock(&cli->cl_lru_list_lock);
+
+		/* XXX: May set force to be true for better performance */
+		if (osc_cache_too_much(cli))
+			(void)ptlrpcd_queue_work(cli->cl_lru_work);
+	}
+}
+
+static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+	LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+	list_del_init(&opg->ops_lru);
+	atomic_dec(&cli->cl_lru_in_list);
+}
+
+/**
+ * Page is being destroyed. The page may be not in LRU list, if the transfer
+ * has never finished(error occurred).
+ */
+static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
+{
+	if (opg->ops_in_lru) {
+		spin_lock(&cli->cl_lru_list_lock);
+		if (!list_empty(&opg->ops_lru)) {
+			__osc_lru_del(cli, opg);
+		} else {
+			LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
+			atomic_dec(&cli->cl_lru_busy);
+		}
+		spin_unlock(&cli->cl_lru_list_lock);
+
+		atomic_inc(cli->cl_lru_left);
+		/* this is a great place to release more LRU pages if
+		 * this osc occupies too many LRU pages and kernel is
+		 * stealing one of them.
+		 */
+		if (!memory_pressure_get())
+			(void)ptlrpcd_queue_work(cli->cl_lru_work);
+		wake_up(&osc_lru_waitq);
+	} else {
+		LASSERT(list_empty(&opg->ops_lru));
+	}
+}
+
+/**
+ * Delete page from LRUlist for redirty.
+ */
+static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
+{
+	/* If page is being transferred for the first time,
+	 * ops_lru should be empty
+	 */
+	if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) {
+		spin_lock(&cli->cl_lru_list_lock);
+		__osc_lru_del(cli, opg);
+		spin_unlock(&cli->cl_lru_list_lock);
+		atomic_inc(&cli->cl_lru_busy);
+	}
+}
+
+static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
+			    struct cl_page **pvec, int max_index)
 {
-	int count;
 	int i;
 
-	for (count = 0, i = 0; i < max_index; i++) {
+	for (i = 0; i < max_index; i++) {
 		struct cl_page *page = pvec[i];
 
-		if (cl_page_own_try(env, io, page) == 0) {
-			/* free LRU page only if nobody is using it.
-			 * This check is necessary to avoid freeing the pages
-			 * having already been removed from LRU and pinned
-			 * for IO.
-			 */
-			if (!cl_page_in_use(page)) {
-				cl_page_unmap(env, io, page);
-				cl_page_discard(env, io, page);
-				++count;
-			}
-			cl_page_disown(env, io, page);
-		}
+		LASSERT(cl_page_is_owned(page, io));
+		cl_page_discard(env, io, page);
+		cl_page_disown(env, io, page);
 		cl_page_put(env, page);
+
 		pvec[i] = NULL;
 	}
-	return max_index - count;
 }
 
 /**
  * Drop @target of pages from LRU at most.
  */
-int osc_lru_shrink(struct client_obd *cli, int target)
+int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		   int target, bool force)
 {
-	struct cl_env_nest nest;
-	struct lu_env *env;
 	struct cl_io *io;
 	struct cl_object *clobj = NULL;
 	struct cl_page **pvec;
@@ -573,23 +587,31 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 	if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
 		return 0;
 
-	env = cl_env_nested_get(&nest);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
+	if (!force) {
+		if (atomic_read(&cli->cl_lru_shrinkers) > 0)
+			return -EBUSY;
 
-	pvec = osc_env_info(env)->oti_pvec;
+		if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) {
+			atomic_dec(&cli->cl_lru_shrinkers);
+			return -EBUSY;
+		}
+	} else {
+		atomic_inc(&cli->cl_lru_shrinkers);
+	}
+
+	pvec = (struct cl_page **)osc_env_info(env)->oti_pvec;
 	io = &osc_env_info(env)->oti_io;
 
-	client_obd_list_lock(&cli->cl_lru_list_lock);
-	atomic_inc(&cli->cl_lru_shrinkers);
+	spin_lock(&cli->cl_lru_list_lock);
 	maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
 	list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
 		struct cl_page *page;
+		bool will_free = false;
 
 		if (--maxscan < 0)
 			break;
 
-		page = cl_page_top(opg->ops_cl.cpl_page);
+		page = opg->ops_cl.cpl_page;
 		if (cl_page_in_use_noref(page)) {
 			list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
 			continue;
@@ -600,10 +622,10 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			struct cl_object *tmp = page->cp_obj;
 
 			cl_object_get(tmp);
-			client_obd_list_unlock(&cli->cl_lru_list_lock);
+			spin_unlock(&cli->cl_lru_list_lock);
 
 			if (clobj) {
-				count -= discard_pagevec(env, io, pvec, index);
+				discard_pagevec(env, io, pvec, index);
 				index = 0;
 
 				cl_io_fini(env, io);
@@ -616,7 +638,7 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			io->ci_ignore_layout = 1;
 			rc = cl_io_init(env, io, CIT_MISC, clobj);
 
-			client_obd_list_lock(&cli->cl_lru_list_lock);
+			spin_lock(&cli->cl_lru_list_lock);
 
 			if (rc != 0)
 				break;
@@ -625,98 +647,54 @@ int osc_lru_shrink(struct client_obd *cli, int target)
 			continue;
 		}
 
-		/* move this page to the end of list as it will be discarded
-		 * soon. The page will be finally removed from LRU list in
-		 * osc_page_delete().
-		 */
-		list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+		if (cl_page_own_try(env, io, page) == 0) {
+			if (!cl_page_in_use_noref(page)) {
+				/* remove it from lru list earlier to avoid
+				 * lock contention
+				 */
+				__osc_lru_del(cli, opg);
+				opg->ops_in_lru = 0; /* will be discarded */
+
+				cl_page_get(page);
+				will_free = true;
+			} else {
+				cl_page_disown(env, io, page);
+			}
+		}
 
-		/* it's okay to grab a refcount here w/o holding lock because
-		 * it has to grab cl_lru_list_lock to delete the page.
-		 */
-		cl_page_get(page);
-		pvec[index++] = page;
-		if (++count >= target)
-			break;
+		if (!will_free) {
+			list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
+			continue;
+		}
 
+		/* Don't discard and free the page with cl_lru_list held */
+		pvec[index++] = page;
 		if (unlikely(index == OTI_PVEC_SIZE)) {
-			client_obd_list_unlock(&cli->cl_lru_list_lock);
-			count -= discard_pagevec(env, io, pvec, index);
+			spin_unlock(&cli->cl_lru_list_lock);
+			discard_pagevec(env, io, pvec, index);
 			index = 0;
 
-			client_obd_list_lock(&cli->cl_lru_list_lock);
+			spin_lock(&cli->cl_lru_list_lock);
 		}
+
+		if (++count >= target)
+			break;
 	}
-	client_obd_list_unlock(&cli->cl_lru_list_lock);
+	spin_unlock(&cli->cl_lru_list_lock);
 
 	if (clobj) {
-		count -= discard_pagevec(env, io, pvec, index);
+		discard_pagevec(env, io, pvec, index);
 
 		cl_io_fini(env, io);
 		cl_object_put(env, clobj);
 	}
-	cl_env_nested_put(&nest, env);
 
 	atomic_dec(&cli->cl_lru_shrinkers);
-	return count > 0 ? count : rc;
-}
-
-static void osc_lru_add(struct client_obd *cli, struct osc_page *opg)
-{
-	bool wakeup = false;
-
-	if (!opg->ops_in_lru)
-		return;
-
-	atomic_dec(&cli->cl_lru_busy);
-	client_obd_list_lock(&cli->cl_lru_list_lock);
-	if (list_empty(&opg->ops_lru)) {
-		list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
-		atomic_inc_return(&cli->cl_lru_in_list);
-		wakeup = atomic_read(&osc_lru_waiters) > 0;
-	}
-	client_obd_list_unlock(&cli->cl_lru_list_lock);
-
-	if (wakeup) {
-		osc_lru_shrink(cli, osc_cache_too_much(cli));
+	if (count > 0) {
+		atomic_add(count, cli->cl_lru_left);
 		wake_up_all(&osc_lru_waitq);
 	}
-}
-
-/* delete page from LRUlist. The page can be deleted from LRUlist for two
- * reasons: redirtied or deleted from page cache.
- */
-static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del)
-{
-	if (opg->ops_in_lru) {
-		client_obd_list_lock(&cli->cl_lru_list_lock);
-		if (!list_empty(&opg->ops_lru)) {
-			LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
-			list_del_init(&opg->ops_lru);
-			atomic_dec(&cli->cl_lru_in_list);
-			if (!del)
-				atomic_inc(&cli->cl_lru_busy);
-		} else if (del) {
-			LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
-			atomic_dec(&cli->cl_lru_busy);
-		}
-		client_obd_list_unlock(&cli->cl_lru_list_lock);
-		if (del) {
-			atomic_inc(cli->cl_lru_left);
-			/* this is a great place to release more LRU pages if
-			 * this osc occupies too many LRU pages and kernel is
-			 * stealing one of them.
-			 * cl_lru_shrinkers is to avoid recursive call in case
-			 * we're already in the context of osc_lru_shrink().
-			 */
-			if (atomic_read(&cli->cl_lru_shrinkers) == 0 &&
-			    !memory_pressure_get())
-				osc_lru_shrink(cli, osc_cache_too_much(cli));
-			wake_up(&osc_lru_waitq);
-		}
-	} else {
-		LASSERT(list_empty(&opg->ops_lru));
-	}
+	return count > 0 ? count : rc;
 }
 
 static inline int max_to_shrink(struct client_obd *cli)
@@ -724,19 +702,28 @@ static inline int max_to_shrink(struct client_obd *cli)
 	return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
 }
 
-static int osc_lru_reclaim(struct client_obd *cli)
+int osc_lru_reclaim(struct client_obd *cli)
 {
+	struct cl_env_nest nest;
+	struct lu_env *env;
 	struct cl_client_cache *cache = cli->cl_cache;
 	int max_scans;
-	int rc;
+	int rc = 0;
 
 	LASSERT(cache);
 
-	rc = osc_lru_shrink(cli, lru_shrink_min);
+	env = cl_env_nested_get(&nest);
+	if (IS_ERR(env))
+		return 0;
+
+	rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false);
 	if (rc != 0) {
+		if (rc == -EBUSY)
+			rc = 0;
+
 		CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
 		       cli->cl_import->imp_obd->obd_name, rc, cli);
-		return rc;
+		goto out;
 	}
 
 	CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
@@ -764,10 +751,11 @@ static int osc_lru_reclaim(struct client_obd *cli)
 		       atomic_read(&cli->cl_lru_busy));
 
 		list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
-		if (atomic_read(&cli->cl_lru_in_list) > 0) {
+		if (osc_cache_too_much(cli) > 0) {
 			spin_unlock(&cache->ccc_lru_lock);
 
-			rc = osc_lru_shrink(cli, max_to_shrink(cli));
+			rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli),
+					    true);
 			spin_lock(&cache->ccc_lru_lock);
 			if (rc != 0)
 				break;
@@ -775,6 +763,8 @@ static int osc_lru_reclaim(struct client_obd *cli)
 	}
 	spin_unlock(&cache->ccc_lru_lock);
 
+out:
+	cl_env_nested_put(&nest, env);
 	CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
 	       cli->cl_import->imp_obd->obd_name, cli, rc);
 	return rc;
@@ -784,16 +774,20 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 			   struct osc_page *opg)
 {
 	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+	struct osc_io *oio = osc_env_io(env);
 	struct client_obd *cli = osc_cli(obj);
 	int rc = 0;
 
 	if (!cli->cl_cache) /* shall not be in LRU */
 		return 0;
 
+	if (oio->oi_lru_reserved > 0) {
+		--oio->oi_lru_reserved;
+		goto out;
+	}
+
 	LASSERT(atomic_read(cli->cl_lru_left) >= 0);
 	while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) {
-		int gen;
-
 		/* run out of LRU spaces, try to drop some by itself */
 		rc = osc_lru_reclaim(cli);
 		if (rc < 0)
@@ -803,23 +797,15 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 
 		cond_resched();
 
-		/* slowest case, all of caching pages are busy, notifying
-		 * other OSCs that we're lack of LRU slots.
-		 */
-		atomic_inc(&osc_lru_waiters);
-
-		gen = atomic_read(&cli->cl_lru_in_list);
 		rc = l_wait_event(osc_lru_waitq,
-				  atomic_read(cli->cl_lru_left) > 0 ||
-				  (atomic_read(&cli->cl_lru_in_list) > 0 &&
-				   gen != atomic_read(&cli->cl_lru_in_list)),
+				  atomic_read(cli->cl_lru_left) > 0,
 				  &lwi);
 
-		atomic_dec(&osc_lru_waiters);
 		if (rc < 0)
 			break;
 	}
 
+out:
 	if (rc >= 0) {
 		atomic_inc(&cli->cl_lru_busy);
 		opg->ops_in_lru = 1;
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 30526ebca..47417f88f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -92,12 +92,13 @@ struct osc_fsync_args {
 
 struct osc_enqueue_args {
 	struct obd_export	*oa_exp;
+	enum ldlm_type		oa_type;
+	enum ldlm_mode		oa_mode;
 	__u64		    *oa_flags;
-	obd_enqueue_update_f      oa_upcall;
+	osc_enqueue_upcall_f	oa_upcall;
 	void		     *oa_cookie;
 	struct ost_lvb	   *oa_lvb;
-	struct lustre_handle     *oa_lockh;
-	struct ldlm_enqueue_info *oa_ei;
+	struct lustre_handle	oa_lockh;
 	unsigned int	      oa_agl:1;
 };
 
@@ -801,21 +802,24 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 	LASSERT(!(oa->o_valid & bits));
 
 	oa->o_valid |= bits;
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	oa->o_dirty = cli->cl_dirty;
 	if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
 		     cli->cl_dirty_max)) {
 		CERROR("dirty %lu - %lu > dirty_max %lu\n",
 		       cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
 		oa->o_undirty = 0;
-	} else if (unlikely(atomic_read(&obd_dirty_pages) -
+	} else if (unlikely(atomic_read(&obd_unstable_pages) +
+			    atomic_read(&obd_dirty_pages) -
 			    atomic_read(&obd_dirty_transit_pages) >
 			    (long)(obd_max_dirty_pages + 1))) {
 		/* The atomic_read() allowing the atomic_inc() are
 		 * not covered by a lock thus they may safely race and trip
 		 * this CERROR() unless we add in a small fudge factor (+1).
 		 */
-		CERROR("dirty %d - %d > system dirty_max %d\n",
+		CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+		       cli->cl_import->imp_obd->obd_name,
+		       atomic_read(&obd_unstable_pages),
 		       atomic_read(&obd_dirty_pages),
 		       atomic_read(&obd_dirty_transit_pages),
 		       obd_max_dirty_pages);
@@ -833,10 +837,9 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 	oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
 	oa->o_dropped = cli->cl_lost_grant;
 	cli->cl_lost_grant = 0;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n",
 	       oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
-
 }
 
 void osc_update_next_shrink(struct client_obd *cli)
@@ -849,9 +852,9 @@ void osc_update_next_shrink(struct client_obd *cli)
 
 static void __osc_update_grant(struct client_obd *cli, u64 grant)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_avail_grant += grant;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 }
 
 static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
@@ -889,10 +892,10 @@ out:
 
 static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa)
 {
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	oa->o_grant = cli->cl_avail_grant / 4;
 	cli->cl_avail_grant -= oa->o_grant;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	if (!(oa->o_valid & OBD_MD_FLFLAGS)) {
 		oa->o_valid |= OBD_MD_FLFLAGS;
 		oa->o_flags = 0;
@@ -911,10 +914,10 @@ static int osc_shrink_grant(struct client_obd *cli)
 	__u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) *
 			     (cli->cl_max_pages_per_rpc << PAGE_SHIFT);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_avail_grant <= target_bytes)
 		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	return osc_shrink_grant_to_target(cli, target_bytes);
 }
@@ -924,7 +927,7 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 	int rc = 0;
 	struct ost_body	*body;
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	/* Don't shrink if we are already above or below the desired limit
 	 * We don't want to shrink below a single RPC, as that will negatively
 	 * impact block allocation and long-term performance.
@@ -933,10 +936,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 		target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT;
 
 	if (target_bytes >= cli->cl_avail_grant) {
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		return 0;
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	body = kzalloc(sizeof(*body), GFP_NOFS);
 	if (!body)
@@ -944,10 +947,10 @@ int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes)
 
 	osc_announce_cached(cli, &body->oa, 0);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	body->oa.o_grant = cli->cl_avail_grant - target_bytes;
 	cli->cl_avail_grant = target_bytes;
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 	if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) {
 		body->oa.o_valid |= OBD_MD_FLFLAGS;
 		body->oa.o_flags = 0;
@@ -1035,7 +1038,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 	 * race is tolerable here: if we're evicted, but imp_state already
 	 * left EVICTED state, then cl_dirty must be 0 already.
 	 */
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
 		cli->cl_avail_grant = ocd->ocd_grant;
 	else
@@ -1053,7 +1056,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 
 	/* determine the appropriate chunk size used by osc_extent. */
 	cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n",
 	       cli->cl_import->imp_obd->obd_name,
@@ -1082,7 +1085,7 @@ static void handle_short_read(int nob_read, u32 page_count,
 		if (pga[i]->count > nob_read) {
 			/* EOF inside this page */
 			ptr = kmap(pga[i]->pg) +
-				(pga[i]->off & ~CFS_PAGE_MASK);
+				(pga[i]->off & ~PAGE_MASK);
 			memset(ptr + nob_read, 0, pga[i]->count - nob_read);
 			kunmap(pga[i]->pg);
 			page_count--;
@@ -1097,7 +1100,7 @@ static void handle_short_read(int nob_read, u32 page_count,
 
 	/* zero remaining pages */
 	while (page_count-- > 0) {
-		ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+		ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
 		memset(ptr, 0, pga[i]->count);
 		kunmap(pga[i]->pg);
 		i++;
@@ -1144,7 +1147,8 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
 {
 	if (p1->flag != p2->flag) {
 		unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
-				  OBD_BRW_SYNC | OBD_BRW_ASYNC|OBD_BRW_NOQUOTA);
+				  OBD_BRW_SYNC | OBD_BRW_ASYNC |
+				  OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
 
 		/* warn if we try to combine flags that we don't know to be
 		 * safe to combine
@@ -1188,32 +1192,29 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
 		if (i == 0 && opc == OST_READ &&
 		    OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
 			unsigned char *ptr = kmap(pga[i]->pg);
-			int off = pga[i]->off & ~CFS_PAGE_MASK;
+			int off = pga[i]->off & ~PAGE_MASK;
 
 			memcpy(ptr + off, "bad1", min(4, nob));
 			kunmap(pga[i]->pg);
 		}
 		cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
-					    pga[i]->off & ~CFS_PAGE_MASK,
+					    pga[i]->off & ~PAGE_MASK,
 				  count);
 		CDEBUG(D_PAGE,
 		       "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n",
 		       pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index,
 		       (long)pga[i]->pg->flags, page_count(pga[i]->pg),
 		       page_private(pga[i]->pg),
-		       (int)(pga[i]->off & ~CFS_PAGE_MASK));
+		       (int)(pga[i]->off & ~PAGE_MASK));
 
 		nob -= pga[i]->count;
 		pg_count--;
 		i++;
 	}
 
-	bufsize = 4;
+	bufsize = sizeof(cksum);
 	err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize);
 
-	if (err)
-		cfs_crypto_hash_final(hdesc, NULL, NULL);
-
 	/* For sending we only compute the wrong checksum instead
 	 * of corrupting the data so it is still correct on a redo
 	 */
@@ -1312,7 +1313,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 	pg_prev = pga[0];
 	for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
 		struct brw_page *pg = pga[i];
-		int poff = pg->off & ~CFS_PAGE_MASK;
+		int poff = pg->off & ~PAGE_MASK;
 
 		LASSERT(pg->count > 0);
 		/* make sure there is no gap in the middle of page array */
@@ -1658,6 +1659,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
 	aa->aa_resends++;
 	new_req->rq_interpret_reply = request->rq_interpret_reply;
 	new_req->rq_async_args = request->rq_async_args;
+	new_req->rq_commit_cb = request->rq_commit_cb;
 	/* cap resend delay to the current request timeout, this is similar to
 	 * what ptlrpc does (see after_reply())
 	 */
@@ -1737,7 +1739,6 @@ static int brw_interpret(const struct lu_env *env,
 	struct osc_brw_async_args *aa = data;
 	struct osc_extent *ext;
 	struct osc_extent *tmp;
-	struct cl_object *obj = NULL;
 	struct client_obd *cli = aa->aa_cli;
 
 	rc = osc_brw_fini_request(req, rc);
@@ -1766,24 +1767,17 @@ static int brw_interpret(const struct lu_env *env,
 			rc = -EIO;
 	}
 
-	list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
-		if (!obj && rc == 0) {
-			obj = osc2cl(ext->oe_obj);
-			cl_object_get(obj);
-		}
-
-		list_del_init(&ext->oe_link);
-		osc_extent_finish(env, ext, 1, rc);
-	}
-	LASSERT(list_empty(&aa->aa_exts));
-	LASSERT(list_empty(&aa->aa_oaps));
-
-	if (obj) {
+	if (rc == 0) {
 		struct obdo *oa = aa->aa_oa;
 		struct cl_attr *attr  = &osc_env_info(env)->oti_attr;
 		unsigned long valid = 0;
+		struct cl_object *obj;
+		struct osc_async_page *last;
+
+		last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
+		obj = osc2cl(last->oap_obj);
 
-		LASSERT(rc == 0);
+		cl_object_attr_lock(obj);
 		if (oa->o_valid & OBD_MD_FLBLOCKS) {
 			attr->cat_blocks = oa->o_blocks;
 			valid |= CAT_BLOCKS;
@@ -1800,21 +1794,45 @@ static int brw_interpret(const struct lu_env *env,
 			attr->cat_ctime = oa->o_ctime;
 			valid |= CAT_CTIME;
 		}
-		if (valid != 0) {
-			cl_object_attr_lock(obj);
-			cl_object_attr_set(env, obj, attr, valid);
-			cl_object_attr_unlock(obj);
+
+		if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
+			struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
+			loff_t last_off = last->oap_count + last->oap_obj_off;
+
+			/* Change file size if this is an out of quota or
+			 * direct IO write and it extends the file size
+			 */
+			if (loi->loi_lvb.lvb_size < last_off) {
+				attr->cat_size = last_off;
+				valid |= CAT_SIZE;
+			}
+			/* Extend KMS if it's not a lockless write */
+			if (loi->loi_kms < last_off &&
+			    oap2osc_page(last)->ops_srvlock == 0) {
+				attr->cat_kms = last_off;
+				valid |= CAT_KMS;
+			}
 		}
-		cl_object_put(env, obj);
+
+		if (valid != 0)
+			cl_object_attr_set(env, obj, attr, valid);
+		cl_object_attr_unlock(obj);
 	}
 	kmem_cache_free(obdo_cachep, aa->aa_oa);
 
+	list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
+		list_del_init(&ext->oe_link);
+		osc_extent_finish(env, ext, 1, rc);
+	}
+	LASSERT(list_empty(&aa->aa_exts));
+	LASSERT(list_empty(&aa->aa_oaps));
+
 	cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
 			  req->rq_bulk->bd_nob_transferred);
 	osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
 	ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	/* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
 	 * is called so we know whether to go to sync BRWs or wait for more
 	 * RPCs to complete
@@ -1824,12 +1842,31 @@ static int brw_interpret(const struct lu_env *env,
 	else
 		cli->cl_r_in_flight--;
 	osc_wake_cache_waiters(cli);
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	osc_io_unplug(env, cli, NULL);
 	return rc;
 }
 
+static void brw_commit(struct ptlrpc_request *req)
+{
+	spin_lock(&req->rq_lock);
+	/*
+	 * If osc_inc_unstable_pages (via osc_extent_finish) races with
+	 * this called via the rq_commit_cb, I need to ensure
+	 * osc_dec_unstable_pages is still called. Otherwise unstable
+	 * pages may be leaked.
+	 */
+	if (req->rq_unstable) {
+		spin_unlock(&req->rq_lock);
+		osc_dec_unstable_pages(req);
+		spin_lock(&req->rq_lock);
+	} else {
+		req->rq_committed = 1;
+	}
+	spin_unlock(&req->rq_lock);
+}
+
 /**
  * Build an RPC by the list of extent @ext_list. The caller must ensure
  * that the total pages in this list are NOT over max pages per RPC.
@@ -1920,7 +1957,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		pga[i] = &oap->oap_brw_page;
 		pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
 		CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
-		       pga[i]->pg, page_index(oap->oap_page), oap,
+		       pga[i]->pg, oap->oap_page->index, oap,
 		       pga[i]->flag);
 		i++;
 		cl_req_page_add(env, clerq, page);
@@ -1949,6 +1986,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		goto out;
 	}
 
+	req->rq_commit_cb = brw_commit;
 	req->rq_interpret_reply = brw_interpret;
 
 	if (mem_tight != 0)
@@ -1992,7 +2030,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	if (tmp)
 		tmp->oap_request = ptlrpc_request_addref(req);
 
-	client_obd_list_lock(&cli->cl_loi_list_lock);
+	spin_lock(&cli->cl_loi_list_lock);
 	starting_offset >>= PAGE_SHIFT;
 	if (cmd == OBD_BRW_READ) {
 		cli->cl_r_in_flight++;
@@ -2007,7 +2045,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
 				      starting_offset + 1);
 	}
-	client_obd_list_unlock(&cli->cl_loi_list_lock);
+	spin_unlock(&cli->cl_loi_list_lock);
 
 	DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
 		  page_count, aa, cli->cl_r_in_flight,
@@ -2055,14 +2093,12 @@ static int osc_set_lock_data_with_check(struct ldlm_lock *lock,
 	LASSERT(lock->l_glimpse_ast == einfo->ei_cb_gl);
 
 	lock_res_and_lock(lock);
-	spin_lock(&osc_ast_guard);
 
 	if (!lock->l_ast_data)
 		lock->l_ast_data = data;
 	if (lock->l_ast_data == data)
 		set = 1;
 
-	spin_unlock(&osc_ast_guard);
 	unlock_res_and_lock(lock);
 
 	return set;
@@ -2104,36 +2140,38 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
 	return rc;
 }
 
-static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
-			    obd_enqueue_update_f upcall, void *cookie,
-			    __u64 *flags, int agl, int rc)
+static int osc_enqueue_fini(struct ptlrpc_request *req,
+			    osc_enqueue_upcall_f upcall, void *cookie,
+			    struct lustre_handle *lockh, enum ldlm_mode mode,
+			    __u64 *flags, int agl, int errcode)
 {
-	int intent = *flags & LDLM_FL_HAS_INTENT;
-
-	if (intent) {
-		/* The request was created before ldlm_cli_enqueue call. */
-		if (rc == ELDLM_LOCK_ABORTED) {
-			struct ldlm_reply *rep;
+	bool intent = *flags & LDLM_FL_HAS_INTENT;
+	int rc;
 
-			rep = req_capsule_server_get(&req->rq_pill,
-						     &RMF_DLM_REP);
+	/* The request was created before ldlm_cli_enqueue call. */
+	if (intent && errcode == ELDLM_LOCK_ABORTED) {
+		struct ldlm_reply *rep;
 
-			rep->lock_policy_res1 =
-				ptlrpc_status_ntoh(rep->lock_policy_res1);
-			if (rep->lock_policy_res1)
-				rc = rep->lock_policy_res1;
-		}
-	}
+		rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
 
-	if ((intent != 0 && rc == ELDLM_LOCK_ABORTED && agl == 0) ||
-	    (rc == 0)) {
+		rep->lock_policy_res1 =
+			ptlrpc_status_ntoh(rep->lock_policy_res1);
+		if (rep->lock_policy_res1)
+			errcode = rep->lock_policy_res1;
+		if (!agl)
+			*flags |= LDLM_FL_LVB_READY;
+	} else if (errcode == ELDLM_OK) {
 		*flags |= LDLM_FL_LVB_READY;
-		CDEBUG(D_INODE, "got kms %llu blocks %llu mtime %llu\n",
-		       lvb->lvb_size, lvb->lvb_blocks, lvb->lvb_mtime);
 	}
 
 	/* Call the update callback. */
-	rc = (*upcall)(cookie, rc);
+	rc = (*upcall)(cookie, lockh, errcode);
+	/* release the reference taken in ldlm_cli_enqueue() */
+	if (errcode == ELDLM_LOCK_MATCHED)
+		errcode = ELDLM_OK;
+	if (errcode == ELDLM_OK && lustre_handle_is_used(lockh))
+		ldlm_lock_decref(lockh, mode);
+
 	return rc;
 }
 
@@ -2142,62 +2180,50 @@ static int osc_enqueue_interpret(const struct lu_env *env,
 				 struct osc_enqueue_args *aa, int rc)
 {
 	struct ldlm_lock *lock;
-	struct lustre_handle handle;
-	__u32 mode;
-	struct ost_lvb *lvb;
-	__u32 lvb_len;
-	__u64 *flags = aa->oa_flags;
-
-	/* Make a local copy of a lock handle and a mode, because aa->oa_*
-	 * might be freed anytime after lock upcall has been called.
-	 */
-	lustre_handle_copy(&handle, aa->oa_lockh);
-	mode = aa->oa_ei->ei_mode;
+	struct lustre_handle *lockh = &aa->oa_lockh;
+	enum ldlm_mode mode = aa->oa_mode;
+	struct ost_lvb *lvb = aa->oa_lvb;
+	__u32 lvb_len = sizeof(*lvb);
+	__u64 flags = 0;
+
 
 	/* ldlm_cli_enqueue is holding a reference on the lock, so it must
 	 * be valid.
 	 */
-	lock = ldlm_handle2lock(&handle);
+	lock = ldlm_handle2lock(lockh);
+	LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n",
+		 lockh->cookie, req, aa);
 
 	/* Take an additional reference so that a blocking AST that
 	 * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed
 	 * to arrive after an upcall has been executed by
 	 * osc_enqueue_fini().
 	 */
-	ldlm_lock_addref(&handle, mode);
+	ldlm_lock_addref(lockh, mode);
+
+	/* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */
+	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2);
 
 	/* Let CP AST to grant the lock first. */
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1);
 
-	if (aa->oa_agl && rc == ELDLM_LOCK_ABORTED) {
-		lvb = NULL;
-		lvb_len = 0;
-	} else {
-		lvb = aa->oa_lvb;
-		lvb_len = sizeof(*aa->oa_lvb);
+	if (aa->oa_agl) {
+		LASSERT(!aa->oa_lvb);
+		LASSERT(!aa->oa_flags);
+		aa->oa_flags = &flags;
 	}
 
 	/* Complete obtaining the lock procedure. */
-	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_ei->ei_type, 1,
-				   mode, flags, lvb, lvb_len, &handle, rc);
+	rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1,
+				   aa->oa_mode, aa->oa_flags, lvb, lvb_len,
+				   lockh, rc);
 	/* Complete osc stuff. */
-	rc = osc_enqueue_fini(req, aa->oa_lvb, aa->oa_upcall, aa->oa_cookie,
-			      flags, aa->oa_agl, rc);
+	rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
+			      aa->oa_flags, aa->oa_agl, rc);
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
 
-	/* Release the lock for async request. */
-	if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
-		/*
-		 * Releases a reference taken by ldlm_cli_enqueue(), if it is
-		 * not already released by
-		 * ldlm_cli_enqueue_fini()->failed_lock_cleanup()
-		 */
-		ldlm_lock_decref(&handle, mode);
-
-	LASSERTF(lock, "lockh %p, req %p, aa %p - client evicted?\n",
-		 aa->oa_lockh, req, aa);
-	ldlm_lock_decref(&handle, mode);
+	ldlm_lock_decref(lockh, mode);
 	LDLM_LOCK_PUT(lock);
 	return rc;
 }
@@ -2209,29 +2235,29 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
  * other synchronous requests, however keeping some locks and trying to obtain
  * others may take a considerable amount of time in a case of ost failure; and
  * when other sync requests do not get released lock from a client, the client
- * is excluded from the cluster -- such scenarious make the life difficult, so
+ * is evicted from the cluster -- such scenaries make the life difficult, so
  * release locks just after they are obtained.
  */
 int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		     __u64 *flags, ldlm_policy_data_t *policy,
 		     struct ost_lvb *lvb, int kms_valid,
-		     obd_enqueue_update_f upcall, void *cookie,
+		     osc_enqueue_upcall_f upcall, void *cookie,
 		     struct ldlm_enqueue_info *einfo,
-		     struct lustre_handle *lockh,
 		     struct ptlrpc_request_set *rqset, int async, int agl)
 {
 	struct obd_device *obd = exp->exp_obd;
+	struct lustre_handle lockh = { 0 };
 	struct ptlrpc_request *req = NULL;
 	int intent = *flags & LDLM_FL_HAS_INTENT;
-	__u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+	__u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY;
 	enum ldlm_mode mode;
 	int rc;
 
 	/* Filesystem lock extents are extended to page boundaries so that
 	 * dealing with the page cache is a little smoother.
 	 */
-	policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
-	policy->l_extent.end |= ~CFS_PAGE_MASK;
+	policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+	policy->l_extent.end |= ~PAGE_MASK;
 
 	/*
 	 * kms is not valid when either object is completely fresh (so that no
@@ -2259,64 +2285,46 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	if (einfo->ei_mode == LCK_PR)
 		mode |= LCK_PW;
 	mode = ldlm_lock_match(obd->obd_namespace, *flags | match_lvb, res_id,
-			       einfo->ei_type, policy, mode, lockh, 0);
+			       einfo->ei_type, policy, mode, &lockh, 0);
 	if (mode) {
-		struct ldlm_lock *matched = ldlm_handle2lock(lockh);
+		struct ldlm_lock *matched;
 
-		if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
-			/* For AGL, if enqueue RPC is sent but the lock is not
-			 * granted, then skip to process this strpe.
-			 * Return -ECANCELED to tell the caller.
+		if (*flags & LDLM_FL_TEST_LOCK)
+			return ELDLM_OK;
+
+		matched = ldlm_handle2lock(&lockh);
+		if (agl) {
+			/* AGL enqueues DLM locks speculatively. Therefore if
+			 * it already exists a DLM lock, it wll just inform the
+			 * caller to cancel the AGL process for this stripe.
 			 */
-			ldlm_lock_decref(lockh, mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return -ECANCELED;
-		}
-
-		if (osc_set_lock_data_with_check(matched, einfo)) {
+		} else if (osc_set_lock_data_with_check(matched, einfo)) {
 			*flags |= LDLM_FL_LVB_READY;
-			/* addref the lock only if not async requests and PW
-			 * lock is matched whereas we asked for PR.
-			 */
-			if (!rqset && einfo->ei_mode != mode)
-				ldlm_lock_addref(lockh, LCK_PR);
-			if (intent) {
-				/* I would like to be able to ASSERT here that
-				 * rss <= kms, but I can't, for reasons which
-				 * are explained in lov_enqueue()
-				 */
-			}
-
-			/* We already have a lock, and it's referenced.
-			 *
-			 * At this point, the cl_lock::cll_state is CLS_QUEUING,
-			 * AGL upcall may change it to CLS_HELD directly.
-			 */
-			(*upcall)(cookie, ELDLM_OK);
+			/* We already have a lock, and it's referenced. */
+			(*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED);
 
-			if (einfo->ei_mode != mode)
-				ldlm_lock_decref(lockh, LCK_PW);
-			else if (rqset)
-				/* For async requests, decref the lock. */
-				ldlm_lock_decref(lockh, einfo->ei_mode);
+			ldlm_lock_decref(&lockh, mode);
 			LDLM_LOCK_PUT(matched);
 			return ELDLM_OK;
+		} else {
+			ldlm_lock_decref(&lockh, mode);
+			LDLM_LOCK_PUT(matched);
 		}
-
-		ldlm_lock_decref(lockh, mode);
-		LDLM_LOCK_PUT(matched);
 	}
 
- no_match:
+no_match:
+	if (*flags & LDLM_FL_TEST_LOCK)
+		return -ENOLCK;
 	if (intent) {
-		LIST_HEAD(cancels);
-
 		req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 					   &RQF_LDLM_ENQUEUE_LVB);
 		if (!req)
 			return -ENOMEM;
 
-		rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0);
+		rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
 		if (rc) {
 			ptlrpc_request_free(req);
 			return rc;
@@ -2331,21 +2339,31 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	*flags &= ~LDLM_FL_BLOCK_GRANTED;
 
 	rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb,
-			      sizeof(*lvb), LVB_T_OST, lockh, async);
-	if (rqset) {
+			      sizeof(*lvb), LVB_T_OST, &lockh, async);
+	if (async) {
 		if (!rc) {
 			struct osc_enqueue_args *aa;
 
-			CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+			CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
 			aa = ptlrpc_req_async_args(req);
-			aa->oa_ei = einfo;
 			aa->oa_exp = exp;
-			aa->oa_flags  = flags;
+			aa->oa_mode = einfo->ei_mode;
+			aa->oa_type = einfo->ei_type;
+			lustre_handle_copy(&aa->oa_lockh, &lockh);
 			aa->oa_upcall = upcall;
 			aa->oa_cookie = cookie;
-			aa->oa_lvb    = lvb;
-			aa->oa_lockh  = lockh;
 			aa->oa_agl    = !!agl;
+			if (!agl) {
+				aa->oa_flags = flags;
+				aa->oa_lvb = lvb;
+			} else {
+				/* AGL is essentially to enqueue an DLM lock
+				* in advance, so we don't care about the
+				* result of AGL enqueue.
+				*/
+				aa->oa_lvb = NULL;
+				aa->oa_flags = NULL;
+			}
 
 			req->rq_interpret_reply =
 				(ptlrpc_interpterer_t)osc_enqueue_interpret;
@@ -2359,7 +2377,8 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 		return rc;
 	}
 
-	rc = osc_enqueue_fini(req, lvb, upcall, cookie, flags, agl, rc);
+	rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
+			      flags, agl, rc);
 	if (intent)
 		ptlrpc_req_finished(req);
 
@@ -2381,8 +2400,8 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
 	/* Filesystem lock extents are extended to page boundaries so that
 	 * dealing with the page cache is a little smoother
 	 */
-	policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
-	policy->l_extent.end |= ~CFS_PAGE_MASK;
+	policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+	policy->l_extent.end |= ~PAGE_MASK;
 
 	/* Next, search for already existing extent locks that will cover us */
 	/* If we're trying to read, we also search for an existing PW lock.  The
@@ -2493,7 +2512,7 @@ static int osc_statfs_async(struct obd_export *exp,
 	}
 
 	req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
-	CLASSERT (sizeof(*aa) <= sizeof(req->rq_async_args));
+	CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
 	aa = ptlrpc_req_async_args(req);
 	aa->aa_oi = oinfo;
 
@@ -2787,7 +2806,7 @@ out:
 			goto skip_locking;
 
 		policy.l_extent.start = fm_key->fiemap.fm_start &
-						CFS_PAGE_MASK;
+						PAGE_MASK;
 
 		if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
 		    fm_key->fiemap.fm_start + PAGE_SIZE - 1)
@@ -2795,7 +2814,7 @@ out:
 		else
 			policy.l_extent.end = (fm_key->fiemap.fm_start +
 				fm_key->fiemap.fm_length +
-				PAGE_SIZE - 1) & CFS_PAGE_MASK;
+				PAGE_SIZE - 1) & PAGE_MASK;
 
 		ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
 		mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
@@ -2913,7 +2932,7 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
 		int target = *(int *)val;
 
-		nr = osc_lru_shrink(cli, min(nr, target));
+		nr = osc_lru_shrink(env, cli, min(nr, target), true);
 		*(int *)val -= nr;
 		return 0;
 	}
@@ -2992,12 +3011,12 @@ static int osc_reconnect(const struct lu_env *env,
 	if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
 		long lost_grant;
 
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
 				2 * cli_brw_size(obd);
 		lost_grant = cli->cl_lost_grant;
 		cli->cl_lost_grant = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 
 		CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n",
 		       data->ocd_connect_flags,
@@ -3047,10 +3066,10 @@ static int osc_import_event(struct obd_device *obd,
 	switch (event) {
 	case IMP_EVENT_DISCON: {
 		cli = &obd->u.cli;
-		client_obd_list_lock(&cli->cl_loi_list_lock);
+		spin_lock(&cli->cl_loi_list_lock);
 		cli->cl_avail_grant = 0;
 		cli->cl_lost_grant = 0;
-		client_obd_list_unlock(&cli->cl_loi_list_lock);
+		spin_unlock(&cli->cl_loi_list_lock);
 		break;
 	}
 	case IMP_EVENT_INACTIVE: {
@@ -3073,8 +3092,9 @@ static int osc_import_event(struct obd_device *obd,
 
 			ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 			cl_env_put(env, &refcheck);
-		} else
+		} else {
 			rc = PTR_ERR(env);
+		}
 		break;
 	}
 	case IMP_EVENT_ACTIVE: {
@@ -3116,20 +3136,14 @@ static int osc_import_event(struct obd_device *obd,
  * \retval zero the lock can't be canceled
  * \retval other ok to cancel
  */
-static int osc_cancel_for_recovery(struct ldlm_lock *lock)
+static int osc_cancel_weight(struct ldlm_lock *lock)
 {
-	check_res_locked(lock->l_resource);
-
 	/*
-	 * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR.
-	 *
-	 * XXX as a future improvement, we can also cancel unused write lock
-	 * if it doesn't have dirty data and active mmaps.
+	 * Cancel all unused and granted extent lock.
 	 */
 	if (lock->l_resource->lr_type == LDLM_EXTENT &&
-	    (lock->l_granted_mode == LCK_PR ||
-	     lock->l_granted_mode == LCK_CR) &&
-	    (osc_dlm_lock_pageref(lock) == 0))
+	    lock->l_granted_mode == lock->l_req_mode &&
+	    osc_ldlm_weigh_ast(lock) == 0)
 		return 1;
 
 	return 0;
@@ -3170,6 +3184,14 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	}
 	cli->cl_writeback_work = handler;
 
+	handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli);
+	if (IS_ERR(handler)) {
+		rc = PTR_ERR(handler);
+		goto out_ptlrpcd_work;
+	}
+
+	cli->cl_lru_work = handler;
+
 	rc = osc_quota_setup(obd);
 	if (rc)
 		goto out_ptlrpcd_work;
@@ -3198,11 +3220,18 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	}
 
 	INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
-	ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
+	ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
 	return rc;
 
 out_ptlrpcd_work:
-	ptlrpcd_destroy_work(handler);
+	if (cli->cl_writeback_work) {
+		ptlrpcd_destroy_work(cli->cl_writeback_work);
+		cli->cl_writeback_work = NULL;
+	}
+	if (cli->cl_lru_work) {
+		ptlrpcd_destroy_work(cli->cl_lru_work);
+		cli->cl_lru_work = NULL;
+	}
 out_client_setup:
 	client_obd_cleanup(obd);
 out_ptlrpcd:
@@ -3241,6 +3270,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 			ptlrpcd_destroy_work(cli->cl_writeback_work);
 			cli->cl_writeback_work = NULL;
 		}
+		if (cli->cl_lru_work) {
+			ptlrpcd_destroy_work(cli->cl_lru_work);
+			cli->cl_lru_work = NULL;
+		}
 		obd_cleanup_client_import(obd);
 		ptlrpc_lprocfs_unregister_obd(obd);
 		lprocfs_obd_cleanup(obd);
@@ -3330,7 +3363,6 @@ static struct obd_ops osc_obd_ops = {
 };
 
 extern struct lu_kmem_descr osc_caches[];
-extern spinlock_t osc_ast_guard;
 extern struct lock_class_key osc_ast_guard_class;
 
 static int __init osc_init(void)
@@ -3357,9 +3389,6 @@ static int __init osc_init(void)
 	if (rc)
 		goto out_kmem;
 
-	spin_lock_init(&osc_ast_guard);
-	lockdep_set_class(&osc_ast_guard, &osc_ast_guard_class);
-
 	/* This is obviously too much memory, only prevent overflow here */
 	if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) {
 		rc = -EINVAL;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index cf3ac8eee..4b7912a2c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -595,9 +595,9 @@ static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
 	struct obd_import *imp = request->rq_import;
 	int rc;
 
-	if (unlikely(ctx))
+	if (unlikely(ctx)) {
 		request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
-	else {
+	} else {
 		rc = sptlrpc_req_get_ctx(request);
 		if (rc)
 			goto out_free;
@@ -1082,7 +1082,6 @@ static int ptlrpc_console_allow(struct ptlrpc_request *req)
 	 */
 	if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
 	    (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
-
 		/* Suppress timed out reconnect requests */
 		if (req->rq_timedout)
 			return 0;
@@ -2087,7 +2086,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 		CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
 		       set, timeout);
 
-		if (timeout == 0 && !cfs_signal_pending())
+		if (timeout == 0 && !signal_pending(current))
 			/*
 			 * No requests are in-flight (ether timed out
 			 * or delayed), so we can allow interrupts.
@@ -2114,7 +2113,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 		 * it being ignored forever
 		 */
 		if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
-		    cfs_signal_pending()) {
+		    signal_pending(current)) {
 			sigset_t blocked_sigs =
 					   cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
 
@@ -2124,7 +2123,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
 			 * important signals since ptlrpc set is not easily
 			 * reentrant from userspace again
 			 */
-			if (cfs_signal_pending())
+			if (signal_pending(current))
 				ptlrpc_interrupted_set(set);
 			cfs_restore_sigs(blocked_sigs);
 		}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index 47be21ac9..fdcde9bbd 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -69,7 +69,6 @@ void request_out_callback(lnet_event_t *ev)
 		req->rq_req_unlink = 0;
 
 	if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
-
 		/* Failed send: make it seem like the reply timed out, just
 		 * like failing sends in client.c does currently...
 		 */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index cd94fed0f..a4f7544f4 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -1001,6 +1001,7 @@ finish:
 			return 0;
 		}
 	} else {
+		static bool warned;
 
 		spin_lock(&imp->imp_lock);
 		list_del(&imp->imp_conn_current->oic_item);
@@ -1021,7 +1022,7 @@ finish:
 			goto out;
 		}
 
-		if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+		if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
 		    (ocd->ocd_version > LUSTRE_VERSION_CODE +
 					LUSTRE_VERSION_OFFSET_WARN ||
 		     ocd->ocd_version < LUSTRE_VERSION_CODE -
@@ -1029,10 +1030,8 @@ finish:
 			/* Sigh, some compilers do not like #ifdef in the middle
 			 * of macro arguments
 			 */
-			const char *older = "older. Consider upgrading server or downgrading client"
-				;
-			const char *newer = "newer than client version. Consider upgrading client"
-					    ;
+			const char *older = "older than client. Consider upgrading server";
+			const char *newer = "newer than client. Consider recompiling application";
 
 			LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
 				      obd2cli_tgt(imp->imp_obd),
@@ -1042,6 +1041,7 @@ finish:
 				      OBD_OCD_VERSION_FIX(ocd->ocd_version),
 				      ocd->ocd_version > LUSTRE_VERSION_CODE ?
 				      newer : older, LUSTRE_VERSION_STRING);
+			warned = true;
 		}
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
@@ -1370,7 +1370,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 			if (rc)
 				goto out;
 		}
-
 	}
 
 	if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
@@ -1453,7 +1452,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
 				       back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
 		rc = l_wait_event(imp->imp_recovery_waitq,
 				  !ptlrpc_import_in_recovery(imp), &lwi);
-
 	}
 
 	spin_lock(&imp->imp_lock);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index 5b06901e5..c0ecd1625 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -160,6 +160,16 @@ static const struct req_msg_field *fld_query_server[] = {
 	&RMF_FLD_MDFLD
 };
 
+static const struct req_msg_field *fld_read_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_read_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GENERIC_DATA
+};
+
 static const struct req_msg_field *mds_getattr_name_client[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_MDT_BODY,
@@ -566,7 +576,7 @@ static const struct req_msg_field *ost_get_info_generic_server[] = {
 
 static const struct req_msg_field *ost_get_info_generic_client[] = {
 	&RMF_PTLRPC_BODY,
-	&RMF_SETINFO_KEY
+	&RMF_GETINFO_KEY
 };
 
 static const struct req_msg_field *ost_get_last_id_server[] = {
@@ -574,6 +584,12 @@ static const struct req_msg_field *ost_get_last_id_server[] = {
 	&RMF_OBD_ID
 };
 
+static const struct req_msg_field *ost_get_last_fid_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GETINFO_KEY,
+	&RMF_FID,
+};
+
 static const struct req_msg_field *ost_get_last_fid_server[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_FID,
@@ -643,6 +659,7 @@ static struct req_format *req_formats[] = {
 	&RQF_MGS_CONFIG_READ,
 	&RQF_SEQ_QUERY,
 	&RQF_FLD_QUERY,
+	&RQF_FLD_READ,
 	&RQF_MDS_CONNECT,
 	&RQF_MDS_DISCONNECT,
 	&RQF_MDS_GET_INFO,
@@ -696,7 +713,7 @@ static struct req_format *req_formats[] = {
 	&RQF_OST_BRW_WRITE,
 	&RQF_OST_STATFS,
 	&RQF_OST_SET_GRANT_INFO,
-	&RQF_OST_GET_INFO_GENERIC,
+	&RQF_OST_GET_INFO,
 	&RQF_OST_GET_INFO_LAST_ID,
 	&RQF_OST_GET_INFO_LAST_FID,
 	&RQF_OST_SET_INFO_LAST_FID,
@@ -1162,6 +1179,10 @@ struct req_format RQF_FLD_QUERY =
 	DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
 EXPORT_SYMBOL(RQF_FLD_QUERY);
 
+struct req_format RQF_FLD_READ =
+	DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server);
+EXPORT_SYMBOL(RQF_FLD_READ);
+
 struct req_format RQF_LOG_CANCEL =
 	DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
 EXPORT_SYMBOL(RQF_LOG_CANCEL);
@@ -1519,10 +1540,10 @@ struct req_format RQF_OST_SET_GRANT_INFO =
 			ost_body_only);
 EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
 
-struct req_format RQF_OST_GET_INFO_GENERIC =
+struct req_format RQF_OST_GET_INFO =
 	DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
 			ost_get_info_generic_server);
-EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+EXPORT_SYMBOL(RQF_OST_GET_INFO);
 
 struct req_format RQF_OST_GET_INFO_LAST_ID =
 	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
@@ -1530,7 +1551,7 @@ struct req_format RQF_OST_GET_INFO_LAST_ID =
 EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
 
 struct req_format RQF_OST_GET_INFO_LAST_FID =
-	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client,
 			ost_get_last_fid_server);
 EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index c95a91ce2..64c0f1e17 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -131,6 +131,7 @@ static struct ll_rpc_opcode {
 	{ SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
 	{ SEC_CTX_FINI,     "sec_ctx_fini" },
 	{ FLD_QUERY,	"fld_query" },
+	{ FLD_READ,	"fld_read" },
 };
 
 static struct ll_eopcode {
@@ -679,11 +680,11 @@ static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
 	/**
 	 * The second token is either NULL, or an optional [reg|hp] string
 	 */
-	if (strcmp(cmd, "reg") == 0)
+	if (strcmp(cmd, "reg") == 0) {
 		queue = PTLRPC_NRS_QUEUE_REG;
-	else if (strcmp(cmd, "hp") == 0)
+	} else if (strcmp(cmd, "hp") == 0) {
 		queue = PTLRPC_NRS_QUEUE_HP;
-	else {
+	} else {
 		rc = -EINVAL;
 		goto out;
 	}
@@ -693,8 +694,9 @@ default_queue:
 	if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
 		rc = -ENODEV;
 		goto out;
-	} else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+	} else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) {
 		queue = PTLRPC_NRS_QUEUE_REG;
+	}
 
 	/**
 	 * Serialize NRS core lprocfs operations with policy registration/
@@ -1320,6 +1322,5 @@ int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
 	up_read(&obd->u.cli.cl_sem);
 
 	return count;
-
 }
 EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
index 710fb806f..c444f5168 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -975,7 +975,11 @@ static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
 	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
 
 again:
-	nrs = nrs_svcpt2nrs(svcpt, hp);
+	/* scp_nrs_hp could be NULL due to short of memory. */
+	nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+	/* check the nrs_svcpt to see if nrs is initialized. */
+	if (!nrs || !nrs->nrs_svcpt)
+		return;
 	nrs->nrs_stopping = 1;
 
 	list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) {
@@ -1038,7 +1042,6 @@ static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
 	LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
 
 	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
-
 		if (!nrs_policy_compatible(svc, desc) ||
 		    unlikely(svc->srv_is_stopping))
 			continue;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index 492d63fad..811acf6fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -1160,7 +1160,6 @@ __u32 lustre_msg_get_timeout(struct lustre_msg *msg)
 		if (!pb) {
 			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
 			return 0;
-
 		}
 		return pb->pb_timeout;
 	}
@@ -1179,7 +1178,6 @@ __u32 lustre_msg_get_service_time(struct lustre_msg *msg)
 		if (!pb) {
 			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
 			return 0;
-
 		}
 		return pb->pb_service_time;
 	}
@@ -1572,7 +1570,6 @@ static void lustre_swab_obdo(struct obdo *o)
 	CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
 	CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
 	CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
-
 }
 
 void lustre_swab_obd_statfs(struct obd_statfs *os)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index db003f5da..76a355a9d 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -387,7 +387,8 @@ static int ptlrpcd(void *arg)
 {
 	struct ptlrpcd_ctl *pc = arg;
 	struct ptlrpc_request_set *set;
-	struct lu_env env = { .le_ses = NULL };
+	struct lu_context ses = { 0 };
+	struct lu_env env = { .le_ses = &ses };
 	int rc = 0;
 	int exit = 0;
 
@@ -416,6 +417,13 @@ static int ptlrpcd(void *arg)
 	 */
 	rc = lu_context_init(&env.le_ctx,
 			     LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+	if (rc == 0) {
+		rc = lu_context_init(env.le_ses,
+				     LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
+		if (rc != 0)
+			lu_context_fini(&env.le_ctx);
+	}
+
 	if (rc != 0)
 		goto failed;
 
@@ -436,9 +444,10 @@ static int ptlrpcd(void *arg)
 				  ptlrpc_expired_set, set);
 
 		lu_context_enter(&env.le_ctx);
-		l_wait_event(set->set_waitq,
-			     ptlrpcd_check(&env, pc), &lwi);
+		lu_context_enter(env.le_ses);
+		l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi);
 		lu_context_exit(&env.le_ctx);
+		lu_context_exit(env.le_ses);
 
 		/*
 		 * Abort inflight rpcs for forced stop case.
@@ -461,6 +470,7 @@ static int ptlrpcd(void *arg)
 	if (!list_empty(&set->set_requests))
 		ptlrpc_set_wait(set);
 	lu_context_fini(&env.le_ctx);
+	lu_context_fini(env.le_ses);
 
 	complete(&pc->pc_finishing);
 
@@ -899,8 +909,11 @@ int ptlrpcd_addref(void)
 	int rc = 0;
 
 	mutex_lock(&ptlrpcd_mutex);
-	if (++ptlrpcd_users == 1)
+	if (++ptlrpcd_users == 1) {
 		rc = ptlrpcd_init();
+		if (rc < 0)
+			ptlrpcd_users--;
+	}
 	mutex_unlock(&ptlrpcd_mutex);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index d3872b8c9..02e6cda4c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -41,7 +41,6 @@
 #define DEBUG_SUBSYSTEM S_SEC
 
 #include "../../include/linux/libcfs/libcfs.h"
-#include <linux/crypto.h>
 
 #include "../include/obd.h"
 #include "../include/obd_cksum.h"
@@ -511,7 +510,6 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 {
 	struct cfs_crypto_hash_desc *hdesc;
 	int hashsize;
-	char hashbuf[64];
 	unsigned int bufsize;
 	int i, err;
 
@@ -529,21 +527,23 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 
 	for (i = 0; i < desc->bd_iov_count; i++) {
 		cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
-				  desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+				  desc->bd_iov[i].kiov_offset & ~PAGE_MASK,
 				  desc->bd_iov[i].kiov_len);
 	}
+
 	if (hashsize > buflen) {
+		unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
+
 		bufsize = sizeof(hashbuf);
-		err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
-					    &bufsize);
+		LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
+			 bufsize, hashsize);
+		err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize);
 		memcpy(buf, hashbuf, buflen);
 	} else {
 		bufsize = buflen;
 		err = cfs_crypto_hash_final(hdesc, buf, &bufsize);
 	}
 
-	if (err)
-		cfs_crypto_hash_final(hdesc, NULL, NULL);
 	return err;
 }
 EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 6276bf59c..37c9f4c45 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -162,7 +162,7 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
 			continue;
 
 		ptr = kmap(desc->bd_iov[i].kiov_page);
-		off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+		off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
 		ptr[off] ^= 0x1;
 		kunmap(desc->bd_iov[i].kiov_page);
 		return;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 1bbd1d39c..17c7b9749 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -838,6 +838,11 @@ static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
 {
 	ptlrpc_server_hpreq_fini(req);
 
+	if (req->rq_session.lc_thread) {
+		lu_context_exit(&req->rq_session);
+		lu_context_fini(&req->rq_session);
+	}
+
 	ptlrpc_server_drop_request(req);
 }
 
@@ -1579,6 +1584,21 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
 	}
 
 	req->rq_svc_thread = thread;
+	if (thread) {
+		/* initialize request session, it is needed for request
+		 * processing by target
+		 */
+		rc = lu_context_init(&req->rq_session,
+				     LCT_SERVER_SESSION | LCT_NOREF);
+		if (rc) {
+			CERROR("%s: failure to initialize session: rc = %d\n",
+			       thread->t_name, rc);
+			goto err_req;
+		}
+		req->rq_session.lc_thread = thread;
+		lu_context_enter(&req->rq_session);
+		req->rq_svc_thread->t_env->le_ses = &req->rq_session;
+	}
 
 	ptlrpc_at_add_timed(req);
 
@@ -1612,7 +1632,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 	struct timespec64 arrived;
 	unsigned long timediff_usecs;
 	unsigned long arrived_usecs;
-	int rc;
 	int fail_opc = 0;
 
 	request = ptlrpc_server_request_get(svcpt, false);
@@ -1649,21 +1668,6 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 				    at_get(&svcpt->scp_at_estimate));
 	}
 
-	rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
-	if (rc) {
-		CERROR("Failure to initialize session: %d\n", rc);
-		goto out_req;
-	}
-	request->rq_session.lc_thread = thread;
-	request->rq_session.lc_cookie = 0x5;
-	lu_context_enter(&request->rq_session);
-
-	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
-
-	request->rq_svc_thread = thread;
-	if (thread)
-		request->rq_svc_thread->t_env->le_ses = &request->rq_session;
-
 	if (likely(request->rq_export)) {
 		if (unlikely(ptlrpc_check_req(request)))
 			goto put_conn;
@@ -1695,14 +1699,21 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
 	if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
 		CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
 
-	rc = svc->srv_ops.so_req_handler(request);
+	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
+
+	/* re-assign request and sesson thread to the current one */
+	request->rq_svc_thread = thread;
+	if (thread) {
+		LASSERT(request->rq_session.lc_thread);
+		request->rq_session.lc_thread = thread;
+		request->rq_session.lc_cookie = 0x55;
+		thread->t_env->le_ses = &request->rq_session;
+	}
+	svc->srv_ops.so_req_handler(request);
 
 	ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
 
 put_conn:
-	lu_context_exit(&request->rq_session);
-	lu_context_fini(&request->rq_session);
-
 	if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) {
 		DEBUG_REQ(D_WARNING, request,
 			  "Request took longer than estimated (%lld:%llds); "
@@ -1756,7 +1767,6 @@ put_conn:
 			  request->rq_arrival_time.tv_sec);
 	}
 
-out_req:
 	ptlrpc_server_finish_active_request(svcpt, request);
 
 	return 1;
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 3ffd2d91f..aacc81083 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -276,7 +276,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)FLD_QUERY);
 	LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
 		 (long long)FLD_FIRST_OPC);
-	LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+	LASSERTF(FLD_READ == 901, "found %lld\n",
+		 (long long)FLD_READ);
+	LASSERTF(FLD_LAST_OPC == 902, "found %lld\n",
 		 (long long)FLD_LAST_OPC);
 	LASSERTF(SEQ_QUERY == 700, "found %lld\n",
 		 (long long)SEQ_QUERY);
@@ -1069,6 +1071,8 @@ void lustre_assert_wire_constants(void)
 		 OBD_CONNECT_PINGLESS);
 	LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
 		 "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
+	LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
+		 "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
 	LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
 		(unsigned)OBD_CKSUM_CRC32);
 	LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
@@ -1639,6 +1643,12 @@ void lustre_assert_wire_constants(void)
 		OBD_BRW_ASYNC);
 	LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
 		OBD_BRW_MEMALLOC);
+	LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
+		 OBD_BRW_OVER_USRQUOTA);
+	LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
+		 OBD_BRW_OVER_GRPQUOTA);
+	LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n",
+		 OBD_BRW_SOFT_SYNC);
 
 	/* Checks for struct ost_body */
 	LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
-- 
cgit v1.2.3-54-g00ecf