Initial import

author: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
committer: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
commit: 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree: 5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/staging/lustre/lustre/ptlrpc
29 files changed, 31743 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile
new file mode 100644
index 000000000..fb50cd4c6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/Makefile
@@ -0,0 +1,20 @@
+obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o
+LDLM := ../../lustre/ldlm/
+
+ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o
+ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o
+ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o
+ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o
+ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o
+ldlm_objs += $(LDLM)ldlm_pool.o
+ldlm_objs += $(LDLM)interval_tree.o
+ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
+ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o
+ptlrpc_objs += llog_net.o llog_client.o import.o ptlrpcd.o
+ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
+ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o
+ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o
+
+ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs)
+ptlrpc-$(CONFIG_PROC_FS) += sec_lproc.o
+ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.o
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
new file mode 100644
index 000000000..0357f1d45
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -0,0 +1,3149 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/** Implementation of client-side PortalRPC interfaces */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_req_layout.h"
+
+#include "ptlrpc_internal.h"
+
+static int ptlrpc_send_new_req(struct ptlrpc_request *req);
+static int ptlrpcd_check_work(struct ptlrpc_request *req);
+
+/**
+ * Initialize passed in client structure \a cl.
+ */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+			struct ptlrpc_client *cl)
+{
+	cl->cli_request_portal = req_portal;
+	cl->cli_reply_portal   = rep_portal;
+	cl->cli_name	   = name;
+}
+EXPORT_SYMBOL(ptlrpc_init_client);
+
+/**
+ * Return PortalRPC connection for remote uud \a uuid
+ */
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
+{
+	struct ptlrpc_connection *c;
+	lnet_nid_t		self;
+	lnet_process_id_t	 peer;
+	int		       err;
+
+	/* ptlrpc_uuid_to_peer() initializes its 2nd parameter
+	 * before accessing its values. */
+	/* coverity[uninit_use_in_call] */
+	err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
+	if (err != 0) {
+		CNETERR("cannot find peer %s!\n", uuid->uuid);
+		return NULL;
+	}
+
+	c = ptlrpc_connection_get(peer, self, uuid);
+	if (c) {
+		memcpy(c->c_remote_uuid.uuid,
+		       uuid->uuid, sizeof(c->c_remote_uuid.uuid));
+	}
+
+	CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
+
+	return c;
+}
+EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
+
+/**
+ * Allocate and initialize new bulk descriptor on the sender.
+ * Returns pointer to the descriptor or NULL on error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+					 unsigned type, unsigned portal)
+{
+	struct ptlrpc_bulk_desc *desc;
+	int i;
+
+	OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[npages]));
+	if (!desc)
+		return NULL;
+
+	spin_lock_init(&desc->bd_lock);
+	init_waitqueue_head(&desc->bd_waitq);
+	desc->bd_max_iov = npages;
+	desc->bd_iov_count = 0;
+	desc->bd_portal = portal;
+	desc->bd_type = type;
+	desc->bd_md_count = 0;
+	LASSERT(max_brw > 0);
+	desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT);
+	/* PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this
+	 * node. Negotiated ocd_brw_size will always be <= this number. */
+	for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++)
+		LNetInvalidateHandle(&desc->bd_mds[i]);
+
+	return desc;
+}
+
+/**
+ * Prepare bulk descriptor for specified outgoing request \a req that
+ * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * the bulk to be sent. Used on client-side.
+ * Returns pointer to newly allocated initialized bulk descriptor or NULL on
+ * error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
+					      unsigned npages, unsigned max_brw,
+					      unsigned type, unsigned portal)
+{
+	struct obd_import *imp = req->rq_import;
+	struct ptlrpc_bulk_desc *desc;
+
+	LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+	desc = ptlrpc_new_bulk(npages, max_brw, type, portal);
+	if (desc == NULL)
+		return NULL;
+
+	desc->bd_import_generation = req->rq_import_generation;
+	desc->bd_import = class_import_get(imp);
+	desc->bd_req = req;
+
+	desc->bd_cbid.cbid_fn  = client_bulk_callback;
+	desc->bd_cbid.cbid_arg = desc;
+
+	/* This makes req own desc, and free it when she frees herself */
+	req->rq_bulk = desc;
+
+	return desc;
+}
+EXPORT_SYMBOL(ptlrpc_prep_bulk_imp);
+
+/**
+ * Add a page \a page to the bulk descriptor \a desc.
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+			     struct page *page, int pageoffset, int len, int pin)
+{
+	LASSERT(desc->bd_iov_count < desc->bd_max_iov);
+	LASSERT(page != NULL);
+	LASSERT(pageoffset >= 0);
+	LASSERT(len > 0);
+	LASSERT(pageoffset + len <= PAGE_CACHE_SIZE);
+
+	desc->bd_nob += len;
+
+	if (pin)
+		page_cache_get(page);
+
+	ptlrpc_add_bulk_page(desc, page, pageoffset, len);
+}
+EXPORT_SYMBOL(__ptlrpc_prep_bulk_page);
+
+/**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
+{
+	int i;
+
+	LASSERT(desc != NULL);
+	LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
+	LASSERT(desc->bd_md_count == 0);	 /* network hands off */
+	LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+
+	sptlrpc_enc_pool_put_pages(desc);
+
+	if (desc->bd_export)
+		class_export_put(desc->bd_export);
+	else
+		class_import_put(desc->bd_import);
+
+	if (unpin) {
+		for (i = 0; i < desc->bd_iov_count; i++)
+			page_cache_release(desc->bd_iov[i].kiov_page);
+	}
+
+	OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
+				bd_iov[desc->bd_max_iov]));
+}
+EXPORT_SYMBOL(__ptlrpc_free_bulk);
+
+/**
+ * Set server timelimit for this req, i.e. how long are we willing to wait
+ * for reply before timing out this request.
+ */
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
+{
+	__u32 serv_est;
+	int idx;
+	struct imp_at *at;
+
+	LASSERT(req->rq_import);
+
+	if (AT_OFF) {
+		/* non-AT settings */
+		/**
+		 * \a imp_server_timeout means this is reverse import and
+		 * we send (currently only) ASTs to the client and cannot afford
+		 * to wait too long for the reply, otherwise the other client
+		 * (because of which we are sending this request) would
+		 * timeout waiting for us
+		 */
+		req->rq_timeout = req->rq_import->imp_server_timeout ?
+				  obd_timeout / 2 : obd_timeout;
+	} else {
+		at = &req->rq_import->imp_at;
+		idx = import_at_get_index(req->rq_import,
+					  req->rq_request_portal);
+		serv_est = at_get(&at->iat_service_estimate[idx]);
+		req->rq_timeout = at_est2timeout(serv_est);
+	}
+	/* We could get even fancier here, using history to predict increased
+	   loading... */
+
+	/* Let the server know what this RPC timeout is by putting it in the
+	   reqmsg*/
+	lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+}
+EXPORT_SYMBOL(ptlrpc_at_set_req_timeout);
+
+/* Adjust max service estimate based on server value */
+static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
+				  unsigned int serv_est)
+{
+	int idx;
+	unsigned int oldse;
+	struct imp_at *at;
+
+	LASSERT(req->rq_import);
+	at = &req->rq_import->imp_at;
+
+	idx = import_at_get_index(req->rq_import, req->rq_request_portal);
+	/* max service estimates are tracked on the server side,
+	   so just keep minimal history here */
+	oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
+	if (oldse != 0)
+		CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d has changed from %d to %d\n",
+		       req->rq_import->imp_obd->obd_name, req->rq_request_portal,
+		       oldse, at_get(&at->iat_service_estimate[idx]));
+}
+
+/* Expected network latency per remote node (secs) */
+int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
+{
+	return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
+}
+
+/* Adjust expected network latency */
+static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+				      unsigned int service_time)
+{
+	unsigned int nl, oldnl;
+	struct imp_at *at;
+	time_t now = get_seconds();
+
+	LASSERT(req->rq_import);
+
+	if (service_time > now - req->rq_sent + 3) {
+		/* bz16408, however, this can also happen if early reply
+		 * is lost and client RPC is expired and resent, early reply
+		 * or reply of original RPC can still be fit in reply buffer
+		 * of resent RPC, now client is measuring time from the
+		 * resent time, but server sent back service time of original
+		 * RPC.
+		 */
+		CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ?
+		       D_ADAPTTO : D_WARNING,
+		       "Reported service time %u > total measured time "
+		       CFS_DURATION_T"\n", service_time,
+		       cfs_time_sub(now, req->rq_sent));
+		return;
+	}
+
+	/* Network latency is total time less server processing time */
+	nl = max_t(int, now - req->rq_sent -
+			service_time, 0) + 1; /* st rounding */
+	at = &req->rq_import->imp_at;
+
+	oldnl = at_measured(&at->iat_net_latency, nl);
+	if (oldnl != 0)
+		CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) has changed from %d to %d\n",
+		       req->rq_import->imp_obd->obd_name,
+		       obd_uuid2str(
+			       &req->rq_import->imp_connection->c_remote_uuid),
+		       oldnl, at_get(&at->iat_net_latency));
+}
+
+static int unpack_reply(struct ptlrpc_request *req)
+{
+	int rc;
+
+	if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+		rc = ptlrpc_unpack_rep_msg(req, req->rq_replen);
+		if (rc) {
+			DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc);
+			return -EPROTO;
+		}
+	}
+
+	rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+	if (rc) {
+		DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc);
+		return -EPROTO;
+	}
+	return 0;
+}
+
+/**
+ * Handle an early reply message, called with the rq_lock held.
+ * If anything goes wrong just ignore it - same as if it never happened
+ */
+static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
+{
+	struct ptlrpc_request *early_req;
+	time_t		 olddl;
+	int		    rc;
+
+	req->rq_early = 0;
+	spin_unlock(&req->rq_lock);
+
+	rc = sptlrpc_cli_unwrap_early_reply(req, &early_req);
+	if (rc) {
+		spin_lock(&req->rq_lock);
+		return rc;
+	}
+
+	rc = unpack_reply(early_req);
+	if (rc == 0) {
+		/* Expecting to increase the service time estimate here */
+		ptlrpc_at_adj_service(req,
+			lustre_msg_get_timeout(early_req->rq_repmsg));
+		ptlrpc_at_adj_net_latency(req,
+			lustre_msg_get_service_time(early_req->rq_repmsg));
+	}
+
+	sptlrpc_cli_finish_early_reply(early_req);
+
+	if (rc != 0) {
+		spin_lock(&req->rq_lock);
+		return rc;
+	}
+
+	/* Adjust the local timeout for this req */
+	ptlrpc_at_set_req_timeout(req);
+
+	spin_lock(&req->rq_lock);
+	olddl = req->rq_deadline;
+	/* server assumes it now has rq_timeout from when it sent the
+	 * early reply, so client should give it at least that long. */
+	req->rq_deadline = get_seconds() + req->rq_timeout +
+			   ptlrpc_at_get_net_latency(req);
+
+	DEBUG_REQ(D_ADAPTTO, req,
+		  "Early reply #%d, new deadline in " CFS_DURATION_T "s (" CFS_DURATION_T "s)",
+		  req->rq_early_count,
+		  cfs_time_sub(req->rq_deadline, get_seconds()),
+		  cfs_time_sub(req->rq_deadline, olddl));
+
+	return rc;
+}
+
+struct kmem_cache *request_cache;
+
+int ptlrpc_request_cache_init(void)
+{
+	request_cache = kmem_cache_create("ptlrpc_cache",
+					  sizeof(struct ptlrpc_request),
+					  0, SLAB_HWCACHE_ALIGN, NULL);
+	return request_cache == NULL ? -ENOMEM : 0;
+}
+
+void ptlrpc_request_cache_fini(void)
+{
+	kmem_cache_destroy(request_cache);
+}
+
+struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags)
+{
+	struct ptlrpc_request *req;
+
+	OBD_SLAB_ALLOC_PTR_GFP(req, request_cache, flags);
+	return req;
+}
+
+void ptlrpc_request_cache_free(struct ptlrpc_request *req)
+{
+	OBD_SLAB_FREE_PTR(req, request_cache);
+}
+
+/**
+ * Wind down request pool \a pool.
+ * Frees all requests from the pool too
+ */
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
+{
+	struct list_head *l, *tmp;
+	struct ptlrpc_request *req;
+
+	LASSERT(pool != NULL);
+
+	spin_lock(&pool->prp_lock);
+	list_for_each_safe(l, tmp, &pool->prp_req_list) {
+		req = list_entry(l, struct ptlrpc_request, rq_list);
+		list_del(&req->rq_list);
+		LASSERT(req->rq_reqbuf);
+		LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
+		OBD_FREE_LARGE(req->rq_reqbuf, pool->prp_rq_size);
+		ptlrpc_request_cache_free(req);
+	}
+	spin_unlock(&pool->prp_lock);
+	OBD_FREE(pool, sizeof(*pool));
+}
+EXPORT_SYMBOL(ptlrpc_free_rq_pool);
+
+/**
+ * Allocates, initializes and adds \a num_rq requests to the pool \a pool
+ */
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
+{
+	int i;
+	int size = 1;
+
+	while (size < pool->prp_rq_size)
+		size <<= 1;
+
+	LASSERTF(list_empty(&pool->prp_req_list) ||
+		 size == pool->prp_rq_size,
+		 "Trying to change pool size with nonempty pool from %d to %d bytes\n",
+		 pool->prp_rq_size, size);
+
+	spin_lock(&pool->prp_lock);
+	pool->prp_rq_size = size;
+	for (i = 0; i < num_rq; i++) {
+		struct ptlrpc_request *req;
+		struct lustre_msg *msg;
+
+		spin_unlock(&pool->prp_lock);
+		req = ptlrpc_request_cache_alloc(GFP_NOFS);
+		if (!req)
+			return;
+		OBD_ALLOC_LARGE(msg, size);
+		if (!msg) {
+			ptlrpc_request_cache_free(req);
+			return;
+		}
+		req->rq_reqbuf = msg;
+		req->rq_reqbuf_len = size;
+		req->rq_pool = pool;
+		spin_lock(&pool->prp_lock);
+		list_add_tail(&req->rq_list, &pool->prp_req_list);
+	}
+	spin_unlock(&pool->prp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
+
+/**
+ * Create and initialize new request pool with given attributes:
+ * \a num_rq - initial number of requests to create for the pool
+ * \a msgsize - maximum message size possible for requests in thid pool
+ * \a populate_pool - function to be called when more requests need to be added
+ *		    to the pool
+ * Returns pointer to newly created pool or NULL on error.
+ */
+struct ptlrpc_request_pool *
+ptlrpc_init_rq_pool(int num_rq, int msgsize,
+		    void (*populate_pool)(struct ptlrpc_request_pool *, int))
+{
+	struct ptlrpc_request_pool *pool;
+
+	OBD_ALLOC(pool, sizeof(struct ptlrpc_request_pool));
+	if (!pool)
+		return NULL;
+
+	/* Request next power of two for the allocation, because internally
+	   kernel would do exactly this */
+
+	spin_lock_init(&pool->prp_lock);
+	INIT_LIST_HEAD(&pool->prp_req_list);
+	pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD;
+	pool->prp_populate = populate_pool;
+
+	populate_pool(pool, num_rq);
+
+	if (list_empty(&pool->prp_req_list)) {
+		/* have not allocated a single request for the pool */
+		OBD_FREE(pool, sizeof(struct ptlrpc_request_pool));
+		pool = NULL;
+	}
+	return pool;
+}
+EXPORT_SYMBOL(ptlrpc_init_rq_pool);
+
+/**
+ * Fetches one request from pool \a pool
+ */
+static struct ptlrpc_request *
+ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
+{
+	struct ptlrpc_request *request;
+	struct lustre_msg *reqbuf;
+
+	if (!pool)
+		return NULL;
+
+	spin_lock(&pool->prp_lock);
+
+	/* See if we have anything in a pool, and bail out if nothing,
+	 * in writeout path, where this matters, this is safe to do, because
+	 * nothing is lost in this case, and when some in-flight requests
+	 * complete, this code will be called again. */
+	if (unlikely(list_empty(&pool->prp_req_list))) {
+		spin_unlock(&pool->prp_lock);
+		return NULL;
+	}
+
+	request = list_entry(pool->prp_req_list.next, struct ptlrpc_request,
+				 rq_list);
+	list_del_init(&request->rq_list);
+	spin_unlock(&pool->prp_lock);
+
+	LASSERT(request->rq_reqbuf);
+	LASSERT(request->rq_pool);
+
+	reqbuf = request->rq_reqbuf;
+	memset(request, 0, sizeof(*request));
+	request->rq_reqbuf = reqbuf;
+	request->rq_reqbuf_len = pool->prp_rq_size;
+	request->rq_pool = pool;
+
+	return request;
+}
+
+/**
+ * Returns freed \a request to pool.
+ */
+static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
+{
+	struct ptlrpc_request_pool *pool = request->rq_pool;
+
+	spin_lock(&pool->prp_lock);
+	LASSERT(list_empty(&request->rq_list));
+	LASSERT(!request->rq_receiving_reply);
+	list_add_tail(&request->rq_list, &pool->prp_req_list);
+	spin_unlock(&pool->prp_lock);
+}
+
+static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+				      __u32 version, int opcode,
+				      int count, __u32 *lengths, char **bufs,
+				      struct ptlrpc_cli_ctx *ctx)
+{
+	struct obd_import  *imp = request->rq_import;
+	int		 rc;
+
+	if (unlikely(ctx))
+		request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
+	else {
+		rc = sptlrpc_req_get_ctx(request);
+		if (rc)
+			goto out_free;
+	}
+
+	sptlrpc_req_set_flavor(request, opcode);
+
+	rc = lustre_pack_request(request, imp->imp_msg_magic, count,
+				 lengths, bufs);
+	if (rc) {
+		LASSERT(!request->rq_pool);
+		goto out_ctx;
+	}
+
+	lustre_msg_add_version(request->rq_reqmsg, version);
+	request->rq_send_state = LUSTRE_IMP_FULL;
+	request->rq_type = PTL_RPC_MSG_REQUEST;
+	request->rq_export = NULL;
+
+	request->rq_req_cbid.cbid_fn  = request_out_callback;
+	request->rq_req_cbid.cbid_arg = request;
+
+	request->rq_reply_cbid.cbid_fn  = reply_in_callback;
+	request->rq_reply_cbid.cbid_arg = request;
+
+	request->rq_reply_deadline = 0;
+	request->rq_phase = RQ_PHASE_NEW;
+	request->rq_next_phase = RQ_PHASE_UNDEFINED;
+
+	request->rq_request_portal = imp->imp_client->cli_request_portal;
+	request->rq_reply_portal = imp->imp_client->cli_reply_portal;
+
+	ptlrpc_at_set_req_timeout(request);
+
+	spin_lock_init(&request->rq_lock);
+	INIT_LIST_HEAD(&request->rq_list);
+	INIT_LIST_HEAD(&request->rq_timed_list);
+	INIT_LIST_HEAD(&request->rq_replay_list);
+	INIT_LIST_HEAD(&request->rq_ctx_chain);
+	INIT_LIST_HEAD(&request->rq_set_chain);
+	INIT_LIST_HEAD(&request->rq_history_list);
+	INIT_LIST_HEAD(&request->rq_exp_list);
+	init_waitqueue_head(&request->rq_reply_waitq);
+	init_waitqueue_head(&request->rq_set_waitq);
+	request->rq_xid = ptlrpc_next_xid();
+	atomic_set(&request->rq_refcount, 1);
+
+	lustre_msg_set_opc(request->rq_reqmsg, opcode);
+
+	return 0;
+out_ctx:
+	sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
+out_free:
+	class_import_put(imp);
+	return rc;
+}
+
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+			     __u32 version, int opcode, char **bufs,
+			     struct ptlrpc_cli_ctx *ctx)
+{
+	int count;
+
+	count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+	return __ptlrpc_request_bufs_pack(request, version, opcode, count,
+					  request->rq_pill.rc_area[RCL_CLIENT],
+					  bufs, ctx);
+}
+EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
+
+/**
+ * Pack request buffers for network transfer, performing necessary encryption
+ * steps if necessary.
+ */
+int ptlrpc_request_pack(struct ptlrpc_request *request,
+			__u32 version, int opcode)
+{
+	int rc;
+	rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL);
+	if (rc)
+		return rc;
+
+	/* For some old 1.8 clients (< 1.8.7), they will LASSERT the size of
+	 * ptlrpc_body sent from server equal to local ptlrpc_body size, so we
+	 * have to send old ptlrpc_body to keep interoperability with these
+	 * clients.
+	 *
+	 * Only three kinds of server->client RPCs so far:
+	 *  - LDLM_BL_CALLBACK
+	 *  - LDLM_CP_CALLBACK
+	 *  - LDLM_GL_CALLBACK
+	 *
+	 * XXX This should be removed whenever we drop the interoperability with
+	 *     the these old clients.
+	 */
+	if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK ||
+	    opcode == LDLM_GL_CALLBACK)
+		req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY,
+				   sizeof(struct ptlrpc_body_v2), RCL_CLIENT);
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_request_pack);
+
+/**
+ * Helper function to allocate new request on import \a imp
+ * and possibly using existing request from pool \a pool if provided.
+ * Returns allocated request structure with import field filled or
+ * NULL on error.
+ */
+static inline
+struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
+					      struct ptlrpc_request_pool *pool)
+{
+	struct ptlrpc_request *request = NULL;
+
+	if (pool)
+		request = ptlrpc_prep_req_from_pool(pool);
+
+	if (!request)
+		request = ptlrpc_request_cache_alloc(GFP_NOFS);
+
+	if (request) {
+		LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
+		LASSERT(imp != LP_POISON);
+		LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p",
+			imp->imp_client);
+		LASSERT(imp->imp_client != LP_POISON);
+
+		request->rq_import = class_import_get(imp);
+	} else {
+		CERROR("request allocation out of memory\n");
+	}
+
+	return request;
+}
+
+/**
+ * Helper function for creating a request.
+ * Calls __ptlrpc_request_alloc to allocate new request structure and inits
+ * buffer structures according to capsule template \a format.
+ * Returns allocated request structure pointer or NULL on error.
+ */
+static struct ptlrpc_request *
+ptlrpc_request_alloc_internal(struct obd_import *imp,
+			      struct ptlrpc_request_pool *pool,
+			      const struct req_format *format)
+{
+	struct ptlrpc_request *request;
+
+	request = __ptlrpc_request_alloc(imp, pool);
+	if (request == NULL)
+		return NULL;
+
+	req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
+	req_capsule_set(&request->rq_pill, format);
+	return request;
+}
+
+/**
+ * Allocate new request structure for import \a imp and initialize its
+ * buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
+					    const struct req_format *format)
+{
+	return ptlrpc_request_alloc_internal(imp, NULL, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc);
+
+/**
+ * Allocate new request structure for import \a imp from pool \a pool and
+ * initialize its buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
+					    struct ptlrpc_request_pool *pool,
+					    const struct req_format *format)
+{
+	return ptlrpc_request_alloc_internal(imp, pool, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pool);
+
+/**
+ * For requests not from pool, free memory of the request structure.
+ * For requests obtained from a pool earlier, return request back to pool.
+ */
+void ptlrpc_request_free(struct ptlrpc_request *request)
+{
+	if (request->rq_pool)
+		__ptlrpc_free_req_to_pool(request);
+	else
+		ptlrpc_request_cache_free(request);
+}
+EXPORT_SYMBOL(ptlrpc_request_free);
+
+/**
+ * Allocate new request for operation \a opcode and immediately pack it for
+ * network transfer.
+ * Only used for simple requests like OBD_PING where the only important
+ * part of the request is operation itself.
+ * Returns allocated request or NULL on error.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
+						const struct req_format *format,
+						__u32 version, int opcode)
+{
+	struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format);
+	int		    rc;
+
+	if (req) {
+		rc = ptlrpc_request_pack(req, version, opcode);
+		if (rc) {
+			ptlrpc_request_free(req);
+			req = NULL;
+		}
+	}
+	return req;
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pack);
+
+/**
+ * Prepare request (fetched from pool \a pool if not NULL) on import \a imp
+ * for operation \a opcode. Request would contain \a count buffers.
+ * Sizes of buffers are described in array \a lengths and buffers themselves
+ * are provided by a pointer \a bufs.
+ * Returns prepared request structure pointer or NULL on error.
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req_pool(struct obd_import *imp,
+		     __u32 version, int opcode,
+		     int count, __u32 *lengths, char **bufs,
+		     struct ptlrpc_request_pool *pool)
+{
+	struct ptlrpc_request *request;
+	int		    rc;
+
+	request = __ptlrpc_request_alloc(imp, pool);
+	if (!request)
+		return NULL;
+
+	rc = __ptlrpc_request_bufs_pack(request, version, opcode, count,
+					lengths, bufs, NULL);
+	if (rc) {
+		ptlrpc_request_free(request);
+		request = NULL;
+	}
+	return request;
+}
+EXPORT_SYMBOL(ptlrpc_prep_req_pool);
+
+/**
+ * Same as ptlrpc_prep_req_pool, but without pool
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count,
+		__u32 *lengths, char **bufs)
+{
+	return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs,
+				    NULL);
+}
+EXPORT_SYMBOL(ptlrpc_prep_req);
+
+/**
+ * Allocate and initialize new request set structure.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_set(void)
+{
+	struct ptlrpc_request_set *set;
+
+	OBD_ALLOC(set, sizeof(*set));
+	if (!set)
+		return NULL;
+	atomic_set(&set->set_refcount, 1);
+	INIT_LIST_HEAD(&set->set_requests);
+	init_waitqueue_head(&set->set_waitq);
+	atomic_set(&set->set_new_count, 0);
+	atomic_set(&set->set_remaining, 0);
+	spin_lock_init(&set->set_new_req_lock);
+	INIT_LIST_HEAD(&set->set_new_requests);
+	INIT_LIST_HEAD(&set->set_cblist);
+	set->set_max_inflight = UINT_MAX;
+	set->set_producer     = NULL;
+	set->set_producer_arg = NULL;
+	set->set_rc	   = 0;
+
+	return set;
+}
+EXPORT_SYMBOL(ptlrpc_prep_set);
+
+/**
+ * Allocate and initialize new request set structure with flow control
+ * extension. This extension allows to control the number of requests in-flight
+ * for the whole set. A callback function to generate requests must be provided
+ * and the request set will keep the number of requests sent over the wire to
+ * @max_inflight.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
+					     void *arg)
+
+{
+	struct ptlrpc_request_set *set;
+
+	set = ptlrpc_prep_set();
+	if (!set)
+		return NULL;
+
+	set->set_max_inflight  = max;
+	set->set_producer      = func;
+	set->set_producer_arg  = arg;
+
+	return set;
+}
+EXPORT_SYMBOL(ptlrpc_prep_fcset);
+
+/**
+ * Wind down and free request set structure previously allocated with
+ * ptlrpc_prep_set.
+ * Ensures that all requests on the set have completed and removes
+ * all requests from the request list in a set.
+ * If any unsent request happen to be on the list, pretends that they got
+ * an error in flight and calls their completion handler.
+ */
+void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
+{
+	struct list_head       *tmp;
+	struct list_head       *next;
+	int	       expected_phase;
+	int	       n = 0;
+
+	/* Requests on the set should either all be completed, or all be new */
+	expected_phase = (atomic_read(&set->set_remaining) == 0) ?
+			 RQ_PHASE_COMPLETE : RQ_PHASE_NEW;
+	list_for_each(tmp, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_set_chain);
+
+		LASSERT(req->rq_phase == expected_phase);
+		n++;
+	}
+
+	LASSERTF(atomic_read(&set->set_remaining) == 0 ||
+		 atomic_read(&set->set_remaining) == n, "%d / %d\n",
+		 atomic_read(&set->set_remaining), n);
+
+	list_for_each_safe(tmp, next, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_set_chain);
+		list_del_init(&req->rq_set_chain);
+
+		LASSERT(req->rq_phase == expected_phase);
+
+		if (req->rq_phase == RQ_PHASE_NEW) {
+			ptlrpc_req_interpret(NULL, req, -EBADR);
+			atomic_dec(&set->set_remaining);
+		}
+
+		spin_lock(&req->rq_lock);
+		req->rq_set = NULL;
+		req->rq_invalid_rqset = 0;
+		spin_unlock(&req->rq_lock);
+
+		ptlrpc_req_finished(req);
+	}
+
+	LASSERT(atomic_read(&set->set_remaining) == 0);
+
+	ptlrpc_reqset_put(set);
+}
+EXPORT_SYMBOL(ptlrpc_set_destroy);
+
+/**
+ * Add a callback function \a fn to the set.
+ * This function would be called when all requests on this set are completed.
+ * The function will be passed \a data argument.
+ */
+int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
+		      set_interpreter_func fn, void *data)
+{
+	struct ptlrpc_set_cbdata *cbdata;
+
+	OBD_ALLOC_PTR(cbdata);
+	if (cbdata == NULL)
+		return -ENOMEM;
+
+	cbdata->psc_interpret = fn;
+	cbdata->psc_data = data;
+	list_add_tail(&cbdata->psc_item, &set->set_cblist);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_set_add_cb);
+
+/**
+ * Add a new request to the general purpose request set.
+ * Assumes request reference from the caller.
+ */
+void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
+			struct ptlrpc_request *req)
+{
+	LASSERT(list_empty(&req->rq_set_chain));
+
+	/* The set takes over the caller's request reference */
+	list_add_tail(&req->rq_set_chain, &set->set_requests);
+	req->rq_set = set;
+	atomic_inc(&set->set_remaining);
+	req->rq_queued_time = cfs_time_current();
+
+	if (req->rq_reqmsg != NULL)
+		lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+	if (set->set_producer != NULL)
+		/* If the request set has a producer callback, the RPC must be
+		 * sent straight away */
+		ptlrpc_send_new_req(req);
+}
+EXPORT_SYMBOL(ptlrpc_set_add_req);
+
+/**
+ * Add a request to a request with dedicated server thread
+ * and wake the thread to make any necessary processing.
+ * Currently only used for ptlrpcd.
+ */
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+			   struct ptlrpc_request *req)
+{
+	struct ptlrpc_request_set *set = pc->pc_set;
+	int count, i;
+
+	LASSERT(req->rq_set == NULL);
+	LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0);
+
+	spin_lock(&set->set_new_req_lock);
+	/*
+	 * The set takes over the caller's request reference.
+	 */
+	req->rq_set = set;
+	req->rq_queued_time = cfs_time_current();
+	list_add_tail(&req->rq_set_chain, &set->set_new_requests);
+	count = atomic_inc_return(&set->set_new_count);
+	spin_unlock(&set->set_new_req_lock);
+
+	/* Only need to call wakeup once for the first entry. */
+	if (count == 1) {
+		wake_up(&set->set_waitq);
+
+		/* XXX: It maybe unnecessary to wakeup all the partners. But to
+		 *      guarantee the async RPC can be processed ASAP, we have
+		 *      no other better choice. It maybe fixed in future. */
+		for (i = 0; i < pc->pc_npartners; i++)
+			wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+	}
+}
+EXPORT_SYMBOL(ptlrpc_set_add_new_req);
+
+/**
+ * Based on the current state of the import, determine if the request
+ * can be sent, is an error, or should be delayed.
+ *
+ * Returns true if this request should be delayed. If false, and
+ * *status is set, then the request can not be sent and *status is the
+ * error code.  If false and status is 0, then request can be sent.
+ *
+ * The imp->imp_lock must be held.
+ */
+static int ptlrpc_import_delay_req(struct obd_import *imp,
+				   struct ptlrpc_request *req, int *status)
+{
+	int delay = 0;
+
+	LASSERT(status != NULL);
+	*status = 0;
+
+	if (req->rq_ctx_init || req->rq_ctx_fini) {
+		/* always allow ctx init/fini rpc go through */
+	} else if (imp->imp_state == LUSTRE_IMP_NEW) {
+		DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
+		*status = -EIO;
+	} else if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+		/* pings may safely race with umount */
+		DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ?
+			  D_HA : D_ERROR, req, "IMP_CLOSED ");
+		*status = -EIO;
+	} else if (ptlrpc_send_limit_expired(req)) {
+		/* probably doesn't need to be a D_ERROR after initial testing */
+		DEBUG_REQ(D_ERROR, req, "send limit expired ");
+		*status = -EIO;
+	} else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
+		   imp->imp_state == LUSTRE_IMP_CONNECTING) {
+		/* allow CONNECT even if import is invalid */
+		if (atomic_read(&imp->imp_inval_count) != 0) {
+			DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+			*status = -EIO;
+		}
+	} else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) {
+		if (!imp->imp_deactive)
+			DEBUG_REQ(D_NET, req, "IMP_INVALID");
+		*status = -ESHUTDOWN; /* bz 12940 */
+	} else if (req->rq_import_generation != imp->imp_generation) {
+		DEBUG_REQ(D_ERROR, req, "req wrong generation:");
+		*status = -EIO;
+	} else if (req->rq_send_state != imp->imp_state) {
+		/* invalidate in progress - any requests should be drop */
+		if (atomic_read(&imp->imp_inval_count) != 0) {
+			DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+			*status = -EIO;
+		} else if (imp->imp_dlm_fake || req->rq_no_delay) {
+			*status = -EWOULDBLOCK;
+		} else if (req->rq_allow_replay &&
+			  (imp->imp_state == LUSTRE_IMP_REPLAY ||
+			   imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS ||
+			   imp->imp_state == LUSTRE_IMP_REPLAY_WAIT ||
+			   imp->imp_state == LUSTRE_IMP_RECOVER)) {
+			DEBUG_REQ(D_HA, req, "allow during recovery.\n");
+		} else {
+			delay = 1;
+		}
+	}
+
+	return delay;
+}
+
+/**
+ * Decide if the error message regarding provided request \a req
+ * should be printed to the console or not.
+ * Makes it's decision on request status and other properties.
+ * Returns 1 to print error on the system console or 0 if not.
+ */
+static int ptlrpc_console_allow(struct ptlrpc_request *req)
+{
+	__u32 opc;
+	int err;
+
+	LASSERT(req->rq_reqmsg != NULL);
+	opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+	/* Suppress particular reconnect errors which are to be expected.  No
+	 * errors are suppressed for the initial connection on an import */
+	if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
+	    (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
+
+		/* Suppress timed out reconnect requests */
+		if (req->rq_timedout)
+			return 0;
+
+		/* Suppress unavailable/again reconnect requests */
+		err = lustre_msg_get_status(req->rq_repmsg);
+		if (err == -ENODEV || err == -EAGAIN)
+			return 0;
+	}
+
+	return 1;
+}
+
+/**
+ * Check request processing status.
+ * Returns the status.
+ */
+static int ptlrpc_check_status(struct ptlrpc_request *req)
+{
+	int err;
+
+	err = lustre_msg_get_status(req->rq_repmsg);
+	if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
+		struct obd_import *imp = req->rq_import;
+		__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
+		if (ptlrpc_console_allow(req))
+			LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s, operation %s failed with %d.\n",
+					   imp->imp_obd->obd_name,
+					   libcfs_nid2str(
+						   imp->imp_connection->c_peer.nid),
+					   ll_opcode2str(opc), err);
+		return err < 0 ? err : -EINVAL;
+	}
+
+	if (err < 0) {
+		DEBUG_REQ(D_INFO, req, "status is %d", err);
+	} else if (err > 0) {
+		/* XXX: translate this error from net to host */
+		DEBUG_REQ(D_INFO, req, "status is %d", err);
+	}
+
+	return err;
+}
+
+/**
+ * save pre-versions of objects into request for replay.
+ * Versions are obtained from server reply.
+ * used for VBR.
+ */
+static void ptlrpc_save_versions(struct ptlrpc_request *req)
+{
+	struct lustre_msg *repmsg = req->rq_repmsg;
+	struct lustre_msg *reqmsg = req->rq_reqmsg;
+	__u64 *versions = lustre_msg_get_versions(repmsg);
+
+	if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
+		return;
+
+	LASSERT(versions);
+	lustre_msg_set_versions(reqmsg, versions);
+	CDEBUG(D_INFO, "Client save versions [%#llx/%#llx]\n",
+	       versions[0], versions[1]);
+}
+
+/**
+ * Callback function called when client receives RPC reply for \a req.
+ * Returns 0 on success or error code.
+ * The return value would be assigned to req->rq_status by the caller
+ * as request processing status.
+ * This function also decides if the request needs to be saved for later replay.
+ */
+static int after_reply(struct ptlrpc_request *req)
+{
+	struct obd_import *imp = req->rq_import;
+	struct obd_device *obd = req->rq_import->imp_obd;
+	int rc;
+	struct timeval work_start;
+	long timediff;
+
+	LASSERT(obd != NULL);
+	/* repbuf must be unlinked */
+	LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+
+	if (req->rq_reply_truncate) {
+		if (ptlrpc_no_resend(req)) {
+			DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
+				  req->rq_nob_received, req->rq_repbuf_len);
+			return -EOVERFLOW;
+		}
+
+		sptlrpc_cli_free_repbuf(req);
+		/* Pass the required reply buffer size (include
+		 * space for early reply).
+		 * NB: no need to roundup because alloc_repbuf
+		 * will roundup it */
+		req->rq_replen       = req->rq_nob_received;
+		req->rq_nob_received = 0;
+		spin_lock(&req->rq_lock);
+		req->rq_resend       = 1;
+		spin_unlock(&req->rq_lock);
+		return 0;
+	}
+
+	/*
+	 * NB Until this point, the whole of the incoming message,
+	 * including buflens, status etc is in the sender's byte order.
+	 */
+	rc = sptlrpc_cli_unwrap_reply(req);
+	if (rc) {
+		DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
+		return rc;
+	}
+
+	/*
+	 * Security layer unwrap might ask resend this request.
+	 */
+	if (req->rq_resend)
+		return 0;
+
+	rc = unpack_reply(req);
+	if (rc)
+		return rc;
+
+	/* retry indefinitely on EINPROGRESS */
+	if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS &&
+	    ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) {
+		time_t	now = get_seconds();
+
+		DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS");
+		spin_lock(&req->rq_lock);
+		req->rq_resend = 1;
+		spin_unlock(&req->rq_lock);
+		req->rq_nr_resend++;
+
+		/* allocate new xid to avoid reply reconstruction */
+		if (!req->rq_bulk) {
+			/* new xid is already allocated for bulk in
+			 * ptlrpc_check_set() */
+			req->rq_xid = ptlrpc_next_xid();
+			DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for resend on EINPROGRESS");
+		}
+
+		/* Readjust the timeout for current conditions */
+		ptlrpc_at_set_req_timeout(req);
+		/* delay resend to give a chance to the server to get ready.
+		 * The delay is increased by 1s on every resend and is capped to
+		 * the current request timeout (i.e. obd_timeout if AT is off,
+		 * or AT service time x 125% + 5s, see at_est2timeout) */
+		if (req->rq_nr_resend > req->rq_timeout)
+			req->rq_sent = now + req->rq_timeout;
+		else
+			req->rq_sent = now + req->rq_nr_resend;
+
+		return 0;
+	}
+
+	do_gettimeofday(&work_start);
+	timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL);
+	if (obd->obd_svc_stats != NULL) {
+		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
+				    timediff);
+		ptlrpc_lprocfs_rpc_sent(req, timediff);
+	}
+
+	if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY &&
+	    lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) {
+		DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)",
+			  lustre_msg_get_type(req->rq_repmsg));
+		return -EPROTO;
+	}
+
+	if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING)
+		CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val);
+	ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg));
+	ptlrpc_at_adj_net_latency(req,
+				  lustre_msg_get_service_time(req->rq_repmsg));
+
+	rc = ptlrpc_check_status(req);
+	imp->imp_connect_error = rc;
+
+	if (rc) {
+		/*
+		 * Either we've been evicted, or the server has failed for
+		 * some reason. Try to reconnect, and if that fails, punt to
+		 * the upcall.
+		 */
+		if (ll_rpc_recoverable_error(rc)) {
+			if (req->rq_send_state != LUSTRE_IMP_FULL ||
+			    imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
+				return rc;
+			}
+			ptlrpc_request_handle_notconn(req);
+			return rc;
+		}
+	} else {
+		/*
+		 * Let's look if server sent slv. Do it only for RPC with
+		 * rc == 0.
+		 */
+		ldlm_cli_update_pool(req);
+	}
+
+	/*
+	 * Store transno in reqmsg for replay.
+	 */
+	if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
+		req->rq_transno = lustre_msg_get_transno(req->rq_repmsg);
+		lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno);
+	}
+
+	if (imp->imp_replayable) {
+		spin_lock(&imp->imp_lock);
+		/*
+		 * No point in adding already-committed requests to the replay
+		 * list, we will just remove them immediately. b=9829
+		 */
+		if (req->rq_transno != 0 &&
+		    (req->rq_transno >
+		     lustre_msg_get_last_committed(req->rq_repmsg) ||
+		     req->rq_replay)) {
+			/** version recovery */
+			ptlrpc_save_versions(req);
+			ptlrpc_retain_replayable_request(req, imp);
+		} else if (req->rq_commit_cb != NULL &&
+			   list_empty(&req->rq_replay_list)) {
+			/* NB: don't call rq_commit_cb if it's already on
+			 * rq_replay_list, ptlrpc_free_committed() will call
+			 * it later, see LU-3618 for details */
+			spin_unlock(&imp->imp_lock);
+			req->rq_commit_cb(req);
+			spin_lock(&imp->imp_lock);
+		}
+
+		/*
+		 * Replay-enabled imports return commit-status information.
+		 */
+		if (lustre_msg_get_last_committed(req->rq_repmsg)) {
+			imp->imp_peer_committed_transno =
+				lustre_msg_get_last_committed(req->rq_repmsg);
+		}
+
+		ptlrpc_free_committed(imp);
+
+		if (!list_empty(&imp->imp_replay_list)) {
+			struct ptlrpc_request *last;
+
+			last = list_entry(imp->imp_replay_list.prev,
+					      struct ptlrpc_request,
+					      rq_replay_list);
+			/*
+			 * Requests with rq_replay stay on the list even if no
+			 * commit is expected.
+			 */
+			if (last->rq_transno > imp->imp_peer_committed_transno)
+				ptlrpc_pinger_commit_expected(imp);
+		}
+
+		spin_unlock(&imp->imp_lock);
+	}
+
+	return rc;
+}
+
+/**
+ * Helper function to send request \a req over the network for the first time
+ * Also adjusts request phase.
+ * Returns 0 on success or error code.
+ */
+static int ptlrpc_send_new_req(struct ptlrpc_request *req)
+{
+	struct obd_import     *imp = req->rq_import;
+	int rc;
+
+	LASSERT(req->rq_phase == RQ_PHASE_NEW);
+	if (req->rq_sent && (req->rq_sent > get_seconds()) &&
+	    (!req->rq_generation_set ||
+	     req->rq_import_generation == imp->imp_generation))
+		return 0;
+
+	ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
+
+	spin_lock(&imp->imp_lock);
+
+	if (!req->rq_generation_set)
+		req->rq_import_generation = imp->imp_generation;
+
+	if (ptlrpc_import_delay_req(imp, req, &rc)) {
+		spin_lock(&req->rq_lock);
+		req->rq_waiting = 1;
+		spin_unlock(&req->rq_lock);
+
+		DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: (%s != %s)",
+			  lustre_msg_get_status(req->rq_reqmsg),
+			  ptlrpc_import_state_name(req->rq_send_state),
+			  ptlrpc_import_state_name(imp->imp_state));
+		LASSERT(list_empty(&req->rq_list));
+		list_add_tail(&req->rq_list, &imp->imp_delayed_list);
+		atomic_inc(&req->rq_import->imp_inflight);
+		spin_unlock(&imp->imp_lock);
+		return 0;
+	}
+
+	if (rc != 0) {
+		spin_unlock(&imp->imp_lock);
+		req->rq_status = rc;
+		ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+		return rc;
+	}
+
+	LASSERT(list_empty(&req->rq_list));
+	list_add_tail(&req->rq_list, &imp->imp_sending_list);
+	atomic_inc(&req->rq_import->imp_inflight);
+	spin_unlock(&imp->imp_lock);
+
+	lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+	rc = sptlrpc_req_refresh_ctx(req, -1);
+	if (rc) {
+		if (req->rq_err) {
+			req->rq_status = rc;
+			return 1;
+		}
+		spin_lock(&req->rq_lock);
+		req->rq_wait_ctx = 1;
+		spin_unlock(&req->rq_lock);
+		return 0;
+	}
+
+	CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
+	       current_comm(),
+	       imp->imp_obd->obd_uuid.uuid,
+	       lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+	       libcfs_nid2str(imp->imp_connection->c_peer.nid),
+	       lustre_msg_get_opc(req->rq_reqmsg));
+
+	rc = ptl_send_rpc(req, 0);
+	if (rc) {
+		DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
+		spin_lock(&req->rq_lock);
+		req->rq_net_err = 1;
+		spin_unlock(&req->rq_lock);
+		return rc;
+	}
+	return 0;
+}
+
+static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set)
+{
+	int remaining, rc;
+
+	LASSERT(set->set_producer != NULL);
+
+	remaining = atomic_read(&set->set_remaining);
+
+	/* populate the ->set_requests list with requests until we
+	 * reach the maximum number of RPCs in flight for this set */
+	while (atomic_read(&set->set_remaining) < set->set_max_inflight) {
+		rc = set->set_producer(set, set->set_producer_arg);
+		if (rc == -ENOENT) {
+			/* no more RPC to produce */
+			set->set_producer     = NULL;
+			set->set_producer_arg = NULL;
+			return 0;
+		}
+	}
+
+	return (atomic_read(&set->set_remaining) - remaining);
+}
+
+/**
+ * this sends any unsent RPCs in \a set and returns 1 if all are sent
+ * and no more replies are expected.
+ * (it is possible to get less replies than requests sent e.g. due to timed out
+ * requests or requests that we had trouble to send out)
+ *
+ * NOTE: This function contains a potential schedule point (cond_resched()).
+ */
+int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
+{
+	struct list_head *tmp, *next;
+	struct list_head comp_reqs;
+	int force_timer_recalc = 0;
+
+	if (atomic_read(&set->set_remaining) == 0)
+		return 1;
+
+	INIT_LIST_HEAD(&comp_reqs);
+	list_for_each_safe(tmp, next, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_set_chain);
+		struct obd_import *imp = req->rq_import;
+		int unregistered = 0;
+		int rc = 0;
+
+		/* This schedule point is mainly for the ptlrpcd caller of this
+		 * function.  Most ptlrpc sets are not long-lived and unbounded
+		 * in length, but at the least the set used by the ptlrpcd is.
+		 * Since the processing time is unbounded, we need to insert an
+		 * explicit schedule point to make the thread well-behaved.
+		 */
+		cond_resched();
+
+		if (req->rq_phase == RQ_PHASE_NEW &&
+		    ptlrpc_send_new_req(req)) {
+			force_timer_recalc = 1;
+		}
+
+		/* delayed send - skip */
+		if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
+			continue;
+
+		/* delayed resend - skip */
+		if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend &&
+		    req->rq_sent > get_seconds())
+			continue;
+
+		if (!(req->rq_phase == RQ_PHASE_RPC ||
+		      req->rq_phase == RQ_PHASE_BULK ||
+		      req->rq_phase == RQ_PHASE_INTERPRET ||
+		      req->rq_phase == RQ_PHASE_UNREGISTERING ||
+		      req->rq_phase == RQ_PHASE_COMPLETE)) {
+			DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
+			LBUG();
+		}
+
+		if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+			LASSERT(req->rq_next_phase != req->rq_phase);
+			LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
+
+			/*
+			 * Skip processing until reply is unlinked. We
+			 * can't return to pool before that and we can't
+			 * call interpret before that. We need to make
+			 * sure that all rdma transfers finished and will
+			 * not corrupt any data.
+			 */
+			if (ptlrpc_client_recv_or_unlink(req) ||
+			    ptlrpc_client_bulk_active(req))
+				continue;
+
+			/*
+			 * Turn fail_loc off to prevent it from looping
+			 * forever.
+			 */
+			if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+				OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK,
+						     OBD_FAIL_ONCE);
+			}
+			if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+				OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK,
+						     OBD_FAIL_ONCE);
+			}
+
+			/*
+			 * Move to next phase if reply was successfully
+			 * unlinked.
+			 */
+			ptlrpc_rqphase_move(req, req->rq_next_phase);
+		}
+
+		if (req->rq_phase == RQ_PHASE_COMPLETE) {
+			list_move_tail(&req->rq_set_chain, &comp_reqs);
+			continue;
+		}
+
+		if (req->rq_phase == RQ_PHASE_INTERPRET)
+			goto interpret;
+
+		/*
+		 * Note that this also will start async reply unlink.
+		 */
+		if (req->rq_net_err && !req->rq_timedout) {
+			ptlrpc_expire_one_request(req, 1);
+
+			/*
+			 * Check if we still need to wait for unlink.
+			 */
+			if (ptlrpc_client_recv_or_unlink(req) ||
+			    ptlrpc_client_bulk_active(req))
+				continue;
+			/* If there is no need to resend, fail it now. */
+			if (req->rq_no_resend) {
+				if (req->rq_status == 0)
+					req->rq_status = -EIO;
+				ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+				goto interpret;
+			} else {
+				continue;
+			}
+		}
+
+		if (req->rq_err) {
+			spin_lock(&req->rq_lock);
+			req->rq_replied = 0;
+			spin_unlock(&req->rq_lock);
+			if (req->rq_status == 0)
+				req->rq_status = -EIO;
+			ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+			goto interpret;
+		}
+
+		/* ptlrpc_set_wait->l_wait_event sets lwi_allow_intr
+		 * so it sets rq_intr regardless of individual rpc
+		 * timeouts. The synchronous IO waiting path sets
+		 * rq_intr irrespective of whether ptlrpcd
+		 * has seen a timeout.  Our policy is to only interpret
+		 * interrupted rpcs after they have timed out, so we
+		 * need to enforce that here.
+		 */
+
+		if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
+				     req->rq_wait_ctx)) {
+			req->rq_status = -EINTR;
+			ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+			goto interpret;
+		}
+
+		if (req->rq_phase == RQ_PHASE_RPC) {
+			if (req->rq_timedout || req->rq_resend ||
+			    req->rq_waiting || req->rq_wait_ctx) {
+				int status;
+
+				if (!ptlrpc_unregister_reply(req, 1))
+					continue;
+
+				spin_lock(&imp->imp_lock);
+				if (ptlrpc_import_delay_req(imp, req,
+							    &status)) {
+					/* put on delay list - only if we wait
+					 * recovery finished - before send */
+					list_del_init(&req->rq_list);
+					list_add_tail(&req->rq_list,
+							  &imp->
+							  imp_delayed_list);
+					spin_unlock(&imp->imp_lock);
+					continue;
+				}
+
+				if (status != 0)  {
+					req->rq_status = status;
+					ptlrpc_rqphase_move(req,
+						RQ_PHASE_INTERPRET);
+					spin_unlock(&imp->imp_lock);
+					goto interpret;
+				}
+				if (ptlrpc_no_resend(req) &&
+				    !req->rq_wait_ctx) {
+					req->rq_status = -ENOTCONN;
+					ptlrpc_rqphase_move(req,
+							    RQ_PHASE_INTERPRET);
+					spin_unlock(&imp->imp_lock);
+					goto interpret;
+				}
+
+				list_del_init(&req->rq_list);
+				list_add_tail(&req->rq_list,
+						  &imp->imp_sending_list);
+
+				spin_unlock(&imp->imp_lock);
+
+				spin_lock(&req->rq_lock);
+				req->rq_waiting = 0;
+				spin_unlock(&req->rq_lock);
+
+				if (req->rq_timedout || req->rq_resend) {
+					/* This is re-sending anyways,
+					 * let's mark req as resend. */
+					spin_lock(&req->rq_lock);
+					req->rq_resend = 1;
+					spin_unlock(&req->rq_lock);
+					if (req->rq_bulk) {
+						__u64 old_xid;
+
+						if (!ptlrpc_unregister_bulk(req, 1))
+							continue;
+
+						/* ensure previous bulk fails */
+						old_xid = req->rq_xid;
+						req->rq_xid = ptlrpc_next_xid();
+						CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
+						       old_xid, req->rq_xid);
+					}
+				}
+				/*
+				 * rq_wait_ctx is only touched by ptlrpcd,
+				 * so no lock is needed here.
+				 */
+				status = sptlrpc_req_refresh_ctx(req, -1);
+				if (status) {
+					if (req->rq_err) {
+						req->rq_status = status;
+						spin_lock(&req->rq_lock);
+						req->rq_wait_ctx = 0;
+						spin_unlock(&req->rq_lock);
+						force_timer_recalc = 1;
+					} else {
+						spin_lock(&req->rq_lock);
+						req->rq_wait_ctx = 1;
+						spin_unlock(&req->rq_lock);
+					}
+
+					continue;
+				} else {
+					spin_lock(&req->rq_lock);
+					req->rq_wait_ctx = 0;
+					spin_unlock(&req->rq_lock);
+				}
+
+				rc = ptl_send_rpc(req, 0);
+				if (rc) {
+					DEBUG_REQ(D_HA, req,
+						  "send failed: rc = %d", rc);
+					force_timer_recalc = 1;
+					spin_lock(&req->rq_lock);
+					req->rq_net_err = 1;
+					spin_unlock(&req->rq_lock);
+					continue;
+				}
+				/* need to reset the timeout */
+				force_timer_recalc = 1;
+			}
+
+			spin_lock(&req->rq_lock);
+
+			if (ptlrpc_client_early(req)) {
+				ptlrpc_at_recv_early_reply(req);
+				spin_unlock(&req->rq_lock);
+				continue;
+			}
+
+			/* Still waiting for a reply? */
+			if (ptlrpc_client_recv(req)) {
+				spin_unlock(&req->rq_lock);
+				continue;
+			}
+
+			/* Did we actually receive a reply? */
+			if (!ptlrpc_client_replied(req)) {
+				spin_unlock(&req->rq_lock);
+				continue;
+			}
+
+			spin_unlock(&req->rq_lock);
+
+			/* unlink from net because we are going to
+			 * swab in-place of reply buffer */
+			unregistered = ptlrpc_unregister_reply(req, 1);
+			if (!unregistered)
+				continue;
+
+			req->rq_status = after_reply(req);
+			if (req->rq_resend)
+				continue;
+
+			/* If there is no bulk associated with this request,
+			 * then we're done and should let the interpreter
+			 * process the reply. Similarly if the RPC returned
+			 * an error, and therefore the bulk will never arrive.
+			 */
+			if (req->rq_bulk == NULL || req->rq_status < 0) {
+				ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+				goto interpret;
+			}
+
+			ptlrpc_rqphase_move(req, RQ_PHASE_BULK);
+		}
+
+		LASSERT(req->rq_phase == RQ_PHASE_BULK);
+		if (ptlrpc_client_bulk_active(req))
+			continue;
+
+		if (req->rq_bulk->bd_failure) {
+			/* The RPC reply arrived OK, but the bulk screwed
+			 * up!  Dead weird since the server told us the RPC
+			 * was good after getting the REPLY for her GET or
+			 * the ACK for her PUT. */
+			DEBUG_REQ(D_ERROR, req, "bulk transfer failed");
+			req->rq_status = -EIO;
+		}
+
+		ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+interpret:
+		LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+
+		/* This moves to "unregistering" phase we need to wait for
+		 * reply unlink. */
+		if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
+			/* start async bulk unlink too */
+			ptlrpc_unregister_bulk(req, 1);
+			continue;
+		}
+
+		if (!ptlrpc_unregister_bulk(req, 1))
+			continue;
+
+		/* When calling interpret receiving already should be
+		 * finished. */
+		LASSERT(!req->rq_receiving_reply);
+
+		ptlrpc_req_interpret(env, req, req->rq_status);
+
+		if (ptlrpcd_check_work(req)) {
+			atomic_dec(&set->set_remaining);
+			continue;
+		}
+		ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
+
+		CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
+		       "Completed RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
+		       current_comm(), imp->imp_obd->obd_uuid.uuid,
+		       lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+		       libcfs_nid2str(imp->imp_connection->c_peer.nid),
+		       lustre_msg_get_opc(req->rq_reqmsg));
+
+		spin_lock(&imp->imp_lock);
+		/* Request already may be not on sending or delaying list. This
+		 * may happen in the case of marking it erroneous for the case
+		 * ptlrpc_import_delay_req(req, status) find it impossible to
+		 * allow sending this rpc and returns *status != 0. */
+		if (!list_empty(&req->rq_list)) {
+			list_del_init(&req->rq_list);
+			atomic_dec(&imp->imp_inflight);
+		}
+		spin_unlock(&imp->imp_lock);
+
+		atomic_dec(&set->set_remaining);
+		wake_up_all(&imp->imp_recovery_waitq);
+
+		if (set->set_producer) {
+			/* produce a new request if possible */
+			if (ptlrpc_set_producer(set) > 0)
+				force_timer_recalc = 1;
+
+			/* free the request that has just been completed
+			 * in order not to pollute set->set_requests */
+			list_del_init(&req->rq_set_chain);
+			spin_lock(&req->rq_lock);
+			req->rq_set = NULL;
+			req->rq_invalid_rqset = 0;
+			spin_unlock(&req->rq_lock);
+
+			/* record rq_status to compute the final status later */
+			if (req->rq_status != 0)
+				set->set_rc = req->rq_status;
+			ptlrpc_req_finished(req);
+		} else {
+			list_move_tail(&req->rq_set_chain, &comp_reqs);
+		}
+	}
+
+	/* move completed request at the head of list so it's easier for
+	 * caller to find them */
+	list_splice(&comp_reqs, &set->set_requests);
+
+	/* If we hit an error, we want to recover promptly. */
+	return atomic_read(&set->set_remaining) == 0 || force_timer_recalc;
+}
+EXPORT_SYMBOL(ptlrpc_check_set);
+
+/**
+ * Time out request \a req. is \a async_unlink is set, that means do not wait
+ * until LNet actually confirms network buffer unlinking.
+ * Return 1 if we should give up further retrying attempts or 0 otherwise.
+ */
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
+{
+	struct obd_import *imp = req->rq_import;
+	int rc = 0;
+
+	spin_lock(&req->rq_lock);
+	req->rq_timedout = 1;
+	spin_unlock(&req->rq_lock);
+
+	DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent "CFS_DURATION_T
+		  "/real "CFS_DURATION_T"]",
+		  req->rq_net_err ? "failed due to network error" :
+		     ((req->rq_real_sent == 0 ||
+		       time_before((unsigned long)req->rq_real_sent, (unsigned long)req->rq_sent) ||
+		       cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+		      "timed out for sent delay" : "timed out for slow reply"),
+		  req->rq_sent, req->rq_real_sent);
+
+	if (imp != NULL && obd_debug_peer_on_timeout)
+		LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
+
+	ptlrpc_unregister_reply(req, async_unlink);
+	ptlrpc_unregister_bulk(req, async_unlink);
+
+	if (obd_dump_on_timeout)
+		libcfs_debug_dumplog();
+
+	if (imp == NULL) {
+		DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?");
+		return 1;
+	}
+
+	atomic_inc(&imp->imp_timeouts);
+
+	/* The DLM server doesn't want recovery run on its imports. */
+	if (imp->imp_dlm_fake)
+		return 1;
+
+	/* If this request is for recovery or other primordial tasks,
+	 * then error it out here. */
+	if (req->rq_ctx_init || req->rq_ctx_fini ||
+	    req->rq_send_state != LUSTRE_IMP_FULL ||
+	    imp->imp_obd->obd_no_recov) {
+		DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)",
+			  ptlrpc_import_state_name(req->rq_send_state),
+			  ptlrpc_import_state_name(imp->imp_state));
+		spin_lock(&req->rq_lock);
+		req->rq_status = -ETIMEDOUT;
+		req->rq_err = 1;
+		spin_unlock(&req->rq_lock);
+		return 1;
+	}
+
+	/* if a request can't be resent we can't wait for an answer after
+	   the timeout */
+	if (ptlrpc_no_resend(req)) {
+		DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:");
+		rc = 1;
+	}
+
+	ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
+
+	return rc;
+}
+
+/**
+ * Time out all uncompleted requests in request set pointed by \a data
+ * Callback used when waiting on sets with l_wait_event.
+ * Always returns 1.
+ */
+int ptlrpc_expired_set(void *data)
+{
+	struct ptlrpc_request_set *set = data;
+	struct list_head		*tmp;
+	time_t		     now = get_seconds();
+
+	LASSERT(set != NULL);
+
+	/*
+	 * A timeout expired. See which reqs it applies to...
+	 */
+	list_for_each(tmp, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_set_chain);
+
+		/* don't expire request waiting for context */
+		if (req->rq_wait_ctx)
+			continue;
+
+		/* Request in-flight? */
+		if (!((req->rq_phase == RQ_PHASE_RPC &&
+		       !req->rq_waiting && !req->rq_resend) ||
+		      (req->rq_phase == RQ_PHASE_BULK)))
+			continue;
+
+		if (req->rq_timedout ||     /* already dealt with */
+		    req->rq_deadline > now) /* not expired */
+			continue;
+
+		/* Deal with this guy. Do it asynchronously to not block
+		 * ptlrpcd thread. */
+		ptlrpc_expire_one_request(req, 1);
+	}
+
+	/*
+	 * When waiting for a whole set, we always break out of the
+	 * sleep so we can recalculate the timeout, or enable interrupts
+	 * if everyone's timed out.
+	 */
+	return 1;
+}
+EXPORT_SYMBOL(ptlrpc_expired_set);
+
+/**
+ * Sets rq_intr flag in \a req under spinlock.
+ */
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req)
+{
+	spin_lock(&req->rq_lock);
+	req->rq_intr = 1;
+	spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_mark_interrupted);
+
+/**
+ * Interrupts (sets interrupted flag) all uncompleted requests in
+ * a set \a data. Callback for l_wait_event for interruptible waits.
+ */
+void ptlrpc_interrupted_set(void *data)
+{
+	struct ptlrpc_request_set *set = data;
+	struct list_head *tmp;
+
+	LASSERT(set != NULL);
+	CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set);
+
+	list_for_each(tmp, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_set_chain);
+
+		if (req->rq_phase != RQ_PHASE_RPC &&
+		    req->rq_phase != RQ_PHASE_UNREGISTERING)
+			continue;
+
+		ptlrpc_mark_interrupted(req);
+	}
+}
+EXPORT_SYMBOL(ptlrpc_interrupted_set);
+
+/**
+ * Get the smallest timeout in the set; this does NOT set a timeout.
+ */
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
+{
+	struct list_head	    *tmp;
+	time_t		 now = get_seconds();
+	int		    timeout = 0;
+	struct ptlrpc_request *req;
+	int		    deadline;
+
+	list_for_each(tmp, &set->set_requests) {
+		req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+		/*
+		 * Request in-flight?
+		 */
+		if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+		      (req->rq_phase == RQ_PHASE_BULK) ||
+		      (req->rq_phase == RQ_PHASE_NEW)))
+			continue;
+
+		/*
+		 * Already timed out.
+		 */
+		if (req->rq_timedout)
+			continue;
+
+		/*
+		 * Waiting for ctx.
+		 */
+		if (req->rq_wait_ctx)
+			continue;
+
+		if (req->rq_phase == RQ_PHASE_NEW)
+			deadline = req->rq_sent;
+		else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend)
+			deadline = req->rq_sent;
+		else
+			deadline = req->rq_sent + req->rq_timeout;
+
+		if (deadline <= now)    /* actually expired already */
+			timeout = 1;    /* ASAP */
+		else if (timeout == 0 || timeout > deadline - now)
+			timeout = deadline - now;
+	}
+	return timeout;
+}
+EXPORT_SYMBOL(ptlrpc_set_next_timeout);
+
+/**
+ * Send all unset request from the set and then wait until all
+ * requests in the set complete (either get a reply, timeout, get an
+ * error or otherwise be interrupted).
+ * Returns 0 on success or error code otherwise.
+ */
+int ptlrpc_set_wait(struct ptlrpc_request_set *set)
+{
+	struct list_head	    *tmp;
+	struct ptlrpc_request *req;
+	struct l_wait_info     lwi;
+	int		    rc, timeout;
+
+	if (set->set_producer)
+		(void)ptlrpc_set_producer(set);
+	else
+		list_for_each(tmp, &set->set_requests) {
+			req = list_entry(tmp, struct ptlrpc_request,
+					     rq_set_chain);
+			if (req->rq_phase == RQ_PHASE_NEW)
+				(void)ptlrpc_send_new_req(req);
+		}
+
+	if (list_empty(&set->set_requests))
+		return 0;
+
+	do {
+		timeout = ptlrpc_set_next_timeout(set);
+
+		/* wait until all complete, interrupted, or an in-flight
+		 * req times out */
+		CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
+		       set, timeout);
+
+		if (timeout == 0 && !cfs_signal_pending())
+			/*
+			 * No requests are in-flight (ether timed out
+			 * or delayed), so we can allow interrupts.
+			 * We still want to block for a limited time,
+			 * so we allow interrupts during the timeout.
+			 */
+			lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
+						   ptlrpc_expired_set,
+						   ptlrpc_interrupted_set, set);
+		else
+			/*
+			 * At least one request is in flight, so no
+			 * interrupts are allowed. Wait until all
+			 * complete, or an in-flight req times out.
+			 */
+			lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+					  ptlrpc_expired_set, set);
+
+		rc = l_wait_event(set->set_waitq, ptlrpc_check_set(NULL, set), &lwi);
+
+		/* LU-769 - if we ignored the signal because it was already
+		 * pending when we started, we need to handle it now or we risk
+		 * it being ignored forever */
+		if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
+		    cfs_signal_pending()) {
+			sigset_t blocked_sigs =
+					   cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
+
+			/* In fact we only interrupt for the "fatal" signals
+			 * like SIGINT or SIGKILL. We still ignore less
+			 * important signals since ptlrpc set is not easily
+			 * reentrant from userspace again */
+			if (cfs_signal_pending())
+				ptlrpc_interrupted_set(set);
+			cfs_restore_sigs(blocked_sigs);
+		}
+
+		LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+
+		/* -EINTR => all requests have been flagged rq_intr so next
+		 * check completes.
+		 * -ETIMEDOUT => someone timed out.  When all reqs have
+		 * timed out, signals are enabled allowing completion with
+		 * EINTR.
+		 * I don't really care if we go once more round the loop in
+		 * the error cases -eeb. */
+		if (rc == 0 && atomic_read(&set->set_remaining) == 0) {
+			list_for_each(tmp, &set->set_requests) {
+				req = list_entry(tmp, struct ptlrpc_request,
+						     rq_set_chain);
+				spin_lock(&req->rq_lock);
+				req->rq_invalid_rqset = 1;
+				spin_unlock(&req->rq_lock);
+			}
+		}
+	} while (rc != 0 || atomic_read(&set->set_remaining) != 0);
+
+	LASSERT(atomic_read(&set->set_remaining) == 0);
+
+	rc = set->set_rc; /* rq_status of already freed requests if any */
+	list_for_each(tmp, &set->set_requests) {
+		req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+		LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
+		if (req->rq_status != 0)
+			rc = req->rq_status;
+	}
+
+	if (set->set_interpret != NULL) {
+		int (*interpreter)(struct ptlrpc_request_set *set, void *, int) =
+			set->set_interpret;
+		rc = interpreter(set, set->set_arg, rc);
+	} else {
+		struct ptlrpc_set_cbdata *cbdata, *n;
+		int err;
+
+		list_for_each_entry_safe(cbdata, n,
+					 &set->set_cblist, psc_item) {
+			list_del_init(&cbdata->psc_item);
+			err = cbdata->psc_interpret(set, cbdata->psc_data, rc);
+			if (err && !rc)
+				rc = err;
+			OBD_FREE_PTR(cbdata);
+		}
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_set_wait);
+
+/**
+ * Helper function for request freeing.
+ * Called when request count reached zero and request needs to be freed.
+ * Removes request from all sorts of sending/replay lists it might be on,
+ * frees network buffers if any are present.
+ * If \a locked is set, that means caller is already holding import imp_lock
+ * and so we no longer need to reobtain it (for certain lists manipulations)
+ */
+static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
+{
+	if (request == NULL)
+		return;
+	LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
+	LASSERTF(request->rq_rqbd == NULL, "req %p\n", request);/* client-side */
+	LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
+	LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+	LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
+	LASSERTF(!request->rq_replay, "req %p\n", request);
+
+	req_capsule_fini(&request->rq_pill);
+
+	/* We must take it off the imp_replay_list first.  Otherwise, we'll set
+	 * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
+	if (request->rq_import != NULL) {
+		if (!locked)
+			spin_lock(&request->rq_import->imp_lock);
+		list_del_init(&request->rq_replay_list);
+		if (!locked)
+			spin_unlock(&request->rq_import->imp_lock);
+	}
+	LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request);
+
+	if (atomic_read(&request->rq_refcount) != 0) {
+		DEBUG_REQ(D_ERROR, request,
+			  "freeing request with nonzero refcount");
+		LBUG();
+	}
+
+	if (request->rq_repbuf != NULL)
+		sptlrpc_cli_free_repbuf(request);
+	if (request->rq_export != NULL) {
+		class_export_put(request->rq_export);
+		request->rq_export = NULL;
+	}
+	if (request->rq_import != NULL) {
+		class_import_put(request->rq_import);
+		request->rq_import = NULL;
+	}
+	if (request->rq_bulk != NULL)
+		ptlrpc_free_bulk_pin(request->rq_bulk);
+
+	if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL)
+		sptlrpc_cli_free_reqbuf(request);
+
+	if (request->rq_cli_ctx)
+		sptlrpc_req_put_ctx(request, !locked);
+
+	if (request->rq_pool)
+		__ptlrpc_free_req_to_pool(request);
+	else
+		ptlrpc_request_cache_free(request);
+}
+
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked);
+/**
+ * Drop one request reference. Must be called with import imp_lock held.
+ * When reference count drops to zero, request is freed.
+ */
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
+{
+	assert_spin_locked(&request->rq_import->imp_lock);
+	(void)__ptlrpc_req_finished(request, 1);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished_with_imp_lock);
+
+/**
+ * Helper function
+ * Drops one reference count for request \a request.
+ * \a locked set indicates that caller holds import imp_lock.
+ * Frees the request when reference count reaches zero.
+ */
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
+{
+	if (request == NULL)
+		return 1;
+
+	if (request == LP_POISON ||
+	    request->rq_reqmsg == LP_POISON) {
+		CERROR("dereferencing freed request (bug 575)\n");
+		LBUG();
+		return 1;
+	}
+
+	DEBUG_REQ(D_INFO, request, "refcount now %u",
+		  atomic_read(&request->rq_refcount) - 1);
+
+	if (atomic_dec_and_test(&request->rq_refcount)) {
+		__ptlrpc_free_req(request, locked);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * Drops one reference count for a request.
+ */
+void ptlrpc_req_finished(struct ptlrpc_request *request)
+{
+	__ptlrpc_req_finished(request, 0);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished);
+
+/**
+ * Returns xid of a \a request
+ */
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
+{
+	return request->rq_xid;
+}
+EXPORT_SYMBOL(ptlrpc_req_xid);
+
+/**
+ * Disengage the client's reply buffer from the network
+ * NB does _NOT_ unregister any client-side bulk.
+ * IDEMPOTENT, but _not_ safe against concurrent callers.
+ * The request owner (i.e. the thread doing the I/O) must call...
+ * Returns 0 on success or 1 if unregistering cannot be made.
+ */
+int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
+{
+	int		rc;
+	wait_queue_head_t       *wq;
+	struct l_wait_info lwi;
+
+	/*
+	 * Might sleep.
+	 */
+	LASSERT(!in_interrupt());
+
+	/*
+	 * Let's setup deadline for reply unlink.
+	 */
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+	    async && request->rq_reply_deadline == 0)
+		request->rq_reply_deadline = get_seconds()+LONG_UNLINK;
+
+	/*
+	 * Nothing left to do.
+	 */
+	if (!ptlrpc_client_recv_or_unlink(request))
+		return 1;
+
+	LNetMDUnlink(request->rq_reply_md_h);
+
+	/*
+	 * Let's check it once again.
+	 */
+	if (!ptlrpc_client_recv_or_unlink(request))
+		return 1;
+
+	/*
+	 * Move to "Unregistering" phase as reply was not unlinked yet.
+	 */
+	ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+
+	/*
+	 * Do not wait for unlink to finish.
+	 */
+	if (async)
+		return 0;
+
+	/*
+	 * We have to l_wait_event() whatever the result, to give liblustre
+	 * a chance to run reply_in_callback(), and to make sure we've
+	 * unlinked before returning a req to the pool.
+	 */
+	if (request->rq_set != NULL)
+		wq = &request->rq_set->set_waitq;
+	else
+		wq = &request->rq_reply_waitq;
+
+	for (;;) {
+		/* Network access will complete in finite time but the HUGE
+		 * timeout lets us CWARN for visibility of sluggish NALs */
+		lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+					   cfs_time_seconds(1), NULL, NULL);
+		rc = l_wait_event(*wq, !ptlrpc_client_recv_or_unlink(request),
+				  &lwi);
+		if (rc == 0) {
+			ptlrpc_rqphase_move(request, request->rq_next_phase);
+			return 1;
+		}
+
+		LASSERT(rc == -ETIMEDOUT);
+		DEBUG_REQ(D_WARNING, request,
+			  "Unexpectedly long timeout rvcng=%d unlnk=%d/%d",
+			  request->rq_receiving_reply,
+			  request->rq_req_unlink, request->rq_reply_unlink);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_reply);
+
+static void ptlrpc_free_request(struct ptlrpc_request *req)
+{
+	spin_lock(&req->rq_lock);
+	req->rq_replay = 0;
+	spin_unlock(&req->rq_lock);
+
+	if (req->rq_commit_cb != NULL)
+		req->rq_commit_cb(req);
+	list_del_init(&req->rq_replay_list);
+
+	__ptlrpc_req_finished(req, 1);
+}
+
+/**
+ * the request is committed and dropped from the replay list of its import
+ */
+void ptlrpc_request_committed(struct ptlrpc_request *req, int force)
+{
+	struct obd_import	*imp = req->rq_import;
+
+	spin_lock(&imp->imp_lock);
+	if (list_empty(&req->rq_replay_list)) {
+		spin_unlock(&imp->imp_lock);
+		return;
+	}
+
+	if (force || req->rq_transno <= imp->imp_peer_committed_transno)
+		ptlrpc_free_request(req);
+
+	spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_request_committed);
+
+/**
+ * Iterates through replay_list on import and prunes
+ * all requests have transno smaller than last_committed for the
+ * import and don't have rq_replay set.
+ * Since requests are sorted in transno order, stops when meeting first
+ * transno bigger than last_committed.
+ * caller must hold imp->imp_lock
+ */
+void ptlrpc_free_committed(struct obd_import *imp)
+{
+	struct ptlrpc_request *req, *saved;
+	struct ptlrpc_request *last_req = NULL; /* temporary fire escape */
+	bool		       skip_committed_list = true;
+
+	LASSERT(imp != NULL);
+	assert_spin_locked(&imp->imp_lock);
+
+	if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
+	    imp->imp_generation == imp->imp_last_generation_checked) {
+		CDEBUG(D_INFO, "%s: skip recheck: last_committed %llu\n",
+		       imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+		return;
+	}
+	CDEBUG(D_RPCTRACE, "%s: committing for last_committed %llu gen %d\n",
+	       imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
+	       imp->imp_generation);
+
+	if (imp->imp_generation != imp->imp_last_generation_checked)
+		skip_committed_list = false;
+
+	imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
+	imp->imp_last_generation_checked = imp->imp_generation;
+
+	list_for_each_entry_safe(req, saved, &imp->imp_replay_list,
+				 rq_replay_list) {
+		/* XXX ok to remove when 1357 resolved - rread 05/29/03  */
+		LASSERT(req != last_req);
+		last_req = req;
+
+		if (req->rq_transno == 0) {
+			DEBUG_REQ(D_EMERG, req, "zero transno during replay");
+			LBUG();
+		}
+		if (req->rq_import_generation < imp->imp_generation) {
+			DEBUG_REQ(D_RPCTRACE, req, "free request with old gen");
+			goto free_req;
+		}
+
+		/* not yet committed */
+		if (req->rq_transno > imp->imp_peer_committed_transno) {
+			DEBUG_REQ(D_RPCTRACE, req, "stopping search");
+			break;
+		}
+
+		if (req->rq_replay) {
+			DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
+			list_move_tail(&req->rq_replay_list,
+				       &imp->imp_committed_list);
+			continue;
+		}
+
+		DEBUG_REQ(D_INFO, req, "commit (last_committed %llu)",
+			  imp->imp_peer_committed_transno);
+free_req:
+		ptlrpc_free_request(req);
+	}
+	if (skip_committed_list)
+		return;
+
+	list_for_each_entry_safe(req, saved, &imp->imp_committed_list,
+				 rq_replay_list) {
+		LASSERT(req->rq_transno != 0);
+		if (req->rq_import_generation < imp->imp_generation) {
+			DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
+			ptlrpc_free_request(req);
+		}
+	}
+}
+
+void ptlrpc_cleanup_client(struct obd_import *imp)
+{
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_client);
+
+/**
+ * Schedule previously sent request for resend.
+ * For bulk requests we assign new xid (to avoid problems with
+ * lost replies and therefore several transfers landing into same buffer
+ * from different sending attempts).
+ */
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+	DEBUG_REQ(D_HA, req, "going to resend");
+	spin_lock(&req->rq_lock);
+
+	/* Request got reply but linked to the import list still.
+	   Let ptlrpc_check_set() to process it. */
+	if (ptlrpc_client_replied(req)) {
+		spin_unlock(&req->rq_lock);
+		DEBUG_REQ(D_HA, req, "it has reply, so skip it");
+		return;
+	}
+
+	lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
+	req->rq_status = -EAGAIN;
+
+	req->rq_resend = 1;
+	req->rq_net_err = 0;
+	req->rq_timedout = 0;
+	if (req->rq_bulk) {
+		__u64 old_xid = req->rq_xid;
+
+		/* ensure previous bulk fails */
+		req->rq_xid = ptlrpc_next_xid();
+		CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
+		       old_xid, req->rq_xid);
+	}
+	ptlrpc_client_wake_req(req);
+	spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_resend_req);
+
+/* XXX: this function and rq_status are currently unused */
+void ptlrpc_restart_req(struct ptlrpc_request *req)
+{
+	DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
+	req->rq_status = -ERESTARTSYS;
+
+	spin_lock(&req->rq_lock);
+	req->rq_restart = 1;
+	req->rq_timedout = 0;
+	ptlrpc_client_wake_req(req);
+	spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_restart_req);
+
+/**
+ * Grab additional reference on a request \a req
+ */
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
+{
+	atomic_inc(&req->rq_refcount);
+	return req;
+}
+EXPORT_SYMBOL(ptlrpc_request_addref);
+
+/**
+ * Add a request to import replay_list.
+ * Must be called under imp_lock
+ */
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+				      struct obd_import *imp)
+{
+	struct list_head *tmp;
+
+	assert_spin_locked(&imp->imp_lock);
+
+	if (req->rq_transno == 0) {
+		DEBUG_REQ(D_EMERG, req, "saving request with zero transno");
+		LBUG();
+	}
+
+	/* clear this for new requests that were resent as well
+	   as resent replayed requests. */
+	lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
+
+	/* don't re-add requests that have been replayed */
+	if (!list_empty(&req->rq_replay_list))
+		return;
+
+	lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
+
+	LASSERT(imp->imp_replayable);
+	/* Balanced in ptlrpc_free_committed, usually. */
+	ptlrpc_request_addref(req);
+	list_for_each_prev(tmp, &imp->imp_replay_list) {
+		struct ptlrpc_request *iter =
+			list_entry(tmp, struct ptlrpc_request,
+				       rq_replay_list);
+
+		/* We may have duplicate transnos if we create and then
+		 * open a file, or for closes retained if to match creating
+		 * opens, so use req->rq_xid as a secondary key.
+		 * (See bugs 684, 685, and 428.)
+		 * XXX no longer needed, but all opens need transnos!
+		 */
+		if (iter->rq_transno > req->rq_transno)
+			continue;
+
+		if (iter->rq_transno == req->rq_transno) {
+			LASSERT(iter->rq_xid != req->rq_xid);
+			if (iter->rq_xid > req->rq_xid)
+				continue;
+		}
+
+		list_add(&req->rq_replay_list, &iter->rq_replay_list);
+		return;
+	}
+
+	list_add(&req->rq_replay_list, &imp->imp_replay_list);
+}
+EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
+
+/**
+ * Send request and wait until it completes.
+ * Returns request processing status.
+ */
+int ptlrpc_queue_wait(struct ptlrpc_request *req)
+{
+	struct ptlrpc_request_set *set;
+	int rc;
+
+	LASSERT(req->rq_set == NULL);
+	LASSERT(!req->rq_receiving_reply);
+
+	set = ptlrpc_prep_set();
+	if (set == NULL) {
+		CERROR("Unable to allocate ptlrpc set.");
+		return -ENOMEM;
+	}
+
+	/* for distributed debugging */
+	lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+	/* add a ref for the set (see comment in ptlrpc_set_add_req) */
+	ptlrpc_request_addref(req);
+	ptlrpc_set_add_req(set, req);
+	rc = ptlrpc_set_wait(set);
+	ptlrpc_set_destroy(set);
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_queue_wait);
+
+struct ptlrpc_replay_async_args {
+	int praa_old_state;
+	int praa_old_status;
+};
+
+/**
+ * Callback used for replayed requests reply processing.
+ * In case of successful reply calls registered request replay callback.
+ * In case of error restart replay process.
+ */
+static int ptlrpc_replay_interpret(const struct lu_env *env,
+				   struct ptlrpc_request *req,
+				   void *data, int rc)
+{
+	struct ptlrpc_replay_async_args *aa = data;
+	struct obd_import *imp = req->rq_import;
+
+	atomic_dec(&imp->imp_replay_inflight);
+
+	if (!ptlrpc_client_replied(req)) {
+		CERROR("request replay timed out, restarting recovery\n");
+		rc = -ETIMEDOUT;
+		goto out;
+	}
+
+	if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR &&
+	    (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN ||
+	     lustre_msg_get_status(req->rq_repmsg) == -ENODEV)) {
+		rc = lustre_msg_get_status(req->rq_repmsg);
+		goto out;
+	}
+
+	/** VBR: check version failure */
+	if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) {
+		/** replay was failed due to version mismatch */
+		DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n");
+		spin_lock(&imp->imp_lock);
+		imp->imp_vbr_failed = 1;
+		imp->imp_no_lock_replay = 1;
+		spin_unlock(&imp->imp_lock);
+		lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+	} else {
+		/** The transno had better not change over replay. */
+		LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) ==
+			 lustre_msg_get_transno(req->rq_repmsg) ||
+			 lustre_msg_get_transno(req->rq_repmsg) == 0,
+			 "%#llx/%#llx\n",
+			 lustre_msg_get_transno(req->rq_reqmsg),
+			 lustre_msg_get_transno(req->rq_repmsg));
+	}
+
+	spin_lock(&imp->imp_lock);
+	/** if replays by version then gap occur on server, no trust to locks */
+	if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY)
+		imp->imp_no_lock_replay = 1;
+	imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg);
+	spin_unlock(&imp->imp_lock);
+	LASSERT(imp->imp_last_replay_transno);
+
+	/* transaction number shouldn't be bigger than the latest replayed */
+	if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) {
+		DEBUG_REQ(D_ERROR, req,
+			  "Reported transno %llu is bigger than the replayed one: %llu",
+			  req->rq_transno,
+			  lustre_msg_get_transno(req->rq_reqmsg));
+		rc = -EINVAL;
+		goto out;
+	}
+
+	DEBUG_REQ(D_HA, req, "got rep");
+
+	/* let the callback do fixups, possibly including in the request */
+	if (req->rq_replay_cb)
+		req->rq_replay_cb(req);
+
+	if (ptlrpc_client_replied(req) &&
+	    lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) {
+		DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
+			  lustre_msg_get_status(req->rq_repmsg),
+			  aa->praa_old_status);
+	} else {
+		/* Put it back for re-replay. */
+		lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+	}
+
+	/*
+	 * Errors while replay can set transno to 0, but
+	 * imp_last_replay_transno shouldn't be set to 0 anyway
+	 */
+	if (req->rq_transno == 0)
+		CERROR("Transno is 0 during replay!\n");
+
+	/* continue with recovery */
+	rc = ptlrpc_import_recovery_state_machine(imp);
+ out:
+	req->rq_send_state = aa->praa_old_state;
+
+	if (rc != 0)
+		/* this replay failed, so restart recovery */
+		ptlrpc_connect_import(imp);
+
+	return rc;
+}
+
+/**
+ * Prepares and queues request for replay.
+ * Adds it to ptlrpcd queue for actual sending.
+ * Returns 0 on success.
+ */
+int ptlrpc_replay_req(struct ptlrpc_request *req)
+{
+	struct ptlrpc_replay_async_args *aa;
+
+	LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
+
+	LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+	aa = ptlrpc_req_async_args(req);
+	memset(aa, 0, sizeof(*aa));
+
+	/* Prepare request to be resent with ptlrpcd */
+	aa->praa_old_state = req->rq_send_state;
+	req->rq_send_state = LUSTRE_IMP_REPLAY;
+	req->rq_phase = RQ_PHASE_NEW;
+	req->rq_next_phase = RQ_PHASE_UNDEFINED;
+	if (req->rq_repmsg)
+		aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg);
+	req->rq_status = 0;
+	req->rq_interpret_reply = ptlrpc_replay_interpret;
+	/* Readjust the timeout for current conditions */
+	ptlrpc_at_set_req_timeout(req);
+
+	/* Tell server the net_latency, so the server can calculate how long
+	 * it should wait for next replay */
+	lustre_msg_set_service_time(req->rq_reqmsg,
+				    ptlrpc_at_get_net_latency(req));
+	DEBUG_REQ(D_HA, req, "REPLAY");
+
+	atomic_inc(&req->rq_import->imp_replay_inflight);
+	ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
+
+	ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_replay_req);
+
+/**
+ * Aborts all in-flight request on import \a imp sending and delayed lists
+ */
+void ptlrpc_abort_inflight(struct obd_import *imp)
+{
+	struct list_head *tmp, *n;
+
+	/* Make sure that no new requests get processed for this import.
+	 * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing
+	 * this flag and then putting requests on sending_list or delayed_list.
+	 */
+	spin_lock(&imp->imp_lock);
+
+	/* XXX locking?  Maybe we should remove each request with the list
+	 * locked?  Also, how do we know if the requests on the list are
+	 * being freed at this time?
+	 */
+	list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request, rq_list);
+
+		DEBUG_REQ(D_RPCTRACE, req, "inflight");
+
+		spin_lock(&req->rq_lock);
+		if (req->rq_import_generation < imp->imp_generation) {
+			req->rq_err = 1;
+			req->rq_status = -EIO;
+			ptlrpc_client_wake_req(req);
+		}
+		spin_unlock(&req->rq_lock);
+	}
+
+	list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
+		struct ptlrpc_request *req =
+			list_entry(tmp, struct ptlrpc_request, rq_list);
+
+		DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req");
+
+		spin_lock(&req->rq_lock);
+		if (req->rq_import_generation < imp->imp_generation) {
+			req->rq_err = 1;
+			req->rq_status = -EIO;
+			ptlrpc_client_wake_req(req);
+		}
+		spin_unlock(&req->rq_lock);
+	}
+
+	/* Last chance to free reqs left on the replay list, but we
+	 * will still leak reqs that haven't committed.  */
+	if (imp->imp_replayable)
+		ptlrpc_free_committed(imp);
+
+	spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_abort_inflight);
+
+/**
+ * Abort all uncompleted requests in request set \a set
+ */
+void ptlrpc_abort_set(struct ptlrpc_request_set *set)
+{
+	struct list_head *tmp, *pos;
+
+	LASSERT(set != NULL);
+
+	list_for_each_safe(pos, tmp, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(pos, struct ptlrpc_request,
+				       rq_set_chain);
+
+		spin_lock(&req->rq_lock);
+		if (req->rq_phase != RQ_PHASE_RPC) {
+			spin_unlock(&req->rq_lock);
+			continue;
+		}
+
+		req->rq_err = 1;
+		req->rq_status = -EINTR;
+		ptlrpc_client_wake_req(req);
+		spin_unlock(&req->rq_lock);
+	}
+}
+
+static __u64 ptlrpc_last_xid;
+static spinlock_t ptlrpc_last_xid_lock;
+
+/**
+ * Initialize the XID for the node.  This is common among all requests on
+ * this node, and only requires the property that it is monotonically
+ * increasing.  It does not need to be sequential.  Since this is also used
+ * as the RDMA match bits, it is important that a single client NOT have
+ * the same match bits for two different in-flight requests, hence we do
+ * NOT want to have an XID per target or similar.
+ *
+ * To avoid an unlikely collision between match bits after a client reboot
+ * (which would deliver old data into the wrong RDMA buffer) initialize
+ * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s.
+ * If the time is clearly incorrect, we instead use a 62-bit random number.
+ * In the worst case the random number will overflow 1M RPCs per second in
+ * 9133 years, or permutations thereof.
+ */
+#define YEAR_2004 (1ULL << 30)
+void ptlrpc_init_xid(void)
+{
+	time_t now = get_seconds();
+
+	spin_lock_init(&ptlrpc_last_xid_lock);
+	if (now < YEAR_2004) {
+		cfs_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
+		ptlrpc_last_xid >>= 2;
+		ptlrpc_last_xid |= (1ULL << 61);
+	} else {
+		ptlrpc_last_xid = (__u64)now << 20;
+	}
+
+	/* Always need to be aligned to a power-of-two for multi-bulk BRW */
+	CLASSERT((PTLRPC_BULK_OPS_COUNT & (PTLRPC_BULK_OPS_COUNT - 1)) == 0);
+	ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK;
+}
+
+/**
+ * Increase xid and returns resulting new value to the caller.
+ *
+ * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting
+ * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC
+ * itself uses the last bulk xid needed, so the server can determine the
+ * the number of bulk transfers from the RPC XID and a bitmask.  The starting
+ * xid must align to a power-of-two value.
+ *
+ * This is assumed to be true due to the initial ptlrpc_last_xid
+ * value also being initialized to a power-of-two value. LU-1431
+ */
+__u64 ptlrpc_next_xid(void)
+{
+	__u64 next;
+
+	spin_lock(&ptlrpc_last_xid_lock);
+	next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+	ptlrpc_last_xid = next;
+	spin_unlock(&ptlrpc_last_xid_lock);
+
+	return next;
+}
+EXPORT_SYMBOL(ptlrpc_next_xid);
+
+/**
+ * Get a glimpse at what next xid value might have been.
+ * Returns possible next xid.
+ */
+__u64 ptlrpc_sample_next_xid(void)
+{
+#if BITS_PER_LONG == 32
+	/* need to avoid possible word tearing on 32-bit systems */
+	__u64 next;
+
+	spin_lock(&ptlrpc_last_xid_lock);
+	next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+	spin_unlock(&ptlrpc_last_xid_lock);
+
+	return next;
+#else
+	/* No need to lock, since returned value is racy anyways */
+	return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+#endif
+}
+EXPORT_SYMBOL(ptlrpc_sample_next_xid);
+
+/**
+ * Functions for operating ptlrpc workers.
+ *
+ * A ptlrpc work is a function which will be running inside ptlrpc context.
+ * The callback shouldn't sleep otherwise it will block that ptlrpcd thread.
+ *
+ * 1. after a work is created, it can be used many times, that is:
+ *	 handler = ptlrpcd_alloc_work();
+ *	 ptlrpcd_queue_work();
+ *
+ *    queue it again when necessary:
+ *	 ptlrpcd_queue_work();
+ *	 ptlrpcd_destroy_work();
+ * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but
+ *    it will only be queued once in any time. Also as its name implies, it may
+ *    have delay before it really runs by ptlrpcd thread.
+ */
+struct ptlrpc_work_async_args {
+	int   (*cb)(const struct lu_env *, void *);
+	void   *cbdata;
+};
+
+static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
+{
+	/* re-initialize the req */
+	req->rq_timeout		= obd_timeout;
+	req->rq_sent		= get_seconds();
+	req->rq_deadline	= req->rq_sent + req->rq_timeout;
+	req->rq_reply_deadline	= req->rq_deadline;
+	req->rq_phase		= RQ_PHASE_INTERPRET;
+	req->rq_next_phase	= RQ_PHASE_COMPLETE;
+	req->rq_xid		= ptlrpc_next_xid();
+	req->rq_import_generation = req->rq_import->imp_generation;
+
+	ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+}
+
+static int work_interpreter(const struct lu_env *env,
+			    struct ptlrpc_request *req, void *data, int rc)
+{
+	struct ptlrpc_work_async_args *arg = data;
+
+	LASSERT(ptlrpcd_check_work(req));
+	LASSERT(arg->cb != NULL);
+
+	rc = arg->cb(env, arg->cbdata);
+
+	list_del_init(&req->rq_set_chain);
+	req->rq_set = NULL;
+
+	if (atomic_dec_return(&req->rq_refcount) > 1) {
+		atomic_set(&req->rq_refcount, 2);
+		ptlrpcd_add_work_req(req);
+	}
+	return rc;
+}
+
+static int worker_format;
+
+static int ptlrpcd_check_work(struct ptlrpc_request *req)
+{
+	return req->rq_pill.rc_fmt == (void *)&worker_format;
+}
+
+/**
+ * Create a work for ptlrpc.
+ */
+void *ptlrpcd_alloc_work(struct obd_import *imp,
+			 int (*cb)(const struct lu_env *, void *), void *cbdata)
+{
+	struct ptlrpc_request	 *req = NULL;
+	struct ptlrpc_work_async_args *args;
+
+	might_sleep();
+
+	if (cb == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* copy some code from deprecated fakereq. */
+	req = ptlrpc_request_cache_alloc(GFP_NOFS);
+	if (req == NULL) {
+		CERROR("ptlrpc: run out of memory!\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	req->rq_send_state = LUSTRE_IMP_FULL;
+	req->rq_type = PTL_RPC_MSG_REQUEST;
+	req->rq_import = class_import_get(imp);
+	req->rq_export = NULL;
+	req->rq_interpret_reply = work_interpreter;
+	/* don't want reply */
+	req->rq_receiving_reply = 0;
+	req->rq_req_unlink = req->rq_reply_unlink = 0;
+	req->rq_no_delay = req->rq_no_resend = 1;
+	req->rq_pill.rc_fmt = (void *)&worker_format;
+
+	spin_lock_init(&req->rq_lock);
+	INIT_LIST_HEAD(&req->rq_list);
+	INIT_LIST_HEAD(&req->rq_replay_list);
+	INIT_LIST_HEAD(&req->rq_set_chain);
+	INIT_LIST_HEAD(&req->rq_history_list);
+	INIT_LIST_HEAD(&req->rq_exp_list);
+	init_waitqueue_head(&req->rq_reply_waitq);
+	init_waitqueue_head(&req->rq_set_waitq);
+	atomic_set(&req->rq_refcount, 1);
+
+	CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
+	args = ptlrpc_req_async_args(req);
+	args->cb     = cb;
+	args->cbdata = cbdata;
+
+	return req;
+}
+EXPORT_SYMBOL(ptlrpcd_alloc_work);
+
+void ptlrpcd_destroy_work(void *handler)
+{
+	struct ptlrpc_request *req = handler;
+
+	if (req)
+		ptlrpc_req_finished(req);
+}
+EXPORT_SYMBOL(ptlrpcd_destroy_work);
+
+int ptlrpcd_queue_work(void *handler)
+{
+	struct ptlrpc_request *req = handler;
+
+	/*
+	 * Check if the req is already being queued.
+	 *
+	 * Here comes a trick: it lacks a way of checking if a req is being
+	 * processed reliably in ptlrpc. Here I have to use refcount of req
+	 * for this purpose. This is okay because the caller should use this
+	 * req as opaque data. - Jinshan
+	 */
+	LASSERT(atomic_read(&req->rq_refcount) > 0);
+	if (atomic_inc_return(&req->rq_refcount) == 2)
+		ptlrpcd_add_work_req(req);
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpcd_queue_work);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
new file mode 100644
index 000000000..7e27397ce
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -0,0 +1,241 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+
+#include "ptlrpc_internal.h"
+
+static struct cfs_hash *conn_hash;
+static cfs_hash_ops_t conn_hash_ops;
+
+struct ptlrpc_connection *
+ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self,
+		      struct obd_uuid *uuid)
+{
+	struct ptlrpc_connection *conn, *conn2;
+
+	conn = cfs_hash_lookup(conn_hash, &peer);
+	if (conn)
+		goto out;
+
+	OBD_ALLOC_PTR(conn);
+	if (!conn)
+		return NULL;
+
+	conn->c_peer = peer;
+	conn->c_self = self;
+	INIT_HLIST_NODE(&conn->c_hash);
+	atomic_set(&conn->c_refcount, 1);
+	if (uuid)
+		obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+
+	/*
+	 * Add the newly created conn to the hash, on key collision we
+	 * lost a racing addition and must destroy our newly allocated
+	 * connection.  The object which exists in the has will be
+	 * returned and may be compared against out object.
+	 */
+	/* In the function below, .hs_keycmp resolves to
+	 * conn_keycmp() */
+	/* coverity[overrun-buffer-val] */
+	conn2 = cfs_hash_findadd_unique(conn_hash, &peer, &conn->c_hash);
+	if (conn != conn2) {
+		OBD_FREE_PTR(conn);
+		conn = conn2;
+	}
+out:
+	CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+	       conn, atomic_read(&conn->c_refcount),
+	       libcfs_nid2str(conn->c_peer.nid));
+	return conn;
+}
+EXPORT_SYMBOL(ptlrpc_connection_get);
+
+int ptlrpc_connection_put(struct ptlrpc_connection *conn)
+{
+	int rc = 0;
+
+	if (!conn)
+		return rc;
+
+	LASSERT(atomic_read(&conn->c_refcount) > 1);
+
+	/*
+	 * We do not remove connection from hashtable and
+	 * do not free it even if last caller released ref,
+	 * as we want to have it cached for the case it is
+	 * needed again.
+	 *
+	 * Deallocating it and later creating new connection
+	 * again would be wastful. This way we also avoid
+	 * expensive locking to protect things from get/put
+	 * race when found cached connection is freed by
+	 * ptlrpc_connection_put().
+	 *
+	 * It will be freed later in module unload time,
+	 * when ptlrpc_connection_fini()->lh_exit->conn_exit()
+	 * path is called.
+	 */
+	if (atomic_dec_return(&conn->c_refcount) == 1)
+		rc = 1;
+
+	CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
+	       conn, atomic_read(&conn->c_refcount),
+	       libcfs_nid2str(conn->c_peer.nid));
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_connection_put);
+
+struct ptlrpc_connection *
+ptlrpc_connection_addref(struct ptlrpc_connection *conn)
+{
+	atomic_inc(&conn->c_refcount);
+	CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+	       conn, atomic_read(&conn->c_refcount),
+	       libcfs_nid2str(conn->c_peer.nid));
+
+	return conn;
+}
+EXPORT_SYMBOL(ptlrpc_connection_addref);
+
+int ptlrpc_connection_init(void)
+{
+	conn_hash = cfs_hash_create("CONN_HASH",
+				    HASH_CONN_CUR_BITS,
+				    HASH_CONN_MAX_BITS,
+				    HASH_CONN_BKT_BITS, 0,
+				    CFS_HASH_MIN_THETA,
+				    CFS_HASH_MAX_THETA,
+				    &conn_hash_ops, CFS_HASH_DEFAULT);
+	if (!conn_hash)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_connection_init);
+
+void ptlrpc_connection_fini(void)
+{
+	cfs_hash_putref(conn_hash);
+}
+EXPORT_SYMBOL(ptlrpc_connection_fini);
+
+/*
+ * Hash operations for net_peer<->connection
+ */
+static unsigned
+conn_hashfn(struct cfs_hash *hs, const void *key, unsigned mask)
+{
+	return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask);
+}
+
+static int
+conn_keycmp(const void *key, struct hlist_node *hnode)
+{
+	struct ptlrpc_connection *conn;
+	const lnet_process_id_t *conn_key;
+
+	LASSERT(key != NULL);
+	conn_key = (lnet_process_id_t *)key;
+	conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+
+	return conn_key->nid == conn->c_peer.nid &&
+	       conn_key->pid == conn->c_peer.pid;
+}
+
+static void *
+conn_key(struct hlist_node *hnode)
+{
+	struct ptlrpc_connection *conn;
+
+	conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+	return &conn->c_peer;
+}
+
+static void *
+conn_object(struct hlist_node *hnode)
+{
+	return hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+}
+
+static void
+conn_get(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+	struct ptlrpc_connection *conn;
+
+	conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+	atomic_inc(&conn->c_refcount);
+}
+
+static void
+conn_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+	struct ptlrpc_connection *conn;
+
+	conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+	atomic_dec(&conn->c_refcount);
+}
+
+static void
+conn_exit(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+	struct ptlrpc_connection *conn;
+
+	conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+	/*
+	 * Nothing should be left. Connection user put it and
+	 * connection also was deleted from table by this time
+	 * so we should have 0 refs.
+	 */
+	LASSERTF(atomic_read(&conn->c_refcount) == 0,
+		 "Busy connection with %d refs\n",
+		 atomic_read(&conn->c_refcount));
+	OBD_FREE_PTR(conn);
+}
+
+static cfs_hash_ops_t conn_hash_ops = {
+	.hs_hash	= conn_hashfn,
+	.hs_keycmp      = conn_keycmp,
+	.hs_key	 = conn_key,
+	.hs_object      = conn_object,
+	.hs_get	 = conn_get,
+	.hs_put_locked  = conn_put_locked,
+	.hs_exit	= conn_exit,
+};
diff --git a/drivers/staging/lustre/lustre/ptlrpc/errno.c b/drivers/staging/lustre/lustre/ptlrpc/errno.c
new file mode 100644
index 000000000..73f8374f1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/errno.c
@@ -0,0 +1,380 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.txt
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (C) 2011 FUJITSU LIMITED.  All rights reserved.
+ *
+ * Copyright (c) 2013, Intel Corporation.
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/lustre/lustre_errno.h"
+
+/*
+ * The two translation tables below must define a one-to-one mapping between
+ * host and network errnos.
+ *
+ * EWOULDBLOCK is equal to EAGAIN on all architectures except for parisc, which
+ * appears irrelevant.  Thus, existing references to EWOULDBLOCK are fine.
+ *
+ * EDEADLOCK is equal to EDEADLK on x86 but not on sparc, at least.  A sparc
+ * host has no context-free way to determine if a LUSTRE_EDEADLK represents an
+ * EDEADLK or an EDEADLOCK.  Therefore, all existing references to EDEADLOCK
+ * that need to be transferred on wire have been replaced with EDEADLK.
+ */
+static int lustre_errno_hton_mapping[] = {
+	[EPERM]			= LUSTRE_EPERM,
+	[ENOENT]		= LUSTRE_ENOENT,
+	[ESRCH]			= LUSTRE_ESRCH,
+	[EINTR]			= LUSTRE_EINTR,
+	[EIO]			= LUSTRE_EIO,
+	[ENXIO]			= LUSTRE_ENXIO,
+	[E2BIG]			= LUSTRE_E2BIG,
+	[ENOEXEC]		= LUSTRE_ENOEXEC,
+	[EBADF]			= LUSTRE_EBADF,
+	[ECHILD]		= LUSTRE_ECHILD,
+	[EAGAIN]		= LUSTRE_EAGAIN,
+	[ENOMEM]		= LUSTRE_ENOMEM,
+	[EACCES]		= LUSTRE_EACCES,
+	[EFAULT]		= LUSTRE_EFAULT,
+	[ENOTBLK]		= LUSTRE_ENOTBLK,
+	[EBUSY]			= LUSTRE_EBUSY,
+	[EEXIST]		= LUSTRE_EEXIST,
+	[EXDEV]			= LUSTRE_EXDEV,
+	[ENODEV]		= LUSTRE_ENODEV,
+	[ENOTDIR]		= LUSTRE_ENOTDIR,
+	[EISDIR]		= LUSTRE_EISDIR,
+	[EINVAL]		= LUSTRE_EINVAL,
+	[ENFILE]		= LUSTRE_ENFILE,
+	[EMFILE]		= LUSTRE_EMFILE,
+	[ENOTTY]		= LUSTRE_ENOTTY,
+	[ETXTBSY]		= LUSTRE_ETXTBSY,
+	[EFBIG]			= LUSTRE_EFBIG,
+	[ENOSPC]		= LUSTRE_ENOSPC,
+	[ESPIPE]		= LUSTRE_ESPIPE,
+	[EROFS]			= LUSTRE_EROFS,
+	[EMLINK]		= LUSTRE_EMLINK,
+	[EPIPE]			= LUSTRE_EPIPE,
+	[EDOM]			= LUSTRE_EDOM,
+	[ERANGE]		= LUSTRE_ERANGE,
+	[EDEADLK]		= LUSTRE_EDEADLK,
+	[ENAMETOOLONG]		= LUSTRE_ENAMETOOLONG,
+	[ENOLCK]		= LUSTRE_ENOLCK,
+	[ENOSYS]		= LUSTRE_ENOSYS,
+	[ENOTEMPTY]		= LUSTRE_ENOTEMPTY,
+	[ELOOP]			= LUSTRE_ELOOP,
+	[ENOMSG]		= LUSTRE_ENOMSG,
+	[EIDRM]			= LUSTRE_EIDRM,
+	[ECHRNG]		= LUSTRE_ECHRNG,
+	[EL2NSYNC]		= LUSTRE_EL2NSYNC,
+	[EL3HLT]		= LUSTRE_EL3HLT,
+	[EL3RST]		= LUSTRE_EL3RST,
+	[ELNRNG]		= LUSTRE_ELNRNG,
+	[EUNATCH]		= LUSTRE_EUNATCH,
+	[ENOCSI]		= LUSTRE_ENOCSI,
+	[EL2HLT]		= LUSTRE_EL2HLT,
+	[EBADE]			= LUSTRE_EBADE,
+	[EBADR]			= LUSTRE_EBADR,
+	[EXFULL]		= LUSTRE_EXFULL,
+	[ENOANO]		= LUSTRE_ENOANO,
+	[EBADRQC]		= LUSTRE_EBADRQC,
+	[EBADSLT]		= LUSTRE_EBADSLT,
+	[EBFONT]		= LUSTRE_EBFONT,
+	[ENOSTR]		= LUSTRE_ENOSTR,
+	[ENODATA]		= LUSTRE_ENODATA,
+	[ETIME]			= LUSTRE_ETIME,
+	[ENOSR]			= LUSTRE_ENOSR,
+	[ENONET]		= LUSTRE_ENONET,
+	[ENOPKG]		= LUSTRE_ENOPKG,
+	[EREMOTE]		= LUSTRE_EREMOTE,
+	[ENOLINK]		= LUSTRE_ENOLINK,
+	[EADV]			= LUSTRE_EADV,
+	[ESRMNT]		= LUSTRE_ESRMNT,
+	[ECOMM]			= LUSTRE_ECOMM,
+	[EPROTO]		= LUSTRE_EPROTO,
+	[EMULTIHOP]		= LUSTRE_EMULTIHOP,
+	[EDOTDOT]		= LUSTRE_EDOTDOT,
+	[EBADMSG]		= LUSTRE_EBADMSG,
+	[EOVERFLOW]		= LUSTRE_EOVERFLOW,
+	[ENOTUNIQ]		= LUSTRE_ENOTUNIQ,
+	[EBADFD]		= LUSTRE_EBADFD,
+	[EREMCHG]		= LUSTRE_EREMCHG,
+	[ELIBACC]		= LUSTRE_ELIBACC,
+	[ELIBBAD]		= LUSTRE_ELIBBAD,
+	[ELIBSCN]		= LUSTRE_ELIBSCN,
+	[ELIBMAX]		= LUSTRE_ELIBMAX,
+	[ELIBEXEC]		= LUSTRE_ELIBEXEC,
+	[EILSEQ]		= LUSTRE_EILSEQ,
+	[ERESTART]		= LUSTRE_ERESTART,
+	[ESTRPIPE]		= LUSTRE_ESTRPIPE,
+	[EUSERS]		= LUSTRE_EUSERS,
+	[ENOTSOCK]		= LUSTRE_ENOTSOCK,
+	[EDESTADDRREQ]		= LUSTRE_EDESTADDRREQ,
+	[EMSGSIZE]		= LUSTRE_EMSGSIZE,
+	[EPROTOTYPE]		= LUSTRE_EPROTOTYPE,
+	[ENOPROTOOPT]		= LUSTRE_ENOPROTOOPT,
+	[EPROTONOSUPPORT]	= LUSTRE_EPROTONOSUPPORT,
+	[ESOCKTNOSUPPORT]	= LUSTRE_ESOCKTNOSUPPORT,
+	[EOPNOTSUPP]		= LUSTRE_EOPNOTSUPP,
+	[EPFNOSUPPORT]		= LUSTRE_EPFNOSUPPORT,
+	[EAFNOSUPPORT]		= LUSTRE_EAFNOSUPPORT,
+	[EADDRINUSE]		= LUSTRE_EADDRINUSE,
+	[EADDRNOTAVAIL]		= LUSTRE_EADDRNOTAVAIL,
+	[ENETDOWN]		= LUSTRE_ENETDOWN,
+	[ENETUNREACH]		= LUSTRE_ENETUNREACH,
+	[ENETRESET]		= LUSTRE_ENETRESET,
+	[ECONNABORTED]		= LUSTRE_ECONNABORTED,
+	[ECONNRESET]		= LUSTRE_ECONNRESET,
+	[ENOBUFS]		= LUSTRE_ENOBUFS,
+	[EISCONN]		= LUSTRE_EISCONN,
+	[ENOTCONN]		= LUSTRE_ENOTCONN,
+	[ESHUTDOWN]		= LUSTRE_ESHUTDOWN,
+	[ETOOMANYREFS]		= LUSTRE_ETOOMANYREFS,
+	[ETIMEDOUT]		= LUSTRE_ETIMEDOUT,
+	[ECONNREFUSED]		= LUSTRE_ECONNREFUSED,
+	[EHOSTDOWN]		= LUSTRE_EHOSTDOWN,
+	[EHOSTUNREACH]		= LUSTRE_EHOSTUNREACH,
+	[EALREADY]		= LUSTRE_EALREADY,
+	[EINPROGRESS]		= LUSTRE_EINPROGRESS,
+	[ESTALE]		= LUSTRE_ESTALE,
+	[EUCLEAN]		= LUSTRE_EUCLEAN,
+	[ENOTNAM]		= LUSTRE_ENOTNAM,
+	[ENAVAIL]		= LUSTRE_ENAVAIL,
+	[EISNAM]		= LUSTRE_EISNAM,
+	[EREMOTEIO]		= LUSTRE_EREMOTEIO,
+	[EDQUOT]		= LUSTRE_EDQUOT,
+	[ENOMEDIUM]		= LUSTRE_ENOMEDIUM,
+	[EMEDIUMTYPE]		= LUSTRE_EMEDIUMTYPE,
+	[ECANCELED]		= LUSTRE_ECANCELED,
+	[ENOKEY]		= LUSTRE_ENOKEY,
+	[EKEYEXPIRED]		= LUSTRE_EKEYEXPIRED,
+	[EKEYREVOKED]		= LUSTRE_EKEYREVOKED,
+	[EKEYREJECTED]		= LUSTRE_EKEYREJECTED,
+	[EOWNERDEAD]		= LUSTRE_EOWNERDEAD,
+	[ENOTRECOVERABLE]	= LUSTRE_ENOTRECOVERABLE,
+	[ERESTARTSYS]		= LUSTRE_ERESTARTSYS,
+	[ERESTARTNOINTR]	= LUSTRE_ERESTARTNOINTR,
+	[ERESTARTNOHAND]	= LUSTRE_ERESTARTNOHAND,
+	[ENOIOCTLCMD]		= LUSTRE_ENOIOCTLCMD,
+	[ERESTART_RESTARTBLOCK]	= LUSTRE_ERESTART_RESTARTBLOCK,
+	[EBADHANDLE]		= LUSTRE_EBADHANDLE,
+	[ENOTSYNC]		= LUSTRE_ENOTSYNC,
+	[EBADCOOKIE]		= LUSTRE_EBADCOOKIE,
+	[ENOTSUPP]		= LUSTRE_ENOTSUPP,
+	[ETOOSMALL]		= LUSTRE_ETOOSMALL,
+	[ESERVERFAULT]		= LUSTRE_ESERVERFAULT,
+	[EBADTYPE]		= LUSTRE_EBADTYPE,
+	[EJUKEBOX]		= LUSTRE_EJUKEBOX,
+	[EIOCBQUEUED]		= LUSTRE_EIOCBQUEUED,
+};
+
+static int lustre_errno_ntoh_mapping[] = {
+	[LUSTRE_EPERM]			= EPERM,
+	[LUSTRE_ENOENT]			= ENOENT,
+	[LUSTRE_ESRCH]			= ESRCH,
+	[LUSTRE_EINTR]			= EINTR,
+	[LUSTRE_EIO]			= EIO,
+	[LUSTRE_ENXIO]			= ENXIO,
+	[LUSTRE_E2BIG]			= E2BIG,
+	[LUSTRE_ENOEXEC]		= ENOEXEC,
+	[LUSTRE_EBADF]			= EBADF,
+	[LUSTRE_ECHILD]			= ECHILD,
+	[LUSTRE_EAGAIN]			= EAGAIN,
+	[LUSTRE_ENOMEM]			= ENOMEM,
+	[LUSTRE_EACCES]			= EACCES,
+	[LUSTRE_EFAULT]			= EFAULT,
+	[LUSTRE_ENOTBLK]		= ENOTBLK,
+	[LUSTRE_EBUSY]			= EBUSY,
+	[LUSTRE_EEXIST]			= EEXIST,
+	[LUSTRE_EXDEV]			= EXDEV,
+	[LUSTRE_ENODEV]			= ENODEV,
+	[LUSTRE_ENOTDIR]		= ENOTDIR,
+	[LUSTRE_EISDIR]			= EISDIR,
+	[LUSTRE_EINVAL]			= EINVAL,
+	[LUSTRE_ENFILE]			= ENFILE,
+	[LUSTRE_EMFILE]			= EMFILE,
+	[LUSTRE_ENOTTY]			= ENOTTY,
+	[LUSTRE_ETXTBSY]		= ETXTBSY,
+	[LUSTRE_EFBIG]			= EFBIG,
+	[LUSTRE_ENOSPC]			= ENOSPC,
+	[LUSTRE_ESPIPE]			= ESPIPE,
+	[LUSTRE_EROFS]			= EROFS,
+	[LUSTRE_EMLINK]			= EMLINK,
+	[LUSTRE_EPIPE]			= EPIPE,
+	[LUSTRE_EDOM]			= EDOM,
+	[LUSTRE_ERANGE]			= ERANGE,
+	[LUSTRE_EDEADLK]		= EDEADLK,
+	[LUSTRE_ENAMETOOLONG]		= ENAMETOOLONG,
+	[LUSTRE_ENOLCK]			= ENOLCK,
+	[LUSTRE_ENOSYS]			= ENOSYS,
+	[LUSTRE_ENOTEMPTY]		= ENOTEMPTY,
+	[LUSTRE_ELOOP]			= ELOOP,
+	[LUSTRE_ENOMSG]			= ENOMSG,
+	[LUSTRE_EIDRM]			= EIDRM,
+	[LUSTRE_ECHRNG]			= ECHRNG,
+	[LUSTRE_EL2NSYNC]		= EL2NSYNC,
+	[LUSTRE_EL3HLT]			= EL3HLT,
+	[LUSTRE_EL3RST]			= EL3RST,
+	[LUSTRE_ELNRNG]			= ELNRNG,
+	[LUSTRE_EUNATCH]		= EUNATCH,
+	[LUSTRE_ENOCSI]			= ENOCSI,
+	[LUSTRE_EL2HLT]			= EL2HLT,
+	[LUSTRE_EBADE]			= EBADE,
+	[LUSTRE_EBADR]			= EBADR,
+	[LUSTRE_EXFULL]			= EXFULL,
+	[LUSTRE_ENOANO]			= ENOANO,
+	[LUSTRE_EBADRQC]		= EBADRQC,
+	[LUSTRE_EBADSLT]		= EBADSLT,
+	[LUSTRE_EBFONT]			= EBFONT,
+	[LUSTRE_ENOSTR]			= ENOSTR,
+	[LUSTRE_ENODATA]		= ENODATA,
+	[LUSTRE_ETIME]			= ETIME,
+	[LUSTRE_ENOSR]			= ENOSR,
+	[LUSTRE_ENONET]			= ENONET,
+	[LUSTRE_ENOPKG]			= ENOPKG,
+	[LUSTRE_EREMOTE]		= EREMOTE,
+	[LUSTRE_ENOLINK]		= ENOLINK,
+	[LUSTRE_EADV]			= EADV,
+	[LUSTRE_ESRMNT]			= ESRMNT,
+	[LUSTRE_ECOMM]			= ECOMM,
+	[LUSTRE_EPROTO]			= EPROTO,
+	[LUSTRE_EMULTIHOP]		= EMULTIHOP,
+	[LUSTRE_EDOTDOT]		= EDOTDOT,
+	[LUSTRE_EBADMSG]		= EBADMSG,
+	[LUSTRE_EOVERFLOW]		= EOVERFLOW,
+	[LUSTRE_ENOTUNIQ]		= ENOTUNIQ,
+	[LUSTRE_EBADFD]			= EBADFD,
+	[LUSTRE_EREMCHG]		= EREMCHG,
+	[LUSTRE_ELIBACC]		= ELIBACC,
+	[LUSTRE_ELIBBAD]		= ELIBBAD,
+	[LUSTRE_ELIBSCN]		= ELIBSCN,
+	[LUSTRE_ELIBMAX]		= ELIBMAX,
+	[LUSTRE_ELIBEXEC]		= ELIBEXEC,
+	[LUSTRE_EILSEQ]			= EILSEQ,
+	[LUSTRE_ERESTART]		= ERESTART,
+	[LUSTRE_ESTRPIPE]		= ESTRPIPE,
+	[LUSTRE_EUSERS]			= EUSERS,
+	[LUSTRE_ENOTSOCK]		= ENOTSOCK,
+	[LUSTRE_EDESTADDRREQ]		= EDESTADDRREQ,
+	[LUSTRE_EMSGSIZE]		= EMSGSIZE,
+	[LUSTRE_EPROTOTYPE]		= EPROTOTYPE,
+	[LUSTRE_ENOPROTOOPT]		= ENOPROTOOPT,
+	[LUSTRE_EPROTONOSUPPORT]	= EPROTONOSUPPORT,
+	[LUSTRE_ESOCKTNOSUPPORT]	= ESOCKTNOSUPPORT,
+	[LUSTRE_EOPNOTSUPP]		= EOPNOTSUPP,
+	[LUSTRE_EPFNOSUPPORT]		= EPFNOSUPPORT,
+	[LUSTRE_EAFNOSUPPORT]		= EAFNOSUPPORT,
+	[LUSTRE_EADDRINUSE]		= EADDRINUSE,
+	[LUSTRE_EADDRNOTAVAIL]		= EADDRNOTAVAIL,
+	[LUSTRE_ENETDOWN]		= ENETDOWN,
+	[LUSTRE_ENETUNREACH]		= ENETUNREACH,
+	[LUSTRE_ENETRESET]		= ENETRESET,
+	[LUSTRE_ECONNABORTED]		= ECONNABORTED,
+	[LUSTRE_ECONNRESET]		= ECONNRESET,
+	[LUSTRE_ENOBUFS]		= ENOBUFS,
+	[LUSTRE_EISCONN]		= EISCONN,
+	[LUSTRE_ENOTCONN]		= ENOTCONN,
+	[LUSTRE_ESHUTDOWN]		= ESHUTDOWN,
+	[LUSTRE_ETOOMANYREFS]		= ETOOMANYREFS,
+	[LUSTRE_ETIMEDOUT]		= ETIMEDOUT,
+	[LUSTRE_ECONNREFUSED]		= ECONNREFUSED,
+	[LUSTRE_EHOSTDOWN]		= EHOSTDOWN,
+	[LUSTRE_EHOSTUNREACH]		= EHOSTUNREACH,
+	[LUSTRE_EALREADY]		= EALREADY,
+	[LUSTRE_EINPROGRESS]		= EINPROGRESS,
+	[LUSTRE_ESTALE]			= ESTALE,
+	[LUSTRE_EUCLEAN]		= EUCLEAN,
+	[LUSTRE_ENOTNAM]		= ENOTNAM,
+	[LUSTRE_ENAVAIL]		= ENAVAIL,
+	[LUSTRE_EISNAM]			= EISNAM,
+	[LUSTRE_EREMOTEIO]		= EREMOTEIO,
+	[LUSTRE_EDQUOT]			= EDQUOT,
+	[LUSTRE_ENOMEDIUM]		= ENOMEDIUM,
+	[LUSTRE_EMEDIUMTYPE]		= EMEDIUMTYPE,
+	[LUSTRE_ECANCELED]		= ECANCELED,
+	[LUSTRE_ENOKEY]			= ENOKEY,
+	[LUSTRE_EKEYEXPIRED]		= EKEYEXPIRED,
+	[LUSTRE_EKEYREVOKED]		= EKEYREVOKED,
+	[LUSTRE_EKEYREJECTED]		= EKEYREJECTED,
+	[LUSTRE_EOWNERDEAD]		= EOWNERDEAD,
+	[LUSTRE_ENOTRECOVERABLE]	= ENOTRECOVERABLE,
+	[LUSTRE_ERESTARTSYS]		= ERESTARTSYS,
+	[LUSTRE_ERESTARTNOINTR]		= ERESTARTNOINTR,
+	[LUSTRE_ERESTARTNOHAND]		= ERESTARTNOHAND,
+	[LUSTRE_ENOIOCTLCMD]		= ENOIOCTLCMD,
+	[LUSTRE_ERESTART_RESTARTBLOCK]	= ERESTART_RESTARTBLOCK,
+	[LUSTRE_EBADHANDLE]		= EBADHANDLE,
+	[LUSTRE_ENOTSYNC]		= ENOTSYNC,
+	[LUSTRE_EBADCOOKIE]		= EBADCOOKIE,
+	[LUSTRE_ENOTSUPP]		= ENOTSUPP,
+	[LUSTRE_ETOOSMALL]		= ETOOSMALL,
+	[LUSTRE_ESERVERFAULT]		= ESERVERFAULT,
+	[LUSTRE_EBADTYPE]		= EBADTYPE,
+	[LUSTRE_EJUKEBOX]		= EJUKEBOX,
+	[LUSTRE_EIOCBQUEUED]		= EIOCBQUEUED,
+};
+
+unsigned int lustre_errno_hton(unsigned int h)
+{
+	unsigned int n;
+
+	if (h == 0) {
+		n = 0;
+	} else if (h < ARRAY_SIZE(lustre_errno_hton_mapping)) {
+		n = lustre_errno_hton_mapping[h];
+		if (n == 0)
+			goto generic;
+	} else {
+generic:
+		/*
+		 * A generic errno is better than the unknown one that could
+		 * mean anything to a different host.
+		 */
+		n = LUSTRE_EIO;
+	}
+
+	return n;
+}
+EXPORT_SYMBOL(lustre_errno_hton);
+
+unsigned int lustre_errno_ntoh(unsigned int n)
+{
+	unsigned int h;
+
+	if (n == 0) {
+		h = 0;
+	} else if (n < ARRAY_SIZE(lustre_errno_ntoh_mapping)) {
+		h = lustre_errno_ntoh_mapping[n];
+		if (h == 0)
+			goto generic;
+	} else {
+generic:
+		/*
+		 * Similar to the situation in lustre_errno_hton(), an unknown
+		 * network errno could coincide with anything.  Hence, it is
+		 * better to return a generic errno.
+		 */
+		h = EIO;
+	}
+
+	return h;
+}
+EXPORT_SYMBOL(lustre_errno_ntoh);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
new file mode 100644
index 000000000..7f8644e01
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -0,0 +1,585 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+# ifdef __mips64__
+#  include <linux/kernel.h>
+# endif
+
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+#include "ptlrpc_internal.h"
+
+lnet_handle_eq_t   ptlrpc_eq_h;
+
+/*
+ *  Client's outgoing request callback
+ */
+void request_out_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
+	struct ptlrpc_request *req = cbid->cbid_arg;
+
+	LASSERT(ev->type == LNET_EVENT_SEND ||
+		ev->type == LNET_EVENT_UNLINK);
+	LASSERT(ev->unlinked);
+
+	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+	sptlrpc_request_out_callback(req);
+	spin_lock(&req->rq_lock);
+	req->rq_real_sent = get_seconds();
+	if (ev->unlinked)
+		req->rq_req_unlink = 0;
+
+	if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
+
+		/* Failed send: make it seem like the reply timed out, just
+		 * like failing sends in client.c does currently...  */
+
+		req->rq_net_err = 1;
+		ptlrpc_client_wake_req(req);
+	}
+	spin_unlock(&req->rq_lock);
+
+	ptlrpc_req_finished(req);
+}
+
+/*
+ * Client's incoming reply callback
+ */
+void reply_in_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
+	struct ptlrpc_request *req = cbid->cbid_arg;
+
+	DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+	LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
+	LASSERT(ev->md.start == req->rq_repbuf);
+	LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len);
+	/* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
+	   for adaptive timeouts' early reply. */
+	LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);
+
+	spin_lock(&req->rq_lock);
+
+	req->rq_receiving_reply = 0;
+	req->rq_early = 0;
+	if (ev->unlinked)
+		req->rq_reply_unlink = 0;
+
+	if (ev->status)
+		goto out_wake;
+
+	if (ev->type == LNET_EVENT_UNLINK) {
+		LASSERT(ev->unlinked);
+		DEBUG_REQ(D_NET, req, "unlink");
+		goto out_wake;
+	}
+
+	if (ev->mlength < ev->rlength) {
+		CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
+		       req->rq_replen, ev->rlength, ev->offset);
+		req->rq_reply_truncate = 1;
+		req->rq_replied = 1;
+		req->rq_status = -EOVERFLOW;
+		req->rq_nob_received = ev->rlength + ev->offset;
+		goto out_wake;
+	}
+
+	if ((ev->offset == 0) &&
+	    ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
+		/* Early reply */
+		DEBUG_REQ(D_ADAPTTO, req,
+			  "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d",
+			  ev->mlength, ev->offset,
+			  req->rq_replen, req->rq_replied, ev->unlinked);
+
+		req->rq_early_count++; /* number received, client side */
+
+		if (req->rq_replied)   /* already got the real reply */
+			goto out_wake;
+
+		req->rq_early = 1;
+		req->rq_reply_off = ev->offset;
+		req->rq_nob_received = ev->mlength;
+		/* And we're still receiving */
+		req->rq_receiving_reply = 1;
+	} else {
+		/* Real reply */
+		req->rq_rep_swab_mask = 0;
+		req->rq_replied = 1;
+		/* Got reply, no resend required */
+		req->rq_resend = 0;
+		req->rq_reply_off = ev->offset;
+		req->rq_nob_received = ev->mlength;
+		/* LNetMDUnlink can't be called under the LNET_LOCK,
+		   so we must unlink in ptlrpc_unregister_reply */
+		DEBUG_REQ(D_INFO, req,
+			  "reply in flags=%x mlen=%u offset=%d replen=%d",
+			  lustre_msg_get_flags(req->rq_reqmsg),
+			  ev->mlength, ev->offset, req->rq_replen);
+	}
+
+	req->rq_import->imp_last_reply_time = get_seconds();
+
+out_wake:
+	/* NB don't unlock till after wakeup; req can disappear under us
+	 * since we don't have our own ref */
+	ptlrpc_client_wake_req(req);
+	spin_unlock(&req->rq_lock);
+}
+
+/*
+ * Client's bulk has been written/read
+ */
+void client_bulk_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id     *cbid = ev->md.user_ptr;
+	struct ptlrpc_bulk_desc *desc = cbid->cbid_arg;
+	struct ptlrpc_request   *req;
+
+	LASSERT((desc->bd_type == BULK_PUT_SINK &&
+		 ev->type == LNET_EVENT_PUT) ||
+		(desc->bd_type == BULK_GET_SOURCE &&
+		 ev->type == LNET_EVENT_GET) ||
+		ev->type == LNET_EVENT_UNLINK);
+	LASSERT(ev->unlinked);
+
+	if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE))
+		ev->status = -EIO;
+
+	if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2,
+				 CFS_FAIL_ONCE))
+		ev->status = -EIO;
+
+	CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+	       "event type %d, status %d, desc %p\n",
+	       ev->type, ev->status, desc);
+
+	spin_lock(&desc->bd_lock);
+	req = desc->bd_req;
+	LASSERT(desc->bd_md_count > 0);
+	desc->bd_md_count--;
+
+	if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
+		desc->bd_nob_transferred += ev->mlength;
+		desc->bd_sender = ev->sender;
+	} else {
+		/* start reconnect and resend if network error hit */
+		spin_lock(&req->rq_lock);
+		req->rq_net_err = 1;
+		spin_unlock(&req->rq_lock);
+	}
+
+	if (ev->status != 0)
+		desc->bd_failure = 1;
+
+	/* NB don't unlock till after wakeup; desc can disappear under us
+	 * otherwise */
+	if (desc->bd_md_count == 0)
+		ptlrpc_client_wake_req(desc->bd_req);
+
+	spin_unlock(&desc->bd_lock);
+}
+
+/*
+ * We will have percpt request history list for ptlrpc service in upcoming
+ * patches because we don't want to be serialized by current per-service
+ * history operations. So we require history ID can (somehow) show arriving
+ * order w/o grabbing global lock, and user can sort them in userspace.
+ *
+ * This is how we generate history ID for ptlrpc_request:
+ * ----------------------------------------------------
+ * |  32 bits  |  16 bits  | (16 - X)bits  |  X bits  |
+ * ----------------------------------------------------
+ * |  seconds  | usec / 16 |   sequence    | CPT id   |
+ * ----------------------------------------------------
+ *
+ * it might not be precise but should be good enough.
+ */
+
+#define REQS_CPT_BITS(svcpt)	((svcpt)->scp_service->srv_cpt_bits)
+
+#define REQS_SEC_SHIFT		32
+#define REQS_USEC_SHIFT		16
+#define REQS_SEQ_SHIFT(svcpt)	REQS_CPT_BITS(svcpt)
+
+static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt,
+				   struct ptlrpc_request *req)
+{
+	__u64	sec = req->rq_arrival_time.tv_sec;
+	__u32	usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */
+	__u64	new_seq;
+
+	/* set sequence ID for request and add it to history list,
+	 * it must be called with hold svcpt::scp_lock */
+
+	new_seq = (sec << REQS_SEC_SHIFT) |
+		  (usec << REQS_USEC_SHIFT) |
+		  (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt);
+
+	if (new_seq > svcpt->scp_hist_seq) {
+		/* This handles the initial case of scp_hist_seq == 0 or
+		 * we just jumped into a new time window */
+		svcpt->scp_hist_seq = new_seq;
+	} else {
+		LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT);
+		/* NB: increase sequence number in current usec bucket,
+		 * however, it's possible that we used up all bits for
+		 * sequence and jumped into the next usec bucket (future time),
+		 * then we hope there will be less RPCs per bucket at some
+		 * point, and sequence will catch up again */
+		svcpt->scp_hist_seq += (1U << REQS_SEQ_SHIFT(svcpt));
+		new_seq = svcpt->scp_hist_seq;
+	}
+
+	req->rq_history_seq = new_seq;
+
+	list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs);
+}
+
+/*
+ * Server's incoming request callback
+ */
+void request_in_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id		  *cbid = ev->md.user_ptr;
+	struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
+	struct ptlrpc_service_part	  *svcpt = rqbd->rqbd_svcpt;
+	struct ptlrpc_service	     *service = svcpt->scp_service;
+	struct ptlrpc_request	     *req;
+
+	LASSERT(ev->type == LNET_EVENT_PUT ||
+		ev->type == LNET_EVENT_UNLINK);
+	LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer);
+	LASSERT((char *)ev->md.start + ev->offset + ev->mlength <=
+		rqbd->rqbd_buffer + service->srv_buf_size);
+
+	CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+	       "event type %d, status %d, service %s\n",
+	       ev->type, ev->status, service->srv_name);
+
+	if (ev->unlinked) {
+		/* If this is the last request message to fit in the
+		 * request buffer we can use the request object embedded in
+		 * rqbd.  Note that if we failed to allocate a request,
+		 * we'd have to re-post the rqbd, which we can't do in this
+		 * context. */
+		req = &rqbd->rqbd_req;
+		memset(req, 0, sizeof(*req));
+	} else {
+		LASSERT(ev->type == LNET_EVENT_PUT);
+		if (ev->status != 0) {
+			/* We moaned above already... */
+			return;
+		}
+		req = ptlrpc_request_cache_alloc(GFP_ATOMIC);
+		if (req == NULL) {
+			CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n",
+			       service->srv_name,
+			       libcfs_id2str(ev->initiator));
+			return;
+		}
+	}
+
+	/* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
+	 * flags are reset and scalars are zero.  We only set the message
+	 * size to non-zero if this was a successful receive. */
+	req->rq_xid = ev->match_bits;
+	req->rq_reqbuf = ev->md.start + ev->offset;
+	if (ev->type == LNET_EVENT_PUT && ev->status == 0)
+		req->rq_reqdata_len = ev->mlength;
+	do_gettimeofday(&req->rq_arrival_time);
+	req->rq_peer = ev->initiator;
+	req->rq_self = ev->target.nid;
+	req->rq_rqbd = rqbd;
+	req->rq_phase = RQ_PHASE_NEW;
+	spin_lock_init(&req->rq_lock);
+	INIT_LIST_HEAD(&req->rq_timed_list);
+	INIT_LIST_HEAD(&req->rq_exp_list);
+	atomic_set(&req->rq_refcount, 1);
+	if (ev->type == LNET_EVENT_PUT)
+		CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
+		       req, req->rq_xid, ev->mlength);
+
+	CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
+
+	spin_lock(&svcpt->scp_lock);
+
+	ptlrpc_req_add_history(svcpt, req);
+
+	if (ev->unlinked) {
+		svcpt->scp_nrqbds_posted--;
+		CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
+		       svcpt->scp_nrqbds_posted);
+
+		/* Normally, don't complain about 0 buffers posted; LNET won't
+		 * drop incoming reqs since we set the portal lazy */
+		if (test_req_buffer_pressure &&
+		    ev->type != LNET_EVENT_UNLINK &&
+		    svcpt->scp_nrqbds_posted == 0)
+			CWARN("All %s request buffers busy\n",
+			      service->srv_name);
+
+		/* req takes over the network's ref on rqbd */
+	} else {
+		/* req takes a ref on rqbd */
+		rqbd->rqbd_refcount++;
+	}
+
+	list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
+	svcpt->scp_nreqs_incoming++;
+
+	/* NB everything can disappear under us once the request
+	 * has been queued and we unlock, so do the wake now... */
+	wake_up(&svcpt->scp_waitq);
+
+	spin_unlock(&svcpt->scp_lock);
+}
+
+/*
+ *  Server's outgoing reply callback
+ */
+void reply_out_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id	  *cbid = ev->md.user_ptr;
+	struct ptlrpc_reply_state *rs = cbid->cbid_arg;
+	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+	LASSERT(ev->type == LNET_EVENT_SEND ||
+		ev->type == LNET_EVENT_ACK ||
+		ev->type == LNET_EVENT_UNLINK);
+
+	if (!rs->rs_difficult) {
+		/* 'Easy' replies have no further processing so I drop the
+		 * net's ref on 'rs' */
+		LASSERT(ev->unlinked);
+		ptlrpc_rs_decref(rs);
+		return;
+	}
+
+	LASSERT(rs->rs_on_net);
+
+	if (ev->unlinked) {
+		/* Last network callback. The net's ref on 'rs' stays put
+		 * until ptlrpc_handle_rs() is done with it */
+		spin_lock(&svcpt->scp_rep_lock);
+		spin_lock(&rs->rs_lock);
+
+		rs->rs_on_net = 0;
+		if (!rs->rs_no_ack ||
+		    rs->rs_transno <=
+		    rs->rs_export->exp_obd->obd_last_committed)
+			ptlrpc_schedule_difficult_reply(rs);
+
+		spin_unlock(&rs->rs_lock);
+		spin_unlock(&svcpt->scp_rep_lock);
+	}
+}
+
+
+static void ptlrpc_master_callback(lnet_event_t *ev)
+{
+	struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+	void (*callback)(lnet_event_t *ev) = cbid->cbid_fn;
+
+	/* Honestly, it's best to find out early. */
+	LASSERT(cbid->cbid_arg != LP_POISON);
+	LASSERT(callback == request_out_callback ||
+		callback == reply_in_callback ||
+		callback == client_bulk_callback ||
+		callback == request_in_callback ||
+		callback == reply_out_callback);
+
+	callback(ev);
+}
+
+int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
+			 lnet_process_id_t *peer, lnet_nid_t *self)
+{
+	int	       best_dist = 0;
+	__u32	     best_order = 0;
+	int	       count = 0;
+	int	       rc = -ENOENT;
+	int	       portals_compatibility;
+	int	       dist;
+	__u32	     order;
+	lnet_nid_t	dst_nid;
+	lnet_nid_t	src_nid;
+
+	portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL);
+
+	peer->pid = LUSTRE_SRV_LNET_PID;
+
+	/* Choose the matching UUID that's closest */
+	while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) {
+		dist = LNetDist(dst_nid, &src_nid, &order);
+		if (dist < 0)
+			continue;
+
+		if (dist == 0) {		/* local! use loopback LND */
+			peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0);
+			rc = 0;
+			break;
+		}
+
+		if (rc < 0 ||
+		    dist < best_dist ||
+		    (dist == best_dist && order < best_order)) {
+			best_dist = dist;
+			best_order = order;
+
+			if (portals_compatibility > 1) {
+				/* Strong portals compatibility: Zero the nid's
+				 * NET, so if I'm reading new config logs, or
+				 * getting configured by (new) lconf I can
+				 * still talk to old servers. */
+				dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid));
+				src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid));
+			}
+			peer->nid = dst_nid;
+			*self = src_nid;
+			rc = 0;
+		}
+	}
+
+	CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer));
+	return rc;
+}
+
+void ptlrpc_ni_fini(void)
+{
+	wait_queue_head_t	 waitq;
+	struct l_wait_info  lwi;
+	int		 rc;
+	int		 retries;
+
+	/* Wait for the event queue to become idle since there may still be
+	 * messages in flight with pending events (i.e. the fire-and-forget
+	 * messages == client requests and "non-difficult" server
+	 * replies */
+
+	for (retries = 0;; retries++) {
+		rc = LNetEQFree(ptlrpc_eq_h);
+		switch (rc) {
+		default:
+			LBUG();
+
+		case 0:
+			LNetNIFini();
+			return;
+
+		case -EBUSY:
+			if (retries != 0)
+				CWARN("Event queue still busy\n");
+
+			/* Wait for a bit */
+			init_waitqueue_head(&waitq);
+			lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL);
+			l_wait_event(waitq, 0, &lwi);
+			break;
+		}
+	}
+	/* notreached */
+}
+
+lnet_pid_t ptl_get_pid(void)
+{
+	lnet_pid_t	pid;
+
+	pid = LUSTRE_SRV_LNET_PID;
+	return pid;
+}
+
+int ptlrpc_ni_init(void)
+{
+	int	      rc;
+	lnet_pid_t       pid;
+
+	pid = ptl_get_pid();
+	CDEBUG(D_NET, "My pid is: %x\n", pid);
+
+	/* We're not passing any limits yet... */
+	rc = LNetNIInit(pid);
+	if (rc < 0) {
+		CDEBUG(D_NET, "Can't init network interface: %d\n", rc);
+		return -ENOENT;
+	}
+
+	/* CAVEAT EMPTOR: how we process portals events is _radically_
+	 * different depending on... */
+	/* kernel LNet calls our master callback when there are new event,
+	 * because we are guaranteed to get every event via callback,
+	 * so we just set EQ size to 0 to avoid overhead of serializing
+	 * enqueue/dequeue operations in LNet. */
+	rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h);
+	if (rc == 0)
+		return 0;
+
+	CERROR("Failed to allocate event queue: %d\n", rc);
+	LNetNIFini();
+
+	return -ENOMEM;
+}
+
+
+int ptlrpc_init_portals(void)
+{
+	int   rc = ptlrpc_ni_init();
+
+	if (rc != 0) {
+		CERROR("network initialisation failed\n");
+		return -EIO;
+	}
+	rc = ptlrpcd_addref();
+	if (rc == 0)
+		return 0;
+
+	CERROR("rpcd initialisation failed\n");
+	ptlrpc_ni_fini();
+	return rc;
+}
+
+void ptlrpc_exit_portals(void)
+{
+	ptlrpcd_decref();
+	ptlrpc_ni_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
new file mode 100644
index 000000000..d5fc689c0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -0,0 +1,1642 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/import.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_export.h"
+#include "../include/obd.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpc_connect_async_args {
+	 __u64 pcaa_peer_committed;
+	int pcaa_initial_connect;
+};
+
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
+static void __import_set_state(struct obd_import *imp,
+			       enum lustre_imp_state state)
+{
+	switch (state) {
+	case LUSTRE_IMP_CLOSED:
+	case LUSTRE_IMP_NEW:
+	case LUSTRE_IMP_DISCON:
+	case LUSTRE_IMP_CONNECTING:
+		break;
+	case LUSTRE_IMP_REPLAY_WAIT:
+		imp->imp_replay_state = LUSTRE_IMP_REPLAY_LOCKS;
+		break;
+	default:
+		imp->imp_replay_state = LUSTRE_IMP_REPLAY;
+	}
+
+	imp->imp_state = state;
+	imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state;
+	imp->imp_state_hist[imp->imp_state_hist_idx].ish_time =
+		get_seconds();
+	imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) %
+		IMP_STATE_HIST_LEN;
+}
+
+/* A CLOSED import should remain so. */
+#define IMPORT_SET_STATE_NOLOCK(imp, state)				       \
+do {									       \
+	if (imp->imp_state != LUSTRE_IMP_CLOSED) {			       \
+		CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n",   \
+		       imp, obd2cli_tgt(imp->imp_obd),			       \
+		       ptlrpc_import_state_name(imp->imp_state),	       \
+		       ptlrpc_import_state_name(state));		       \
+		__import_set_state(imp, state);				       \
+	}								       \
+} while (0)
+
+#define IMPORT_SET_STATE(imp, state)					\
+do {									\
+	spin_lock(&imp->imp_lock);					\
+	IMPORT_SET_STATE_NOLOCK(imp, state);				\
+	spin_unlock(&imp->imp_lock);					\
+} while (0)
+
+
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+				    struct ptlrpc_request *request,
+				    void *data, int rc);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* Only this function is allowed to change the import state when it is
+ * CLOSED. I would rather refcount the import and free it after
+ * disconnection like we do with exports. To do that, the client_obd
+ * will need to save the peer info somewhere other than in the import,
+ * though. */
+int ptlrpc_init_import(struct obd_import *imp)
+{
+	spin_lock(&imp->imp_lock);
+
+	imp->imp_generation++;
+	imp->imp_state =  LUSTRE_IMP_NEW;
+
+	spin_unlock(&imp->imp_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_init_import);
+
+#define UUID_STR "_UUID"
+void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
+{
+	*uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix))
+		? uuid : uuid + strlen(prefix);
+
+	*uuid_len = strlen(*uuid_start);
+
+	if (*uuid_len < strlen(UUID_STR))
+		return;
+
+	if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
+		    UUID_STR, strlen(UUID_STR)))
+		*uuid_len -= strlen(UUID_STR);
+}
+EXPORT_SYMBOL(deuuidify);
+
+/**
+ * Returns true if import was FULL, false if import was already not
+ * connected.
+ * @imp - import to be disconnected
+ * @conn_cnt - connection count (epoch) of the request that timed out
+ *	     and caused the disconnection.  In some cases, multiple
+ *	     inflight requests can fail to a single target (e.g. OST
+ *	     bulk requests) and if one has already caused a reconnection
+ *	     (increasing the import->conn_cnt) the older failure should
+ *	     not also cause a reconnection.  If zero it forces a reconnect.
+ */
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
+{
+	int rc = 0;
+
+	spin_lock(&imp->imp_lock);
+
+	if (imp->imp_state == LUSTRE_IMP_FULL &&
+	    (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
+		char *target_start;
+		int   target_len;
+
+		deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+			  &target_start, &target_len);
+
+		if (imp->imp_replayable) {
+			LCONSOLE_WARN("%s: Connection to %.*s (at %s) was lost; in progress operations using this service will wait for recovery to complete\n",
+				      imp->imp_obd->obd_name, target_len, target_start,
+				      libcfs_nid2str(imp->imp_connection->c_peer.nid));
+		} else {
+			LCONSOLE_ERROR_MSG(0x166, "%s: Connection to %.*s (at %s) was lost; in progress operations using this service will fail\n",
+					   imp->imp_obd->obd_name,
+					   target_len, target_start,
+					   libcfs_nid2str(imp->imp_connection->c_peer.nid));
+		}
+		IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+		spin_unlock(&imp->imp_lock);
+
+		if (obd_dump_on_timeout)
+			libcfs_debug_dumplog();
+
+		obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+		rc = 1;
+	} else {
+		spin_unlock(&imp->imp_lock);
+		CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
+		       imp->imp_client->cli_name, imp,
+		       (imp->imp_state == LUSTRE_IMP_FULL &&
+			imp->imp_conn_cnt > conn_cnt) ?
+		       "reconnected" : "not connected", imp->imp_conn_cnt,
+		       conn_cnt, ptlrpc_import_state_name(imp->imp_state));
+	}
+
+	return rc;
+}
+
+/* Must be called with imp_lock held! */
+static void ptlrpc_deactivate_and_unlock_import(struct obd_import *imp)
+{
+	assert_spin_locked(&imp->imp_lock);
+
+	CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
+	imp->imp_invalid = 1;
+	imp->imp_generation++;
+	spin_unlock(&imp->imp_lock);
+
+	ptlrpc_abort_inflight(imp);
+	obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+}
+
+/*
+ * This acts as a barrier; all existing requests are rejected, and
+ * no new requests will be accepted until the import is valid again.
+ */
+void ptlrpc_deactivate_import(struct obd_import *imp)
+{
+	spin_lock(&imp->imp_lock);
+	ptlrpc_deactivate_and_unlock_import(imp);
+}
+EXPORT_SYMBOL(ptlrpc_deactivate_import);
+
+static unsigned int
+ptlrpc_inflight_deadline(struct ptlrpc_request *req, time_t now)
+{
+	long dl;
+
+	if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+	      (req->rq_phase == RQ_PHASE_BULK) ||
+	      (req->rq_phase == RQ_PHASE_NEW)))
+		return 0;
+
+	if (req->rq_timedout)
+		return 0;
+
+	if (req->rq_phase == RQ_PHASE_NEW)
+		dl = req->rq_sent;
+	else
+		dl = req->rq_deadline;
+
+	if (dl <= now)
+		return 0;
+
+	return dl - now;
+}
+
+static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp)
+{
+	time_t now = get_seconds();
+	struct list_head *tmp, *n;
+	struct ptlrpc_request *req;
+	unsigned int timeout = 0;
+
+	spin_lock(&imp->imp_lock);
+	list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+		req = list_entry(tmp, struct ptlrpc_request, rq_list);
+		timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
+	}
+	spin_unlock(&imp->imp_lock);
+	return timeout;
+}
+
+/**
+ * This function will invalidate the import, if necessary, then block
+ * for all the RPC completions, and finally notify the obd to
+ * invalidate its state (ie cancel locks, clear pending requests,
+ * etc).
+ */
+void ptlrpc_invalidate_import(struct obd_import *imp)
+{
+	struct list_head *tmp, *n;
+	struct ptlrpc_request *req;
+	struct l_wait_info lwi;
+	unsigned int timeout;
+	int rc;
+
+	atomic_inc(&imp->imp_inval_count);
+
+	if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
+		ptlrpc_deactivate_import(imp);
+
+	CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2);
+	LASSERT(imp->imp_invalid);
+
+	/* Wait forever until inflight == 0. We really can't do it another
+	 * way because in some cases we need to wait for very long reply
+	 * unlink. We can't do anything before that because there is really
+	 * no guarantee that some rdma transfer is not in progress right now. */
+	do {
+		/* Calculate max timeout for waiting on rpcs to error
+		 * out. Use obd_timeout if calculated value is smaller
+		 * than it. */
+		if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+			timeout = ptlrpc_inflight_timeout(imp);
+			timeout += timeout / 3;
+
+			if (timeout == 0)
+				timeout = obd_timeout;
+		} else {
+			/* decrease the interval to increase race condition */
+			timeout = 1;
+		}
+
+		CDEBUG(D_RPCTRACE,
+		       "Sleeping %d sec for inflight to error out\n",
+		       timeout);
+
+		/* Wait for all requests to error out and call completion
+		 * callbacks. Cap it at obd_timeout -- these should all
+		 * have been locally cancelled by ptlrpc_abort_inflight. */
+		lwi = LWI_TIMEOUT_INTERVAL(
+			cfs_timeout_cap(cfs_time_seconds(timeout)),
+			(timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+			NULL, NULL);
+		rc = l_wait_event(imp->imp_recovery_waitq,
+				  (atomic_read(&imp->imp_inflight) == 0),
+				  &lwi);
+		if (rc) {
+			const char *cli_tgt = obd2cli_tgt(imp->imp_obd);
+
+			CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
+			       cli_tgt, rc,
+			       atomic_read(&imp->imp_inflight));
+
+			spin_lock(&imp->imp_lock);
+			if (atomic_read(&imp->imp_inflight) == 0) {
+				int count = atomic_read(&imp->imp_unregistering);
+
+				/* We know that "unregistering" rpcs only can
+				 * survive in sending or delaying lists (they
+				 * maybe waiting for long reply unlink in
+				 * sluggish nets). Let's check this. If there
+				 * is no inflight and unregistering != 0, this
+				 * is bug. */
+				LASSERTF(count == 0, "Some RPCs are still unregistering: %d\n",
+					 count);
+
+				/* Let's save one loop as soon as inflight have
+				 * dropped to zero. No new inflights possible at
+				 * this point. */
+				rc = 0;
+			} else {
+				list_for_each_safe(tmp, n,
+						       &imp->imp_sending_list) {
+					req = list_entry(tmp,
+							     struct ptlrpc_request,
+							     rq_list);
+					DEBUG_REQ(D_ERROR, req,
+						  "still on sending list");
+				}
+				list_for_each_safe(tmp, n,
+						       &imp->imp_delayed_list) {
+					req = list_entry(tmp,
+							     struct ptlrpc_request,
+							     rq_list);
+					DEBUG_REQ(D_ERROR, req,
+						  "still on delayed list");
+				}
+
+				CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+				       cli_tgt,
+				       ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
+				       atomic_read(&imp->
+						   imp_unregistering));
+			}
+			spin_unlock(&imp->imp_lock);
+		  }
+	} while (rc != 0);
+
+	/*
+	 * Let's additionally check that no new rpcs added to import in
+	 * "invalidate" state.
+	 */
+	LASSERT(atomic_read(&imp->imp_inflight) == 0);
+	obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
+	sptlrpc_import_flush_all_ctx(imp);
+
+	atomic_dec(&imp->imp_inval_count);
+	wake_up_all(&imp->imp_recovery_waitq);
+}
+EXPORT_SYMBOL(ptlrpc_invalidate_import);
+
+/* unset imp_invalid */
+void ptlrpc_activate_import(struct obd_import *imp)
+{
+	struct obd_device *obd = imp->imp_obd;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_deactive != 0) {
+		spin_unlock(&imp->imp_lock);
+		return;
+	}
+
+	imp->imp_invalid = 0;
+	spin_unlock(&imp->imp_lock);
+	obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+}
+EXPORT_SYMBOL(ptlrpc_activate_import);
+
+static void ptlrpc_pinger_force(struct obd_import *imp)
+{
+	CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd),
+	       ptlrpc_import_state_name(imp->imp_state));
+
+	spin_lock(&imp->imp_lock);
+	imp->imp_force_verify = 1;
+	spin_unlock(&imp->imp_lock);
+
+	if (imp->imp_state != LUSTRE_IMP_CONNECTING)
+		ptlrpc_pinger_wake_up();
+}
+
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
+{
+	LASSERT(!imp->imp_dlm_fake);
+
+	if (ptlrpc_set_import_discon(imp, conn_cnt)) {
+		if (!imp->imp_replayable) {
+			CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
+			       obd2cli_tgt(imp->imp_obd),
+			       imp->imp_connection->c_remote_uuid.uuid,
+			       imp->imp_obd->obd_name);
+			ptlrpc_deactivate_import(imp);
+		}
+
+		ptlrpc_pinger_force(imp);
+	}
+}
+EXPORT_SYMBOL(ptlrpc_fail_import);
+
+int ptlrpc_reconnect_import(struct obd_import *imp)
+{
+#ifdef ENABLE_PINGER
+	struct l_wait_info lwi;
+	int secs = cfs_time_seconds(obd_timeout);
+	int rc;
+
+	ptlrpc_pinger_force(imp);
+
+	CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
+	       obd2cli_tgt(imp->imp_obd), secs);
+
+	lwi = LWI_TIMEOUT(secs, NULL, NULL);
+	rc = l_wait_event(imp->imp_recovery_waitq,
+			  !ptlrpc_import_in_recovery(imp), &lwi);
+	CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
+	       ptlrpc_import_state_name(imp->imp_state));
+	return rc;
+#else
+	ptlrpc_set_import_discon(imp, 0);
+	/* Force a new connect attempt */
+	ptlrpc_invalidate_import(imp);
+	/* Do a fresh connect next time by zeroing the handle */
+	ptlrpc_disconnect_import(imp, 1);
+	/* Wait for all invalidate calls to finish */
+	if (atomic_read(&imp->imp_inval_count) > 0) {
+		int rc;
+		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+		rc = l_wait_event(imp->imp_recovery_waitq,
+				  (atomic_read(&imp->imp_inval_count) == 0),
+				  &lwi);
+		if (rc)
+			CERROR("Interrupted, inval=%d\n",
+			       atomic_read(&imp->imp_inval_count));
+	}
+
+	/* Allow reconnect attempts */
+	imp->imp_obd->obd_no_recov = 0;
+	/* Remove 'invalid' flag */
+	ptlrpc_activate_import(imp);
+	/* Attempt a new connect */
+	ptlrpc_recover_import(imp, NULL, 0);
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
+static int import_select_connection(struct obd_import *imp)
+{
+	struct obd_import_conn *imp_conn = NULL, *conn;
+	struct obd_export *dlmexp;
+	char *target_start;
+	int target_len, tried_all = 1;
+
+	spin_lock(&imp->imp_lock);
+
+	if (list_empty(&imp->imp_conn_list)) {
+		CERROR("%s: no connections available\n",
+		       imp->imp_obd->obd_name);
+		spin_unlock(&imp->imp_lock);
+		return -EINVAL;
+	}
+
+	list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+		CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n",
+		       imp->imp_obd->obd_name,
+		       libcfs_nid2str(conn->oic_conn->c_peer.nid),
+		       conn->oic_last_attempt);
+
+		/* If we have not tried this connection since
+		   the last successful attempt, go with this one */
+		if ((conn->oic_last_attempt == 0) ||
+		    cfs_time_beforeq_64(conn->oic_last_attempt,
+				       imp->imp_last_success_conn)) {
+			imp_conn = conn;
+			tried_all = 0;
+			break;
+		}
+
+		/* If all of the connections have already been tried
+		   since the last successful connection; just choose the
+		   least recently used */
+		if (!imp_conn)
+			imp_conn = conn;
+		else if (cfs_time_before_64(conn->oic_last_attempt,
+					    imp_conn->oic_last_attempt))
+			imp_conn = conn;
+	}
+
+	/* if not found, simply choose the current one */
+	if (!imp_conn || imp->imp_force_reconnect) {
+		LASSERT(imp->imp_conn_current);
+		imp_conn = imp->imp_conn_current;
+		tried_all = 0;
+	}
+	LASSERT(imp_conn->oic_conn);
+
+	/* If we've tried everything, and we're back to the beginning of the
+	   list, increase our timeout and try again. It will be reset when
+	   we do finally connect. (FIXME: really we should wait for all network
+	   state associated with the last connection attempt to drain before
+	   trying to reconnect on it.) */
+	if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
+		struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+		if (at_get(at) < CONNECTION_SWITCH_MAX) {
+			at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
+			if (at_get(at) > CONNECTION_SWITCH_MAX)
+				at_reset(at, CONNECTION_SWITCH_MAX);
+		}
+		LASSERT(imp_conn->oic_last_attempt);
+		CDEBUG(D_HA, "%s: tried all connections, increasing latency to %ds\n",
+		       imp->imp_obd->obd_name, at_get(at));
+	}
+
+	imp_conn->oic_last_attempt = cfs_time_current_64();
+
+	/* switch connection, don't mind if it's same as the current one */
+	if (imp->imp_connection)
+		ptlrpc_connection_put(imp->imp_connection);
+	imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+	dlmexp =  class_conn2export(&imp->imp_dlm_handle);
+	LASSERT(dlmexp != NULL);
+	if (dlmexp->exp_connection)
+		ptlrpc_connection_put(dlmexp->exp_connection);
+	dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+	class_export_put(dlmexp);
+
+	if (imp->imp_conn_current != imp_conn) {
+		if (imp->imp_conn_current) {
+			deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+				  &target_start, &target_len);
+
+			CDEBUG(D_HA, "%s: Connection changing to %.*s (at %s)\n",
+			       imp->imp_obd->obd_name,
+			       target_len, target_start,
+			       libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+		}
+
+		imp->imp_conn_current = imp_conn;
+	}
+
+	CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+	       imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+	       libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+
+	spin_unlock(&imp->imp_lock);
+
+	return 0;
+}
+
+/*
+ * must be called under imp_lock
+ */
+static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
+{
+	struct ptlrpc_request *req;
+	struct list_head *tmp;
+
+	/* The requests in committed_list always have smaller transnos than
+	 * the requests in replay_list */
+	if (!list_empty(&imp->imp_committed_list)) {
+		tmp = imp->imp_committed_list.next;
+		req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+		*transno = req->rq_transno;
+		if (req->rq_transno == 0) {
+			DEBUG_REQ(D_ERROR, req,
+				  "zero transno in committed_list");
+			LBUG();
+		}
+		return 1;
+	}
+	if (!list_empty(&imp->imp_replay_list)) {
+		tmp = imp->imp_replay_list.next;
+		req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+		*transno = req->rq_transno;
+		if (req->rq_transno == 0) {
+			DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
+			LBUG();
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_connect_import(struct obd_import *imp)
+{
+	struct obd_device *obd = imp->imp_obd;
+	int initial_connect = 0;
+	int set_transno = 0;
+	__u64 committed_before_reconnect = 0;
+	struct ptlrpc_request *request;
+	char *bufs[] = { NULL,
+			 obd2cli_tgt(imp->imp_obd),
+			 obd->obd_uuid.uuid,
+			 (char *)&imp->imp_dlm_handle,
+			 (char *)&imp->imp_connect_data };
+	struct ptlrpc_connect_async_args *aa;
+	int rc;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+		spin_unlock(&imp->imp_lock);
+		CERROR("can't connect to a closed import\n");
+		return -EINVAL;
+	} else if (imp->imp_state == LUSTRE_IMP_FULL) {
+		spin_unlock(&imp->imp_lock);
+		CERROR("already connected\n");
+		return 0;
+	} else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+		spin_unlock(&imp->imp_lock);
+		CERROR("already connecting\n");
+		return -EALREADY;
+	}
+
+	IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
+
+	imp->imp_conn_cnt++;
+	imp->imp_resend_replay = 0;
+
+	if (!lustre_handle_is_used(&imp->imp_remote_handle))
+		initial_connect = 1;
+	else
+		committed_before_reconnect = imp->imp_peer_committed_transno;
+
+	set_transno = ptlrpc_first_transno(imp,
+					   &imp->imp_connect_data.ocd_transno);
+	spin_unlock(&imp->imp_lock);
+
+	rc = import_select_connection(imp);
+	if (rc)
+		goto out;
+
+	rc = sptlrpc_import_sec_adapt(imp, NULL, NULL);
+	if (rc)
+		goto out;
+
+	/* Reset connect flags to the originally requested flags, in case
+	 * the server is updated on-the-fly we will get the new features. */
+	imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
+	/* Reset ocd_version each time so the server knows the exact versions */
+	imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE;
+	imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+	imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+	rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
+			   &obd->obd_uuid, &imp->imp_connect_data, NULL);
+	if (rc)
+		goto out;
+
+	request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
+	if (request == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
+				      imp->imp_connect_op, bufs, NULL);
+	if (rc) {
+		ptlrpc_request_free(request);
+		goto out;
+	}
+
+	/* Report the rpc service time to the server so that it knows how long
+	 * to wait for clients to join recovery */
+	lustre_msg_set_service_time(request->rq_reqmsg,
+				    at_timeout2est(request->rq_timeout));
+
+	/* The amount of time we give the server to process the connect req.
+	 * import_select_connection will increase the net latency on
+	 * repeated reconnect attempts to cover slow networks.
+	 * We override/ignore the server rpc completion estimate here,
+	 * which may be large if this is a reconnect attempt */
+	request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+	lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
+	lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
+
+	request->rq_no_resend = request->rq_no_delay = 1;
+	request->rq_send_state = LUSTRE_IMP_CONNECTING;
+	/* Allow a slightly larger reply for future growth compatibility */
+	req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
+			     sizeof(struct obd_connect_data)+16*sizeof(__u64));
+	ptlrpc_request_set_replen(request);
+	request->rq_interpret_reply = ptlrpc_connect_interpret;
+
+	CLASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
+	aa = ptlrpc_req_async_args(request);
+	memset(aa, 0, sizeof(*aa));
+
+	aa->pcaa_peer_committed = committed_before_reconnect;
+	aa->pcaa_initial_connect = initial_connect;
+
+	if (aa->pcaa_initial_connect) {
+		spin_lock(&imp->imp_lock);
+		imp->imp_replayable = 1;
+		spin_unlock(&imp->imp_lock);
+		lustre_msg_add_op_flags(request->rq_reqmsg,
+					MSG_CONNECT_INITIAL);
+	}
+
+	if (set_transno)
+		lustre_msg_add_op_flags(request->rq_reqmsg,
+					MSG_CONNECT_TRANSNO);
+
+	DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
+		  request->rq_timeout);
+	ptlrpcd_add_req(request, PDL_POLICY_ROUND, -1);
+	rc = 0;
+out:
+	if (rc != 0) {
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_connect_import);
+
+static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
+{
+	int force_verify;
+
+	spin_lock(&imp->imp_lock);
+	force_verify = imp->imp_force_verify != 0;
+	spin_unlock(&imp->imp_lock);
+
+	if (force_verify)
+		ptlrpc_pinger_wake_up();
+}
+
+static int ptlrpc_busy_reconnect(int rc)
+{
+	return (rc == -EBUSY) || (rc == -EAGAIN);
+}
+
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+				    struct ptlrpc_request *request,
+				    void *data, int rc)
+{
+	struct ptlrpc_connect_async_args *aa = data;
+	struct obd_import *imp = request->rq_import;
+	struct client_obd *cli = &imp->imp_obd->u.cli;
+	struct lustre_handle old_hdl;
+	__u64 old_connect_flags;
+	int msg_flags;
+	struct obd_connect_data *ocd;
+	struct obd_export *exp;
+	int ret;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+		imp->imp_connect_tried = 1;
+		spin_unlock(&imp->imp_lock);
+		return 0;
+	}
+
+	if (rc) {
+		/* if this reconnect to busy export - not need select new target
+		 * for connecting*/
+		imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
+		spin_unlock(&imp->imp_lock);
+		ptlrpc_maybe_ping_import_soon(imp);
+		goto out;
+	}
+	spin_unlock(&imp->imp_lock);
+
+	LASSERT(imp->imp_conn_current);
+
+	msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+
+	ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA,
+				   RCL_SERVER);
+	/* server replied obd_connect_data is always bigger */
+	ocd = req_capsule_server_sized_get(&request->rq_pill,
+					   &RMF_CONNECT_DATA, ret);
+
+	if (ocd == NULL) {
+		CERROR("%s: no connect data from server\n",
+		       imp->imp_obd->obd_name);
+		rc = -EPROTO;
+		goto out;
+	}
+
+	spin_lock(&imp->imp_lock);
+
+	/* All imports are pingable */
+	imp->imp_pingable = 1;
+	imp->imp_force_reconnect = 0;
+	imp->imp_force_verify = 0;
+
+	imp->imp_connect_data = *ocd;
+
+	CDEBUG(D_HA, "%s: connect to target with instance %u\n",
+	       imp->imp_obd->obd_name, ocd->ocd_instance);
+	exp = class_conn2export(&imp->imp_dlm_handle);
+
+	spin_unlock(&imp->imp_lock);
+
+	/* check that server granted subset of flags we asked for. */
+	if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
+	    ocd->ocd_connect_flags) {
+		CERROR("%s: Server didn't granted asked subset of flags: asked=%#llx grranted=%#llx\n",
+		       imp->imp_obd->obd_name, imp->imp_connect_flags_orig,
+		       ocd->ocd_connect_flags);
+		rc = -EPROTO;
+		goto out;
+	}
+
+	if (!exp) {
+		/* This could happen if export is cleaned during the
+		   connect attempt */
+		CERROR("%s: missing export after connect\n",
+		       imp->imp_obd->obd_name);
+		rc = -ENODEV;
+		goto out;
+	}
+	old_connect_flags = exp_connect_flags(exp);
+	exp->exp_connect_data = *ocd;
+	imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
+	class_export_put(exp);
+
+	obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
+
+	if (aa->pcaa_initial_connect) {
+		spin_lock(&imp->imp_lock);
+		if (msg_flags & MSG_CONNECT_REPLAYABLE) {
+			imp->imp_replayable = 1;
+			spin_unlock(&imp->imp_lock);
+			CDEBUG(D_HA, "connected to replayable target: %s\n",
+			       obd2cli_tgt(imp->imp_obd));
+		} else {
+			imp->imp_replayable = 0;
+			spin_unlock(&imp->imp_lock);
+		}
+
+		/* if applies, adjust the imp->imp_msg_magic here
+		 * according to reply flags */
+
+		imp->imp_remote_handle =
+				*lustre_msg_get_handle(request->rq_repmsg);
+
+		/* Initial connects are allowed for clients with non-random
+		 * uuids when servers are in recovery.  Simply signal the
+		 * servers replay is complete and wait in REPLAY_WAIT. */
+		if (msg_flags & MSG_CONNECT_RECOVERING) {
+			CDEBUG(D_HA, "connect to %s during recovery\n",
+			       obd2cli_tgt(imp->imp_obd));
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+		} else {
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+			ptlrpc_activate_import(imp);
+		}
+
+		rc = 0;
+		goto finish;
+	}
+
+	/* Determine what recovery state to move the import to. */
+	if (MSG_CONNECT_RECONNECT & msg_flags) {
+		memset(&old_hdl, 0, sizeof(old_hdl));
+		if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg),
+			    sizeof(old_hdl))) {
+			LCONSOLE_WARN("Reconnect to %s (at @%s) failed due bad handle %#llx\n",
+				      obd2cli_tgt(imp->imp_obd),
+				      imp->imp_connection->c_remote_uuid.uuid,
+				      imp->imp_dlm_handle.cookie);
+			rc = -ENOTCONN;
+			goto out;
+		}
+
+		if (memcmp(&imp->imp_remote_handle,
+			   lustre_msg_get_handle(request->rq_repmsg),
+			   sizeof(imp->imp_remote_handle))) {
+			int level = msg_flags & MSG_CONNECT_RECOVERING ?
+				D_HA : D_WARNING;
+
+			/* Bug 16611/14775: if server handle have changed,
+			 * that means some sort of disconnection happened.
+			 * If the server is not in recovery, that also means it
+			 * already erased all of our state because of previous
+			 * eviction. If it is in recovery - we are safe to
+			 * participate since we can reestablish all of our state
+			 * with server again */
+			if ((MSG_CONNECT_RECOVERING & msg_flags)) {
+				CDEBUG(level, "%s@%s changed server handle from %#llx to %#llx but is still in recovery\n",
+				       obd2cli_tgt(imp->imp_obd),
+				       imp->imp_connection->c_remote_uuid.uuid,
+				       imp->imp_remote_handle.cookie,
+				       lustre_msg_get_handle(
+				       request->rq_repmsg)->cookie);
+			} else {
+				LCONSOLE_WARN("Evicted from %s (at %s) after server handle changed from %#llx to %#llx\n",
+					      obd2cli_tgt(imp->imp_obd),
+					      imp->imp_connection-> \
+					      c_remote_uuid.uuid,
+					      imp->imp_remote_handle.cookie,
+					      lustre_msg_get_handle(
+						      request->rq_repmsg)->cookie);
+			}
+
+
+			imp->imp_remote_handle =
+				     *lustre_msg_get_handle(request->rq_repmsg);
+
+			if (!(MSG_CONNECT_RECOVERING & msg_flags)) {
+				IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+				rc = 0;
+				goto finish;
+			}
+
+		} else {
+			CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
+			       obd2cli_tgt(imp->imp_obd),
+			       imp->imp_connection->c_remote_uuid.uuid);
+		}
+
+		if (imp->imp_invalid) {
+			CDEBUG(D_HA, "%s: reconnected but import is invalid; marking evicted\n",
+			       imp->imp_obd->obd_name);
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+		} else if (MSG_CONNECT_RECOVERING & msg_flags) {
+			CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+			       imp->imp_obd->obd_name,
+			       obd2cli_tgt(imp->imp_obd));
+
+			spin_lock(&imp->imp_lock);
+			imp->imp_resend_replay = 1;
+			spin_unlock(&imp->imp_lock);
+
+			IMPORT_SET_STATE(imp, imp->imp_replay_state);
+		} else {
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+		}
+	} else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
+		LASSERT(imp->imp_replayable);
+		imp->imp_remote_handle =
+				*lustre_msg_get_handle(request->rq_repmsg);
+		imp->imp_last_replay_transno = 0;
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+	} else {
+		DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)",
+			  imp->imp_obd->obd_name, msg_flags);
+		imp->imp_remote_handle =
+				*lustre_msg_get_handle(request->rq_repmsg);
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+	}
+
+	/* Sanity checks for a reconnected import. */
+	if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE)) {
+		CERROR("imp_replayable flag does not match server after reconnect. We should LBUG right here.\n");
+	}
+
+	if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
+	    lustre_msg_get_last_committed(request->rq_repmsg) <
+	    aa->pcaa_peer_committed) {
+		CERROR("%s went back in time (transno %lld was previously committed, server now claims %lld)!  See https://bugzilla.lustre.org/show_bug.cgi?id=9646\n",
+		       obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
+		       lustre_msg_get_last_committed(request->rq_repmsg));
+	}
+
+finish:
+	rc = ptlrpc_import_recovery_state_machine(imp);
+	if (rc != 0) {
+		if (rc == -ENOTCONN) {
+			CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
+			       obd2cli_tgt(imp->imp_obd),
+			       imp->imp_connection->c_remote_uuid.uuid);
+			ptlrpc_connect_import(imp);
+			imp->imp_connect_tried = 1;
+			return 0;
+		}
+	} else {
+
+		spin_lock(&imp->imp_lock);
+		list_del(&imp->imp_conn_current->oic_item);
+		list_add(&imp->imp_conn_current->oic_item,
+			     &imp->imp_conn_list);
+		imp->imp_last_success_conn =
+			imp->imp_conn_current->oic_last_attempt;
+
+		spin_unlock(&imp->imp_lock);
+
+		if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
+		    !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
+			LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %llx, replied %llx\n",
+				      imp->imp_obd->obd_name,
+				      imp->imp_connection->c_remote_uuid.uuid,
+				      imp->imp_connect_flags_orig,
+				      ocd->ocd_connect_flags);
+			rc = -EPROTO;
+			goto out;
+		}
+
+		if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+		    (ocd->ocd_version > LUSTRE_VERSION_CODE +
+					LUSTRE_VERSION_OFFSET_WARN ||
+		     ocd->ocd_version < LUSTRE_VERSION_CODE -
+					LUSTRE_VERSION_OFFSET_WARN)) {
+			/* Sigh, some compilers do not like #ifdef in the middle
+			   of macro arguments */
+			const char *older = "older. Consider upgrading server or downgrading client"
+				;
+			const char *newer = "newer than client version. Consider upgrading client"
+					    ;
+
+			LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
+				      obd2cli_tgt(imp->imp_obd),
+				      OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+				      OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+				      OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+				      OBD_OCD_VERSION_FIX(ocd->ocd_version),
+				      ocd->ocd_version > LUSTRE_VERSION_CODE ?
+				      newer : older, LUSTRE_VERSION_STRING);
+		}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+		/* Check if server has LU-1252 fix applied to not always swab
+		 * the IR MNE entries. Do this only once per connection.  This
+		 * fixup is version-limited, because we don't want to carry the
+		 * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+		 * need interop with unpatched 2.2 servers.  For newer servers,
+		 * the client will do MNE swabbing only as needed.  LU-1644 */
+		if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+			     !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+			     OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+			     OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+			     OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+			     strcmp(imp->imp_obd->obd_type->typ_name,
+				    LUSTRE_MGC_NAME) == 0))
+			imp->imp_need_mne_swab = 1;
+		else /* clear if server was upgraded since last connect */
+			imp->imp_need_mne_swab = 0;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
+#endif
+
+		if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+			/* We sent to the server ocd_cksum_types with bits set
+			 * for algorithms we understand. The server masked off
+			 * the checksum types it doesn't support */
+			if ((ocd->ocd_cksum_types &
+			     cksum_types_supported_client()) == 0) {
+				LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
+					      obd2cli_tgt(imp->imp_obd),
+					      ocd->ocd_cksum_types,
+					      cksum_types_supported_client());
+				cli->cl_checksum = 0;
+				cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+			} else {
+				cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+			}
+		} else {
+			/* The server does not support OBD_CONNECT_CKSUM.
+			 * Enforce ADLER for backward compatibility*/
+			cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+		}
+		cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
+
+		if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+			cli->cl_max_pages_per_rpc =
+				min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT,
+				    cli->cl_max_pages_per_rpc);
+		else if (imp->imp_connect_op == MDS_CONNECT ||
+			 imp->imp_connect_op == MGS_CONNECT)
+			cli->cl_max_pages_per_rpc = 1;
+
+		/* Reset ns_connect_flags only for initial connect. It might be
+		 * changed in while using FS and if we reset it in reconnect
+		 * this leads to losing user settings done before such as
+		 * disable lru_resize, etc. */
+		if (old_connect_flags != exp_connect_flags(exp) ||
+		    aa->pcaa_initial_connect) {
+			CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
+			       imp->imp_obd->obd_name, ocd->ocd_connect_flags);
+			imp->imp_obd->obd_namespace->ns_connect_flags =
+				ocd->ocd_connect_flags;
+			imp->imp_obd->obd_namespace->ns_orig_connect_flags =
+				ocd->ocd_connect_flags;
+		}
+
+		if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
+		    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+			/* We need a per-message support flag, because
+			   a. we don't know if the incoming connect reply
+			      supports AT or not (in reply_in_callback)
+			      until we unpack it.
+			   b. failovered server means export and flags are gone
+			      (in ptlrpc_send_reply).
+			   Can only be set when we know AT is supported at
+			   both ends */
+			imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+		else
+			imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+		if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+		    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+			imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+		else
+			imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+		LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+			(cli->cl_max_pages_per_rpc > 0));
+	}
+
+out:
+	imp->imp_connect_tried = 1;
+
+	if (rc != 0) {
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+		if (rc == -EACCES) {
+			/*
+			 * Give up trying to reconnect
+			 * EACCES means client has no permission for connection
+			 */
+			imp->imp_obd->obd_no_recov = 1;
+			ptlrpc_deactivate_import(imp);
+		}
+
+		if (rc == -EPROTO) {
+			struct obd_connect_data *ocd;
+
+			/* reply message might not be ready */
+			if (request->rq_repmsg == NULL)
+				return -EPROTO;
+
+			ocd = req_capsule_server_get(&request->rq_pill,
+						     &RMF_CONNECT_DATA);
+			if (ocd &&
+			    (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+			    (ocd->ocd_version != LUSTRE_VERSION_CODE)) {
+				/*
+				 * Actually servers are only supposed to refuse
+				 * connection from liblustre clients, so we
+				 * should never see this from VFS context
+				 */
+				LCONSOLE_ERROR_MSG(0x16a, "Server %s version (%d.%d.%d.%d) refused connection from this client with an incompatible version (%s).  Client must be recompiled\n",
+						   obd2cli_tgt(imp->imp_obd),
+						   OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+						   OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+						   OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+						   OBD_OCD_VERSION_FIX(ocd->ocd_version),
+						   LUSTRE_VERSION_STRING);
+				ptlrpc_deactivate_import(imp);
+				IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
+			}
+			return -EPROTO;
+		}
+
+		ptlrpc_maybe_ping_import_soon(imp);
+
+		CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
+		       obd2cli_tgt(imp->imp_obd),
+		       (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
+	}
+
+	wake_up_all(&imp->imp_recovery_waitq);
+	return rc;
+}
+
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
+static int completed_replay_interpret(const struct lu_env *env,
+				      struct ptlrpc_request *req,
+				      void *data, int rc)
+{
+	atomic_dec(&req->rq_import->imp_replay_inflight);
+	if (req->rq_status == 0 &&
+	    !req->rq_import->imp_vbr_failed) {
+		ptlrpc_import_recovery_state_machine(req->rq_import);
+	} else {
+		if (req->rq_import->imp_vbr_failed) {
+			CDEBUG(D_WARNING,
+			       "%s: version recovery fails, reconnecting\n",
+			       req->rq_import->imp_obd->obd_name);
+		} else {
+			CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, reconnecting\n",
+			       req->rq_import->imp_obd->obd_name,
+			       req->rq_status);
+		}
+		ptlrpc_connect_import(req->rq_import);
+	}
+
+	return 0;
+}
+
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
+static int signal_completed_replay(struct obd_import *imp)
+{
+	struct ptlrpc_request *req;
+
+	if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
+		return 0;
+
+	LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+	atomic_inc(&imp->imp_replay_inflight);
+
+	req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
+					OBD_PING);
+	if (req == NULL) {
+		atomic_dec(&imp->imp_replay_inflight);
+		return -ENOMEM;
+	}
+
+	ptlrpc_request_set_replen(req);
+	req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
+	lustre_msg_add_flags(req->rq_reqmsg,
+			     MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
+	if (AT_OFF)
+		req->rq_timeout *= 3;
+	req->rq_interpret_reply = completed_replay_interpret;
+
+	ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+	return 0;
+}
+
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+	struct obd_import *imp = data;
+
+	unshare_fs_struct();
+
+	CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+	       imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+	       imp->imp_connection->c_remote_uuid.uuid);
+
+	ptlrpc_invalidate_import(imp);
+
+	if (obd_dump_on_eviction) {
+		CERROR("dump the log upon eviction\n");
+		libcfs_debug_dumplog();
+	}
+
+	IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+	ptlrpc_import_recovery_state_machine(imp);
+
+	class_import_put(imp);
+	return 0;
+}
+
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typically we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
+{
+	int rc = 0;
+	int inflight;
+	char *target_start;
+	int target_len;
+
+	if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+		deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+			  &target_start, &target_len);
+		/* Don't care about MGC eviction */
+		if (strcmp(imp->imp_obd->obd_type->typ_name,
+			   LUSTRE_MGC_NAME) != 0) {
+			LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n",
+					   imp->imp_obd->obd_name, target_len,
+					   target_start);
+		}
+		CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
+		       obd2cli_tgt(imp->imp_obd),
+		       imp->imp_connection->c_remote_uuid.uuid);
+		/* reset vbr_failed flag upon eviction */
+		spin_lock(&imp->imp_lock);
+		imp->imp_vbr_failed = 0;
+		spin_unlock(&imp->imp_lock);
+
+		{
+		struct task_struct *task;
+		/* bug 17802:  XXX client_disconnect_export vs connect request
+		 * race. if client will evicted at this time, we start
+		 * invalidate thread without reference to import and import can
+		 * be freed at same time. */
+		class_import_get(imp);
+		task = kthread_run(ptlrpc_invalidate_import_thread, imp,
+				     "ll_imp_inval");
+		if (IS_ERR(task)) {
+			class_import_put(imp);
+			CERROR("error starting invalidate thread: %d\n", rc);
+			rc = PTR_ERR(task);
+		} else {
+			rc = 0;
+		}
+		return rc;
+		}
+	}
+
+	if (imp->imp_state == LUSTRE_IMP_REPLAY) {
+		CDEBUG(D_HA, "replay requested by %s\n",
+		       obd2cli_tgt(imp->imp_obd));
+		rc = ptlrpc_replay_next(imp, &inflight);
+		if (inflight == 0 &&
+		    atomic_read(&imp->imp_replay_inflight) == 0) {
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+			rc = ldlm_replay_locks(imp);
+			if (rc)
+				goto out;
+		}
+		rc = 0;
+	}
+
+	if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) {
+		if (atomic_read(&imp->imp_replay_inflight) == 0) {
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT);
+			rc = signal_completed_replay(imp);
+			if (rc)
+				goto out;
+		}
+
+	}
+
+	if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
+		if (atomic_read(&imp->imp_replay_inflight) == 0) {
+			IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+		}
+	}
+
+	if (imp->imp_state == LUSTRE_IMP_RECOVER) {
+		CDEBUG(D_HA, "reconnected to %s@%s\n",
+		       obd2cli_tgt(imp->imp_obd),
+		       imp->imp_connection->c_remote_uuid.uuid);
+
+		rc = ptlrpc_resend(imp);
+		if (rc)
+			goto out;
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+		ptlrpc_activate_import(imp);
+
+		deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+			  &target_start, &target_len);
+		LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n",
+			      imp->imp_obd->obd_name,
+			      target_len, target_start,
+			      libcfs_nid2str(imp->imp_connection->c_peer.nid));
+	}
+
+	if (imp->imp_state == LUSTRE_IMP_FULL) {
+		wake_up_all(&imp->imp_recovery_waitq);
+		ptlrpc_wake_delayed(imp);
+	}
+
+out:
+	return rc;
+}
+
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+{
+	struct ptlrpc_request *req;
+	int rq_opc, rc = 0;
+
+	if (imp->imp_obd->obd_force)
+		goto set_state;
+
+	switch (imp->imp_connect_op) {
+	case OST_CONNECT:
+		rq_opc = OST_DISCONNECT;
+		break;
+	case MDS_CONNECT:
+		rq_opc = MDS_DISCONNECT;
+		break;
+	case MGS_CONNECT:
+		rq_opc = MGS_DISCONNECT;
+		break;
+	default:
+		rc = -EINVAL;
+		CERROR("%s: don't know how to disconnect from %s (connect_op %d): rc = %d\n",
+		       imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+		       imp->imp_connect_op, rc);
+		return rc;
+	}
+
+	if (ptlrpc_import_in_recovery(imp)) {
+		struct l_wait_info lwi;
+		long timeout;
+
+		if (AT_OFF) {
+			if (imp->imp_server_timeout)
+				timeout = cfs_time_seconds(obd_timeout / 2);
+			else
+				timeout = cfs_time_seconds(obd_timeout);
+		} else {
+			int idx = import_at_get_index(imp,
+				imp->imp_client->cli_request_portal);
+			timeout = cfs_time_seconds(
+				at_get(&imp->imp_at.iat_service_estimate[idx]));
+		}
+
+		lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout),
+				       back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
+		rc = l_wait_event(imp->imp_recovery_waitq,
+				  !ptlrpc_import_in_recovery(imp), &lwi);
+
+	}
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state != LUSTRE_IMP_FULL)
+		goto out;
+	spin_unlock(&imp->imp_lock);
+
+	req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
+					LUSTRE_OBD_VERSION, rq_opc);
+	if (req) {
+		/* We are disconnecting, do not retry a failed DISCONNECT rpc if
+		 * it fails.  We can get through the above with a down server
+		 * if the client doesn't know the server is gone yet. */
+		req->rq_no_resend = 1;
+
+		/* We want client umounts to happen quickly, no matter the
+		   server state... */
+		req->rq_timeout = min_t(int, req->rq_timeout,
+					INITIAL_CONNECT_TIMEOUT);
+
+		IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
+		req->rq_send_state =  LUSTRE_IMP_CONNECTING;
+		ptlrpc_request_set_replen(req);
+		rc = ptlrpc_queue_wait(req);
+		ptlrpc_req_finished(req);
+	}
+
+set_state:
+	spin_lock(&imp->imp_lock);
+out:
+	if (noclose)
+		IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+	else
+		IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+	memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+	spin_unlock(&imp->imp_lock);
+
+	if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
+		rc = 0;
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_disconnect_import);
+
+void ptlrpc_cleanup_imp(struct obd_import *imp)
+{
+	spin_lock(&imp->imp_lock);
+	IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+	imp->imp_generation++;
+	spin_unlock(&imp->imp_lock);
+	ptlrpc_abort_inflight(imp);
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_imp);
+
+/* Adaptive Timeout utils */
+extern unsigned int at_min, at_max, at_history;
+
+/* Bin into timeslices using AT_BINS bins.
+   This gives us a max of the last binlimit*AT_BINS secs without the storage,
+   but still smoothing out a return to normalcy from a slow response.
+   (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
+int at_measured(struct adaptive_timeout *at, unsigned int val)
+{
+	unsigned int old = at->at_current;
+	time_t now = get_seconds();
+	time_t binlimit = max_t(time_t, at_history / AT_BINS, 1);
+
+	LASSERT(at);
+	CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
+	       val, at, now - at->at_binstart, at->at_current,
+	       at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
+
+	if (val == 0)
+		/* 0's don't count, because we never want our timeout to
+		   drop to 0, and because 0 could mean an error */
+		return 0;
+
+	spin_lock(&at->at_lock);
+
+	if (unlikely(at->at_binstart == 0)) {
+		/* Special case to remove default from history */
+		at->at_current = val;
+		at->at_worst_ever = val;
+		at->at_worst_time = now;
+		at->at_hist[0] = val;
+		at->at_binstart = now;
+	} else if (now - at->at_binstart < binlimit) {
+		/* in bin 0 */
+		at->at_hist[0] = max(val, at->at_hist[0]);
+		at->at_current = max(val, at->at_current);
+	} else {
+		int i, shift;
+		unsigned int maxv = val;
+		/* move bins over */
+		shift = (now - at->at_binstart) / binlimit;
+		LASSERT(shift > 0);
+		for (i = AT_BINS - 1; i >= 0; i--) {
+			if (i >= shift) {
+				at->at_hist[i] = at->at_hist[i - shift];
+				maxv = max(maxv, at->at_hist[i]);
+			} else {
+				at->at_hist[i] = 0;
+			}
+		}
+		at->at_hist[0] = val;
+		at->at_current = maxv;
+		at->at_binstart += shift * binlimit;
+	}
+
+	if (at->at_current > at->at_worst_ever) {
+		at->at_worst_ever = at->at_current;
+		at->at_worst_time = now;
+	}
+
+	if (at->at_flags & AT_FLG_NOHIST)
+		/* Only keep last reported val; keeping the rest of the history
+		   for proc only */
+		at->at_current = val;
+
+	if (at_max > 0)
+		at->at_current =  min(at->at_current, at_max);
+	at->at_current =  max(at->at_current, at_min);
+
+	if (at->at_current != old)
+		CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d (val=%u) hist %u %u %u %u\n",
+		       at,
+		       old, at->at_current, at->at_current - old, val,
+		       at->at_hist[0], at->at_hist[1], at->at_hist[2],
+		       at->at_hist[3]);
+
+	/* if we changed, report the old value */
+	old = (at->at_current != old) ? old : 0;
+
+	spin_unlock(&at->at_lock);
+	return old;
+}
+
+/* Find the imp_at index for a given portal; assign if space available */
+int import_at_get_index(struct obd_import *imp, int portal)
+{
+	struct imp_at *at = &imp->imp_at;
+	int i;
+
+	for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+		if (at->iat_portal[i] == portal)
+			return i;
+		if (at->iat_portal[i] == 0)
+			/* unused */
+			break;
+	}
+
+	/* Not found in list, add it under a lock */
+	spin_lock(&imp->imp_lock);
+
+	/* Check unused under lock */
+	for (; i < IMP_AT_MAX_PORTALS; i++) {
+		if (at->iat_portal[i] == portal)
+			goto out;
+		if (at->iat_portal[i] == 0)
+			/* unused */
+			break;
+	}
+
+	/* Not enough portals? */
+	LASSERT(i < IMP_AT_MAX_PORTALS);
+
+	at->iat_portal[i] = portal;
+out:
+	spin_unlock(&imp->imp_lock);
+	return i;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
new file mode 100644
index 000000000..a42335e26
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -0,0 +1,2442 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/layout.c
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+/*
+ * This file contains the "capsule/pill" abstraction layered above PTLRPC.
+ *
+ * Every struct ptlrpc_request contains a "pill", which points to a description
+ * of the format that the request conforms to.
+ */
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/module.h>
+
+/* LUSTRE_VERSION_CODE */
+#include "../include/lustre_ver.h"
+
+#include "../include/obd_support.h"
+/* lustre_swab_mdt_body */
+#include "../include/lustre/lustre_idl.h"
+/* obd2cli_tgt() (required by DEBUG_REQ()) */
+#include "../include/obd.h"
+
+/* __REQ_LAYOUT_USER__ */
+#endif
+/* struct ptlrpc_request, lustre_msg* */
+#include "../include/lustre_req_layout.h"
+#include "../include/lustre_acl.h"
+#include "../include/lustre_debug.h"
+
+/*
+ * RQFs (see below) refer to two struct req_msg_field arrays describing the
+ * client request and server reply, respectively.
+ */
+/* empty set of fields... for suitable definition of emptiness. */
+static const struct req_msg_field *empty[] = {
+	&RMF_PTLRPC_BODY
+};
+
+static const struct req_msg_field *mgs_target_info_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MGS_TARGET_INFO
+};
+
+static const struct req_msg_field *mgs_set_info[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MGS_SEND_PARAM
+};
+
+static const struct req_msg_field *mgs_config_read_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MGS_CONFIG_BODY
+};
+
+static const struct req_msg_field *mgs_config_read_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MGS_CONFIG_RES
+};
+
+static const struct req_msg_field *log_cancel_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LOGCOOKIES
+};
+
+static const struct req_msg_field *mdt_body_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY
+};
+
+static const struct req_msg_field *mdt_body_capa[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1
+};
+
+static const struct req_msg_field *quotactl_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OBD_QUOTACTL
+};
+
+static const struct req_msg_field *quota_body_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_DLM_LVB,
+	&RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *mdt_close_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_EPOCH,
+	&RMF_REC_REINT,
+	&RMF_CAPA1
+};
+
+static const struct req_msg_field *mdt_release_close_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_EPOCH,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_CLOSE_DATA
+};
+
+static const struct req_msg_field *obd_statfs_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OBD_STATFS
+};
+
+static const struct req_msg_field *seq_query_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_SEQ_OPC,
+	&RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *seq_query_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *fld_query_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FLD_OPC,
+	&RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_query_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *mds_getattr_name_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1,
+	&RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT
+};
+
+static const struct req_msg_field *mds_reint_create_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_create_slave_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_EADATA,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_EADATA,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_sym_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_SYMTGT,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_open_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_CAPA2,
+	&RMF_NAME,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *mds_reint_open_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL,
+	&RMF_CAPA1,
+	&RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_unlink_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_link_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_CAPA2,
+	&RMF_NAME,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_rename_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_CAPA2,
+	&RMF_NAME,
+	&RMF_SYMTGT,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_last_unlink_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_LOGCOOKIES,
+	&RMF_CAPA1,
+	&RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_setattr_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_MDT_EPOCH,
+	&RMF_EADATA,
+	&RMF_LOGCOOKIES,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_setxattr_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_EADATA,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mdt_swap_layouts[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_SWAP_LAYOUTS,
+	&RMF_CAPA1,
+	&RMF_CAPA2,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *obd_connect_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_TGTUUID,
+	&RMF_CLUUID,
+	&RMF_CONN,
+	&RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_connect_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_set_info_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_SETINFO_KEY,
+	&RMF_SETINFO_VAL
+};
+
+static const struct req_msg_field *ost_grant_shrink_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_SETINFO_KEY,
+	&RMF_OST_BODY
+};
+
+static const struct req_msg_field *mds_getinfo_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GETINFO_KEY,
+	&RMF_GETINFO_VALLEN
+};
+
+static const struct req_msg_field *mds_getinfo_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GETINFO_VAL,
+};
+
+static const struct req_msg_field *ldlm_enqueue_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *ldlm_enqueue_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP
+};
+
+static const struct req_msg_field *ldlm_enqueue_lvb_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_cp_callback_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_gl_callback_desc_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_DLM_GL_DESC
+};
+
+static const struct req_msg_field *ldlm_gl_callback_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_intent_basic_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+};
+
+static const struct req_msg_field *ldlm_intent_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_REC_REINT
+};
+
+static const struct req_msg_field *ldlm_intent_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL
+};
+
+static const struct req_msg_field *ldlm_intent_layout_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_LAYOUT_INTENT,
+	&RMF_EADATA /* for new layout to be set up */
+};
+static const struct req_msg_field *ldlm_intent_open_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL,
+	&RMF_CAPA1,
+	&RMF_CAPA2
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_MDT_BODY,     /* coincides with mds_getattr_name_client[] */
+	&RMF_CAPA1,
+	&RMF_NAME
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL,
+	&RMF_CAPA1
+};
+
+static const struct req_msg_field *ldlm_intent_create_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_REC_REINT,    /* coincides with mds_reint_create_client[] */
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_open_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_REC_REINT,    /* coincides with mds_reint_open_client[] */
+	&RMF_CAPA1,
+	&RMF_CAPA2,
+	&RMF_NAME,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_unlink_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_REC_REINT,    /* coincides with mds_reint_unlink_client[] */
+	&RMF_CAPA1,
+	&RMF_NAME
+};
+
+static const struct req_msg_field *ldlm_intent_getxattr_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REQ,
+	&RMF_LDLM_INTENT,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1,
+};
+
+static const struct req_msg_field *ldlm_intent_getxattr_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_DLM_REP,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL, /* for req_capsule_extend/mdt_intent_policy */
+	&RMF_EADATA,
+	&RMF_EAVALS,
+	&RMF_EAVALS_LENS
+};
+
+static const struct req_msg_field *mds_getxattr_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1,
+	&RMF_NAME,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getxattr_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getattr_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL,
+	&RMF_CAPA1,
+	&RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_setattr_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDT_MD,
+	&RMF_ACL,
+	&RMF_CAPA1,
+	&RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_update_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_UPDATE,
+};
+
+static const struct req_msg_field *mds_update_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_UPDATE_REPLY,
+};
+
+static const struct req_msg_field *llog_origin_handle_create_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LLOGD_BODY,
+	&RMF_NAME
+};
+
+static const struct req_msg_field *llogd_body_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LLOGD_BODY
+};
+
+static const struct req_msg_field *llog_log_hdr_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LLOG_LOG_HDR
+};
+
+static const struct req_msg_field *llogd_conn_body_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LLOGD_CONN_BODY
+};
+
+static const struct req_msg_field *llog_origin_handle_next_block_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_LLOGD_BODY,
+	&RMF_EADATA
+};
+
+static const struct req_msg_field *obd_idx_read_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_IDX_INFO
+};
+
+static const struct req_msg_field *obd_idx_read_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_IDX_INFO
+};
+
+static const struct req_msg_field *ost_body_only[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_body_capa[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY,
+	&RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_destroy_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY,
+	&RMF_DLM_REQ,
+	&RMF_CAPA1
+};
+
+
+static const struct req_msg_field *ost_brw_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY,
+	&RMF_OBD_IOOBJ,
+	&RMF_NIOBUF_REMOTE,
+	&RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_brw_read_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_brw_write_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OST_BODY,
+	&RMF_RCS
+};
+
+static const struct req_msg_field *ost_get_info_generic_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_GENERIC_DATA,
+};
+
+static const struct req_msg_field *ost_get_info_generic_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_SETINFO_KEY
+};
+
+static const struct req_msg_field *ost_get_last_id_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_OBD_ID
+};
+
+static const struct req_msg_field *ost_get_last_fid_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FID,
+};
+
+static const struct req_msg_field *ost_get_fiemap_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FIEMAP_KEY,
+	&RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *ost_get_fiemap_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *mdt_hsm_progress[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDS_HSM_PROGRESS,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_register[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDS_HSM_ARCHIVE,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_unregister[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+};
+
+static const struct req_msg_field *mdt_hsm_action_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDS_HSM_CURRENT_ACTION,
+};
+
+static const struct req_msg_field *mdt_hsm_state_get_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_HSM_USER_STATE,
+};
+
+static const struct req_msg_field *mdt_hsm_state_set[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1,
+	&RMF_HSM_STATE_SET,
+};
+
+static const struct req_msg_field *mdt_hsm_request[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_MDS_HSM_REQUEST,
+	&RMF_MDS_HSM_USER_ITEM,
+	&RMF_GENERIC_DATA,
+};
+
+static struct req_format *req_formats[] = {
+	&RQF_OBD_PING,
+	&RQF_OBD_SET_INFO,
+	&RQF_OBD_IDX_READ,
+	&RQF_SEC_CTX,
+	&RQF_MGS_TARGET_REG,
+	&RQF_MGS_SET_INFO,
+	&RQF_MGS_CONFIG_READ,
+	&RQF_SEQ_QUERY,
+	&RQF_FLD_QUERY,
+	&RQF_MDS_CONNECT,
+	&RQF_MDS_DISCONNECT,
+	&RQF_MDS_GET_INFO,
+	&RQF_MDS_GETSTATUS,
+	&RQF_MDS_STATFS,
+	&RQF_MDS_GETATTR,
+	&RQF_MDS_GETATTR_NAME,
+	&RQF_MDS_GETXATTR,
+	&RQF_MDS_SYNC,
+	&RQF_MDS_CLOSE,
+	&RQF_MDS_RELEASE_CLOSE,
+	&RQF_MDS_PIN,
+	&RQF_MDS_UNPIN,
+	&RQF_MDS_READPAGE,
+	&RQF_MDS_WRITEPAGE,
+	&RQF_MDS_IS_SUBDIR,
+	&RQF_MDS_DONE_WRITING,
+	&RQF_MDS_REINT,
+	&RQF_MDS_REINT_CREATE,
+	&RQF_MDS_REINT_CREATE_RMT_ACL,
+	&RQF_MDS_REINT_CREATE_SLAVE,
+	&RQF_MDS_REINT_CREATE_SYM,
+	&RQF_MDS_REINT_OPEN,
+	&RQF_MDS_REINT_UNLINK,
+	&RQF_MDS_REINT_LINK,
+	&RQF_MDS_REINT_RENAME,
+	&RQF_MDS_REINT_SETATTR,
+	&RQF_MDS_REINT_SETXATTR,
+	&RQF_MDS_QUOTACHECK,
+	&RQF_MDS_QUOTACTL,
+	&RQF_MDS_HSM_PROGRESS,
+	&RQF_MDS_HSM_CT_REGISTER,
+	&RQF_MDS_HSM_CT_UNREGISTER,
+	&RQF_MDS_HSM_STATE_GET,
+	&RQF_MDS_HSM_STATE_SET,
+	&RQF_MDS_HSM_ACTION,
+	&RQF_MDS_HSM_REQUEST,
+	&RQF_MDS_SWAP_LAYOUTS,
+	&RQF_UPDATE_OBJ,
+	&RQF_QC_CALLBACK,
+	&RQF_OST_CONNECT,
+	&RQF_OST_DISCONNECT,
+	&RQF_OST_QUOTACHECK,
+	&RQF_OST_QUOTACTL,
+	&RQF_OST_GETATTR,
+	&RQF_OST_SETATTR,
+	&RQF_OST_CREATE,
+	&RQF_OST_PUNCH,
+	&RQF_OST_SYNC,
+	&RQF_OST_DESTROY,
+	&RQF_OST_BRW_READ,
+	&RQF_OST_BRW_WRITE,
+	&RQF_OST_STATFS,
+	&RQF_OST_SET_GRANT_INFO,
+	&RQF_OST_GET_INFO_GENERIC,
+	&RQF_OST_GET_INFO_LAST_ID,
+	&RQF_OST_GET_INFO_LAST_FID,
+	&RQF_OST_SET_INFO_LAST_FID,
+	&RQF_OST_GET_INFO_FIEMAP,
+	&RQF_LDLM_ENQUEUE,
+	&RQF_LDLM_ENQUEUE_LVB,
+	&RQF_LDLM_CONVERT,
+	&RQF_LDLM_CANCEL,
+	&RQF_LDLM_CALLBACK,
+	&RQF_LDLM_CP_CALLBACK,
+	&RQF_LDLM_BL_CALLBACK,
+	&RQF_LDLM_GL_CALLBACK,
+	&RQF_LDLM_GL_DESC_CALLBACK,
+	&RQF_LDLM_INTENT,
+	&RQF_LDLM_INTENT_BASIC,
+	&RQF_LDLM_INTENT_LAYOUT,
+	&RQF_LDLM_INTENT_GETATTR,
+	&RQF_LDLM_INTENT_OPEN,
+	&RQF_LDLM_INTENT_CREATE,
+	&RQF_LDLM_INTENT_UNLINK,
+	&RQF_LDLM_INTENT_GETXATTR,
+	&RQF_LDLM_INTENT_QUOTA,
+	&RQF_QUOTA_DQACQ,
+	&RQF_LOG_CANCEL,
+	&RQF_LLOG_ORIGIN_HANDLE_CREATE,
+	&RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+	&RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+	&RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+	&RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+	&RQF_LLOG_ORIGIN_CONNECT,
+	&RQF_CONNECT,
+};
+
+struct req_msg_field {
+	const __u32 rmf_flags;
+	const char  *rmf_name;
+	/**
+	 * Field length. (-1) means "variable length".  If the
+	 * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length,
+	 * but the actual size must be a whole multiple of \a rmf_size.
+	 */
+	const int   rmf_size;
+	void	(*rmf_swabber)(void *);
+	void	(*rmf_dumper)(void *);
+	int	 rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR];
+};
+
+enum rmf_flags {
+	/**
+	 * The field is a string, must be NUL-terminated.
+	 */
+	RMF_F_STRING = 1 << 0,
+	/**
+	 * The field's buffer size need not match the declared \a rmf_size.
+	 */
+	RMF_F_NO_SIZE_CHECK = 1 << 1,
+	/**
+	 * The field's buffer size must be a whole multiple of the declared \a
+	 * rmf_size and the \a rmf_swabber function must work on the declared \a
+	 * rmf_size worth of bytes.
+	 */
+	RMF_F_STRUCT_ARRAY = 1 << 2
+};
+
+struct req_capsule;
+
+/*
+ * Request fields.
+ */
+#define DEFINE_MSGF(name, flags, size, swabber, dumper) {       \
+	.rmf_name    = (name),				  \
+	.rmf_flags   = (flags),				 \
+	.rmf_size    = (size),				  \
+	.rmf_swabber = (void (*)(void *))(swabber),	      \
+	.rmf_dumper  = (void (*)(void *))(dumper)		\
+}
+
+struct req_msg_field RMF_GENERIC_DATA =
+	DEFINE_MSGF("generic_data", 0,
+		    -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GENERIC_DATA);
+
+struct req_msg_field RMF_MGS_TARGET_INFO =
+	DEFINE_MSGF("mgs_target_info", 0,
+		    sizeof(struct mgs_target_info),
+		    lustre_swab_mgs_target_info, NULL);
+EXPORT_SYMBOL(RMF_MGS_TARGET_INFO);
+
+struct req_msg_field RMF_MGS_SEND_PARAM =
+	DEFINE_MSGF("mgs_send_param", 0,
+		    sizeof(struct mgs_send_param),
+		    NULL, NULL);
+EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
+
+struct req_msg_field RMF_MGS_CONFIG_BODY =
+	DEFINE_MSGF("mgs_config_read request", 0,
+		    sizeof(struct mgs_config_body),
+		    lustre_swab_mgs_config_body, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
+
+struct req_msg_field RMF_MGS_CONFIG_RES =
+	DEFINE_MSGF("mgs_config_read reply ", 0,
+		    sizeof(struct mgs_config_res),
+		    lustre_swab_mgs_config_res, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
+
+struct req_msg_field RMF_U32 =
+	DEFINE_MSGF("generic u32", 0,
+		    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_U32);
+
+struct req_msg_field RMF_SETINFO_VAL =
+	DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_VAL);
+
+struct req_msg_field RMF_GETINFO_KEY =
+	DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_KEY);
+
+struct req_msg_field RMF_GETINFO_VALLEN =
+	DEFINE_MSGF("getinfo_vallen", 0,
+		    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VALLEN);
+
+struct req_msg_field RMF_GETINFO_VAL =
+	DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VAL);
+
+struct req_msg_field RMF_SEQ_OPC =
+	DEFINE_MSGF("seq_query_opc", 0,
+		    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_SEQ_OPC);
+
+struct req_msg_field RMF_SEQ_RANGE =
+	DEFINE_MSGF("seq_query_range", 0,
+		    sizeof(struct lu_seq_range),
+		    lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_SEQ_RANGE);
+
+struct req_msg_field RMF_FLD_OPC =
+	DEFINE_MSGF("fld_query_opc", 0,
+		    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_FLD_OPC);
+
+struct req_msg_field RMF_FLD_MDFLD =
+	DEFINE_MSGF("fld_query_mdfld", 0,
+		    sizeof(struct lu_seq_range),
+		    lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_FLD_MDFLD);
+
+struct req_msg_field RMF_MDT_BODY =
+	DEFINE_MSGF("mdt_body", 0,
+		    sizeof(struct mdt_body), lustre_swab_mdt_body, NULL);
+EXPORT_SYMBOL(RMF_MDT_BODY);
+
+struct req_msg_field RMF_OBD_QUOTACTL =
+	DEFINE_MSGF("obd_quotactl", 0,
+		    sizeof(struct obd_quotactl),
+		    lustre_swab_obd_quotactl, NULL);
+EXPORT_SYMBOL(RMF_OBD_QUOTACTL);
+
+struct req_msg_field RMF_QUOTA_BODY =
+	DEFINE_MSGF("quota_body", 0,
+		    sizeof(struct quota_body), lustre_swab_quota_body, NULL);
+EXPORT_SYMBOL(RMF_QUOTA_BODY);
+
+struct req_msg_field RMF_MDT_EPOCH =
+	DEFINE_MSGF("mdt_ioepoch", 0,
+		    sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL);
+EXPORT_SYMBOL(RMF_MDT_EPOCH);
+
+struct req_msg_field RMF_PTLRPC_BODY =
+	DEFINE_MSGF("ptlrpc_body", 0,
+		    sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL);
+EXPORT_SYMBOL(RMF_PTLRPC_BODY);
+
+struct req_msg_field RMF_CLOSE_DATA =
+	DEFINE_MSGF("data_version", 0,
+		    sizeof(struct close_data), lustre_swab_close_data, NULL);
+EXPORT_SYMBOL(RMF_CLOSE_DATA);
+
+struct req_msg_field RMF_OBD_STATFS =
+	DEFINE_MSGF("obd_statfs", 0,
+		    sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL);
+EXPORT_SYMBOL(RMF_OBD_STATFS);
+
+struct req_msg_field RMF_SETINFO_KEY =
+	DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_KEY);
+
+struct req_msg_field RMF_NAME =
+	DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_NAME);
+
+struct req_msg_field RMF_SYMTGT =
+	DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SYMTGT);
+
+struct req_msg_field RMF_TGTUUID =
+	DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+	NULL);
+EXPORT_SYMBOL(RMF_TGTUUID);
+
+struct req_msg_field RMF_CLUUID =
+	DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+	NULL);
+EXPORT_SYMBOL(RMF_CLUUID);
+
+struct req_msg_field RMF_STRING =
+	DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_STRING);
+
+struct req_msg_field RMF_LLOGD_BODY =
+	DEFINE_MSGF("llogd_body", 0,
+		    sizeof(struct llogd_body), lustre_swab_llogd_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_BODY);
+
+struct req_msg_field RMF_LLOG_LOG_HDR =
+	DEFINE_MSGF("llog_log_hdr", 0,
+		    sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL);
+EXPORT_SYMBOL(RMF_LLOG_LOG_HDR);
+
+struct req_msg_field RMF_LLOGD_CONN_BODY =
+	DEFINE_MSGF("llogd_conn_body", 0,
+		    sizeof(struct llogd_conn_body),
+		    lustre_swab_llogd_conn_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY);
+
+/*
+ * connection handle received in MDS_CONNECT request.
+ *
+ * No swabbing needed because struct lustre_handle contains only a 64-bit cookie
+ * that the client does not interpret at all.
+ */
+struct req_msg_field RMF_CONN =
+	DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL);
+EXPORT_SYMBOL(RMF_CONN);
+
+struct req_msg_field RMF_CONNECT_DATA =
+	DEFINE_MSGF("cdata",
+		    RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */,
+		    sizeof(struct obd_connect_data),
+		    lustre_swab_connect, NULL);
+EXPORT_SYMBOL(RMF_CONNECT_DATA);
+
+struct req_msg_field RMF_DLM_REQ =
+	DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */,
+		    sizeof(struct ldlm_request),
+		    lustre_swab_ldlm_request, NULL);
+EXPORT_SYMBOL(RMF_DLM_REQ);
+
+struct req_msg_field RMF_DLM_REP =
+	DEFINE_MSGF("dlm_rep", 0,
+		    sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL);
+EXPORT_SYMBOL(RMF_DLM_REP);
+
+struct req_msg_field RMF_LDLM_INTENT =
+	DEFINE_MSGF("ldlm_intent", 0,
+		    sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL);
+EXPORT_SYMBOL(RMF_LDLM_INTENT);
+
+struct req_msg_field RMF_DLM_LVB =
+	DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_DLM_LVB);
+
+struct req_msg_field RMF_DLM_GL_DESC =
+	DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc),
+		    lustre_swab_gl_desc, NULL);
+EXPORT_SYMBOL(RMF_DLM_GL_DESC);
+
+struct req_msg_field RMF_MDT_MD =
+	DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_MDT_MD);
+
+struct req_msg_field RMF_REC_REINT =
+	DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint),
+		    lustre_swab_mdt_rec_reint, NULL);
+EXPORT_SYMBOL(RMF_REC_REINT);
+
+/* FIXME: this length should be defined as a macro */
+struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1,
+						    NULL, NULL);
+EXPORT_SYMBOL(RMF_EADATA);
+
+struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_EAVALS);
+
+struct req_msg_field RMF_ACL =
+	DEFINE_MSGF("acl", RMF_F_NO_SIZE_CHECK,
+		    LUSTRE_POSIX_ACL_MAX_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_ACL);
+
+/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */
+struct req_msg_field RMF_LOGCOOKIES =
+	DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */,
+		    sizeof(struct llog_cookie), NULL, NULL);
+EXPORT_SYMBOL(RMF_LOGCOOKIES);
+
+struct req_msg_field RMF_CAPA1 =
+	DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+		    lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA1);
+
+struct req_msg_field RMF_CAPA2 =
+	DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+		    lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA2);
+
+struct req_msg_field RMF_LAYOUT_INTENT =
+	DEFINE_MSGF("layout_intent", 0,
+		    sizeof(struct layout_intent), lustre_swab_layout_intent,
+		    NULL);
+EXPORT_SYMBOL(RMF_LAYOUT_INTENT);
+
+/*
+ * OST request field.
+ */
+struct req_msg_field RMF_OST_BODY =
+	DEFINE_MSGF("ost_body", 0,
+		    sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body);
+EXPORT_SYMBOL(RMF_OST_BODY);
+
+struct req_msg_field RMF_OBD_IOOBJ =
+	DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY,
+		    sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo);
+EXPORT_SYMBOL(RMF_OBD_IOOBJ);
+
+struct req_msg_field RMF_NIOBUF_REMOTE =
+	DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY,
+		    sizeof(struct niobuf_remote), lustre_swab_niobuf_remote,
+		    dump_rniobuf);
+EXPORT_SYMBOL(RMF_NIOBUF_REMOTE);
+
+struct req_msg_field RMF_RCS =
+	DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32),
+		    lustre_swab_generic_32s, dump_rcs);
+EXPORT_SYMBOL(RMF_RCS);
+
+struct req_msg_field RMF_EAVALS_LENS =
+	DEFINE_MSGF("eavals_lens", RMF_F_STRUCT_ARRAY, sizeof(__u32),
+		lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_EAVALS_LENS);
+
+struct req_msg_field RMF_OBD_ID =
+	DEFINE_MSGF("u64", 0,
+		    sizeof(u64), lustre_swab_ost_last_id, NULL);
+EXPORT_SYMBOL(RMF_OBD_ID);
+
+struct req_msg_field RMF_FID =
+	DEFINE_MSGF("fid", 0,
+		    sizeof(struct lu_fid), lustre_swab_lu_fid, NULL);
+EXPORT_SYMBOL(RMF_FID);
+
+struct req_msg_field RMF_OST_ID =
+	DEFINE_MSGF("ost_id", 0,
+		    sizeof(struct ost_id), lustre_swab_ost_id, NULL);
+EXPORT_SYMBOL(RMF_OST_ID);
+
+struct req_msg_field RMF_FIEMAP_KEY =
+	DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key),
+		    lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_KEY);
+
+struct req_msg_field RMF_FIEMAP_VAL =
+	DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_VAL);
+
+struct req_msg_field RMF_IDX_INFO =
+	DEFINE_MSGF("idx_info", 0, sizeof(struct idx_info),
+		    lustre_swab_idx_info, NULL);
+EXPORT_SYMBOL(RMF_IDX_INFO);
+struct req_msg_field RMF_HSM_USER_STATE =
+	DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state),
+		    lustre_swab_hsm_user_state, NULL);
+EXPORT_SYMBOL(RMF_HSM_USER_STATE);
+
+struct req_msg_field RMF_HSM_STATE_SET =
+	DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set),
+		    lustre_swab_hsm_state_set, NULL);
+EXPORT_SYMBOL(RMF_HSM_STATE_SET);
+
+struct req_msg_field RMF_MDS_HSM_PROGRESS =
+	DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel),
+		    lustre_swab_hsm_progress_kernel, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS);
+
+struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION =
+	DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action),
+		    lustre_swab_hsm_current_action, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION);
+
+struct req_msg_field RMF_MDS_HSM_USER_ITEM =
+	DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY,
+		    sizeof(struct hsm_user_item), lustre_swab_hsm_user_item,
+		    NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM);
+
+struct req_msg_field RMF_MDS_HSM_ARCHIVE =
+	DEFINE_MSGF("hsm_archive", 0,
+		    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE);
+
+struct req_msg_field RMF_MDS_HSM_REQUEST =
+	DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request),
+		    lustre_swab_hsm_request, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST);
+
+struct req_msg_field RMF_UPDATE = DEFINE_MSGF("update", 0, -1,
+					      lustre_swab_update_buf, NULL);
+EXPORT_SYMBOL(RMF_UPDATE);
+
+struct req_msg_field RMF_UPDATE_REPLY = DEFINE_MSGF("update_reply", 0, -1,
+						lustre_swab_update_reply_buf,
+						    NULL);
+EXPORT_SYMBOL(RMF_UPDATE_REPLY);
+
+struct req_msg_field RMF_SWAP_LAYOUTS =
+	DEFINE_MSGF("swap_layouts", 0, sizeof(struct  mdc_swap_layouts),
+		    lustre_swab_swap_layouts, NULL);
+EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
+/*
+ * Request formats.
+ */
+
+struct req_format {
+	const char *rf_name;
+	int	 rf_idx;
+	struct {
+		int			  nr;
+		const struct req_msg_field **d;
+	} rf_fields[RCL_NR];
+};
+
+#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) {    \
+	.rf_name   = name,					      \
+	.rf_fields = {						  \
+		[RCL_CLIENT] = {					\
+			.nr = client_nr,				\
+			.d  = client				    \
+		},						      \
+		[RCL_SERVER] = {					\
+			.nr = server_nr,				\
+			.d  = server				    \
+		}						       \
+	}							       \
+}
+
+#define DEFINE_REQ_FMT0(name, client, server)				  \
+DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server))
+
+struct req_format RQF_OBD_PING =
+	DEFINE_REQ_FMT0("OBD_PING", empty, empty);
+EXPORT_SYMBOL(RQF_OBD_PING);
+
+struct req_format RQF_OBD_SET_INFO =
+	DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty);
+EXPORT_SYMBOL(RQF_OBD_SET_INFO);
+
+/* Read index file through the network */
+struct req_format RQF_OBD_IDX_READ =
+	DEFINE_REQ_FMT0("OBD_IDX_READ",
+			obd_idx_read_client, obd_idx_read_server);
+EXPORT_SYMBOL(RQF_OBD_IDX_READ);
+
+struct req_format RQF_SEC_CTX =
+	DEFINE_REQ_FMT0("SEC_CTX", empty, empty);
+EXPORT_SYMBOL(RQF_SEC_CTX);
+
+struct req_format RQF_MGS_TARGET_REG =
+	DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only,
+			 mgs_target_info_only);
+EXPORT_SYMBOL(RQF_MGS_TARGET_REG);
+
+struct req_format RQF_MGS_SET_INFO =
+	DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info,
+			 mgs_set_info);
+EXPORT_SYMBOL(RQF_MGS_SET_INFO);
+
+struct req_format RQF_MGS_CONFIG_READ =
+	DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
+			 mgs_config_read_server);
+EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
+
+struct req_format RQF_SEQ_QUERY =
+	DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
+EXPORT_SYMBOL(RQF_SEQ_QUERY);
+
+struct req_format RQF_FLD_QUERY =
+	DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
+EXPORT_SYMBOL(RQF_FLD_QUERY);
+
+struct req_format RQF_LOG_CANCEL =
+	DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
+EXPORT_SYMBOL(RQF_LOG_CANCEL);
+
+struct req_format RQF_MDS_QUOTACHECK =
+	DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_MDS_QUOTACHECK);
+
+struct req_format RQF_OST_QUOTACHECK =
+	DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_OST_QUOTACHECK);
+
+struct req_format RQF_MDS_QUOTACTL =
+	DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_MDS_QUOTACTL);
+
+struct req_format RQF_OST_QUOTACTL =
+	DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_OST_QUOTACTL);
+
+struct req_format RQF_QC_CALLBACK =
+	DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_QC_CALLBACK);
+
+struct req_format RQF_QUOTA_DQACQ =
+	DEFINE_REQ_FMT0("QUOTA_DQACQ", quota_body_only, quota_body_only);
+EXPORT_SYMBOL(RQF_QUOTA_DQACQ);
+
+struct req_format RQF_LDLM_INTENT_QUOTA =
+	DEFINE_REQ_FMT0("LDLM_INTENT_QUOTA",
+			ldlm_intent_quota_client,
+			ldlm_intent_quota_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_QUOTA);
+
+struct req_format RQF_MDS_GETSTATUS =
+	DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
+
+struct req_format RQF_MDS_STATFS =
+	DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_MDS_STATFS);
+
+struct req_format RQF_MDS_SYNC =
+	DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_SYNC);
+
+struct req_format RQF_MDS_GETATTR =
+	DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR);
+
+struct req_format RQF_MDS_GETXATTR =
+	DEFINE_REQ_FMT0("MDS_GETXATTR",
+			mds_getxattr_client, mds_getxattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETXATTR);
+
+struct req_format RQF_MDS_GETATTR_NAME =
+	DEFINE_REQ_FMT0("MDS_GETATTR_NAME",
+			mds_getattr_name_client, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME);
+
+struct req_format RQF_MDS_REINT =
+	DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT);
+
+struct req_format RQF_MDS_REINT_CREATE =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE",
+			mds_reint_create_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
+
+struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
+			mds_reint_create_rmt_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+
+struct req_format RQF_MDS_REINT_CREATE_SLAVE =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
+			mds_reint_create_slave_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE);
+
+struct req_format RQF_MDS_REINT_CREATE_SYM =
+	DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM",
+			mds_reint_create_sym_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM);
+
+struct req_format RQF_MDS_REINT_OPEN =
+	DEFINE_REQ_FMT0("MDS_REINT_OPEN",
+			mds_reint_open_client, mds_reint_open_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_OPEN);
+
+struct req_format RQF_MDS_REINT_UNLINK =
+	DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client,
+			mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK);
+
+struct req_format RQF_MDS_REINT_LINK =
+	DEFINE_REQ_FMT0("MDS_REINT_LINK",
+			mds_reint_link_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_LINK);
+
+struct req_format RQF_MDS_REINT_RENAME =
+	DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client,
+			mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
+
+struct req_format RQF_MDS_REINT_SETATTR =
+	DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
+			mds_reint_setattr_client, mds_setattr_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
+
+struct req_format RQF_MDS_REINT_SETXATTR =
+	DEFINE_REQ_FMT0("MDS_REINT_SETXATTR",
+			mds_reint_setxattr_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR);
+
+struct req_format RQF_MDS_CONNECT =
+	DEFINE_REQ_FMT0("MDS_CONNECT",
+			obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_MDS_CONNECT);
+
+struct req_format RQF_MDS_DISCONNECT =
+	DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_MDS_DISCONNECT);
+
+struct req_format RQF_MDS_GET_INFO =
+	DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client,
+			mds_getinfo_server);
+EXPORT_SYMBOL(RQF_MDS_GET_INFO);
+
+struct req_format RQF_UPDATE_OBJ =
+	DEFINE_REQ_FMT0("OBJECT_UPDATE_OBJ", mds_update_client,
+			mds_update_server);
+EXPORT_SYMBOL(RQF_UPDATE_OBJ);
+
+struct req_format RQF_LDLM_ENQUEUE =
+	DEFINE_REQ_FMT0("LDLM_ENQUEUE",
+			ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE);
+
+struct req_format RQF_LDLM_ENQUEUE_LVB =
+	DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB",
+			ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB);
+
+struct req_format RQF_LDLM_CONVERT =
+	DEFINE_REQ_FMT0("LDLM_CONVERT",
+			ldlm_enqueue_client, ldlm_enqueue_server);
+EXPORT_SYMBOL(RQF_LDLM_CONVERT);
+
+struct req_format RQF_LDLM_CANCEL =
+	DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CANCEL);
+
+struct req_format RQF_LDLM_CALLBACK =
+	DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CALLBACK);
+
+struct req_format RQF_LDLM_CP_CALLBACK =
+	DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK);
+
+struct req_format RQF_LDLM_BL_CALLBACK =
+	DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_CALLBACK =
+	DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client,
+			ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_DESC_CALLBACK =
+	DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client,
+			ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK);
+
+struct req_format RQF_LDLM_INTENT_BASIC =
+	DEFINE_REQ_FMT0("LDLM_INTENT_BASIC",
+			ldlm_intent_basic_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC);
+
+struct req_format RQF_LDLM_INTENT =
+	DEFINE_REQ_FMT0("LDLM_INTENT",
+			ldlm_intent_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT);
+
+struct req_format RQF_LDLM_INTENT_LAYOUT =
+	DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ",
+			ldlm_intent_layout_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT);
+
+struct req_format RQF_LDLM_INTENT_GETATTR =
+	DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR",
+			ldlm_intent_getattr_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR);
+
+struct req_format RQF_LDLM_INTENT_OPEN =
+	DEFINE_REQ_FMT0("LDLM_INTENT_OPEN",
+			ldlm_intent_open_client, ldlm_intent_open_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN);
+
+struct req_format RQF_LDLM_INTENT_CREATE =
+	DEFINE_REQ_FMT0("LDLM_INTENT_CREATE",
+			ldlm_intent_create_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE);
+
+struct req_format RQF_LDLM_INTENT_UNLINK =
+	DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK",
+			ldlm_intent_unlink_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK);
+
+struct req_format RQF_LDLM_INTENT_GETXATTR =
+	DEFINE_REQ_FMT0("LDLM_INTENT_GETXATTR",
+			ldlm_intent_getxattr_client,
+			ldlm_intent_getxattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_GETXATTR);
+
+struct req_format RQF_MDS_CLOSE =
+	DEFINE_REQ_FMT0("MDS_CLOSE",
+			mdt_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_CLOSE);
+
+struct req_format RQF_MDS_RELEASE_CLOSE =
+	DEFINE_REQ_FMT0("MDS_CLOSE",
+			mdt_release_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
+
+struct req_format RQF_MDS_PIN =
+	DEFINE_REQ_FMT0("MDS_PIN",
+			mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_PIN);
+
+struct req_format RQF_MDS_UNPIN =
+	DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty);
+EXPORT_SYMBOL(RQF_MDS_UNPIN);
+
+struct req_format RQF_MDS_DONE_WRITING =
+	DEFINE_REQ_FMT0("MDS_DONE_WRITING",
+			mdt_close_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
+
+struct req_format RQF_MDS_READPAGE =
+	DEFINE_REQ_FMT0("MDS_READPAGE",
+			mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_READPAGE);
+
+struct req_format RQF_MDS_HSM_ACTION =
+	DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_ACTION);
+
+struct req_format RQF_MDS_HSM_PROGRESS =
+	DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS);
+
+struct req_format RQF_MDS_HSM_CT_REGISTER =
+	DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER);
+
+struct req_format RQF_MDS_HSM_CT_UNREGISTER =
+	DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER);
+
+struct req_format RQF_MDS_HSM_STATE_GET =
+	DEFINE_REQ_FMT0("MDS_HSM_STATE_GET",
+			mdt_body_capa, mdt_hsm_state_get_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET);
+
+struct req_format RQF_MDS_HSM_STATE_SET =
+	DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET);
+
+struct req_format RQF_MDS_HSM_REQUEST =
+	DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST);
+
+struct req_format RQF_MDS_SWAP_LAYOUTS =
+	DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS",
+			mdt_swap_layouts, empty);
+EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS);
+
+/* This is for split */
+struct req_format RQF_MDS_WRITEPAGE =
+	DEFINE_REQ_FMT0("MDS_WRITEPAGE",
+			mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
+
+struct req_format RQF_MDS_IS_SUBDIR =
+	DEFINE_REQ_FMT0("MDS_IS_SUBDIR",
+			mdt_body_only, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
+			llog_origin_handle_create_client, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY",
+			llogd_body_only, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK",
+			llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK",
+			llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER",
+			llogd_body_only, llog_log_hdr_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
+
+struct req_format RQF_LLOG_ORIGIN_CONNECT =
+	DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT);
+
+struct req_format RQF_CONNECT =
+	DEFINE_REQ_FMT0("CONNECT", obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_CONNECT);
+
+struct req_format RQF_OST_CONNECT =
+	DEFINE_REQ_FMT0("OST_CONNECT",
+			obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_OST_CONNECT);
+
+struct req_format RQF_OST_DISCONNECT =
+	DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_OST_DISCONNECT);
+
+struct req_format RQF_OST_GETATTR =
+	DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_GETATTR);
+
+struct req_format RQF_OST_SETATTR =
+	DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SETATTR);
+
+struct req_format RQF_OST_CREATE =
+	DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_CREATE);
+
+struct req_format RQF_OST_PUNCH =
+	DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_PUNCH);
+
+struct req_format RQF_OST_SYNC =
+	DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SYNC);
+
+struct req_format RQF_OST_DESTROY =
+	DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_DESTROY);
+
+struct req_format RQF_OST_BRW_READ =
+	DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server);
+EXPORT_SYMBOL(RQF_OST_BRW_READ);
+
+struct req_format RQF_OST_BRW_WRITE =
+	DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server);
+EXPORT_SYMBOL(RQF_OST_BRW_WRITE);
+
+struct req_format RQF_OST_STATFS =
+	DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_OST_STATFS);
+
+struct req_format RQF_OST_SET_GRANT_INFO =
+	DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client,
+			 ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
+
+struct req_format RQF_OST_GET_INFO_GENERIC =
+	DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
+					ost_get_info_generic_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+
+struct req_format RQF_OST_GET_INFO_LAST_ID =
+	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
+						ost_get_last_id_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
+
+struct req_format RQF_OST_GET_INFO_LAST_FID =
+	DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+						 ost_get_last_fid_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
+
+struct req_format RQF_OST_SET_INFO_LAST_FID =
+	DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client,
+						 empty);
+EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID);
+
+struct req_format RQF_OST_GET_INFO_FIEMAP =
+	DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client,
+					       ost_get_fiemap_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+/* Convenience macro */
+#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)]
+
+/**
+ * Initializes the capsule abstraction by computing and setting the \a rf_idx
+ * field of RQFs and the \a rmf_offset field of RMFs.
+ */
+int req_layout_init(void)
+{
+	int i;
+	int j;
+	int k;
+	struct req_format *rf = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
+		rf = req_formats[i];
+		rf->rf_idx = i;
+		for (j = 0; j < RCL_NR; ++j) {
+			LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR);
+			for (k = 0; k < rf->rf_fields[j].nr; ++k) {
+				struct req_msg_field *field;
+
+				field = (typeof(field))rf->rf_fields[j].d[k];
+				LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY)
+					|| field->rmf_size > 0);
+				LASSERT(field->rmf_offset[i][j] == 0);
+				/*
+				 * k + 1 to detect unused format/field
+				 * combinations.
+				 */
+				field->rmf_offset[i][j] = k + 1;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(req_layout_init);
+
+void req_layout_fini(void)
+{
+}
+EXPORT_SYMBOL(req_layout_fini);
+
+/**
+ * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1.
+ *
+ * Actual/expected field sizes are set elsewhere in functions in this file:
+ * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and
+ * req_capsule_msg_size().  The \a rc_area information is used by.
+ * ptlrpc_request_set_replen().
+ */
+void req_capsule_init_area(struct req_capsule *pill)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
+		pill->rc_area[RCL_CLIENT][i] = -1;
+		pill->rc_area[RCL_SERVER][i] = -1;
+	}
+}
+EXPORT_SYMBOL(req_capsule_init_area);
+
+/**
+ * Initialize a pill.
+ *
+ * The \a location indicates whether the caller is executing on the client side
+ * (RCL_CLIENT) or server side (RCL_SERVER)..
+ */
+void req_capsule_init(struct req_capsule *pill,
+		      struct ptlrpc_request *req,
+		      enum req_location location)
+{
+	LASSERT(location == RCL_SERVER || location == RCL_CLIENT);
+
+	/*
+	 * Today all capsules are embedded in ptlrpc_request structs,
+	 * but just in case that ever isn't the case, we don't reach
+	 * into req unless req != NULL and pill is the one embedded in
+	 * the req.
+	 *
+	 * The req->rq_pill_init flag makes it safe to initialize a pill
+	 * twice, which might happen in the OST paths as a result of the
+	 * high-priority RPC queue getting peeked at before ost_handle()
+	 * handles an OST RPC.
+	 */
+	if (req != NULL && pill == &req->rq_pill && req->rq_pill_init)
+		return;
+
+	memset(pill, 0, sizeof(*pill));
+	pill->rc_req = req;
+	pill->rc_loc = location;
+	req_capsule_init_area(pill);
+
+	if (req != NULL && pill == &req->rq_pill)
+		req->rq_pill_init = 1;
+}
+EXPORT_SYMBOL(req_capsule_init);
+
+void req_capsule_fini(struct req_capsule *pill)
+{
+}
+EXPORT_SYMBOL(req_capsule_fini);
+
+static int __req_format_is_sane(const struct req_format *fmt)
+{
+	return
+		0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) &&
+		req_formats[fmt->rf_idx] == fmt;
+}
+
+static struct lustre_msg *__req_msg(const struct req_capsule *pill,
+				    enum req_location loc)
+{
+	struct ptlrpc_request *req;
+
+	req = pill->rc_req;
+	return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg;
+}
+
+/**
+ * Set the format (\a fmt) of a \a pill; format changes are not allowed here
+ * (see req_capsule_extend()).
+ */
+void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt)
+{
+	LASSERT(pill->rc_fmt == NULL || pill->rc_fmt == fmt);
+	LASSERT(__req_format_is_sane(fmt));
+
+	pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_set);
+
+/**
+ * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in
+ * yet.
+
+ * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of
+ * variable-sized fields.  The field sizes come from the declared \a rmf_size
+ * field of a \a pill's \a rc_fmt's RMF's.
+ */
+int req_capsule_filled_sizes(struct req_capsule *pill,
+			   enum req_location loc)
+{
+	const struct req_format *fmt = pill->rc_fmt;
+	int		      i;
+
+	LASSERT(fmt != NULL);
+
+	for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+		if (pill->rc_area[loc][i] == -1) {
+			pill->rc_area[loc][i] =
+					    fmt->rf_fields[loc].d[i]->rmf_size;
+			if (pill->rc_area[loc][i] == -1) {
+				/*
+				 * Skip the following fields.
+				 *
+				 * If this LASSERT() trips then you're missing a
+				 * call to req_capsule_set_size().
+				 */
+				LASSERT(loc != RCL_SERVER);
+				break;
+			}
+		}
+	}
+	return i;
+}
+EXPORT_SYMBOL(req_capsule_filled_sizes);
+
+/**
+ * Capsule equivalent of lustre_pack_request() and lustre_pack_reply().
+ *
+ * This function uses the \a pill's \a rc_area as filled in by
+ * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by
+ * this function).
+ */
+int req_capsule_server_pack(struct req_capsule *pill)
+{
+	const struct req_format *fmt;
+	int		      count;
+	int		      rc;
+
+	LASSERT(pill->rc_loc == RCL_SERVER);
+	fmt = pill->rc_fmt;
+	LASSERT(fmt != NULL);
+
+	count = req_capsule_filled_sizes(pill, RCL_SERVER);
+	rc = lustre_pack_reply(pill->rc_req, count,
+			       pill->rc_area[RCL_SERVER], NULL);
+	if (rc != 0) {
+		DEBUG_REQ(D_ERROR, pill->rc_req,
+		       "Cannot pack %d fields in format `%s': ",
+		       count, fmt->rf_name);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(req_capsule_server_pack);
+
+/**
+ * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
+ * corresponding to the given RMF (\a field).
+ */
+static int __req_capsule_offset(const struct req_capsule *pill,
+				const struct req_msg_field *field,
+				enum req_location loc)
+{
+	int offset;
+
+	offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+	LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n",
+			    pill->rc_fmt->rf_name,
+			    field->rmf_name, offset, loc);
+	offset--;
+
+	LASSERT(0 <= offset && offset < REQ_MAX_FIELD_NR);
+	return offset;
+}
+
+/**
+ * Helper for __req_capsule_get(); swabs value / array of values and/or dumps
+ * them if desired.
+ */
+static
+void
+swabber_dumper_helper(struct req_capsule *pill,
+		      const struct req_msg_field *field,
+		      enum req_location loc,
+		      int offset,
+		      void *value, int len, int dump, void (*swabber)(void *))
+{
+	void    *p;
+	int     i;
+	int     n;
+	int     do_swab;
+	int     inout = loc == RCL_CLIENT;
+
+	swabber = swabber ?: field->rmf_swabber;
+
+	if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) &&
+	    swabber != NULL && value != NULL)
+		do_swab = 1;
+	else
+		do_swab = 0;
+
+	if (!field->rmf_dumper)
+		dump = 0;
+
+	if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) {
+		if (dump) {
+			CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n",
+			       do_swab ? "unswabbed " : "", field->rmf_name);
+			field->rmf_dumper(value);
+		}
+		if (!do_swab)
+			return;
+		swabber(value);
+		ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+		if (dump) {
+			CDEBUG(D_RPCTRACE, "Dump of swabbed field %s follows\n",
+			       field->rmf_name);
+			field->rmf_dumper(value);
+		}
+
+		return;
+	}
+
+	/*
+	 * We're swabbing an array; swabber() swabs a single array element, so
+	 * swab every element.
+	 */
+	LASSERT((len % field->rmf_size) == 0);
+	for (p = value, i = 0, n = len / field->rmf_size;
+	     i < n;
+	     i++, p += field->rmf_size) {
+		if (dump) {
+			CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, element %d follows\n",
+			       do_swab ? "unswabbed " : "", field->rmf_name, i);
+			field->rmf_dumper(p);
+		}
+		if (!do_swab)
+			continue;
+		swabber(p);
+		if (dump) {
+			CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, element %d follows\n",
+			       field->rmf_name, i);
+			field->rmf_dumper(value);
+		}
+	}
+	if (do_swab)
+		ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+}
+
+/**
+ * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill
+ * corresponding to the given RMF (\a field).
+ *
+ * The buffer will be swabbed using the given \a swabber.  If \a swabber == NULL
+ * then the \a rmf_swabber from the RMF will be used.  Soon there will be no
+ * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then
+ * be removed.  Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each
+ * element of the array swabbed.
+ */
+static void *__req_capsule_get(struct req_capsule *pill,
+			       const struct req_msg_field *field,
+			       enum req_location loc,
+			       void (*swabber)(void *),
+			       int dump)
+{
+	const struct req_format *fmt;
+	struct lustre_msg       *msg;
+	void		    *value;
+	int		      len;
+	int		      offset;
+
+	void *(*getter)(struct lustre_msg *m, int n, int minlen);
+
+	static const char *rcl_names[RCL_NR] = {
+		[RCL_CLIENT] = "client",
+		[RCL_SERVER] = "server"
+	};
+
+	LASSERT(pill != NULL);
+	LASSERT(pill != LP_POISON);
+	fmt = pill->rc_fmt;
+	LASSERT(fmt != NULL);
+	LASSERT(fmt != LP_POISON);
+	LASSERT(__req_format_is_sane(fmt));
+
+	offset = __req_capsule_offset(pill, field, loc);
+
+	msg = __req_msg(pill, loc);
+	LASSERT(msg != NULL);
+
+	getter = (field->rmf_flags & RMF_F_STRING) ?
+		(typeof(getter))lustre_msg_string : lustre_msg_buf;
+
+	if (field->rmf_flags & RMF_F_STRUCT_ARRAY) {
+		/*
+		 * We've already asserted that field->rmf_size > 0 in
+		 * req_layout_init().
+		 */
+		len = lustre_msg_buflen(msg, offset);
+		if ((len % field->rmf_size) != 0) {
+			CERROR("%s: array field size mismatch %d modulo %d != 0 (%d)\n",
+			       field->rmf_name, len, field->rmf_size, loc);
+			return NULL;
+		}
+	} else if (pill->rc_area[loc][offset] != -1) {
+		len = pill->rc_area[loc][offset];
+	} else {
+		len = max(field->rmf_size, 0);
+	}
+	value = getter(msg, offset, len);
+
+	if (value == NULL) {
+		DEBUG_REQ(D_ERROR, pill->rc_req,
+			  "Wrong buffer for field `%s' (%d of %d) in format `%s': %d vs. %d (%s)\n",
+			  field->rmf_name, offset, lustre_msg_bufcount(msg),
+			  fmt->rf_name, lustre_msg_buflen(msg, offset), len,
+			  rcl_names[loc]);
+	} else {
+		swabber_dumper_helper(pill, field, loc, offset, value, len,
+				      dump, swabber);
+	}
+
+	return value;
+}
+
+/**
+ * Dump a request and/or reply
+ */
+static void __req_capsule_dump(struct req_capsule *pill, enum req_location loc)
+{
+	const struct    req_format *fmt;
+	const struct    req_msg_field *field;
+	int	     len;
+	int	     i;
+
+	fmt = pill->rc_fmt;
+
+	DEBUG_REQ(D_RPCTRACE, pill->rc_req, "BEGIN REQ CAPSULE DUMP\n");
+	for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+		field = FMT_FIELD(fmt, loc, i);
+		if (field->rmf_dumper == NULL) {
+			/*
+			 * FIXME Add a default hex dumper for fields that don't
+			 * have a specific dumper
+			 */
+			len = req_capsule_get_size(pill, field, loc);
+			CDEBUG(D_RPCTRACE, "Field %s has no dumper function; field size is %d\n",
+			       field->rmf_name, len);
+		} else {
+			/* It's the dumping side-effect that we're interested in */
+			(void) __req_capsule_get(pill, field, loc, NULL, 1);
+		}
+	}
+	CDEBUG(D_RPCTRACE, "END REQ CAPSULE DUMP\n");
+}
+
+/**
+ * Dump a request.
+ */
+void req_capsule_client_dump(struct req_capsule *pill)
+{
+	__req_capsule_dump(pill, RCL_CLIENT);
+}
+EXPORT_SYMBOL(req_capsule_client_dump);
+
+/**
+ * Dump a reply
+ */
+void req_capsule_server_dump(struct req_capsule *pill)
+{
+	__req_capsule_dump(pill, RCL_SERVER);
+}
+EXPORT_SYMBOL(req_capsule_server_dump);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_client_get(struct req_capsule *pill,
+			     const struct req_msg_field *field)
+{
+	return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_get);
+
+/**
+ * Same as req_capsule_client_get(), but with a \a swabber argument.
+ *
+ * Currently unused; will be removed when req_capsule_server_swab_get() is
+ * unused too.
+ */
+void *req_capsule_client_swab_get(struct req_capsule *pill,
+				  const struct req_msg_field *field,
+				  void *swabber)
+{
+	return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_client_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len.  Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_client_sized_get(struct req_capsule *pill,
+				   const struct req_msg_field *field,
+				   int len)
+{
+	req_capsule_set_size(pill, field, RCL_CLIENT, len);
+	return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_sized_get);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_server_get(struct req_capsule *pill,
+			     const struct req_msg_field *field)
+{
+	return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_get);
+
+/**
+ * Same as req_capsule_server_get(), but with a \a swabber argument.
+ *
+ * Ideally all swabbing should be done pursuant to RMF definitions, with no
+ * swabbing done outside this capsule abstraction.
+ */
+void *req_capsule_server_swab_get(struct req_capsule *pill,
+				  const struct req_msg_field *field,
+				  void *swabber)
+{
+	return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_server_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len.  Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_server_sized_get(struct req_capsule *pill,
+				   const struct req_msg_field *field,
+				   int len)
+{
+	req_capsule_set_size(pill, field, RCL_SERVER, len);
+	return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_get);
+
+void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
+					const struct req_msg_field *field,
+					int len, void *swabber)
+{
+	req_capsule_set_size(pill, field, RCL_SERVER, len);
+	return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
+
+/**
+ * Returns the buffer of a \a pill corresponding to the given \a field from the
+ * request (if the caller is executing on the server-side) or reply (if the
+ * caller is executing on the client-side).
+ *
+ * This function convenient for use is code that could be executed on the
+ * client and server alike.
+ */
+const void *req_capsule_other_get(struct req_capsule *pill,
+				  const struct req_msg_field *field)
+{
+	return __req_capsule_get(pill, field, pill->rc_loc ^ 1, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_other_get);
+
+/**
+ * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a
+ * field of the given \a pill.
+ *
+ * This function must be used when constructing variable sized fields of a
+ * request or reply.
+ */
+void req_capsule_set_size(struct req_capsule *pill,
+			  const struct req_msg_field *field,
+			  enum req_location loc, int size)
+{
+	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+	if ((size != field->rmf_size) &&
+	    (field->rmf_size != -1) &&
+	    !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
+	    (size > 0)) {
+		if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+		    (size % field->rmf_size != 0)) {
+			CERROR("%s: array field size mismatch %d %% %d != 0 (%d)\n",
+			       field->rmf_name, size, field->rmf_size, loc);
+			LBUG();
+		} else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+		    size < field->rmf_size) {
+			CERROR("%s: field size mismatch %d != %d (%d)\n",
+			       field->rmf_name, size, field->rmf_size, loc);
+			LBUG();
+		}
+	}
+
+	pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size;
+}
+EXPORT_SYMBOL(req_capsule_set_size);
+
+/**
+ * Return the actual PTLRPC buffer length of a request or reply (\a loc)
+ * for the given \a pill's given \a field.
+ *
+ * NB: this function doesn't correspond with req_capsule_set_size(), which
+ * actually sets the size in pill.rc_area[loc][offset], but this function
+ * returns the message buflen[offset], maybe we should use another name.
+ */
+int req_capsule_get_size(const struct req_capsule *pill,
+			 const struct req_msg_field *field,
+			 enum req_location loc)
+{
+	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+	return lustre_msg_buflen(__req_msg(pill, loc),
+				 __req_capsule_offset(pill, field, loc));
+}
+EXPORT_SYMBOL(req_capsule_get_size);
+
+/**
+ * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the
+ * given \a pill's request or reply (\a loc) given the field size recorded in
+ * the \a pill's rc_area.
+ *
+ * See also req_capsule_set_size().
+ */
+int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
+{
+	return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
+			       pill->rc_fmt->rf_fields[loc].nr,
+			       pill->rc_area[loc]);
+}
+
+/**
+ * While req_capsule_msg_size() computes the size of a PTLRPC request or reply
+ * (\a loc) given a \a pill's \a rc_area, this function computes the size of a
+ * PTLRPC request or reply given only an RQF (\a fmt).
+ *
+ * This function should not be used for formats which contain variable size
+ * fields.
+ */
+int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+			 enum req_location loc)
+{
+	int size, i = 0;
+
+	/*
+	 * This function should probably LASSERT() that fmt has no fields with
+	 * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many
+	 * elements in the array there will ultimately be, but then, we could
+	 * assume that there will be at least one element, and that's just what
+	 * we do.
+	 */
+	size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
+	if (size < 0)
+		return size;
+
+	for (; i < fmt->rf_fields[loc].nr; ++i)
+		if (fmt->rf_fields[loc].d[i]->rmf_size != -1)
+			size += cfs_size_round(fmt->rf_fields[loc].d[i]->
+					       rmf_size);
+	return size;
+}
+
+/**
+ * Changes the format of an RPC.
+ *
+ * The pill must already have been initialized, which means that it already has
+ * a request format.  The new format \a fmt must be an extension of the pill's
+ * old format.  Specifically: the new format must have as many request and reply
+ * fields as the old one, and all fields shared by the old and new format must
+ * be at least as large in the new format.
+ *
+ * The new format's fields may be of different "type" than the old format, but
+ * only for fields that are "opaque" blobs: fields which have a) have no
+ * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a
+ * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK.  For example,
+ * OBD_SET_INFO has a key field and an opaque value field that gets interpreted
+ * according to the key field.  When the value, according to the key, contains a
+ * structure (or array thereof) to be swabbed, the format should be changed to
+ * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set
+ * accordingly.
+ */
+void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
+{
+	int i;
+	int j;
+
+	const struct req_format *old;
+
+	LASSERT(pill->rc_fmt != NULL);
+	LASSERT(__req_format_is_sane(fmt));
+
+	old = pill->rc_fmt;
+	/*
+	 * Sanity checking...
+	 */
+	for (i = 0; i < RCL_NR; ++i) {
+		LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr);
+		for (j = 0; j < old->rf_fields[i].nr - 1; ++j) {
+			const struct req_msg_field *ofield = FMT_FIELD(old, i, j);
+
+			/* "opaque" fields can be transmogrified */
+			if (ofield->rmf_swabber == NULL &&
+			    (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 &&
+			    (ofield->rmf_size == -1 ||
+			    ofield->rmf_flags == RMF_F_NO_SIZE_CHECK))
+				continue;
+			LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j));
+		}
+		/*
+		 * Last field in old format can be shorter than in new.
+		 */
+		LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >=
+			FMT_FIELD(old, i, j)->rmf_size);
+	}
+
+	pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_extend);
+
+/**
+ * This function returns a non-zero value if the given \a field is present in
+ * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it
+ * returns 0.
+ */
+int req_capsule_has_field(const struct req_capsule *pill,
+			  const struct req_msg_field *field,
+			  enum req_location loc)
+{
+	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+	return field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+}
+EXPORT_SYMBOL(req_capsule_has_field);
+
+/**
+ * Returns a non-zero value if the given \a field is present in the given \a
+ * pill's PTLRPC request or reply (\a loc), else it returns 0.
+ */
+int req_capsule_field_present(const struct req_capsule *pill,
+			      const struct req_msg_field *field,
+			      enum req_location loc)
+{
+	int offset;
+
+	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+	LASSERT(req_capsule_has_field(pill, field, loc));
+
+	offset = __req_capsule_offset(pill, field, loc);
+	return lustre_msg_bufcount(__req_msg(pill, loc)) > offset;
+}
+EXPORT_SYMBOL(req_capsule_field_present);
+
+/**
+ * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC
+ * request or reply (\a loc).
+ *
+ * This is not the opposite of req_capsule_extend().
+ */
+void req_capsule_shrink(struct req_capsule *pill,
+			const struct req_msg_field *field,
+			unsigned int newlen,
+			enum req_location loc)
+{
+	const struct req_format *fmt;
+	struct lustre_msg       *msg;
+	int		      len;
+	int		      offset;
+
+	fmt = pill->rc_fmt;
+	LASSERT(fmt != NULL);
+	LASSERT(__req_format_is_sane(fmt));
+	LASSERT(req_capsule_has_field(pill, field, loc));
+	LASSERT(req_capsule_field_present(pill, field, loc));
+
+	offset = __req_capsule_offset(pill, field, loc);
+
+	msg = __req_msg(pill, loc);
+	len = lustre_msg_buflen(msg, offset);
+	LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n",
+				fmt->rf_name, field->rmf_name, len, newlen);
+
+	if (loc == RCL_CLIENT)
+		pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
+							    1);
+	else
+		pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
+							    1);
+}
+EXPORT_SYMBOL(req_capsule_shrink);
+
+int req_capsule_server_grow(struct req_capsule *pill,
+			    const struct req_msg_field *field,
+			    unsigned int newlen)
+{
+	struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs;
+	char *from, *to;
+	int offset, len, rc;
+
+	LASSERT(pill->rc_fmt != NULL);
+	LASSERT(__req_format_is_sane(pill->rc_fmt));
+	LASSERT(req_capsule_has_field(pill, field, RCL_SERVER));
+	LASSERT(req_capsule_field_present(pill, field, RCL_SERVER));
+
+	len = req_capsule_get_size(pill, field, RCL_SERVER);
+	offset = __req_capsule_offset(pill, field, RCL_SERVER);
+	if (pill->rc_req->rq_repbuf_len >=
+	    lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen)
+		CERROR("Inplace repack might be done\n");
+
+	pill->rc_req->rq_reply_state = NULL;
+	req_capsule_set_size(pill, field, RCL_SERVER, newlen);
+	rc = req_capsule_server_pack(pill);
+	if (rc) {
+		/* put old rs back, the caller will decide what to do */
+		pill->rc_req->rq_reply_state = rs;
+		return rc;
+	}
+	nrs = pill->rc_req->rq_reply_state;
+	/* Now we need only buffers, copy first chunk */
+	to = lustre_msg_buf(nrs->rs_msg, 0, 0);
+	from = lustre_msg_buf(rs->rs_msg, 0, 0);
+	len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from;
+	memcpy(to, from, len);
+	/* check if we have tail and copy it too */
+	if (rs->rs_msg->lm_bufcount > offset + 1) {
+		to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0);
+		from = lustre_msg_buf(rs->rs_msg, offset + 1, 0);
+		offset = rs->rs_msg->lm_bufcount - 1;
+		len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) +
+		      cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from;
+		memcpy(to, from, len);
+	}
+	/* drop old reply if everything is fine */
+	if (rs->rs_difficult) {
+		/* copy rs data */
+		int i;
+
+		nrs->rs_difficult = 1;
+		nrs->rs_no_ack = rs->rs_no_ack;
+		for (i = 0; i < rs->rs_nlocks; i++) {
+			nrs->rs_locks[i] = rs->rs_locks[i];
+			nrs->rs_modes[i] = rs->rs_modes[i];
+			nrs->rs_nlocks++;
+		}
+		rs->rs_nlocks = 0;
+		rs->rs_difficult = 0;
+		rs->rs_no_ack = 0;
+	}
+	ptlrpc_rs_decref(rs);
+	return 0;
+}
+EXPORT_SYMBOL(req_capsule_server_grow);
+/* __REQ_LAYOUT_USER__ */
+#endif
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
new file mode 100644
index 000000000..e9baf5bbe
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -0,0 +1,366 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_client.c
+ *
+ * remote api for llog - client side
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_class.h"
+#include "../include/lustre_log.h"
+#include "../include/lustre_net.h"
+#include <linux/list.h>
+
+#define LLOG_CLIENT_ENTRY(ctxt, imp) do {			     \
+	mutex_lock(&ctxt->loc_mutex);			     \
+	if (ctxt->loc_imp) {					  \
+		imp = class_import_get(ctxt->loc_imp);		\
+	} else {						      \
+		CERROR("ctxt->loc_imp == NULL for context idx %d."    \
+		       "Unable to complete MDS/OSS recovery,"	 \
+		       "but I'll try again next time.  Not fatal.\n", \
+		       ctxt->loc_idx);				\
+		imp = NULL;					   \
+		mutex_unlock(&ctxt->loc_mutex);		   \
+		return (-EINVAL);				     \
+	}							     \
+	mutex_unlock(&ctxt->loc_mutex);			   \
+} while (0)
+
+#define LLOG_CLIENT_EXIT(ctxt, imp) do {			      \
+	mutex_lock(&ctxt->loc_mutex);			     \
+	if (ctxt->loc_imp != imp)				     \
+		CWARN("loc_imp has changed from %p to %p\n",	  \
+		       ctxt->loc_imp, imp);			   \
+	class_import_put(imp);					\
+	mutex_unlock(&ctxt->loc_mutex);			   \
+} while (0)
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_client_open(const struct lu_env *env,
+			    struct llog_handle *lgh, struct llog_logid *logid,
+			    char *name, enum llog_open_param open_param)
+{
+	struct obd_import     *imp;
+	struct llogd_body     *body;
+	struct llog_ctxt      *ctxt = lgh->lgh_ctxt;
+	struct ptlrpc_request *req = NULL;
+	int		    rc;
+
+	LLOG_CLIENT_ENTRY(ctxt, imp);
+
+	/* client cannot create llog */
+	LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param);
+	LASSERT(lgh);
+
+	req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if (name)
+		req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+				     strlen(name) + 1);
+
+	rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION,
+				 LLOG_ORIGIN_HANDLE_CREATE);
+	if (rc) {
+		ptlrpc_request_free(req);
+		req = NULL;
+		goto out;
+	}
+	ptlrpc_request_set_replen(req);
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	if (logid)
+		body->lgd_logid = *logid;
+	body->lgd_ctxt_idx = ctxt->loc_idx - 1;
+
+	if (name) {
+		char *tmp;
+		tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME,
+						   strlen(name) + 1);
+		LASSERT(tmp);
+		strcpy(tmp, name);
+	}
+
+	rc = ptlrpc_queue_wait(req);
+	if (rc)
+		goto out;
+
+	body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	if (body == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	lgh->lgh_id = body->lgd_logid;
+	lgh->lgh_ctxt = ctxt;
+out:
+	LLOG_CLIENT_EXIT(ctxt, imp);
+	ptlrpc_req_finished(req);
+	return rc;
+}
+
+static int llog_client_destroy(const struct lu_env *env,
+			       struct llog_handle *loghandle)
+{
+	struct obd_import     *imp;
+	struct ptlrpc_request *req = NULL;
+	struct llogd_body     *body;
+	int		    rc;
+
+	LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+	req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+					LUSTRE_LOG_VERSION,
+					LLOG_ORIGIN_HANDLE_DESTROY);
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	body->lgd_logid = loghandle->lgh_id;
+	body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+
+	if (!(body->lgd_llh_flags & LLOG_F_IS_PLAIN))
+		CERROR("%s: wrong llog flags %x\n", imp->imp_obd->obd_name,
+		       body->lgd_llh_flags);
+
+	ptlrpc_request_set_replen(req);
+	rc = ptlrpc_queue_wait(req);
+
+	ptlrpc_req_finished(req);
+err_exit:
+	LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+	return rc;
+}
+
+
+static int llog_client_next_block(const struct lu_env *env,
+				  struct llog_handle *loghandle,
+				  int *cur_idx, int next_idx,
+				  __u64 *cur_offset, void *buf, int len)
+{
+	struct obd_import     *imp;
+	struct ptlrpc_request *req = NULL;
+	struct llogd_body     *body;
+	void		  *ptr;
+	int		    rc;
+
+	LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+	req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+					LUSTRE_LOG_VERSION,
+					LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	body->lgd_logid = loghandle->lgh_id;
+	body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+	body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+	body->lgd_index = next_idx;
+	body->lgd_saved_index = *cur_idx;
+	body->lgd_len = len;
+	body->lgd_cur_offset = *cur_offset;
+
+	req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+	ptlrpc_request_set_replen(req);
+	rc = ptlrpc_queue_wait(req);
+	if (rc)
+		goto out;
+
+	body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	if (body == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* The log records are swabbed as they are processed */
+	ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+	if (ptr == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	*cur_idx = body->lgd_saved_index;
+	*cur_offset = body->lgd_cur_offset;
+
+	memcpy(buf, ptr, len);
+out:
+	ptlrpc_req_finished(req);
+err_exit:
+	LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+	return rc;
+}
+
+static int llog_client_prev_block(const struct lu_env *env,
+				  struct llog_handle *loghandle,
+				  int prev_idx, void *buf, int len)
+{
+	struct obd_import     *imp;
+	struct ptlrpc_request *req = NULL;
+	struct llogd_body     *body;
+	void		  *ptr;
+	int		    rc;
+
+	LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+	req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+					LUSTRE_LOG_VERSION,
+					LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	body->lgd_logid = loghandle->lgh_id;
+	body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+	body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+	body->lgd_index = prev_idx;
+	body->lgd_len = len;
+
+	req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+	ptlrpc_request_set_replen(req);
+
+	rc = ptlrpc_queue_wait(req);
+	if (rc)
+		goto out;
+
+	body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	if (body == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+	if (ptr == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	memcpy(buf, ptr, len);
+out:
+	ptlrpc_req_finished(req);
+err_exit:
+	LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+	return rc;
+}
+
+static int llog_client_read_header(const struct lu_env *env,
+				   struct llog_handle *handle)
+{
+	struct obd_import     *imp;
+	struct ptlrpc_request *req = NULL;
+	struct llogd_body     *body;
+	struct llog_log_hdr   *hdr;
+	struct llog_rec_hdr   *llh_hdr;
+	int		    rc;
+
+	LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
+	req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+					LUSTRE_LOG_VERSION,
+					LLOG_ORIGIN_HANDLE_READ_HEADER);
+	if (req == NULL) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+	body->lgd_logid = handle->lgh_id;
+	body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1;
+	body->lgd_llh_flags = handle->lgh_hdr->llh_flags;
+
+	ptlrpc_request_set_replen(req);
+	rc = ptlrpc_queue_wait(req);
+	if (rc)
+		goto out;
+
+	hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR);
+	if (hdr == NULL) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	memcpy(handle->lgh_hdr, hdr, sizeof(*hdr));
+	handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+
+	/* sanity checks */
+	llh_hdr = &handle->lgh_hdr->llh_hdr;
+	if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+		CERROR("bad log header magic: %#x (expecting %#x)\n",
+		       llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+		rc = -EIO;
+	} else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+		CERROR("incorrectly sized log header: %#x (expecting %#x)\n",
+		       llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+		CERROR("you may need to re-run lconf --write_conf.\n");
+		rc = -EIO;
+	}
+out:
+	ptlrpc_req_finished(req);
+err_exit:
+	LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
+	return rc;
+}
+
+static int llog_client_close(const struct lu_env *env,
+			     struct llog_handle *handle)
+{
+	/* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
+	   the servers all close the file at the end of every
+	   other LLOG_ RPC. */
+	return 0;
+}
+
+struct llog_operations llog_client_ops = {
+	.lop_next_block		= llog_client_next_block,
+	.lop_prev_block		= llog_client_prev_block,
+	.lop_read_header	= llog_client_read_header,
+	.lop_open		= llog_client_open,
+	.lop_destroy		= llog_client_destroy,
+	.lop_close		= llog_client_close,
+};
+EXPORT_SYMBOL(llog_client_ops);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
new file mode 100644
index 000000000..dac66f5b3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_net.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_class.h"
+#include "../include/lustre_log.h"
+#include <linux/list.h>
+
+int llog_initiator_connect(struct llog_ctxt *ctxt)
+{
+	struct obd_import *new_imp;
+
+	LASSERT(ctxt);
+	new_imp = ctxt->loc_obd->u.cli.cl_import;
+	LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp,
+		 "%p - %p\n", ctxt->loc_imp, new_imp);
+	mutex_lock(&ctxt->loc_mutex);
+	if (ctxt->loc_imp != new_imp) {
+		if (ctxt->loc_imp)
+			class_import_put(ctxt->loc_imp);
+		ctxt->loc_imp = class_import_get(new_imp);
+	}
+	mutex_unlock(&ctxt->loc_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(llog_initiator_connect);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
new file mode 100644
index 000000000..9533ab976
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -0,0 +1,1366 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+#include "../include/lprocfs_status.h"
+#include "../include/lustre/lustre_idl.h"
+#include "../include/lustre_net.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+
+static struct ll_rpc_opcode {
+	__u32       opcode;
+	const char *opname;
+} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = {
+	{ OST_REPLY,	"ost_reply" },
+	{ OST_GETATTR,      "ost_getattr" },
+	{ OST_SETATTR,      "ost_setattr" },
+	{ OST_READ,	 "ost_read" },
+	{ OST_WRITE,	"ost_write" },
+	{ OST_CREATE ,      "ost_create" },
+	{ OST_DESTROY,      "ost_destroy" },
+	{ OST_GET_INFO,     "ost_get_info" },
+	{ OST_CONNECT,      "ost_connect" },
+	{ OST_DISCONNECT,   "ost_disconnect" },
+	{ OST_PUNCH,	"ost_punch" },
+	{ OST_OPEN,	 "ost_open" },
+	{ OST_CLOSE,	"ost_close" },
+	{ OST_STATFS,       "ost_statfs" },
+	{ 14,		NULL },    /* formerly OST_SAN_READ */
+	{ 15,		NULL },    /* formerly OST_SAN_WRITE */
+	{ OST_SYNC,	 "ost_sync" },
+	{ OST_SET_INFO,     "ost_set_info" },
+	{ OST_QUOTACHECK,   "ost_quotacheck" },
+	{ OST_QUOTACTL,     "ost_quotactl" },
+	{ OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
+	{ MDS_GETATTR,      "mds_getattr" },
+	{ MDS_GETATTR_NAME, "mds_getattr_lock" },
+	{ MDS_CLOSE,	"mds_close" },
+	{ MDS_REINT,	"mds_reint" },
+	{ MDS_READPAGE,     "mds_readpage" },
+	{ MDS_CONNECT,      "mds_connect" },
+	{ MDS_DISCONNECT,   "mds_disconnect" },
+	{ MDS_GETSTATUS,    "mds_getstatus" },
+	{ MDS_STATFS,       "mds_statfs" },
+	{ MDS_PIN,	  "mds_pin" },
+	{ MDS_UNPIN,	"mds_unpin" },
+	{ MDS_SYNC,	 "mds_sync" },
+	{ MDS_DONE_WRITING, "mds_done_writing" },
+	{ MDS_SET_INFO,     "mds_set_info" },
+	{ MDS_QUOTACHECK,   "mds_quotacheck" },
+	{ MDS_QUOTACTL,     "mds_quotactl" },
+	{ MDS_GETXATTR,     "mds_getxattr" },
+	{ MDS_SETXATTR,     "mds_setxattr" },
+	{ MDS_WRITEPAGE,    "mds_writepage" },
+	{ MDS_IS_SUBDIR,    "mds_is_subdir" },
+	{ MDS_GET_INFO,     "mds_get_info" },
+	{ MDS_HSM_STATE_GET, "mds_hsm_state_get" },
+	{ MDS_HSM_STATE_SET, "mds_hsm_state_set" },
+	{ MDS_HSM_ACTION,   "mds_hsm_action" },
+	{ MDS_HSM_PROGRESS, "mds_hsm_progress" },
+	{ MDS_HSM_REQUEST,  "mds_hsm_request" },
+	{ MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" },
+	{ MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" },
+	{ MDS_SWAP_LAYOUTS,	"mds_swap_layouts" },
+	{ LDLM_ENQUEUE,     "ldlm_enqueue" },
+	{ LDLM_CONVERT,     "ldlm_convert" },
+	{ LDLM_CANCEL,      "ldlm_cancel" },
+	{ LDLM_BL_CALLBACK, "ldlm_bl_callback" },
+	{ LDLM_CP_CALLBACK, "ldlm_cp_callback" },
+	{ LDLM_GL_CALLBACK, "ldlm_gl_callback" },
+	{ LDLM_SET_INFO,    "ldlm_set_info" },
+	{ MGS_CONNECT,      "mgs_connect" },
+	{ MGS_DISCONNECT,   "mgs_disconnect" },
+	{ MGS_EXCEPTION,    "mgs_exception" },
+	{ MGS_TARGET_REG,   "mgs_target_reg" },
+	{ MGS_TARGET_DEL,   "mgs_target_del" },
+	{ MGS_SET_INFO,     "mgs_set_info" },
+	{ MGS_CONFIG_READ,  "mgs_config_read" },
+	{ OBD_PING,	 "obd_ping" },
+	{ OBD_LOG_CANCEL,	"llog_cancel" },
+	{ OBD_QC_CALLBACK,  "obd_quota_callback" },
+	{ OBD_IDX_READ,	    "dt_index_read" },
+	{ LLOG_ORIGIN_HANDLE_CREATE,	 "llog_origin_handle_open" },
+	{ LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" },
+	{ LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" },
+	{ LLOG_ORIGIN_HANDLE_WRITE_REC,  "llog_origin_handle_write_rec" },
+	{ LLOG_ORIGIN_HANDLE_CLOSE,      "llog_origin_handle_close" },
+	{ LLOG_ORIGIN_CONNECT,	   "llog_origin_connect" },
+	{ LLOG_CATINFO,		  "llog_catinfo" },
+	{ LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" },
+	{ LLOG_ORIGIN_HANDLE_DESTROY,    "llog_origin_handle_destroy" },
+	{ QUOTA_DQACQ,      "quota_acquire" },
+	{ QUOTA_DQREL,      "quota_release" },
+	{ SEQ_QUERY,	"seq_query" },
+	{ SEC_CTX_INIT,     "sec_ctx_init" },
+	{ SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
+	{ SEC_CTX_FINI,     "sec_ctx_fini" },
+	{ FLD_QUERY,	"fld_query" },
+	{ UPDATE_OBJ,	    "update_obj" },
+};
+
+static struct ll_eopcode {
+	__u32       opcode;
+	const char *opname;
+} ll_eopcode_table[EXTRA_LAST_OPC] = {
+	{ LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" },
+	{ LDLM_PLAIN_ENQUEUE,   "ldlm_plain_enqueue" },
+	{ LDLM_EXTENT_ENQUEUE,  "ldlm_extent_enqueue" },
+	{ LDLM_FLOCK_ENQUEUE,   "ldlm_flock_enqueue" },
+	{ LDLM_IBITS_ENQUEUE,   "ldlm_ibits_enqueue" },
+	{ MDS_REINT_SETATTR,    "mds_reint_setattr" },
+	{ MDS_REINT_CREATE,     "mds_reint_create" },
+	{ MDS_REINT_LINK,       "mds_reint_link" },
+	{ MDS_REINT_UNLINK,     "mds_reint_unlink" },
+	{ MDS_REINT_RENAME,     "mds_reint_rename" },
+	{ MDS_REINT_OPEN,       "mds_reint_open" },
+	{ MDS_REINT_SETXATTR,   "mds_reint_setxattr" },
+	{ BRW_READ_BYTES,       "read_bytes" },
+	{ BRW_WRITE_BYTES,      "write_bytes" },
+};
+
+const char *ll_opcode2str(__u32 opcode)
+{
+	/* When one of the assertions below fail, chances are that:
+	 *     1) A new opcode was added in include/lustre/lustre_idl.h,
+	 *	but is missing from the table above.
+	 * or  2) The opcode space was renumbered or rearranged,
+	 *	and the opcode_offset() function in
+	 *	ptlrpc_internal.h needs to be modified.
+	 */
+	__u32 offset = opcode_offset(opcode);
+	LASSERTF(offset < LUSTRE_MAX_OPCODES,
+		 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
+		 offset, LUSTRE_MAX_OPCODES);
+	LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode,
+		 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
+		 offset, ll_rpc_opcode_table[offset].opcode, opcode);
+	return ll_rpc_opcode_table[offset].opname;
+}
+
+static const char *ll_eopcode2str(__u32 opcode)
+{
+	LASSERT(ll_eopcode_table[opcode].opcode == opcode);
+	return ll_eopcode_table[opcode].opname;
+}
+
+#if defined(CONFIG_PROC_FS)
+static void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir,
+				    char *name,
+				    struct proc_dir_entry **procroot_ret,
+				    struct lprocfs_stats **stats_ret)
+{
+	struct proc_dir_entry *svc_procroot;
+	struct lprocfs_stats *svc_stats;
+	int i, rc;
+	unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX |
+					  LPROCFS_CNTR_STDDEV;
+
+	LASSERT(*procroot_ret == NULL);
+	LASSERT(*stats_ret == NULL);
+
+	svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES,
+					0);
+	if (svc_stats == NULL)
+		return;
+
+	if (dir) {
+		svc_procroot = lprocfs_register(dir, root, NULL, NULL);
+		if (IS_ERR(svc_procroot)) {
+			lprocfs_free_stats(&svc_stats);
+			return;
+		}
+	} else {
+		svc_procroot = root;
+	}
+
+	lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
+			     svc_counter_config, "req_waittime", "usec");
+	lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR,
+			     svc_counter_config, "req_qdepth", "reqs");
+	lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR,
+			     svc_counter_config, "req_active", "reqs");
+	lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT,
+			     svc_counter_config, "req_timeout", "sec");
+	lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR,
+			     svc_counter_config, "reqbuf_avail", "bufs");
+	for (i = 0; i < EXTRA_LAST_OPC; i++) {
+		char *units;
+
+		switch (i) {
+		case BRW_WRITE_BYTES:
+		case BRW_READ_BYTES:
+			units = "bytes";
+			break;
+		default:
+			units = "reqs";
+			break;
+		}
+		lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
+				     svc_counter_config,
+				     ll_eopcode2str(i), units);
+	}
+	for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
+		__u32 opcode = ll_rpc_opcode_table[i].opcode;
+		lprocfs_counter_init(svc_stats,
+				     EXTRA_MAX_OPCODES + i, svc_counter_config,
+				     ll_opcode2str(opcode), "usec");
+	}
+
+	rc = lprocfs_register_stats(svc_procroot, name, svc_stats);
+	if (rc < 0) {
+		if (dir)
+			lprocfs_remove(&svc_procroot);
+		lprocfs_free_stats(&svc_stats);
+	} else {
+		if (dir)
+			*procroot_ret = svc_procroot;
+		*stats_ret = svc_stats;
+	}
+}
+
+static int
+ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v)
+{
+	struct ptlrpc_service *svc = m->private;
+	struct ptlrpc_service_part *svcpt;
+	int	total = 0;
+	int	i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc)
+		total += svcpt->scp_hist_nrqbds;
+
+	seq_printf(m, "%d\n", total);
+	return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len);
+
+static int
+ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service *svc = m->private;
+	struct ptlrpc_service_part *svcpt;
+	int	total = 0;
+	int	i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc)
+		total += svc->srv_hist_nrqbds_cpt_max;
+
+	seq_printf(m, "%d\n", total);
+	return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *off)
+{
+	struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+	int			    bufpages;
+	int			    val;
+	int			    rc;
+
+	rc = lprocfs_write_helper(buffer, count, &val);
+	if (rc < 0)
+		return rc;
+
+	if (val < 0)
+		return -ERANGE;
+
+	/* This sanity check is more of an insanity check; we can still
+	 * hose a kernel by allowing the request history to grow too
+	 * far. */
+	bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	if (val > totalram_pages / (2 * bufpages))
+		return -ERANGE;
+
+	spin_lock(&svc->srv_lock);
+
+	if (val == 0)
+		svc->srv_hist_nrqbds_cpt_max = 0;
+	else
+		svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts));
+
+	spin_unlock(&svc->srv_lock);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max);
+
+static int
+ptlrpc_lprocfs_threads_min_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service *svc = m->private;
+
+	seq_printf(m, "%d\n", svc->srv_nthrs_cpt_init * svc->srv_ncpts);
+	return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_min_seq_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *off)
+{
+	struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+	int	val;
+	int	rc = lprocfs_write_helper(buffer, count, &val);
+
+	if (rc < 0)
+		return rc;
+
+	if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+		return -ERANGE;
+
+	spin_lock(&svc->srv_lock);
+	if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) {
+		spin_unlock(&svc->srv_lock);
+		return -ERANGE;
+	}
+
+	svc->srv_nthrs_cpt_init = val / svc->srv_ncpts;
+
+	spin_unlock(&svc->srv_lock);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_min);
+
+static int
+ptlrpc_lprocfs_threads_started_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service *svc = m->private;
+	struct ptlrpc_service_part *svcpt;
+	int	total = 0;
+	int	i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc)
+		total += svcpt->scp_nthrs_running;
+
+	seq_printf(m, "%d\n", total);
+	return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_threads_started);
+
+static int
+ptlrpc_lprocfs_threads_max_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service *svc = m->private;
+
+	seq_printf(m, "%d\n", svc->srv_nthrs_cpt_limit * svc->srv_ncpts);
+	return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_max_seq_write(struct file *file,
+				const char __user *buffer,
+				size_t count, loff_t *off)
+{
+	struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+	int	val;
+	int	rc = lprocfs_write_helper(buffer, count, &val);
+
+	if (rc < 0)
+		return rc;
+
+	if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+		return -ERANGE;
+
+	spin_lock(&svc->srv_lock);
+	if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) {
+		spin_unlock(&svc->srv_lock);
+		return -ERANGE;
+	}
+
+	svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts;
+
+	spin_unlock(&svc->srv_lock);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_max);
+
+/**
+ * \addtogoup nrs
+ * @{
+ */
+extern struct nrs_core nrs_core;
+
+/**
+ * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
+ *
+ * \param[in] state The policy state
+ */
+static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state)
+{
+	switch (state) {
+	default:
+		LBUG();
+	case NRS_POL_STATE_INVALID:
+		return "invalid";
+	case NRS_POL_STATE_STOPPED:
+		return "stopped";
+	case NRS_POL_STATE_STOPPING:
+		return "stopping";
+	case NRS_POL_STATE_STARTING:
+		return "starting";
+	case NRS_POL_STATE_STARTED:
+		return "started";
+	}
+}
+
+/**
+ * Obtains status information for \a policy.
+ *
+ * Information is copied in \a info.
+ *
+ * \param[in] policy The policy
+ * \param[out] info  Holds returned status information
+ */
+void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
+				struct ptlrpc_nrs_pol_info *info)
+{
+	LASSERT(policy != NULL);
+	LASSERT(info != NULL);
+	assert_spin_locked(&policy->pol_nrs->nrs_lock);
+
+	memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX);
+
+	info->pi_fallback    = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK);
+	info->pi_state	     = policy->pol_state;
+	/**
+	 * XXX: These are accessed without holding
+	 * ptlrpc_service_part::scp_req_lock.
+	 */
+	info->pi_req_queued  = policy->pol_req_queued;
+	info->pi_req_started = policy->pol_req_started;
+}
+
+/**
+ * Reads and prints policy status information for all policies of a PTLRPC
+ * service.
+ */
+static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service	       *svc = m->private;
+	struct ptlrpc_service_part     *svcpt;
+	struct ptlrpc_nrs	       *nrs;
+	struct ptlrpc_nrs_policy       *policy;
+	struct ptlrpc_nrs_pol_info     *infos;
+	struct ptlrpc_nrs_pol_info	tmp;
+	unsigned			num_pols;
+	unsigned			pol_idx = 0;
+	bool				hp = false;
+	int				i;
+	int				rc = 0;
+
+	/**
+	 * Serialize NRS core lprocfs operations with policy registration/
+	 * unregistration.
+	 */
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	/**
+	 * Use the first service partition's regular NRS head in order to obtain
+	 * the number of policies registered with NRS heads of this service. All
+	 * service partitions will have the same number of policies.
+	 */
+	nrs = nrs_svcpt2nrs(svc->srv_parts[0], false);
+
+	spin_lock(&nrs->nrs_lock);
+	num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols;
+	spin_unlock(&nrs->nrs_lock);
+
+	OBD_ALLOC(infos, num_pols * sizeof(*infos));
+	if (infos == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+again:
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		nrs = nrs_svcpt2nrs(svcpt, hp);
+		spin_lock(&nrs->nrs_lock);
+
+		pol_idx = 0;
+
+		list_for_each_entry(policy, &nrs->nrs_policy_list,
+					pol_list) {
+			LASSERT(pol_idx < num_pols);
+
+			nrs_policy_get_info_locked(policy, &tmp);
+			/**
+			 * Copy values when handling the first service
+			 * partition.
+			 */
+			if (i == 0) {
+				memcpy(infos[pol_idx].pi_name, tmp.pi_name,
+				       NRS_POL_NAME_MAX);
+				memcpy(&infos[pol_idx].pi_state, &tmp.pi_state,
+				       sizeof(tmp.pi_state));
+				infos[pol_idx].pi_fallback = tmp.pi_fallback;
+				/**
+				 * For the rest of the service partitions
+				 * sanity-check the values we get.
+				 */
+			} else {
+				LASSERT(strncmp(infos[pol_idx].pi_name,
+						tmp.pi_name,
+						NRS_POL_NAME_MAX) == 0);
+				/**
+				 * Not asserting ptlrpc_nrs_pol_info::pi_state,
+				 * because it may be different between
+				 * instances of the same policy in different
+				 * service partitions.
+				 */
+				LASSERT(infos[pol_idx].pi_fallback ==
+					tmp.pi_fallback);
+			}
+
+			infos[pol_idx].pi_req_queued += tmp.pi_req_queued;
+			infos[pol_idx].pi_req_started += tmp.pi_req_started;
+
+			pol_idx++;
+		}
+		spin_unlock(&nrs->nrs_lock);
+	}
+
+	/**
+	 * Policy status information output is in YAML format.
+	 * For example:
+	 *
+	 *	regular_requests:
+	 *	  - name: fifo
+	 *	    state: started
+	 *	    fallback: yes
+	 *	    queued: 0
+	 *	    active: 0
+	 *
+	 *	  - name: crrn
+	 *	    state: started
+	 *	    fallback: no
+	 *	    queued: 2015
+	 *	    active: 384
+	 *
+	 *	high_priority_requests:
+	 *	  - name: fifo
+	 *	    state: started
+	 *	    fallback: yes
+	 *	    queued: 0
+	 *	    active: 2
+	 *
+	 *	  - name: crrn
+	 *	    state: stopped
+	 *	    fallback: no
+	 *	    queued: 0
+	 *	    active: 0
+	 */
+	seq_printf(m, "%s\n",
+		      !hp ?  "\nregular_requests:" : "high_priority_requests:");
+
+	for (pol_idx = 0; pol_idx < num_pols; pol_idx++) {
+		seq_printf(m,  "  - name: %s\n"
+			       "    state: %s\n"
+			       "    fallback: %s\n"
+			       "    queued: %-20d\n"
+			       "    active: %-20d\n\n",
+			       infos[pol_idx].pi_name,
+			       nrs_state2str(infos[pol_idx].pi_state),
+			       infos[pol_idx].pi_fallback ? "yes" : "no",
+			       (int)infos[pol_idx].pi_req_queued,
+			       (int)infos[pol_idx].pi_req_started);
+	}
+
+	if (!hp && nrs_svc_has_hp(svc)) {
+		memset(infos, 0, num_pols * sizeof(*infos));
+
+		/**
+		 * Redo the processing for the service's HP NRS heads' policies.
+		 */
+		hp = true;
+		goto again;
+	}
+
+out:
+	if (infos)
+		OBD_FREE(infos, num_pols * sizeof(*infos));
+
+	mutex_unlock(&nrs_core.nrs_mutex);
+
+	return rc;
+}
+
+/**
+ * The longest valid command string is the maximum policy name size, plus the
+ * length of the " reg" substring
+ */
+#define LPROCFS_NRS_WR_MAX_CMD	(NRS_POL_NAME_MAX + sizeof(" reg") - 1)
+
+/**
+ * Starts and stops a given policy on a PTLRPC service.
+ *
+ * Commands consist of the policy name, followed by an optional [reg|hp] token;
+ * if the optional token is omitted, the operation is performed on both the
+ * regular and high-priority (if the service has one) NRS head.
+ */
+static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
+					const char __user *buffer,
+					size_t count, loff_t *off)
+{
+	struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+	enum ptlrpc_nrs_queue_type	queue = PTLRPC_NRS_QUEUE_BOTH;
+	char			       *cmd;
+	char			       *cmd_copy = NULL;
+	char			       *token;
+	int				rc = 0;
+
+	if (count >= LPROCFS_NRS_WR_MAX_CMD) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD);
+	if (cmd == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	/**
+	 * strsep() modifies its argument, so keep a copy
+	 */
+	cmd_copy = cmd;
+
+	if (copy_from_user(cmd, buffer, count)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	cmd[count] = '\0';
+
+	token = strsep(&cmd, " ");
+
+	if (strlen(token) > NRS_POL_NAME_MAX - 1) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/**
+	 * No [reg|hp] token has been specified
+	 */
+	if (cmd == NULL)
+		goto default_queue;
+
+	/**
+	 * The second token is either NULL, or an optional [reg|hp] string
+	 */
+	if (strcmp(cmd, "reg") == 0)
+		queue = PTLRPC_NRS_QUEUE_REG;
+	else if (strcmp(cmd, "hp") == 0)
+		queue = PTLRPC_NRS_QUEUE_HP;
+	else {
+		rc = -EINVAL;
+		goto out;
+	}
+
+default_queue:
+
+	if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
+		rc = -ENODEV;
+		goto out;
+	} else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+		queue = PTLRPC_NRS_QUEUE_REG;
+
+	/**
+	 * Serialize NRS core lprocfs operations with policy registration/
+	 * unregistration.
+	 */
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START,
+				       false, NULL);
+
+	mutex_unlock(&nrs_core.nrs_mutex);
+out:
+	if (cmd_copy)
+		OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD);
+
+	return rc < 0 ? rc : count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs);
+
+/** @} nrs */
+
+struct ptlrpc_srh_iterator {
+	int			srhi_idx;
+	__u64			srhi_seq;
+	struct ptlrpc_request	*srhi_req;
+};
+
+static int
+ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt,
+				    struct ptlrpc_srh_iterator *srhi,
+				    __u64 seq)
+{
+	struct list_head		*e;
+	struct ptlrpc_request	*req;
+
+	if (srhi->srhi_req != NULL &&
+	    srhi->srhi_seq > svcpt->scp_hist_seq_culled &&
+	    srhi->srhi_seq <= seq) {
+		/* If srhi_req was set previously, hasn't been culled and
+		 * we're searching for a seq on or after it (i.e. more
+		 * recent), search from it onwards.
+		 * Since the service history is LRU (i.e. culled reqs will
+		 * be near the head), we shouldn't have to do long
+		 * re-scans */
+		LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq,
+			 "%s:%d: seek seq %llu, request seq %llu\n",
+			 svcpt->scp_service->srv_name, svcpt->scp_cpt,
+			 srhi->srhi_seq, srhi->srhi_req->rq_history_seq);
+		LASSERTF(!list_empty(&svcpt->scp_hist_reqs),
+			 "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
+			 svcpt->scp_service->srv_name, svcpt->scp_cpt,
+			 seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled);
+		e = &srhi->srhi_req->rq_history_list;
+	} else {
+		/* search from start */
+		e = svcpt->scp_hist_reqs.next;
+	}
+
+	while (e != &svcpt->scp_hist_reqs) {
+		req = list_entry(e, struct ptlrpc_request, rq_history_list);
+
+		if (req->rq_history_seq >= seq) {
+			srhi->srhi_seq = req->rq_history_seq;
+			srhi->srhi_req = req;
+			return 0;
+		}
+		e = e->next;
+	}
+
+	return -ENOENT;
+}
+
+/*
+ * ptlrpc history sequence is used as "position" of seq_file, in some case,
+ * seq_read() will increase "position" to indicate reading the next
+ * element, however, low bits of history sequence are reserved for CPT id
+ * (check the details from comments before ptlrpc_req_add_history), which
+ * means seq_read() might change CPT id of history sequence and never
+ * finish reading of requests on a CPT. To make it work, we have to shift
+ * CPT id to high bits and timestamp to low bits, so seq_read() will only
+ * increase timestamp which can correctly indicate the next position.
+ */
+
+/* convert seq_file pos to cpt */
+#define PTLRPC_REQ_POS2CPT(svc, pos)			\
+	((svc)->srv_cpt_bits == 0 ? 0 :			\
+	 (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
+
+/* make up seq_file pos from cpt */
+#define PTLRPC_REQ_CPT2POS(svc, cpt)			\
+	((svc)->srv_cpt_bits == 0 ? 0 :			\
+	 (cpt) << (64 - (svc)->srv_cpt_bits))
+
+/* convert sequence to position */
+#define PTLRPC_REQ_SEQ2POS(svc, seq)			\
+	((svc)->srv_cpt_bits == 0 ? (seq) :		\
+	 ((seq) >> (svc)->srv_cpt_bits) |		\
+	 ((seq) << (64 - (svc)->srv_cpt_bits)))
+
+/* convert position to sequence */
+#define PTLRPC_REQ_POS2SEQ(svc, pos)			\
+	((svc)->srv_cpt_bits == 0 ? (pos) :		\
+	 ((__u64)(pos) << (svc)->srv_cpt_bits) |	\
+	 ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
+
+static void *
+ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos)
+{
+	struct ptlrpc_service		*svc = s->private;
+	struct ptlrpc_service_part	*svcpt;
+	struct ptlrpc_srh_iterator	*srhi;
+	unsigned int			cpt;
+	int				rc;
+	int				i;
+
+	if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */
+		CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
+		      (int)sizeof(loff_t));
+		return NULL;
+	}
+
+	OBD_ALLOC(srhi, sizeof(*srhi));
+	if (srhi == NULL)
+		return NULL;
+
+	srhi->srhi_seq = 0;
+	srhi->srhi_req = NULL;
+
+	cpt = PTLRPC_REQ_POS2CPT(svc, *pos);
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (i < cpt) /* skip */
+			continue;
+		if (i > cpt) /* make up the lowest position for this CPT */
+			*pos = PTLRPC_REQ_CPT2POS(svc, i);
+
+		spin_lock(&svcpt->scp_lock);
+		rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi,
+				PTLRPC_REQ_POS2SEQ(svc, *pos));
+		spin_unlock(&svcpt->scp_lock);
+		if (rc == 0) {
+			*pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+			srhi->srhi_idx = i;
+			return srhi;
+		}
+	}
+
+	OBD_FREE(srhi, sizeof(*srhi));
+	return NULL;
+}
+
+static void
+ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter)
+{
+	struct ptlrpc_srh_iterator *srhi = iter;
+
+	if (srhi != NULL)
+		OBD_FREE(srhi, sizeof(*srhi));
+}
+
+static void *
+ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
+				    void *iter, loff_t *pos)
+{
+	struct ptlrpc_service		*svc = s->private;
+	struct ptlrpc_srh_iterator	*srhi = iter;
+	struct ptlrpc_service_part	*svcpt;
+	__u64				seq;
+	int				rc;
+	int				i;
+
+	for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) {
+		svcpt = svc->srv_parts[i];
+
+		if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
+			srhi->srhi_req = NULL;
+			seq = srhi->srhi_seq = 0;
+		} else { /* the next sequence */
+			seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
+		}
+
+		spin_lock(&svcpt->scp_lock);
+		rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq);
+		spin_unlock(&svcpt->scp_lock);
+		if (rc == 0) {
+			*pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+			srhi->srhi_idx = i;
+			return srhi;
+		}
+	}
+
+	OBD_FREE(srhi, sizeof(*srhi));
+	return NULL;
+}
+
+/* common ost/mdt so_req_printer */
+void target_print_req(void *seq_file, struct ptlrpc_request *req)
+{
+	/* Called holding srv_lock with irqs disabled.
+	 * Print specific req contents and a newline.
+	 * CAVEAT EMPTOR: check request message length before printing!!!
+	 * You might have received any old crap so you must be just as
+	 * careful here as the service's request parser!!! */
+	struct seq_file *sf = seq_file;
+
+	switch (req->rq_phase) {
+	case RQ_PHASE_NEW:
+		/* still awaiting a service thread's attention, or rejected
+		 * because the generic request message didn't unpack */
+		seq_printf(sf, "<not swabbed>\n");
+		break;
+	case RQ_PHASE_INTERPRET:
+		/* being handled, so basic msg swabbed, and opc is valid
+		 * but racing with mds_handle() */
+	case RQ_PHASE_COMPLETE:
+		/* been handled by mds_handle() reply state possibly still
+		 * volatile */
+		seq_printf(sf, "opc %d\n", lustre_msg_get_opc(req->rq_reqmsg));
+		break;
+	default:
+		DEBUG_REQ(D_ERROR, req, "bad phase %d", req->rq_phase);
+	}
+}
+EXPORT_SYMBOL(target_print_req);
+
+static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
+{
+	struct ptlrpc_service		*svc = s->private;
+	struct ptlrpc_srh_iterator	*srhi = iter;
+	struct ptlrpc_service_part	*svcpt;
+	struct ptlrpc_request		*req;
+	int				rc;
+
+	LASSERT(srhi->srhi_idx < svc->srv_ncpts);
+
+	svcpt = svc->srv_parts[srhi->srhi_idx];
+
+	spin_lock(&svcpt->scp_lock);
+
+	rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq);
+
+	if (rc == 0) {
+		req = srhi->srhi_req;
+
+		/* Print common req fields.
+		 * CAVEAT EMPTOR: we're racing with the service handler
+		 * here.  The request could contain any old crap, so you
+		 * must be just as careful as the service's request
+		 * parser. Currently I only print stuff here I know is OK
+		 * to look at coz it was set up in request_in_callback()!!! */
+		seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%ld:%lds(%+lds) ",
+			   req->rq_history_seq, libcfs_nid2str(req->rq_self),
+			   libcfs_id2str(req->rq_peer), req->rq_xid,
+			   req->rq_reqlen, ptlrpc_rqphase2str(req),
+			   req->rq_arrival_time.tv_sec,
+			   req->rq_sent - req->rq_arrival_time.tv_sec,
+			   req->rq_sent - req->rq_deadline);
+		if (svc->srv_ops.so_req_printer == NULL)
+			seq_printf(s, "\n");
+		else
+			svc->srv_ops.so_req_printer(s, srhi->srhi_req);
+	}
+
+	spin_unlock(&svcpt->scp_lock);
+	return rc;
+}
+
+static int
+ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
+{
+	static struct seq_operations sops = {
+		.start = ptlrpc_lprocfs_svc_req_history_start,
+		.stop  = ptlrpc_lprocfs_svc_req_history_stop,
+		.next  = ptlrpc_lprocfs_svc_req_history_next,
+		.show  = ptlrpc_lprocfs_svc_req_history_show,
+	};
+	struct seq_file       *seqf;
+	int		    rc;
+
+	rc = seq_open(file, &sops);
+	if (rc)
+		return rc;
+
+	seqf = file->private_data;
+	seqf->private = PDE_DATA(inode);
+	return 0;
+}
+
+/* See also lprocfs_rd_timeouts */
+static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
+{
+	struct ptlrpc_service		*svc = m->private;
+	struct ptlrpc_service_part	*svcpt;
+	struct dhms			ts;
+	time_t				worstt;
+	unsigned int			cur;
+	unsigned int			worst;
+	int				i;
+
+	if (AT_OFF) {
+		seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
+			       obd_timeout);
+		return 0;
+	}
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		cur	= at_get(&svcpt->scp_at_estimate);
+		worst	= svcpt->scp_at_estimate.at_worst_ever;
+		worstt	= svcpt->scp_at_estimate.at_worst_time;
+		s2dhms(&ts, get_seconds() - worstt);
+
+		seq_printf(m, "%10s : cur %3u  worst %3u (at %ld, "
+			      DHMS_FMT" ago) ", "service",
+			      cur, worst, worstt, DHMS_VARS(&ts));
+
+		lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate);
+	}
+
+	return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts);
+
+static int ptlrpc_lprocfs_hp_ratio_seq_show(struct seq_file *m, void *v)
+{
+	struct ptlrpc_service *svc = m->private;
+	seq_printf(m, "%d", svc->srv_hpreq_ratio);
+	return 0;
+}
+
+static ssize_t ptlrpc_lprocfs_hp_ratio_seq_write(struct file *file,
+					     const char __user *buffer,
+					     size_t count,
+					     loff_t *off)
+{
+	struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+	int	rc;
+	int	val;
+
+	rc = lprocfs_write_helper(buffer, count, &val);
+	if (rc < 0)
+		return rc;
+
+	if (val < 0)
+		return -ERANGE;
+
+	spin_lock(&svc->srv_lock);
+	svc->srv_hpreq_ratio = val;
+	spin_unlock(&svc->srv_lock);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_hp_ratio);
+
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry,
+				     struct ptlrpc_service *svc)
+{
+	struct lprocfs_vars lproc_vars[] = {
+		{.name       = "high_priority_ratio",
+		 .fops	     = &ptlrpc_lprocfs_hp_ratio_fops,
+		 .data       = svc},
+		{.name       = "req_buffer_history_len",
+		 .fops	     = &ptlrpc_lprocfs_req_history_len_fops,
+		 .data       = svc},
+		{.name       = "req_buffer_history_max",
+		 .fops	     = &ptlrpc_lprocfs_req_history_max_fops,
+		 .data       = svc},
+		{.name       = "threads_min",
+		 .fops	     = &ptlrpc_lprocfs_threads_min_fops,
+		 .data       = svc},
+		{.name       = "threads_max",
+		 .fops	     = &ptlrpc_lprocfs_threads_max_fops,
+		 .data       = svc},
+		{.name       = "threads_started",
+		 .fops	     = &ptlrpc_lprocfs_threads_started_fops,
+		 .data       = svc},
+		{.name       = "timeouts",
+		 .fops	     = &ptlrpc_lprocfs_timeouts_fops,
+		 .data       = svc},
+		{.name       = "nrs_policies",
+		 .fops	     = &ptlrpc_lprocfs_nrs_fops,
+		 .data	     = svc},
+		{NULL}
+	};
+	static const struct file_operations req_history_fops = {
+		.owner       = THIS_MODULE,
+		.open	= ptlrpc_lprocfs_svc_req_history_open,
+		.read	= seq_read,
+		.llseek      = seq_lseek,
+		.release     = lprocfs_seq_release,
+	};
+
+	int rc;
+
+	ptlrpc_lprocfs_register(entry, svc->srv_name,
+				"stats", &svc->srv_procroot,
+				&svc->srv_stats);
+
+	if (svc->srv_procroot == NULL)
+		return;
+
+	lprocfs_add_vars(svc->srv_procroot, lproc_vars, NULL);
+
+	rc = lprocfs_seq_create(svc->srv_procroot, "req_history",
+				0400, &req_history_fops, svc);
+	if (rc)
+		CWARN("Error adding the req_history file\n");
+}
+
+void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
+{
+	ptlrpc_lprocfs_register(obddev->obd_proc_entry, NULL, "stats",
+				&obddev->obd_svc_procroot,
+				&obddev->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd);
+
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount)
+{
+	struct lprocfs_stats *svc_stats;
+	__u32 op = lustre_msg_get_opc(req->rq_reqmsg);
+	int opc = opcode_offset(op);
+
+	svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+	if (svc_stats == NULL || opc <= 0)
+		return;
+	LASSERT(opc < LUSTRE_MAX_OPCODES);
+	if (!(op == LDLM_ENQUEUE || op == MDS_REINT))
+		lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount);
+}
+
+void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
+{
+	struct lprocfs_stats *svc_stats;
+	int idx;
+
+	if (!req->rq_import)
+		return;
+	svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+	if (!svc_stats)
+		return;
+	idx = lustre_msg_get_opc(req->rq_reqmsg);
+	switch (idx) {
+	case OST_READ:
+		idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR;
+		break;
+	case OST_WRITE:
+		idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR;
+		break;
+	default:
+		LASSERTF(0, "unsupported opcode %u\n", idx);
+		break;
+	}
+
+	lprocfs_counter_add(svc_stats, idx, bytes);
+}
+
+EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
+
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
+{
+	if (svc->srv_procroot != NULL)
+		lprocfs_remove(&svc->srv_procroot);
+
+	if (svc->srv_stats)
+		lprocfs_free_stats(&svc->srv_stats);
+}
+
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
+{
+	if (obd->obd_svc_procroot)
+		lprocfs_remove(&obd->obd_svc_procroot);
+
+	if (obd->obd_svc_stats)
+		lprocfs_free_stats(&obd->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd);
+
+
+#define BUFLEN (UUID_MAX + 5)
+
+int lprocfs_wr_evict_client(struct file *file, const char __user *buffer,
+			    size_t count, loff_t *off)
+{
+	struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+	char	      *kbuf;
+	char	      *tmpbuf;
+
+	OBD_ALLOC(kbuf, BUFLEN);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	/*
+	 * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1
+	 * bytes into kbuf, to ensure that the string is NUL-terminated.
+	 * UUID_MAX should include a trailing NUL already.
+	 */
+	if (copy_from_user(kbuf, buffer,
+			       min_t(unsigned long, BUFLEN - 1, count))) {
+		count = -EFAULT;
+		goto out;
+	}
+	tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+	/* Kludge code(deadlock situation): the lprocfs lock has been held
+	 * since the client is evicted by writing client's
+	 * uuid/nid to procfs "evict_client" entry. However,
+	 * obd_export_evict_by_uuid() will call lprocfs_remove() to destroy
+	 * the proc entries under the being destroyed export{}, so I have
+	 * to drop the lock at first here.
+	 * - jay, jxiong@clusterfs.com */
+	class_incref(obd, __func__, current);
+
+	if (strncmp(tmpbuf, "nid:", 4) == 0)
+		obd_export_evict_by_nid(obd, tmpbuf + 4);
+	else if (strncmp(tmpbuf, "uuid:", 5) == 0)
+		obd_export_evict_by_uuid(obd, tmpbuf + 5);
+	else
+		obd_export_evict_by_uuid(obd, tmpbuf);
+
+	class_decref(obd, __func__, current);
+
+out:
+	OBD_FREE(kbuf, BUFLEN);
+	return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_evict_client);
+
+#undef BUFLEN
+
+int lprocfs_wr_ping(struct file *file, const char __user *buffer,
+		    size_t count, loff_t *off)
+{
+	struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+	struct ptlrpc_request *req;
+	int		    rc;
+
+	LPROCFS_CLIMP_CHECK(obd);
+	req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+	LPROCFS_CLIMP_EXIT(obd);
+	if (req == NULL)
+		return -ENOMEM;
+
+	req->rq_send_state = LUSTRE_IMP_FULL;
+
+	rc = ptlrpc_queue_wait(req);
+
+	ptlrpc_req_finished(req);
+	if (rc >= 0)
+		return count;
+	return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_ping);
+
+/* Write the connection UUID to this file to attempt to connect to that node.
+ * The connection UUID is a node's primary NID. For example,
+ * "echo connection=192.168.0.1@tcp0::instance > .../import".
+ */
+int lprocfs_wr_import(struct file *file, const char __user *buffer,
+		      size_t count, loff_t *off)
+{
+	struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+	struct obd_import *imp = obd->u.cli.cl_import;
+	char *kbuf = NULL;
+	char *uuid;
+	char *ptr;
+	int do_reconn = 1;
+	const char prefix[] = "connection=";
+	const int prefix_len = sizeof(prefix) - 1;
+
+	if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len)
+		return -EINVAL;
+
+	OBD_ALLOC(kbuf, count + 1);
+	if (kbuf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(kbuf, buffer, count)) {
+		count = -EFAULT;
+		goto out;
+	}
+
+	kbuf[count] = 0;
+
+	/* only support connection=uuid::instance now */
+	if (strncmp(prefix, kbuf, prefix_len) != 0) {
+		count = -EINVAL;
+		goto out;
+	}
+
+	uuid = kbuf + prefix_len;
+	ptr = strstr(uuid, "::");
+	if (ptr) {
+		__u32 inst;
+		char *endptr;
+
+		*ptr = 0;
+		do_reconn = 0;
+		ptr += strlen("::");
+		inst = simple_strtol(ptr, &endptr, 10);
+		if (*endptr) {
+			CERROR("config: wrong instance # %s\n", ptr);
+		} else if (inst != imp->imp_connect_data.ocd_instance) {
+			CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
+			       imp->imp_obd->obd_name,
+			       imp->imp_connect_data.ocd_instance, inst);
+			do_reconn = 1;
+		} else {
+			CDEBUG(D_INFO, "IR: %s has already been connecting to new target(%u)\n",
+			       imp->imp_obd->obd_name, inst);
+		}
+	}
+
+	if (do_reconn)
+		ptlrpc_recover_import(imp, uuid, 1);
+
+out:
+	OBD_FREE(kbuf, count + 1);
+	return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_import);
+
+int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
+{
+	struct obd_device *obd = m->private;
+	struct obd_import *imp = obd->u.cli.cl_import;
+
+	LPROCFS_CLIMP_CHECK(obd);
+	seq_printf(m, "%d\n", !imp->imp_no_pinger_recover);
+	LPROCFS_CLIMP_EXIT(obd);
+
+	return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_pinger_recov);
+
+int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
+		      size_t count, loff_t *off)
+{
+	struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+	struct client_obd *cli = &obd->u.cli;
+	struct obd_import *imp = cli->cl_import;
+	int rc, val;
+
+	rc = lprocfs_write_helper(buffer, count, &val);
+	if (rc < 0)
+		return rc;
+
+	if (val != 0 && val != 1)
+		return -ERANGE;
+
+	LPROCFS_CLIMP_CHECK(obd);
+	spin_lock(&imp->imp_lock);
+	imp->imp_no_pinger_recover = !val;
+	spin_unlock(&imp->imp_lock);
+	LPROCFS_CLIMP_EXIT(obd);
+
+	return count;
+
+}
+EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
+
+#endif /* CONFIG_PROC_FS */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
new file mode 100644
index 000000000..2fa258558
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -0,0 +1,731 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_lib.h"
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+/**
+ * Helper function. Sends \a len bytes from \a base at offset \a offset
+ * over \a conn connection to portal \a portal.
+ * Returns 0 on success or error code.
+ */
+static int ptl_send_buf(lnet_handle_md_t *mdh, void *base, int len,
+			lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
+			struct ptlrpc_connection *conn, int portal, __u64 xid,
+			unsigned int offset)
+{
+	int	      rc;
+	lnet_md_t	 md;
+
+	LASSERT(portal != 0);
+	LASSERT(conn != NULL);
+	CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer));
+	md.start     = base;
+	md.length    = len;
+	md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
+	md.options   = PTLRPC_MD_OPTIONS;
+	md.user_ptr  = cbid;
+	md.eq_handle = ptlrpc_eq_h;
+
+	if (unlikely(ack == LNET_ACK_REQ &&
+		     OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK,
+					  OBD_FAIL_ONCE))) {
+		/* don't ask for the ack to simulate failing client */
+		ack = LNET_NOACK_REQ;
+	}
+
+	rc = LNetMDBind(md, LNET_UNLINK, mdh);
+	if (unlikely(rc != 0)) {
+		CERROR("LNetMDBind failed: %d\n", rc);
+		LASSERT(rc == -ENOMEM);
+		return -ENOMEM;
+	}
+
+	CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n",
+	       len, portal, xid, offset);
+
+	rc = LNetPut(conn->c_self, *mdh, ack,
+		     conn->c_peer, portal, xid, offset, 0);
+	if (unlikely(rc != 0)) {
+		int rc2;
+		/* We're going to get an UNLINK event when I unlink below,
+		 * which will complete just like any other failed send, so
+		 * I fall through and return success here! */
+		CERROR("LNetPut(%s, %d, %lld) failed: %d\n",
+		       libcfs_id2str(conn->c_peer), portal, xid, rc);
+		rc2 = LNetMDUnlink(*mdh);
+		LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
+	}
+
+	return 0;
+}
+
+static void mdunlink_iterate_helper(lnet_handle_md_t *bd_mds, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		LNetMDUnlink(bd_mds[i]);
+}
+
+
+/**
+ * Register bulk at the sender for later transfer.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_register_bulk(struct ptlrpc_request *req)
+{
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	lnet_process_id_t peer;
+	int rc = 0;
+	int rc2;
+	int posted_md;
+	int total_md;
+	__u64 xid;
+	lnet_handle_me_t  me_h;
+	lnet_md_t	 md;
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
+		return 0;
+
+	/* NB no locking required until desc is on the network */
+	LASSERT(desc->bd_nob > 0);
+	LASSERT(desc->bd_md_count == 0);
+	LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
+	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+	LASSERT(desc->bd_req != NULL);
+	LASSERT(desc->bd_type == BULK_PUT_SINK ||
+		desc->bd_type == BULK_GET_SOURCE);
+
+	/* cleanup the state of the bulk for it will be reused */
+	if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
+		desc->bd_nob_transferred = 0;
+	else
+		LASSERT(desc->bd_nob_transferred == 0);
+
+	desc->bd_failure = 0;
+
+	peer = desc->bd_import->imp_connection->c_peer;
+
+	LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
+	LASSERT(desc->bd_cbid.cbid_arg == desc);
+
+	/* An XID is only used for a single request from the client.
+	 * For retried bulk transfers, a new XID will be allocated in
+	 * in ptlrpc_check_set() if it needs to be resent, so it is not
+	 * using the same RDMA match bits after an error.
+	 *
+	 * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
+	 * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
+	xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
+	LASSERTF(!(desc->bd_registered &&
+		   req->rq_send_state != LUSTRE_IMP_REPLAY) ||
+		 xid != desc->bd_last_xid,
+		 "registered: %d  rq_xid: %llu bd_last_xid: %llu\n",
+		 desc->bd_registered, xid, desc->bd_last_xid);
+
+	total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
+	desc->bd_registered = 1;
+	desc->bd_last_xid = xid;
+	desc->bd_md_count = total_md;
+	md.user_ptr = &desc->bd_cbid;
+	md.eq_handle = ptlrpc_eq_h;
+	md.threshold = 1;		       /* PUT or GET */
+
+	for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
+		md.options = PTLRPC_MD_OPTIONS |
+			     ((desc->bd_type == BULK_GET_SOURCE) ?
+			      LNET_MD_OP_GET : LNET_MD_OP_PUT);
+		ptlrpc_fill_bulk_md(&md, desc, posted_md);
+
+		rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
+				  LNET_UNLINK, LNET_INS_AFTER, &me_h);
+		if (rc != 0) {
+			CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
+			       desc->bd_import->imp_obd->obd_name, xid,
+			       posted_md, rc);
+			break;
+		}
+
+		/* About to let the network at it... */
+		rc = LNetMDAttach(me_h, md, LNET_UNLINK,
+				  &desc->bd_mds[posted_md]);
+		if (rc != 0) {
+			CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
+			       desc->bd_import->imp_obd->obd_name, xid,
+			       posted_md, rc);
+			rc2 = LNetMEUnlink(me_h);
+			LASSERT(rc2 == 0);
+			break;
+		}
+	}
+
+	if (rc != 0) {
+		LASSERT(rc == -ENOMEM);
+		spin_lock(&desc->bd_lock);
+		desc->bd_md_count -= total_md - posted_md;
+		spin_unlock(&desc->bd_lock);
+		LASSERT(desc->bd_md_count >= 0);
+		mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+		req->rq_status = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	/* Set rq_xid to matchbits of the final bulk so that server can
+	 * infer the number of bulks that were prepared */
+	req->rq_xid = --xid;
+	LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
+		 "bd_last_xid = x%llu, rq_xid = x%llu\n",
+		 desc->bd_last_xid, req->rq_xid);
+
+	spin_lock(&desc->bd_lock);
+	/* Holler if peer manages to touch buffers before he knows the xid */
+	if (desc->bd_md_count != total_md)
+		CWARN("%s: Peer %s touched %d buffers while I registered\n",
+		      desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
+		      total_md - desc->bd_md_count);
+	spin_unlock(&desc->bd_lock);
+
+	CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n",
+	       desc->bd_md_count,
+	       desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
+	       desc->bd_iov_count, desc->bd_nob,
+	       desc->bd_last_xid, req->rq_xid, desc->bd_portal);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_register_bulk);
+
+/**
+ * Disconnect a bulk desc from the network. Idempotent. Not
+ * thread-safe (i.e. only interlocks with completion callback).
+ * Returns 1 on success or 0 if network unregistration failed for whatever
+ * reason.
+ */
+int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
+{
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	wait_queue_head_t	     *wq;
+	struct l_wait_info       lwi;
+	int		      rc;
+
+	LASSERT(!in_interrupt());     /* might sleep */
+
+	/* Let's setup deadline for reply unlink. */
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+	    async && req->rq_bulk_deadline == 0)
+		req->rq_bulk_deadline = get_seconds() + LONG_UNLINK;
+
+	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
+		return 1;				/* never registered */
+
+	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */
+
+	/* the unlink ensures the callback happens ASAP and is the last
+	 * one.  If it fails, it must be because completion just happened,
+	 * but we must still l_wait_event() in this case to give liblustre
+	 * a chance to run client_bulk_callback() */
+	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+
+	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
+		return 1;				/* never registered */
+
+	/* Move to "Unregistering" phase as bulk was not unlinked yet. */
+	ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+
+	/* Do not wait for unlink to finish. */
+	if (async)
+		return 0;
+
+	if (req->rq_set != NULL)
+		wq = &req->rq_set->set_waitq;
+	else
+		wq = &req->rq_reply_waitq;
+
+	for (;;) {
+		/* Network access will complete in finite time but the HUGE
+		 * timeout lets us CWARN for visibility of sluggish NALs */
+		lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+					   cfs_time_seconds(1), NULL, NULL);
+		rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
+		if (rc == 0) {
+			ptlrpc_rqphase_move(req, req->rq_next_phase);
+			return 1;
+		}
+
+		LASSERT(rc == -ETIMEDOUT);
+		DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
+			  desc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_bulk);
+
+static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
+{
+	struct ptlrpc_service_part	*svcpt = req->rq_rqbd->rqbd_svcpt;
+	struct ptlrpc_service		*svc = svcpt->scp_service;
+	int service_time = max_t(int, get_seconds() -
+				 req->rq_arrival_time.tv_sec, 1);
+
+	if (!(flags & PTLRPC_REPLY_EARLY) &&
+	    (req->rq_type != PTL_RPC_MSG_ERR) &&
+	    (req->rq_reqmsg != NULL) &&
+	    !(lustre_msg_get_flags(req->rq_reqmsg) &
+	      (MSG_RESENT | MSG_REPLAY |
+	       MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
+		/* early replies, errors and recovery requests don't count
+		 * toward our service time estimate */
+		int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
+
+		if (oldse != 0) {
+			DEBUG_REQ(D_ADAPTTO, req,
+				  "svc %s changed estimate from %d to %d",
+				  svc->srv_name, oldse,
+				  at_get(&svcpt->scp_at_estimate));
+		}
+	}
+	/* Report actual service time for client latency calc */
+	lustre_msg_set_service_time(req->rq_repmsg, service_time);
+	/* Report service time estimate for future client reqs, but report 0
+	 * (to be ignored by client) if it's a error reply during recovery.
+	 * (bz15815) */
+	if (req->rq_type == PTL_RPC_MSG_ERR &&
+	    (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering))
+		lustre_msg_set_timeout(req->rq_repmsg, 0);
+	else
+		lustre_msg_set_timeout(req->rq_repmsg,
+				       at_get(&svcpt->scp_at_estimate));
+
+	if (req->rq_reqmsg &&
+	    !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+		CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x req_flags=%#x magic=%d:%x/%x len=%d\n",
+		       flags, lustre_msg_get_flags(req->rq_reqmsg),
+		       lustre_msg_is_v1(req->rq_reqmsg),
+		       lustre_msg_get_magic(req->rq_reqmsg),
+		       lustre_msg_get_magic(req->rq_repmsg), req->rq_replen);
+	}
+}
+
+/**
+ * Send request reply from request \a req reply buffer.
+ * \a flags defines reply types
+ * Returns 0 on success or error code
+ */
+int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
+{
+	struct ptlrpc_reply_state *rs = req->rq_reply_state;
+	struct ptlrpc_connection  *conn;
+	int			rc;
+
+	/* We must already have a reply buffer (only ptlrpc_error() may be
+	 * called without one). The reply generated by sptlrpc layer (e.g.
+	 * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
+	 * have a request buffer which is either the actual (swabbed) incoming
+	 * request, or a saved copy if this is a req saved in
+	 * target_queue_final_reply().
+	 */
+	LASSERT(req->rq_no_reply == 0);
+	LASSERT(req->rq_reqbuf != NULL);
+	LASSERT(rs != NULL);
+	LASSERT((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
+	LASSERT(req->rq_repmsg != NULL);
+	LASSERT(req->rq_repmsg == rs->rs_msg);
+	LASSERT(rs->rs_cb_id.cbid_fn == reply_out_callback);
+	LASSERT(rs->rs_cb_id.cbid_arg == rs);
+
+	/* There may be no rq_export during failover */
+
+	if (unlikely(req->rq_export && req->rq_export->exp_obd &&
+		     req->rq_export->exp_obd->obd_fail)) {
+		/* Failed obd's only send ENODEV */
+		req->rq_type = PTL_RPC_MSG_ERR;
+		req->rq_status = -ENODEV;
+		CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
+		       req->rq_export->exp_obd->obd_minor);
+	}
+
+	/* In order to keep interoperability with the client (< 2.3) which
+	 * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
+	 * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
+	 * reply buffer on client will be overflow.
+	 *
+	 * XXX Remove this whenever we drop the interoperability with
+	 * such client.
+	 */
+	req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
+					   sizeof(struct ptlrpc_body_v2), 1);
+
+	if (req->rq_type != PTL_RPC_MSG_ERR)
+		req->rq_type = PTL_RPC_MSG_REPLY;
+
+	lustre_msg_set_type(req->rq_repmsg, req->rq_type);
+	lustre_msg_set_status(req->rq_repmsg,
+			      ptlrpc_status_hton(req->rq_status));
+	lustre_msg_set_opc(req->rq_repmsg,
+		req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+
+	target_pack_pool_reply(req);
+
+	ptlrpc_at_set_reply(req, flags);
+
+	if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
+		conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
+	else
+		conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
+
+	if (unlikely(conn == NULL)) {
+		CERROR("not replying on NULL connection\n"); /* bug 9635 */
+		return -ENOTCONN;
+	}
+	ptlrpc_rs_addref(rs);		   /* +1 ref for the network */
+
+	rc = sptlrpc_svc_wrap_reply(req);
+	if (unlikely(rc))
+		goto out;
+
+	req->rq_sent = get_seconds();
+
+	rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
+			  (rs->rs_difficult && !rs->rs_no_ack) ?
+			  LNET_ACK_REQ : LNET_NOACK_REQ,
+			  &rs->rs_cb_id, conn,
+			  ptlrpc_req2svc(req)->srv_rep_portal,
+			  req->rq_xid, req->rq_reply_off);
+out:
+	if (unlikely(rc != 0))
+		ptlrpc_req_drop_rs(req);
+	ptlrpc_connection_put(conn);
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_send_reply);
+
+int ptlrpc_reply(struct ptlrpc_request *req)
+{
+	if (req->rq_no_reply)
+		return 0;
+	return ptlrpc_send_reply(req, 0);
+}
+EXPORT_SYMBOL(ptlrpc_reply);
+
+/**
+ * For request \a req send an error reply back. Create empty
+ * reply buffers if necessary.
+ */
+int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
+{
+	int rc;
+
+	if (req->rq_no_reply)
+		return 0;
+
+	if (!req->rq_repmsg) {
+		rc = lustre_pack_reply(req, 1, NULL, NULL);
+		if (rc)
+			return rc;
+	}
+
+	if (req->rq_status != -ENOSPC && req->rq_status != -EACCES &&
+	    req->rq_status != -EPERM && req->rq_status != -ENOENT &&
+	    req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT)
+		req->rq_type = PTL_RPC_MSG_ERR;
+
+	rc = ptlrpc_send_reply(req, may_be_difficult);
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_send_error);
+
+int ptlrpc_error(struct ptlrpc_request *req)
+{
+	return ptlrpc_send_error(req, 0);
+}
+EXPORT_SYMBOL(ptlrpc_error);
+
+/**
+ * Send request \a request.
+ * if \a noreply is set, don't expect any reply back and don't set up
+ * reply buffers.
+ * Returns 0 on success or error code.
+ */
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
+{
+	int rc;
+	int rc2;
+	int mpflag = 0;
+	struct ptlrpc_connection *connection;
+	lnet_handle_me_t  reply_me_h;
+	lnet_md_t	 reply_md;
+	struct obd_device *obd = request->rq_import->imp_obd;
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
+		return 0;
+
+	LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
+	LASSERT(request->rq_wait_ctx == 0);
+
+	/* If this is a re-transmit, we're required to have disengaged
+	 * cleanly from the previous attempt */
+	LASSERT(!request->rq_receiving_reply);
+	LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
+		(request->rq_import->imp_state == LUSTRE_IMP_FULL)));
+
+	if (unlikely(obd != NULL && obd->obd_fail)) {
+		CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
+			obd->obd_name);
+		/* this prevents us from waiting in ptlrpc_queue_wait */
+		spin_lock(&request->rq_lock);
+		request->rq_err = 1;
+		spin_unlock(&request->rq_lock);
+		request->rq_status = -ENODEV;
+		return -ENODEV;
+	}
+
+	connection = request->rq_import->imp_connection;
+
+	lustre_msg_set_handle(request->rq_reqmsg,
+			      &request->rq_import->imp_remote_handle);
+	lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
+	lustre_msg_set_conn_cnt(request->rq_reqmsg,
+				request->rq_import->imp_conn_cnt);
+	lustre_msghdr_set_flags(request->rq_reqmsg,
+				request->rq_import->imp_msghdr_flags);
+
+	if (request->rq_resend)
+		lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+
+	if (request->rq_memalloc)
+		mpflag = cfs_memory_pressure_get_and_set();
+
+	rc = sptlrpc_cli_wrap_request(request);
+	if (rc)
+		goto out;
+
+	/* bulk register should be done after wrap_request() */
+	if (request->rq_bulk != NULL) {
+		rc = ptlrpc_register_bulk(request);
+		if (rc != 0)
+			goto out;
+	}
+
+	if (!noreply) {
+		LASSERT(request->rq_replen != 0);
+		if (request->rq_repbuf == NULL) {
+			LASSERT(request->rq_repdata == NULL);
+			LASSERT(request->rq_repmsg == NULL);
+			rc = sptlrpc_cli_alloc_repbuf(request,
+						      request->rq_replen);
+			if (rc) {
+				/* this prevents us from looping in
+				 * ptlrpc_queue_wait */
+				spin_lock(&request->rq_lock);
+				request->rq_err = 1;
+				spin_unlock(&request->rq_lock);
+				request->rq_status = rc;
+				goto cleanup_bulk;
+			}
+		} else {
+			request->rq_repdata = NULL;
+			request->rq_repmsg = NULL;
+		}
+
+		rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
+				  connection->c_peer, request->rq_xid, 0,
+				  LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
+		if (rc != 0) {
+			CERROR("LNetMEAttach failed: %d\n", rc);
+			LASSERT(rc == -ENOMEM);
+			rc = -ENOMEM;
+			goto cleanup_bulk;
+		}
+	}
+
+	spin_lock(&request->rq_lock);
+	/* If the MD attach succeeds, there _will_ be a reply_in callback */
+	request->rq_receiving_reply = !noreply;
+	request->rq_req_unlink = 1;
+	/* We are responsible for unlinking the reply buffer */
+	request->rq_reply_unlink = !noreply;
+	/* Clear any flags that may be present from previous sends. */
+	request->rq_replied = 0;
+	request->rq_err = 0;
+	request->rq_timedout = 0;
+	request->rq_net_err = 0;
+	request->rq_resend = 0;
+	request->rq_restart = 0;
+	request->rq_reply_truncate = 0;
+	spin_unlock(&request->rq_lock);
+
+	if (!noreply) {
+		reply_md.start     = request->rq_repbuf;
+		reply_md.length    = request->rq_repbuf_len;
+		/* Allow multiple early replies */
+		reply_md.threshold = LNET_MD_THRESH_INF;
+		/* Manage remote for early replies */
+		reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
+			LNET_MD_MANAGE_REMOTE |
+			LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
+		reply_md.user_ptr  = &request->rq_reply_cbid;
+		reply_md.eq_handle = ptlrpc_eq_h;
+
+		/* We must see the unlink callback to unset rq_reply_unlink,
+		   so we can't auto-unlink */
+		rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
+				  &request->rq_reply_md_h);
+		if (rc != 0) {
+			CERROR("LNetMDAttach failed: %d\n", rc);
+			LASSERT(rc == -ENOMEM);
+			spin_lock(&request->rq_lock);
+			/* ...but the MD attach didn't succeed... */
+			request->rq_receiving_reply = 0;
+			spin_unlock(&request->rq_lock);
+			rc = -ENOMEM;
+			goto cleanup_me;
+		}
+
+		CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n",
+		       request->rq_repbuf_len, request->rq_xid,
+		       request->rq_reply_portal);
+	}
+
+	/* add references on request for request_out_callback */
+	ptlrpc_request_addref(request);
+	if (obd != NULL && obd->obd_svc_stats != NULL)
+		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
+			atomic_read(&request->rq_import->imp_inflight));
+
+	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
+
+	do_gettimeofday(&request->rq_arrival_time);
+	request->rq_sent = get_seconds();
+	/* We give the server rq_timeout secs to process the req, and
+	   add the network latency for our local timeout. */
+	request->rq_deadline = request->rq_sent + request->rq_timeout +
+		ptlrpc_at_get_net_latency(request);
+
+	ptlrpc_pinger_sending_on_import(request->rq_import);
+
+	DEBUG_REQ(D_INFO, request, "send flg=%x",
+		  lustre_msg_get_flags(request->rq_reqmsg));
+	rc = ptl_send_buf(&request->rq_req_md_h,
+			  request->rq_reqbuf, request->rq_reqdata_len,
+			  LNET_NOACK_REQ, &request->rq_req_cbid,
+			  connection,
+			  request->rq_request_portal,
+			  request->rq_xid, 0);
+	if (rc == 0)
+		goto out;
+
+	ptlrpc_req_finished(request);
+	if (noreply)
+		goto out;
+
+ cleanup_me:
+	/* MEUnlink is safe; the PUT didn't even get off the ground, and
+	 * nobody apart from the PUT's target has the right nid+XID to
+	 * access the reply buffer. */
+	rc2 = LNetMEUnlink(reply_me_h);
+	LASSERT(rc2 == 0);
+	/* UNLINKED callback called synchronously */
+	LASSERT(!request->rq_receiving_reply);
+
+ cleanup_bulk:
+	/* We do sync unlink here as there was no real transfer here so
+	 * the chance to have long unlink to sluggish net is smaller here. */
+	ptlrpc_unregister_bulk(request, 0);
+ out:
+	if (request->rq_memalloc)
+		cfs_memory_pressure_restore(mpflag);
+	return rc;
+}
+EXPORT_SYMBOL(ptl_send_rpc);
+
+/**
+ * Register request buffer descriptor for request receiving.
+ */
+int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+	struct ptlrpc_service	  *service = rqbd->rqbd_svcpt->scp_service;
+	static lnet_process_id_t  match_id = {LNET_NID_ANY, LNET_PID_ANY};
+	int			  rc;
+	lnet_md_t		 md;
+	lnet_handle_me_t	  me_h;
+
+	CDEBUG(D_NET, "LNetMEAttach: portal %d\n",
+	       service->srv_req_portal);
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD))
+		return -ENOMEM;
+
+	/* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL,
+	 * which means buffer can only be attached on local CPT, and LND
+	 * threads can find it by grabbing a local lock */
+	rc = LNetMEAttach(service->srv_req_portal,
+			  match_id, 0, ~0, LNET_UNLINK,
+			  rqbd->rqbd_svcpt->scp_cpt >= 0 ?
+			  LNET_INS_LOCAL : LNET_INS_AFTER, &me_h);
+	if (rc != 0) {
+		CERROR("LNetMEAttach failed: %d\n", rc);
+		return -ENOMEM;
+	}
+
+	LASSERT(rqbd->rqbd_refcount == 0);
+	rqbd->rqbd_refcount = 1;
+
+	md.start     = rqbd->rqbd_buffer;
+	md.length    = service->srv_buf_size;
+	md.max_size  = service->srv_max_req_size;
+	md.threshold = LNET_MD_THRESH_INF;
+	md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE;
+	md.user_ptr  = &rqbd->rqbd_cbid;
+	md.eq_handle = ptlrpc_eq_h;
+
+	rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h);
+	if (rc == 0)
+		return 0;
+
+	CERROR("LNetMDAttach failed: %d;\n", rc);
+	LASSERT(rc == -ENOMEM);
+	rc = LNetMEUnlink(me_h);
+	LASSERT(rc == 0);
+	rqbd->rqbd_refcount = 0;
+
+	return -ENOMEM;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
new file mode 100644
index 000000000..81ad74732
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -0,0 +1,1754 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs.c
+ *
+ * Network Request Scheduler (NRS)
+ *
+ * Allows to reorder the handling of RPCs at servers.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lprocfs_status.h"
+#include "../../include/linux/libcfs/libcfs.h"
+#include "ptlrpc_internal.h"
+
+/* XXX: This is just for liblustre. Remove the #if defined directive when the
+ * "cfs_" prefix is dropped from cfs_list_head. */
+extern struct list_head ptlrpc_all_services;
+
+/**
+ * NRS core object.
+ */
+struct nrs_core nrs_core;
+
+static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
+{
+	return policy->pol_desc->pd_ops->op_policy_init != NULL ?
+	       policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
+}
+
+static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
+{
+	LASSERT(policy->pol_ref == 0);
+	LASSERT(policy->pol_req_queued == 0);
+
+	if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
+		policy->pol_desc->pd_ops->op_policy_fini(policy);
+}
+
+static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
+				 enum ptlrpc_nrs_ctl opc, void *arg)
+{
+	/**
+	 * The policy may be stopped, but the lprocfs files and
+	 * ptlrpc_nrs_policy instances remain present until unregistration time.
+	 * Do not perform the ctl operation if the policy is stopped, as
+	 * policy->pol_private will be NULL in such a case.
+	 */
+	if (policy->pol_state == NRS_POL_STATE_STOPPED)
+		return -ENODEV;
+
+	return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
+	       policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
+	       -ENOSYS;
+}
+
+static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
+{
+	struct ptlrpc_nrs *nrs = policy->pol_nrs;
+
+	if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
+		spin_unlock(&nrs->nrs_lock);
+
+		policy->pol_desc->pd_ops->op_policy_stop(policy);
+
+		spin_lock(&nrs->nrs_lock);
+	}
+
+	LASSERT(list_empty(&policy->pol_list_queued));
+	LASSERT(policy->pol_req_queued == 0 &&
+		policy->pol_req_started == 0);
+
+	policy->pol_private = NULL;
+
+	policy->pol_state = NRS_POL_STATE_STOPPED;
+
+	if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+		module_put(policy->pol_desc->pd_owner);
+}
+
+static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
+{
+	struct ptlrpc_nrs *nrs = policy->pol_nrs;
+
+	if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
+		return -EPERM;
+
+	if (policy->pol_state == NRS_POL_STATE_STARTING)
+		return -EAGAIN;
+
+	/* In progress or already stopped */
+	if (policy->pol_state != NRS_POL_STATE_STARTED)
+		return 0;
+
+	policy->pol_state = NRS_POL_STATE_STOPPING;
+
+	/* Immediately make it invisible */
+	if (nrs->nrs_policy_primary == policy) {
+		nrs->nrs_policy_primary = NULL;
+
+	} else {
+		LASSERT(nrs->nrs_policy_fallback == policy);
+		nrs->nrs_policy_fallback = NULL;
+	}
+
+	/* I have the only refcount */
+	if (policy->pol_ref == 1)
+		nrs_policy_stop0(policy);
+
+	return 0;
+}
+
+/**
+ * Transitions the \a nrs NRS head's primary policy to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
+ * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
+ *
+ * \param[in] nrs the NRS head to carry out this operation on
+ */
+static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
+{
+	struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
+
+	if (tmp == NULL)
+		return;
+
+	nrs->nrs_policy_primary = NULL;
+
+	LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
+	tmp->pol_state = NRS_POL_STATE_STOPPING;
+
+	if (tmp->pol_ref == 0)
+		nrs_policy_stop0(tmp);
+}
+
+/**
+ * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
+ * response to an lprocfs command to start a policy.
+ *
+ * If a primary policy different to the current one is specified, this function
+ * will transition the new policy to the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
+ * the old primary policy (if there is one) to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
+ *
+ * If the fallback policy is specified, this is taken to indicate an instruction
+ * to stop the current primary policy, without substituting it with another
+ * primary policy, so the primary policy (if any) is transitioned to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
+ * this case, the fallback policy is only left active in the NRS head.
+ */
+static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
+{
+	struct ptlrpc_nrs      *nrs = policy->pol_nrs;
+	int			rc = 0;
+
+	/**
+	 * Don't allow multiple starting which is too complex, and has no real
+	 * benefit.
+	 */
+	if (nrs->nrs_policy_starting)
+		return -EAGAIN;
+
+	LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
+
+	if (policy->pol_state == NRS_POL_STATE_STOPPING)
+		return -EAGAIN;
+
+	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+		/**
+		 * This is for cases in which the user sets the policy to the
+		 * fallback policy (currently fifo for all services); i.e. the
+		 * user is resetting the policy to the default; so we stop the
+		 * primary policy, if any.
+		 */
+		if (policy == nrs->nrs_policy_fallback) {
+			nrs_policy_stop_primary(nrs);
+			return 0;
+		}
+
+		/**
+		 * If we reach here, we must be setting up the fallback policy
+		 * at service startup time, and only a single policy with the
+		 * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
+		 * register with NRS core.
+		 */
+		LASSERT(nrs->nrs_policy_fallback == NULL);
+	} else {
+		/**
+		 * Shouldn't start primary policy if w/o fallback policy.
+		 */
+		if (nrs->nrs_policy_fallback == NULL)
+			return -EPERM;
+
+		if (policy->pol_state == NRS_POL_STATE_STARTED)
+			return 0;
+	}
+
+	/**
+	 * Increase the module usage count for policies registering from other
+	 * modules.
+	 */
+	if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
+	    !try_module_get(policy->pol_desc->pd_owner)) {
+		atomic_dec(&policy->pol_desc->pd_refs);
+		CERROR("NRS: cannot get module for policy %s; is it alive?\n",
+		       policy->pol_desc->pd_name);
+		return -ENODEV;
+	}
+
+	/**
+	 * Serialize policy starting across the NRS head
+	 */
+	nrs->nrs_policy_starting = 1;
+
+	policy->pol_state = NRS_POL_STATE_STARTING;
+
+	if (policy->pol_desc->pd_ops->op_policy_start) {
+		spin_unlock(&nrs->nrs_lock);
+
+		rc = policy->pol_desc->pd_ops->op_policy_start(policy);
+
+		spin_lock(&nrs->nrs_lock);
+		if (rc != 0) {
+			if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+				module_put(policy->pol_desc->pd_owner);
+
+			policy->pol_state = NRS_POL_STATE_STOPPED;
+			goto out;
+		}
+	}
+
+	policy->pol_state = NRS_POL_STATE_STARTED;
+
+	if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+		/**
+		 * This path is only used at PTLRPC service setup time.
+		 */
+		nrs->nrs_policy_fallback = policy;
+	} else {
+		/*
+		 * Try to stop the current primary policy if there is one.
+		 */
+		nrs_policy_stop_primary(nrs);
+
+		/**
+		 * And set the newly-started policy as the primary one.
+		 */
+		nrs->nrs_policy_primary = policy;
+	}
+
+out:
+	nrs->nrs_policy_starting = 0;
+
+	return rc;
+}
+
+/**
+ * Increases the policy's usage reference count.
+ */
+static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
+{
+	policy->pol_ref++;
+}
+
+/**
+ * Decreases the policy's usage reference count, and stops the policy in case it
+ * was already stopping and have no more outstanding usage references (which
+ * indicates it has no more queued or started requests, and can be safely
+ * stopped).
+ */
+static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
+{
+	LASSERT(policy->pol_ref > 0);
+
+	policy->pol_ref--;
+	if (unlikely(policy->pol_ref == 0 &&
+	    policy->pol_state == NRS_POL_STATE_STOPPING))
+		nrs_policy_stop0(policy);
+}
+
+static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
+{
+	spin_lock(&policy->pol_nrs->nrs_lock);
+	nrs_policy_put_locked(policy);
+	spin_unlock(&policy->pol_nrs->nrs_lock);
+}
+
+/**
+ * Find and return a policy by name.
+ */
+static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
+							 char *name)
+{
+	struct ptlrpc_nrs_policy *tmp;
+
+	list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
+		if (strncmp(tmp->pol_desc->pd_name, name,
+			    NRS_POL_NAME_MAX) == 0) {
+			nrs_policy_get_locked(tmp);
+			return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * Release references for the resource hierarchy moving upwards towards the
+ * policy instance resource.
+ */
+static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
+{
+	struct ptlrpc_nrs_policy *policy = res->res_policy;
+
+	if (policy->pol_desc->pd_ops->op_res_put != NULL) {
+		struct ptlrpc_nrs_resource *parent;
+
+		for (; res != NULL; res = parent) {
+			parent = res->res_parent;
+			policy->pol_desc->pd_ops->op_res_put(policy, res);
+		}
+	}
+}
+
+/**
+ * Obtains references for each resource in the resource hierarchy for request
+ * \a nrq if it is to be handled by \a policy.
+ *
+ * \param[in] policy	  the policy
+ * \param[in] nrq	  the request
+ * \param[in] moving_req  denotes whether this is a call to the function by
+ *			  ldlm_lock_reorder_req(), in order to move \a nrq to
+ *			  the high-priority NRS head; we should not sleep when
+ *			  set.
+ *
+ * \retval NULL		  resource hierarchy references not obtained
+ * \retval valid-pointer  the bottom level of the resource hierarchy
+ *
+ * \see ptlrpc_nrs_pol_ops::op_res_get()
+ */
+static
+struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
+					      struct ptlrpc_nrs_request *nrq,
+					      bool moving_req)
+{
+	/**
+	 * Set to NULL to traverse the resource hierarchy from the top.
+	 */
+	struct ptlrpc_nrs_resource *res = NULL;
+	struct ptlrpc_nrs_resource *tmp = NULL;
+	int			    rc;
+
+	while (1) {
+		rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
+							  &tmp, moving_req);
+		if (rc < 0) {
+			if (res != NULL)
+				nrs_resource_put(res);
+			return NULL;
+		}
+
+		LASSERT(tmp != NULL);
+		tmp->res_parent = res;
+		tmp->res_policy = policy;
+		res = tmp;
+		tmp = NULL;
+		/**
+		 * Return once we have obtained a reference to the bottom level
+		 * of the resource hierarchy.
+		 */
+		if (rc > 0)
+			return res;
+	}
+}
+
+/**
+ * Obtains resources for the resource hierarchies and policy references for
+ * the fallback and current primary policy (if any), that will later be used
+ * to handle request \a nrq.
+ *
+ * \param[in]  nrs  the NRS head instance that will be handling request \a nrq.
+ * \param[in]  nrq  the request that is being handled.
+ * \param[out] resp the array where references to the resource hierarchy are
+ *		    stored.
+ * \param[in]  moving_req  is set when obtaining resources while moving a
+ *			   request from a policy on the regular NRS head to a
+ *			   policy on the HP NRS head (via
+ *			   ldlm_lock_reorder_req()). It signifies that
+ *			   allocations to get resources should be atomic; for
+ *			   a full explanation, see comment in
+ *			   ptlrpc_nrs_pol_ops::op_res_get().
+ */
+static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
+				  struct ptlrpc_nrs_request *nrq,
+				  struct ptlrpc_nrs_resource **resp,
+				  bool moving_req)
+{
+	struct ptlrpc_nrs_policy   *primary = NULL;
+	struct ptlrpc_nrs_policy   *fallback = NULL;
+
+	memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
+
+	/**
+	 * Obtain policy references.
+	 */
+	spin_lock(&nrs->nrs_lock);
+
+	fallback = nrs->nrs_policy_fallback;
+	nrs_policy_get_locked(fallback);
+
+	primary = nrs->nrs_policy_primary;
+	if (primary != NULL)
+		nrs_policy_get_locked(primary);
+
+	spin_unlock(&nrs->nrs_lock);
+
+	/**
+	 * Obtain resource hierarchy references.
+	 */
+	resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
+	LASSERT(resp[NRS_RES_FALLBACK] != NULL);
+
+	if (primary != NULL) {
+		resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
+							 moving_req);
+		/**
+		 * A primary policy may exist which may not wish to serve a
+		 * particular request for different reasons; release the
+		 * reference on the policy as it will not be used for this
+		 * request.
+		 */
+		if (resp[NRS_RES_PRIMARY] == NULL)
+			nrs_policy_put(primary);
+	}
+}
+
+/**
+ * Releases references to resource hierarchies and policies, because they are no
+ * longer required; used when request handling has been completed, or the
+ * request is moving to the high priority NRS head.
+ *
+ * \param resp	the resource hierarchy that is being released
+ *
+ * \see ptlrpcnrs_req_hp_move()
+ * \see ptlrpc_nrs_req_finalize()
+ */
+static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
+{
+	struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
+	struct ptlrpc_nrs	 *nrs = NULL;
+	int			  i;
+
+	for (i = 0; i < NRS_RES_MAX; i++) {
+		if (resp[i] != NULL) {
+			pols[i] = resp[i]->res_policy;
+			nrs_resource_put(resp[i]);
+			resp[i] = NULL;
+		} else {
+			pols[i] = NULL;
+		}
+	}
+
+	for (i = 0; i < NRS_RES_MAX; i++) {
+		if (pols[i] == NULL)
+			continue;
+
+		if (nrs == NULL) {
+			nrs = pols[i]->pol_nrs;
+			spin_lock(&nrs->nrs_lock);
+		}
+		nrs_policy_put_locked(pols[i]);
+	}
+
+	if (nrs != NULL)
+		spin_unlock(&nrs->nrs_lock);
+}
+
+/**
+ * Obtains an NRS request from \a policy for handling or examination; the
+ * request should be removed in the 'handling' case.
+ *
+ * Calling into this function implies we already know the policy has a request
+ * waiting to be handled.
+ *
+ * \param[in] policy the policy from which a request
+ * \param[in] peek   when set, signifies that we just want to examine the
+ *		     request, and not handle it, so the request is not removed
+ *		     from the policy.
+ * \param[in] force  when set, it will force a policy to return a request if it
+ *		     has one pending
+ *
+ * \retval the NRS request to be handled
+ */
+static inline
+struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
+					    bool peek, bool force)
+{
+	struct ptlrpc_nrs_request *nrq;
+
+	LASSERT(policy->pol_req_queued > 0);
+
+	nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
+
+	LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
+
+	return nrq;
+}
+
+/**
+ * Enqueues request \a nrq for later handling, via one one the policies for
+ * which resources where earlier obtained via nrs_resource_get_safe(). The
+ * function attempts to enqueue the request first on the primary policy
+ * (if any), since this is the preferred choice.
+ *
+ * \param nrq the request being enqueued
+ *
+ * \see nrs_resource_get_safe()
+ */
+static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
+{
+	struct ptlrpc_nrs_policy *policy;
+	int			  rc;
+	int			  i;
+
+	/**
+	 * Try in descending order, because the primary policy (if any) is
+	 * the preferred choice.
+	 */
+	for (i = NRS_RES_MAX - 1; i >= 0; i--) {
+		if (nrq->nr_res_ptrs[i] == NULL)
+			continue;
+
+		nrq->nr_res_idx = i;
+		policy = nrq->nr_res_ptrs[i]->res_policy;
+
+		rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
+		if (rc == 0) {
+			policy->pol_nrs->nrs_req_queued++;
+			policy->pol_req_queued++;
+			return;
+		}
+	}
+	/**
+	 * Should never get here, as at least the primary policy's
+	 * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
+	 * succeed.
+	 */
+	LBUG();
+}
+
+/**
+ * Called when a request has been handled
+ *
+ * \param[in] nrs the request that has been handled; can be used for
+ *		  job/resource control.
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
+{
+	struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
+
+	if (policy->pol_desc->pd_ops->op_req_stop)
+		policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
+
+	LASSERT(policy->pol_nrs->nrs_req_started > 0);
+	LASSERT(policy->pol_req_started > 0);
+
+	policy->pol_nrs->nrs_req_started--;
+	policy->pol_req_started--;
+}
+
+/**
+ * Handler for operations that can be carried out on policies.
+ *
+ * Handles opcodes that are common to all policy types within NRS core, and
+ * passes any unknown opcodes to the policy-specific control function.
+ *
+ * \param[in]	  nrs  the NRS head this policy belongs to.
+ * \param[in]	  name the human-readable policy name; should be the same as
+ *		       ptlrpc_nrs_pol_desc::pd_name.
+ * \param[in]	  opc  the opcode of the operation being carried out.
+ * \param[in,out] arg  can be used to pass information in and out between when
+ *		       carrying an operation; usually data that is private to
+ *		       the policy at some level, or generic policy status
+ *		       information.
+ *
+ * \retval -ve error condition
+ * \retval   0 operation was carried out successfully
+ */
+static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
+			  enum ptlrpc_nrs_ctl opc, void *arg)
+{
+	struct ptlrpc_nrs_policy       *policy;
+	int				rc = 0;
+
+	spin_lock(&nrs->nrs_lock);
+
+	policy = nrs_policy_find_locked(nrs, name);
+	if (policy == NULL) {
+		rc = -ENOENT;
+		goto out;
+	}
+
+	switch (opc) {
+		/**
+		 * Unknown opcode, pass it down to the policy-specific control
+		 * function for handling.
+		 */
+	default:
+		rc = nrs_policy_ctl_locked(policy, opc, arg);
+		break;
+
+		/**
+		 * Start \e policy
+		 */
+	case PTLRPC_NRS_CTL_START:
+		rc = nrs_policy_start_locked(policy);
+		break;
+	}
+out:
+	if (policy != NULL)
+		nrs_policy_put_locked(policy);
+
+	spin_unlock(&nrs->nrs_lock);
+
+	return rc;
+}
+
+/**
+ * Unregisters a policy by name.
+ *
+ * \param[in] nrs  the NRS head this policy belongs to.
+ * \param[in] name the human-readable policy name; should be the same as
+ *		   ptlrpc_nrs_pol_desc::pd_name
+ *
+ * \retval -ve error
+ * \retval   0 success
+ */
+static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
+{
+	struct ptlrpc_nrs_policy *policy = NULL;
+
+	spin_lock(&nrs->nrs_lock);
+
+	policy = nrs_policy_find_locked(nrs, name);
+	if (policy == NULL) {
+		spin_unlock(&nrs->nrs_lock);
+
+		CERROR("Can't find NRS policy %s\n", name);
+		return -ENOENT;
+	}
+
+	if (policy->pol_ref > 1) {
+		CERROR("Policy %s is busy with %d references\n", name,
+		       (int)policy->pol_ref);
+		nrs_policy_put_locked(policy);
+
+		spin_unlock(&nrs->nrs_lock);
+		return -EBUSY;
+	}
+
+	LASSERT(policy->pol_req_queued == 0);
+	LASSERT(policy->pol_req_started == 0);
+
+	if (policy->pol_state != NRS_POL_STATE_STOPPED) {
+		nrs_policy_stop_locked(policy);
+		LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
+	}
+
+	list_del(&policy->pol_list);
+	nrs->nrs_num_pols--;
+
+	nrs_policy_put_locked(policy);
+
+	spin_unlock(&nrs->nrs_lock);
+
+	nrs_policy_fini(policy);
+
+	LASSERT(policy->pol_private == NULL);
+	OBD_FREE_PTR(policy);
+
+	return 0;
+}
+
+/**
+ * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
+ *
+ * \param[in] nrs   the NRS head on which the policy will be registered.
+ * \param[in] desc  the policy descriptor from which the information will be
+ *		    obtained to register the policy.
+ *
+ * \retval -ve error
+ * \retval   0 success
+ */
+static int nrs_policy_register(struct ptlrpc_nrs *nrs,
+			       struct ptlrpc_nrs_pol_desc *desc)
+{
+	struct ptlrpc_nrs_policy       *policy;
+	struct ptlrpc_nrs_policy       *tmp;
+	struct ptlrpc_service_part     *svcpt = nrs->nrs_svcpt;
+	int				rc;
+
+	LASSERT(svcpt != NULL);
+	LASSERT(desc->pd_ops != NULL);
+	LASSERT(desc->pd_ops->op_res_get != NULL);
+	LASSERT(desc->pd_ops->op_req_get != NULL);
+	LASSERT(desc->pd_ops->op_req_enqueue != NULL);
+	LASSERT(desc->pd_ops->op_req_dequeue != NULL);
+	LASSERT(desc->pd_compat != NULL);
+
+	OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable,
+			  svcpt->scp_cpt, sizeof(*policy), GFP_NOFS);
+	if (policy == NULL)
+		return -ENOMEM;
+
+	policy->pol_nrs     = nrs;
+	policy->pol_desc    = desc;
+	policy->pol_state   = NRS_POL_STATE_STOPPED;
+	policy->pol_flags   = desc->pd_flags;
+
+	INIT_LIST_HEAD(&policy->pol_list);
+	INIT_LIST_HEAD(&policy->pol_list_queued);
+
+	rc = nrs_policy_init(policy);
+	if (rc != 0) {
+		OBD_FREE_PTR(policy);
+		return rc;
+	}
+
+	spin_lock(&nrs->nrs_lock);
+
+	tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
+	if (tmp != NULL) {
+		CERROR("NRS policy %s has been registered, can't register it for %s\n",
+		       policy->pol_desc->pd_name,
+		       svcpt->scp_service->srv_name);
+		nrs_policy_put_locked(tmp);
+
+		spin_unlock(&nrs->nrs_lock);
+		nrs_policy_fini(policy);
+		OBD_FREE_PTR(policy);
+
+		return -EEXIST;
+	}
+
+	list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
+	nrs->nrs_num_pols++;
+
+	if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
+		rc = nrs_policy_start_locked(policy);
+
+	spin_unlock(&nrs->nrs_lock);
+
+	if (rc != 0)
+		(void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+
+	return rc;
+}
+
+/**
+ * Enqueue request \a req using one of the policies its resources are referring
+ * to.
+ *
+ * \param[in] req the request to enqueue.
+ */
+static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
+{
+	struct ptlrpc_nrs_policy       *policy;
+
+	LASSERT(req->rq_nrq.nr_initialized);
+	LASSERT(!req->rq_nrq.nr_enqueued);
+
+	nrs_request_enqueue(&req->rq_nrq);
+	req->rq_nrq.nr_enqueued = 1;
+
+	policy = nrs_request_policy(&req->rq_nrq);
+	/**
+	 * Add the policy to the NRS head's list of policies with enqueued
+	 * requests, if it has not been added there.
+	 */
+	if (unlikely(list_empty(&policy->pol_list_queued)))
+		list_add_tail(&policy->pol_list_queued,
+				  &policy->pol_nrs->nrs_policy_queued);
+}
+
+/**
+ * Enqueue a request on the high priority NRS head.
+ *
+ * \param req the request to enqueue.
+ */
+static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
+{
+	int	opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+	spin_lock(&req->rq_lock);
+	req->rq_hp = 1;
+	ptlrpc_nrs_req_add_nolock(req);
+	if (opc != OBD_PING)
+		DEBUG_REQ(D_NET, req, "high priority req");
+	spin_unlock(&req->rq_lock);
+}
+
+/**
+ * Returns a boolean predicate indicating whether the policy described by
+ * \a desc is adequate for use with service \a svc.
+ *
+ * \param[in] svc  the service
+ * \param[in] desc the policy descriptor
+ *
+ * \retval false the policy is not compatible with the service
+ * \retval true	 the policy is compatible with the service
+ */
+static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
+					 const struct ptlrpc_nrs_pol_desc *desc)
+{
+	return desc->pd_compat(svc, desc);
+}
+
+/**
+ * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
+ * \a nrs.
+ *
+ * \param[in] nrs the NRS head
+ *
+ * \retval -ve error
+ * \retval   0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ *
+ * \see ptlrpc_service_nrs_setup()
+ */
+static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
+{
+	struct ptlrpc_nrs_pol_desc *desc;
+	/* for convenience */
+	struct ptlrpc_service_part	 *svcpt = nrs->nrs_svcpt;
+	struct ptlrpc_service		 *svc = svcpt->scp_service;
+	int				  rc = -EINVAL;
+
+	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+		if (nrs_policy_compatible(svc, desc)) {
+			rc = nrs_policy_register(nrs, desc);
+			if (rc != 0) {
+				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
+				       desc->pd_name, svcpt->scp_cpt,
+				       svc->srv_name, rc);
+				/**
+				 * Fail registration if any of the policies'
+				 * registration fails.
+				 */
+				break;
+			}
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
+ * compatible policies in NRS core, with the NRS head.
+ *
+ * \param[in] nrs   the NRS head
+ * \param[in] svcpt the PTLRPC service partition to setup
+ *
+ * \retval -ve error
+ * \retval   0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
+				   struct ptlrpc_service_part *svcpt)
+{
+	enum ptlrpc_nrs_queue_type	queue;
+
+	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+	if (nrs == &svcpt->scp_nrs_reg)
+		queue = PTLRPC_NRS_QUEUE_REG;
+	else if (nrs == svcpt->scp_nrs_hp)
+		queue = PTLRPC_NRS_QUEUE_HP;
+	else
+		LBUG();
+
+	nrs->nrs_svcpt = svcpt;
+	nrs->nrs_queue_type = queue;
+	spin_lock_init(&nrs->nrs_lock);
+	INIT_LIST_HEAD(&nrs->nrs_policy_list);
+	INIT_LIST_HEAD(&nrs->nrs_policy_queued);
+
+	return nrs_register_policies_locked(nrs);
+}
+
+/**
+ * Allocates a regular and optionally a high-priority NRS head (if the service
+ * handles high-priority RPCs), and then registers all available compatible
+ * policies on those NRS heads.
+ *
+ * \param[in,out] svcpt the PTLRPC service partition to setup
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_nrs	       *nrs;
+	int				rc;
+
+	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+	/**
+	 * Initialize the regular NRS head.
+	 */
+	nrs = nrs_svcpt2nrs(svcpt, false);
+	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+	if (rc < 0)
+		goto out;
+
+	/**
+	 * Optionally allocate a high-priority NRS head.
+	 */
+	if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
+		goto out;
+
+	OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp,
+			  svcpt->scp_service->srv_cptable,
+			  svcpt->scp_cpt);
+	if (svcpt->scp_nrs_hp == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	nrs = nrs_svcpt2nrs(svcpt, true);
+	rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+
+out:
+	return rc;
+}
+
+/**
+ * Unregisters all policies on all available NRS heads in a service partition;
+ * called at PTLRPC service unregistration time.
+ *
+ * \param[in] svcpt the PTLRPC service partition
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_nrs	       *nrs;
+	struct ptlrpc_nrs_policy       *policy;
+	struct ptlrpc_nrs_policy       *tmp;
+	int				rc;
+	bool				hp = false;
+
+	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+again:
+	nrs = nrs_svcpt2nrs(svcpt, hp);
+	nrs->nrs_stopping = 1;
+
+	list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
+				     pol_list) {
+		rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+		LASSERT(rc == 0);
+	}
+
+	/**
+	 * If the service partition has an HP NRS head, clean that up as well.
+	 */
+	if (!hp && nrs_svcpt_has_hp(svcpt)) {
+		hp = true;
+		goto again;
+	}
+
+	if (hp)
+		OBD_FREE_PTR(nrs);
+}
+
+/**
+ * Returns the descriptor for a policy as identified by by \a name.
+ *
+ * \param[in] name the policy name
+ *
+ * \retval the policy descriptor
+ * \retval NULL
+ */
+static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
+{
+	struct ptlrpc_nrs_pol_desc     *tmp;
+
+	list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
+		if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
+			return tmp;
+	}
+	return NULL;
+}
+
+/**
+ * Removes the policy from all supported NRS heads of all partitions of all
+ * PTLRPC services.
+ *
+ * \param[in] desc the policy descriptor to unregister
+ *
+ * \retval -ve error
+ * \retval  0  successfully unregistered policy on all supported NRS heads
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
+ */
+static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
+{
+	struct ptlrpc_nrs	       *nrs;
+	struct ptlrpc_service	       *svc;
+	struct ptlrpc_service_part     *svcpt;
+	int				i;
+	int				rc = 0;
+
+	LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+	LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
+
+	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+
+		if (!nrs_policy_compatible(svc, desc) ||
+		    unlikely(svc->srv_is_stopping))
+			continue;
+
+		ptlrpc_service_for_each_part(svcpt, i, svc) {
+			bool hp = false;
+
+again:
+			nrs = nrs_svcpt2nrs(svcpt, hp);
+			rc = nrs_policy_unregister(nrs, desc->pd_name);
+			/**
+			 * Ignore -ENOENT as the policy may not have registered
+			 * successfully on all service partitions.
+			 */
+			if (rc == -ENOENT) {
+				rc = 0;
+			} else if (rc != 0) {
+				CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
+				       desc->pd_name, svcpt->scp_cpt,
+				       svcpt->scp_service->srv_name, rc);
+				return rc;
+			}
+
+			if (!hp && nrs_svc_has_hp(svc)) {
+				hp = true;
+				goto again;
+			}
+		}
+
+		if (desc->pd_ops->op_lprocfs_fini != NULL)
+			desc->pd_ops->op_lprocfs_fini(svc);
+	}
+
+	return rc;
+}
+
+/**
+ * Registers a new policy with NRS core.
+ *
+ * The function will only succeed if policy registration with all compatible
+ * service partitions (if any) is successful.
+ *
+ * N.B. This function should be called either at ptlrpc module initialization
+ *	time when registering a policy that ships with NRS core, or in a
+ *	module's init() function for policies registering from other modules.
+ *
+ * \param[in] conf configuration information for the new policy to register
+ *
+ * \retval -ve error
+ * \retval   0 success
+ */
+int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
+{
+	struct ptlrpc_service	       *svc;
+	struct ptlrpc_nrs_pol_desc     *desc;
+	int				rc = 0;
+
+	LASSERT(conf != NULL);
+	LASSERT(conf->nc_ops != NULL);
+	LASSERT(conf->nc_compat != NULL);
+	LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
+		conf->nc_compat_svc_name != NULL));
+	LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
+		     conf->nc_owner != NULL));
+
+	conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+	/**
+	 * External policies are not allowed to start immediately upon
+	 * registration, as there is a relatively higher chance that their
+	 * registration might fail. In such a case, some policy instances may
+	 * already have requests queued wen unregistration needs to happen as
+	 * part o cleanup; since there is currently no way to drain requests
+	 * from a policy unless the service is unregistering, we just disallow
+	 * this.
+	 */
+	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
+	    (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
+			       PTLRPC_NRS_FL_REG_START))) {
+		CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
+		       conf->nc_name);
+		return -EINVAL;
+	}
+
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
+		CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
+		       conf->nc_name);
+		rc = -EEXIST;
+		goto fail;
+	}
+
+	OBD_ALLOC_PTR(desc);
+	if (desc == NULL) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
+	desc->pd_ops		 = conf->nc_ops;
+	desc->pd_compat		 = conf->nc_compat;
+	desc->pd_compat_svc_name = conf->nc_compat_svc_name;
+	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
+		desc->pd_owner	 = conf->nc_owner;
+	desc->pd_flags		 = conf->nc_flags;
+	atomic_set(&desc->pd_refs, 0);
+
+	/**
+	 * For policies that are held in the same module as NRS (currently
+	 * ptlrpc), do not register the policy with all compatible services,
+	 * as the services will not have started at this point, since we are
+	 * calling from ptlrpc module initialization code. In such cases each
+	 * service will register all compatible policies later, via
+	 * ptlrpc_service_nrs_setup().
+	 */
+	if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
+		goto internal;
+
+	/**
+	 * Register the new policy on all compatible services
+	 */
+	mutex_lock(&ptlrpc_all_services_mutex);
+
+	list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+		struct ptlrpc_service_part     *svcpt;
+		int				i;
+		int				rc2;
+
+		if (!nrs_policy_compatible(svc, desc) ||
+		    unlikely(svc->srv_is_stopping))
+			continue;
+
+		ptlrpc_service_for_each_part(svcpt, i, svc) {
+			struct ptlrpc_nrs      *nrs;
+			bool			hp = false;
+again:
+			nrs = nrs_svcpt2nrs(svcpt, hp);
+			rc = nrs_policy_register(nrs, desc);
+			if (rc != 0) {
+				CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
+				       desc->pd_name, svcpt->scp_cpt,
+				       svcpt->scp_service->srv_name, rc);
+
+				rc2 = nrs_policy_unregister_locked(desc);
+				/**
+				 * Should not fail at this point
+				 */
+				LASSERT(rc2 == 0);
+				mutex_unlock(&ptlrpc_all_services_mutex);
+				OBD_FREE_PTR(desc);
+				goto fail;
+			}
+
+			if (!hp && nrs_svc_has_hp(svc)) {
+				hp = true;
+				goto again;
+			}
+		}
+
+		/**
+		 * No need to take a reference to other modules here, as we
+		 * will be calling from the module's init() function.
+		 */
+		if (desc->pd_ops->op_lprocfs_init != NULL) {
+			rc = desc->pd_ops->op_lprocfs_init(svc);
+			if (rc != 0) {
+				rc2 = nrs_policy_unregister_locked(desc);
+				/**
+				 * Should not fail at this point
+				 */
+				LASSERT(rc2 == 0);
+				mutex_unlock(&ptlrpc_all_services_mutex);
+				OBD_FREE_PTR(desc);
+				goto fail;
+			}
+		}
+	}
+
+	mutex_unlock(&ptlrpc_all_services_mutex);
+internal:
+	list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
+fail:
+	mutex_unlock(&nrs_core.nrs_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_register);
+
+/**
+ * Unregisters a previously registered policy with NRS core. All instances of
+ * the policy on all NRS heads of all supported services are removed.
+ *
+ * N.B. This function should only be called from a module's exit() function.
+ *	Although it can be used for policies that ship alongside NRS core, the
+ *	function is primarily intended for policies that register externally,
+ *	from other modules.
+ *
+ * \param[in] conf configuration information for the policy to unregister
+ *
+ * \retval -ve error
+ * \retval   0 success
+ */
+int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf)
+{
+	struct ptlrpc_nrs_pol_desc	*desc;
+	int				 rc;
+
+	LASSERT(conf != NULL);
+
+	if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) {
+		CERROR("Unable to unregister a fallback policy, unless the PTLRPC service is stopping.\n");
+		return -EPERM;
+	}
+
+	conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	desc = nrs_policy_find_desc_locked(conf->nc_name);
+	if (desc == NULL) {
+		CERROR("Failing to unregister NRS policy %s which has not been registered with NRS core!\n",
+		       conf->nc_name);
+		rc = -ENOENT;
+		goto not_exist;
+	}
+
+	mutex_lock(&ptlrpc_all_services_mutex);
+
+	rc = nrs_policy_unregister_locked(desc);
+	if (rc < 0) {
+		if (rc == -EBUSY)
+			CERROR("Please first stop policy %s on all service partitions and then retry to unregister the policy.\n",
+			       conf->nc_name);
+		goto fail;
+	}
+
+	CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n",
+	       conf->nc_name);
+
+	list_del(&desc->pd_list);
+	OBD_FREE_PTR(desc);
+
+fail:
+	mutex_unlock(&ptlrpc_all_services_mutex);
+
+not_exist:
+	mutex_unlock(&nrs_core.nrs_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister);
+
+/**
+ * Setup NRS heads on all service partitions of service \a svc, and register
+ * all compatible policies on those NRS heads.
+ *
+ * To be called from within ptl
+ * \param[in] svc the service to setup
+ *
+ * \retval -ve error, the calling logic should eventually call
+ *		      ptlrpc_service_nrs_cleanup() to undo any work performed
+ *		      by this function.
+ *
+ * \see ptlrpc_register_service()
+ * \see ptlrpc_service_nrs_cleanup()
+ */
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	       *svcpt;
+	const struct ptlrpc_nrs_pol_desc       *desc;
+	int					i;
+	int					rc = 0;
+
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	/**
+	 * Initialize NRS heads on all service CPTs.
+	 */
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		rc = nrs_svcpt_setup_locked(svcpt);
+		if (rc != 0)
+			goto failed;
+	}
+
+	/**
+	 * Set up lprocfs interfaces for all supported policies for the
+	 * service.
+	 */
+	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+		if (!nrs_policy_compatible(svc, desc))
+			continue;
+
+		if (desc->pd_ops->op_lprocfs_init != NULL) {
+			rc = desc->pd_ops->op_lprocfs_init(svc);
+			if (rc != 0)
+				goto failed;
+		}
+	}
+
+failed:
+
+	mutex_unlock(&nrs_core.nrs_mutex);
+
+	return rc;
+}
+
+/**
+ * Unregisters all policies on all service partitions of service \a svc.
+ *
+ * \param[in] svc the PTLRPC service to unregister
+ */
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	     *svcpt;
+	const struct ptlrpc_nrs_pol_desc     *desc;
+	int				      i;
+
+	mutex_lock(&nrs_core.nrs_mutex);
+
+	/**
+	 * Clean up NRS heads on all service partitions
+	 */
+	ptlrpc_service_for_each_part(svcpt, i, svc)
+		nrs_svcpt_cleanup_locked(svcpt);
+
+	/**
+	 * Clean up lprocfs interfaces for all supported policies for the
+	 * service.
+	 */
+	list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+		if (!nrs_policy_compatible(svc, desc))
+			continue;
+
+		if (desc->pd_ops->op_lprocfs_fini != NULL)
+			desc->pd_ops->op_lprocfs_fini(svc);
+	}
+
+	mutex_unlock(&nrs_core.nrs_mutex);
+}
+
+/**
+ * Obtains NRS head resources for request \a req.
+ *
+ * These could be either on the regular or HP NRS head of \a svcpt; resources
+ * taken on the regular head can later be swapped for HP head resources by
+ * ldlm_lock_reorder_req().
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req   the request
+ * \param[in] hp    which NRS head of \a svcpt to use
+ */
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+			       struct ptlrpc_request *req, bool hp)
+{
+	struct ptlrpc_nrs	*nrs = nrs_svcpt2nrs(svcpt, hp);
+
+	memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
+	nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
+			      false);
+
+	/**
+	 * It is fine to access \e nr_initialized without locking as there is
+	 * no contention at this early stage.
+	 */
+	req->rq_nrq.nr_initialized = 1;
+}
+
+/**
+ * Releases resources for a request; is called after the request has been
+ * handled.
+ *
+ * \param[in] req the request
+ *
+ * \see ptlrpc_server_finish_request()
+ */
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
+{
+	if (req->rq_nrq.nr_initialized) {
+		nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
+		/* no protection on bit nr_initialized because no
+		 * contention at this late stage */
+		req->rq_nrq.nr_finalized = 1;
+	}
+}
+
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
+{
+	if (req->rq_nrq.nr_started)
+		nrs_request_stop(&req->rq_nrq);
+}
+
+/**
+ * Enqueues request \a req on either the regular or high-priority NRS head
+ * of service partition \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req   the request to be enqueued
+ * \param[in] hp    whether to enqueue the request on the regular or
+ *		    high-priority NRS head.
+ */
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+			struct ptlrpc_request *req, bool hp)
+{
+	spin_lock(&svcpt->scp_req_lock);
+
+	if (hp)
+		ptlrpc_nrs_hpreq_add_nolock(req);
+	else
+		ptlrpc_nrs_req_add_nolock(req);
+
+	spin_unlock(&svcpt->scp_req_lock);
+}
+
+static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
+{
+	LASSERT(policy->pol_nrs->nrs_req_queued > 0);
+	LASSERT(policy->pol_req_queued > 0);
+
+	policy->pol_nrs->nrs_req_queued--;
+	policy->pol_req_queued--;
+
+	/**
+	 * If the policy has no more requests queued, remove it from
+	 * ptlrpc_nrs::nrs_policy_queued.
+	 */
+	if (unlikely(policy->pol_req_queued == 0)) {
+		list_del_init(&policy->pol_list_queued);
+
+		/**
+		 * If there are other policies with queued requests, move the
+		 * current policy to the end so that we can round robin over
+		 * all policies and drain the requests.
+		 */
+	} else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
+		LASSERT(policy->pol_req_queued <
+			policy->pol_nrs->nrs_req_queued);
+
+		list_move_tail(&policy->pol_list_queued,
+				   &policy->pol_nrs->nrs_policy_queued);
+	}
+}
+
+/**
+ * Obtains a request for handling from an NRS head of service partition
+ * \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] hp    whether to obtain a request from the regular or
+ *		    high-priority NRS head.
+ * \param[in] peek  when set, signifies that we just want to examine the
+ *		    request, and not handle it, so the request is not removed
+ *		    from the policy.
+ * \param[in] force when set, it will force a policy to return a request if it
+ *		    has one pending
+ *
+ * \retval the	request to be handled
+ * \retval NULL the head has no requests to serve
+ */
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+			   bool peek, bool force)
+{
+	struct ptlrpc_nrs	  *nrs = nrs_svcpt2nrs(svcpt, hp);
+	struct ptlrpc_nrs_policy  *policy;
+	struct ptlrpc_nrs_request *nrq;
+
+	/**
+	 * Always try to drain requests from all NRS polices even if they are
+	 * inactive, because the user can change policy status at runtime.
+	 */
+	list_for_each_entry(policy, &nrs->nrs_policy_queued,
+				pol_list_queued) {
+		nrq = nrs_request_get(policy, peek, force);
+		if (nrq != NULL) {
+			if (likely(!peek)) {
+				nrq->nr_started = 1;
+
+				policy->pol_req_started++;
+				policy->pol_nrs->nrs_req_started++;
+
+				nrs_request_removed(policy);
+			}
+
+			return container_of(nrq, struct ptlrpc_request, rq_nrq);
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * Dequeues request \a req from the policy it has been enqueued on.
+ *
+ * \param[in] req the request
+ */
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req)
+{
+	struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq);
+
+	policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq);
+
+	req->rq_nrq.nr_enqueued = 0;
+
+	nrs_request_removed(policy);
+}
+
+/**
+ * Returns whether there are any requests currently enqueued on any of the
+ * policies of service partition's \a svcpt NRS head specified by \a hp. Should
+ * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
+ * result.
+ *
+ * \param[in] svcpt the service partition to enquire.
+ * \param[in] hp    whether the regular or high-priority NRS head is to be
+ *		    enquired.
+ *
+ * \retval false the indicated NRS head has no enqueued requests.
+ * \retval true	 the indicated NRS head has some enqueued requests.
+ */
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+	struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+
+	return nrs->nrs_req_queued > 0;
+};
+
+/**
+ * Moves request \a req from the regular to the high-priority NRS head.
+ *
+ * \param[in] req the request to move
+ */
+void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req)
+{
+	struct ptlrpc_service_part	*svcpt = req->rq_rqbd->rqbd_svcpt;
+	struct ptlrpc_nrs_request	*nrq = &req->rq_nrq;
+	struct ptlrpc_nrs_resource	*res1[NRS_RES_MAX];
+	struct ptlrpc_nrs_resource	*res2[NRS_RES_MAX];
+
+	/**
+	 * Obtain the high-priority NRS head resources.
+	 */
+	nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true);
+
+	spin_lock(&svcpt->scp_req_lock);
+
+	if (!ptlrpc_nrs_req_can_move(req))
+		goto out;
+
+	ptlrpc_nrs_req_del_nolock(req);
+
+	memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0]));
+	memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0]));
+
+	ptlrpc_nrs_hpreq_add_nolock(req);
+
+	memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0]));
+out:
+	spin_unlock(&svcpt->scp_req_lock);
+
+	/**
+	 * Release either the regular NRS head resources if we moved the
+	 * request, or the high-priority NRS head resources if we took a
+	 * reference earlier in this function and ptlrpc_nrs_req_can_move()
+	 * returned false.
+	 */
+	nrs_resource_put_safe(res1);
+}
+
+/**
+ * Carries out a control operation \a opc on the policy identified by the
+ * human-readable \a name, on either all partitions, or only on the first
+ * partition of service \a svc.
+ *
+ * \param[in]	  svc	 the service the policy belongs to.
+ * \param[in]	  queue  whether to carry out the command on the policy which
+ *			 belongs to the regular, high-priority, or both NRS
+ *			 heads of service partitions of \a svc.
+ * \param[in]	  name   the policy to act upon, by human-readable name
+ * \param[in]	  opc	 the opcode of the operation to carry out
+ * \param[in]	  single when set, the operation will only be carried out on the
+ *			 NRS heads of the first service partition of \a svc.
+ *			 This is useful for some policies which e.g. share
+ *			 identical values on the same parameters of different
+ *			 service partitions; when reading these parameters via
+ *			 lprocfs, these policies may just want to obtain and
+ *			 print out the values from the first service partition.
+ *			 Storing these values centrally elsewhere then could be
+ *			 another solution for this.
+ * \param[in,out] arg	 can be used as a generic in/out buffer between control
+ *			 operations and the user environment.
+ *
+ *\retval -ve error condition
+ *\retval   0 operation was carried out successfully
+ */
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+			      enum ptlrpc_nrs_queue_type queue, char *name,
+			      enum ptlrpc_nrs_ctl opc, bool single, void *arg)
+{
+	struct ptlrpc_service_part     *svcpt;
+	int				i;
+	int				rc = 0;
+
+	LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
+
+	if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
+		return -EINVAL;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
+			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
+					    opc, arg);
+			if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
+					single))
+				goto out;
+		}
+
+		if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
+			/**
+			 * XXX: We could optionally check for
+			 * nrs_svc_has_hp(svc) here, and return an error if it
+			 * is false. Right now we rely on the policies' lprocfs
+			 * handlers that call the present function to make this
+			 * check; if they fail to do so, they might hit the
+			 * assertion inside nrs_svcpt2nrs() below.
+			 */
+			rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
+					    opc, arg);
+			if (rc != 0 || single)
+				goto out;
+		}
+	}
+out:
+	return rc;
+}
+
+
+/* ptlrpc/nrs_fifo.c */
+extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
+
+/**
+ * Adds all policies that ship with the ptlrpc module, to NRS core's list of
+ * policies \e nrs_core.nrs_policies.
+ *
+ * \retval 0 all policies have been registered successfully
+ * \retval -ve error
+ */
+int ptlrpc_nrs_init(void)
+{
+	int	rc;
+
+	mutex_init(&nrs_core.nrs_mutex);
+	INIT_LIST_HEAD(&nrs_core.nrs_policies);
+
+	rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
+	if (rc != 0)
+		goto fail;
+
+
+	return rc;
+fail:
+	/**
+	 * Since no PTLRPC services have been started at this point, all we need
+	 * to do for cleanup is to free the descriptors.
+	 */
+	ptlrpc_nrs_fini();
+
+	return rc;
+}
+
+/**
+ * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
+ * policy descriptors.
+ *
+ * Since all PTLRPC services are stopped at this point, there are no more
+ * instances of any policies, because each service will have stopped its policy
+ * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
+ * descriptors here.
+ */
+void ptlrpc_nrs_fini(void)
+{
+	struct ptlrpc_nrs_pol_desc *desc;
+	struct ptlrpc_nrs_pol_desc *tmp;
+
+	list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
+				     pd_list) {
+		list_del_init(&desc->pd_list);
+		OBD_FREE_PTR(desc);
+	}
+}
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
new file mode 100644
index 000000000..eb40c01db
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
@@ -0,0 +1,270 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs_fifo.c
+ *
+ * Network Request Scheduler (NRS) FIFO policy
+ *
+ * Handles RPCs in a FIFO manner, as received from the network. This policy is
+ * a logical wrapper around previous, non-NRS functionality. It is used as the
+ * default and fallback policy for all types of RPCs on all PTLRPC service
+ * partitions, for both regular and high-priority NRS heads. Default here means
+ * the policy is the one enabled at PTLRPC service partition startup time, and
+ * fallback means the policy is used to handle RPCs that are not handled
+ * successfully or are not handled at all by any primary policy that may be
+ * enabled on a given NRS head.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../../include/linux/libcfs/libcfs.h"
+#include "ptlrpc_internal.h"
+
+/**
+ * \name fifo
+ *
+ * The FIFO policy is a logical wrapper around previous, non-NRS functionality.
+ * It schedules RPCs in the same order as they are queued from LNet.
+ *
+ * @{
+ */
+
+#define NRS_POL_NAME_FIFO	"fifo"
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a
+ * policy-specific private data structure.
+ *
+ * \param[in] policy The policy to start
+ *
+ * \retval -ENOMEM OOM error
+ * \retval  0	   success
+ *
+ * \see nrs_policy_register()
+ * \see nrs_policy_ctl()
+ */
+static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy)
+{
+	struct nrs_fifo_head *head;
+
+	OBD_CPT_ALLOC_PTR(head, nrs_pol2cptab(policy), nrs_pol2cptid(policy));
+	if (head == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&head->fh_list);
+	policy->pol_private = head;
+	return 0;
+}
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific
+ * private data structure.
+ *
+ * \param[in] policy The policy to stop
+ *
+ * \see nrs_policy_stop0()
+ */
+static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy)
+{
+	struct nrs_fifo_head *head = policy->pol_private;
+
+	LASSERT(head != NULL);
+	LASSERT(list_empty(&head->fh_list));
+
+	OBD_FREE_PTR(head);
+}
+
+/**
+ * Is called for obtaining a FIFO policy resource.
+ *
+ * \param[in]  policy	  The policy on which the request is being asked for
+ * \param[in]  nrq	  The request for which resources are being taken
+ * \param[in]  parent	  Parent resource, unused in this policy
+ * \param[out] resp	  Resources references are placed in this array
+ * \param[in]  moving_req Signifies limited caller context; unused in this
+ *			  policy
+ *
+ * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since
+ *	     it implements a simple scheduling algorithm in which request
+ *	     priority is determined on the request arrival order, it does not
+ *	     need to maintain a set of resources that would otherwise be used
+ *	     to calculate a request's priority.
+ *
+ * \see nrs_resource_get_safe()
+ */
+static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy,
+			    struct ptlrpc_nrs_request *nrq,
+			    const struct ptlrpc_nrs_resource *parent,
+			    struct ptlrpc_nrs_resource **resp, bool moving_req)
+{
+	/**
+	 * Just return the resource embedded inside nrs_fifo_head, and end this
+	 * resource hierarchy reference request.
+	 */
+	*resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res;
+	return 1;
+}
+
+/**
+ * Called when getting a request from the FIFO policy for handling, or just
+ * peeking; removes the request from the policy when it is to be handled.
+ *
+ * \param[in] policy The policy
+ * \param[in] peek   When set, signifies that we just want to examine the
+ *		     request, and not handle it, so the request is not removed
+ *		     from the policy.
+ * \param[in] force  Force the policy to return a request; unused in this
+ *		     policy
+ *
+ * \retval The request to be handled; this is the next request in the FIFO
+ *	   queue
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ * \see nrs_request_get()
+ */
+static
+struct ptlrpc_nrs_request *nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy,
+					     bool peek, bool force)
+{
+	struct nrs_fifo_head	  *head = policy->pol_private;
+	struct ptlrpc_nrs_request *nrq;
+
+	nrq = unlikely(list_empty(&head->fh_list)) ? NULL :
+	      list_entry(head->fh_list.next, struct ptlrpc_nrs_request,
+			     nr_u.fifo.fr_list);
+
+	if (likely(!peek && nrq != NULL)) {
+		struct ptlrpc_request *req = container_of(nrq,
+							  struct ptlrpc_request,
+							  rq_nrq);
+
+		list_del_init(&nrq->nr_u.fifo.fr_list);
+
+		CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: %llu\n",
+		       policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
+		       nrq->nr_u.fifo.fr_sequence);
+	}
+
+	return nrq;
+}
+
+/**
+ * Adds request \a nrq to \a policy's list of queued requests
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq    The request to add
+ *
+ * \retval 0 success; nrs_request_enqueue() assumes this function will always
+ *		      succeed
+ */
+static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy,
+			    struct ptlrpc_nrs_request *nrq)
+{
+	struct nrs_fifo_head *head;
+
+	head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head,
+			    fh_res);
+	/**
+	 * Only used for debugging
+	 */
+	nrq->nr_u.fifo.fr_sequence = head->fh_sequence++;
+	list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list);
+
+	return 0;
+}
+
+/**
+ * Removes request \a nrq from \a policy's list of queued requests.
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq    The request to remove
+ */
+static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy,
+			     struct ptlrpc_nrs_request *nrq)
+{
+	LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list));
+	list_del_init(&nrq->nr_u.fifo.fr_list);
+}
+
+/**
+ * Prints a debug statement right before the request \a nrq stops being
+ * handled.
+ *
+ * \param[in] policy The policy handling the request
+ * \param[in] nrq    The request being handled
+ *
+ * \see ptlrpc_server_finish_request()
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy,
+			      struct ptlrpc_nrs_request *nrq)
+{
+	struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request,
+						  rq_nrq);
+
+	CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n",
+	       policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
+	       nrq->nr_u.fifo.fr_sequence);
+}
+
+/**
+ * FIFO policy operations
+ */
+static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = {
+	.op_policy_start	= nrs_fifo_start,
+	.op_policy_stop		= nrs_fifo_stop,
+	.op_res_get		= nrs_fifo_res_get,
+	.op_req_get		= nrs_fifo_req_get,
+	.op_req_enqueue		= nrs_fifo_req_add,
+	.op_req_dequeue		= nrs_fifo_req_del,
+	.op_req_stop		= nrs_fifo_req_stop,
+};
+
+/**
+ * FIFO policy configuration
+ */
+struct ptlrpc_nrs_pol_conf nrs_conf_fifo = {
+	.nc_name		= NRS_POL_NAME_FIFO,
+	.nc_ops			= &nrs_fifo_ops,
+	.nc_compat		= nrs_policy_compat_all,
+	.nc_flags		= PTLRPC_NRS_FL_FALLBACK |
+				  PTLRPC_NRS_FL_REG_START
+};
+
+/** @} fifo */
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
new file mode 100644
index 000000000..b51af9bf3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -0,0 +1,2536 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pack_generic.c
+ *
+ * (Un)packing of OST requests
+ *
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eeb@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/obd_cksum.h"
+#include "../include/lustre/ll_fiemap.h"
+
+static inline int lustre_msg_hdr_size_v2(int count)
+{
+	return cfs_size_round(offsetof(struct lustre_msg_v2,
+				       lm_buflens[count]));
+}
+
+int lustre_msg_hdr_size(__u32 magic, int count)
+{
+	switch (magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_hdr_size_v2(count);
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_hdr_size);
+
+void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
+			    int index)
+{
+	if (inout)
+		lustre_set_req_swabbed(req, index);
+	else
+		lustre_set_rep_swabbed(req, index);
+}
+EXPORT_SYMBOL(ptlrpc_buf_set_swabbed);
+
+int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
+			 int index)
+{
+	if (inout)
+		return (ptlrpc_req_need_swab(req) &&
+			!lustre_req_swabbed(req, index));
+	else
+		return (ptlrpc_rep_need_swab(req) &&
+			!lustre_rep_swabbed(req, index));
+}
+EXPORT_SYMBOL(ptlrpc_buf_need_swab);
+
+static inline int lustre_msg_check_version_v2(struct lustre_msg_v2 *msg,
+					      __u32 version)
+{
+	__u32 ver = lustre_msg_get_version(msg);
+	return (ver & LUSTRE_VERSION_MASK) != version;
+}
+
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		CERROR("msg v1 not supported - please upgrade you system\n");
+		return -EINVAL;
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_check_version_v2(msg, version);
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_check_version);
+
+/* early reply size */
+int lustre_msg_early_size(void)
+{
+	static int size;
+	if (!size) {
+		/* Always reply old ptlrpc_body_v2 to keep interoperability
+		 * with the old client (< 2.3) which doesn't have pb_jobid
+		 * in the ptlrpc_body.
+		 *
+		 * XXX Remove this whenever we drop interoperability with such
+		 *     client.
+		 */
+		__u32 pblen = sizeof(struct ptlrpc_body_v2);
+		size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen);
+	}
+	return size;
+}
+EXPORT_SYMBOL(lustre_msg_early_size);
+
+int lustre_msg_size_v2(int count, __u32 *lengths)
+{
+	int size;
+	int i;
+
+	size = lustre_msg_hdr_size_v2(count);
+	for (i = 0; i < count; i++)
+		size += cfs_size_round(lengths[i]);
+
+	return size;
+}
+EXPORT_SYMBOL(lustre_msg_size_v2);
+
+/* This returns the size of the buffer that is required to hold a lustre_msg
+ * with the given sub-buffer lengths.
+ * NOTE: this should only be used for NEW requests, and should always be
+ *       in the form of a v2 request.  If this is a connection to a v1
+ *       target then the first buffer will be stripped because the ptlrpc
+ *       data is part of the lustre_msg_v1 header. b=14043 */
+int lustre_msg_size(__u32 magic, int count, __u32 *lens)
+{
+	__u32 size[] = { sizeof(struct ptlrpc_body) };
+
+	if (!lens) {
+		LASSERT(count == 1);
+		lens = size;
+	}
+
+	LASSERT(count > 0);
+	LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2));
+
+	switch (magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_size_v2(count, lens);
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_size);
+
+/* This is used to determine the size of a buffer that was already packed
+ * and will correctly handle the different message formats. */
+int lustre_packed_msg_size(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_packed_msg_size);
+
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
+			char **bufs)
+{
+	char *ptr;
+	int i;
+
+	msg->lm_bufcount = count;
+	/* XXX: lm_secflvr uninitialized here */
+	msg->lm_magic = LUSTRE_MSG_MAGIC_V2;
+
+	for (i = 0; i < count; i++)
+		msg->lm_buflens[i] = lens[i];
+
+	if (bufs == NULL)
+		return;
+
+	ptr = (char *)msg + lustre_msg_hdr_size_v2(count);
+	for (i = 0; i < count; i++) {
+		char *tmp = bufs[i];
+		LOGL(tmp, lens[i], ptr);
+	}
+}
+EXPORT_SYMBOL(lustre_init_msg_v2);
+
+static int lustre_pack_request_v2(struct ptlrpc_request *req,
+				  int count, __u32 *lens, char **bufs)
+{
+	int reqlen, rc;
+
+	reqlen = lustre_msg_size_v2(count, lens);
+
+	rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+	if (rc)
+		return rc;
+
+	req->rq_reqlen = reqlen;
+
+	lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs);
+	lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION);
+	return 0;
+}
+
+int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
+			__u32 *lens, char **bufs)
+{
+	__u32 size[] = { sizeof(struct ptlrpc_body) };
+
+	if (!lens) {
+		LASSERT(count == 1);
+		lens = size;
+	}
+
+	LASSERT(count > 0);
+	LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+	/* only use new format, we don't need to be compatible with 1.4 */
+	return lustre_pack_request_v2(req, count, lens, bufs);
+}
+EXPORT_SYMBOL(lustre_pack_request);
+
+#if RS_DEBUG
+LIST_HEAD(ptlrpc_rs_debug_lru);
+spinlock_t ptlrpc_rs_debug_lock;
+
+#define PTLRPC_RS_DEBUG_LRU_ADD(rs)					\
+do {									\
+	spin_lock(&ptlrpc_rs_debug_lock);				\
+	list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru);	\
+	spin_unlock(&ptlrpc_rs_debug_lock);				\
+} while (0)
+
+#define PTLRPC_RS_DEBUG_LRU_DEL(rs)					\
+do {									\
+	spin_lock(&ptlrpc_rs_debug_lock);				\
+	list_del(&(rs)->rs_debug_list);				\
+	spin_unlock(&ptlrpc_rs_debug_lock);				\
+} while (0)
+#else
+# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while (0)
+# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while (0)
+#endif
+
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_reply_state *rs = NULL;
+
+	spin_lock(&svcpt->scp_rep_lock);
+
+	/* See if we have anything in a pool, and wait if nothing */
+	while (list_empty(&svcpt->scp_rep_idle)) {
+		struct l_wait_info	lwi;
+		int			rc;
+
+		spin_unlock(&svcpt->scp_rep_lock);
+		/* If we cannot get anything for some long time, we better
+		 * bail out instead of waiting infinitely */
+		lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
+		rc = l_wait_event(svcpt->scp_rep_waitq,
+				  !list_empty(&svcpt->scp_rep_idle), &lwi);
+		if (rc != 0)
+			goto out;
+		spin_lock(&svcpt->scp_rep_lock);
+	}
+
+	rs = list_entry(svcpt->scp_rep_idle.next,
+			    struct ptlrpc_reply_state, rs_list);
+	list_del(&rs->rs_list);
+
+	spin_unlock(&svcpt->scp_rep_lock);
+
+	memset(rs, 0, svcpt->scp_service->srv_max_reply_size);
+	rs->rs_size = svcpt->scp_service->srv_max_reply_size;
+	rs->rs_svcpt = svcpt;
+	rs->rs_prealloc = 1;
+out:
+	return rs;
+}
+
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
+{
+	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+	spin_lock(&svcpt->scp_rep_lock);
+	list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+	spin_unlock(&svcpt->scp_rep_lock);
+	wake_up(&svcpt->scp_rep_waitq);
+}
+
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+			 __u32 *lens, char **bufs, int flags)
+{
+	struct ptlrpc_reply_state *rs;
+	int			msg_len, rc;
+
+	LASSERT(req->rq_reply_state == NULL);
+
+	if ((flags & LPRFL_EARLY_REPLY) == 0) {
+		spin_lock(&req->rq_lock);
+		req->rq_packed_final = 1;
+		spin_unlock(&req->rq_lock);
+	}
+
+	msg_len = lustre_msg_size_v2(count, lens);
+	rc = sptlrpc_svc_alloc_rs(req, msg_len);
+	if (rc)
+		return rc;
+
+	rs = req->rq_reply_state;
+	atomic_set(&rs->rs_refcount, 1);    /* 1 ref for rq_reply_state */
+	rs->rs_cb_id.cbid_fn = reply_out_callback;
+	rs->rs_cb_id.cbid_arg = rs;
+	rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt;
+	INIT_LIST_HEAD(&rs->rs_exp_list);
+	INIT_LIST_HEAD(&rs->rs_obd_list);
+	INIT_LIST_HEAD(&rs->rs_list);
+	spin_lock_init(&rs->rs_lock);
+
+	req->rq_replen = msg_len;
+	req->rq_reply_state = rs;
+	req->rq_repmsg = rs->rs_msg;
+
+	lustre_init_msg_v2(rs->rs_msg, count, lens, bufs);
+	lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION);
+
+	PTLRPC_RS_DEBUG_LRU_ADD(rs);
+
+	return 0;
+}
+EXPORT_SYMBOL(lustre_pack_reply_v2);
+
+int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
+			    char **bufs, int flags)
+{
+	int rc = 0;
+	__u32 size[] = { sizeof(struct ptlrpc_body) };
+
+	if (!lens) {
+		LASSERT(count == 1);
+		lens = size;
+	}
+
+	LASSERT(count > 0);
+	LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+	switch (req->rq_reqmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		rc = lustre_pack_reply_v2(req, count, lens, bufs, flags);
+		break;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n",
+			 req->rq_reqmsg->lm_magic);
+		rc = -EINVAL;
+	}
+	if (rc != 0)
+		CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc,
+		       lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
+	return rc;
+}
+EXPORT_SYMBOL(lustre_pack_reply_flags);
+
+int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
+		      char **bufs)
+{
+	return lustre_pack_reply_flags(req, count, lens, bufs, 0);
+}
+EXPORT_SYMBOL(lustre_pack_reply);
+
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
+{
+	int i, offset, buflen, bufcount;
+
+	LASSERT(m != NULL);
+	LASSERT(n >= 0);
+
+	bufcount = m->lm_bufcount;
+	if (unlikely(n >= bufcount)) {
+		CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n",
+		       m, n, bufcount);
+		return NULL;
+	}
+
+	buflen = m->lm_buflens[n];
+	if (unlikely(buflen < min_size)) {
+		CERROR("msg %p buffer[%d] size %d too small (required %d, opc=%d)\n",
+		       m, n, buflen, min_size,
+		       n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m));
+		return NULL;
+	}
+
+	offset = lustre_msg_hdr_size_v2(bufcount);
+	for (i = 0; i < n; i++)
+		offset += cfs_size_round(m->lm_buflens[i]);
+
+	return (char *)m + offset;
+}
+
+void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
+{
+	switch (m->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_buf_v2(m, n, min_size);
+	default:
+		LASSERTF(0, "incorrect message magic: %08x(msg:%p)\n", m->lm_magic, m);
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_buf);
+
+int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+			 unsigned int newlen, int move_data)
+{
+	char   *tail = NULL, *newpos;
+	int     tail_len = 0, n;
+
+	LASSERT(msg);
+	LASSERT(msg->lm_bufcount > segment);
+	LASSERT(msg->lm_buflens[segment] >= newlen);
+
+	if (msg->lm_buflens[segment] == newlen)
+		goto out;
+
+	if (move_data && msg->lm_bufcount > segment + 1) {
+		tail = lustre_msg_buf_v2(msg, segment + 1, 0);
+		for (n = segment + 1; n < msg->lm_bufcount; n++)
+			tail_len += cfs_size_round(msg->lm_buflens[n]);
+	}
+
+	msg->lm_buflens[segment] = newlen;
+
+	if (tail && tail_len) {
+		newpos = lustre_msg_buf_v2(msg, segment + 1, 0);
+		LASSERT(newpos <= tail);
+		if (newpos != tail)
+			memmove(newpos, tail, tail_len);
+	}
+out:
+	return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+}
+
+/*
+ * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
+ * we also move data forward from @segment + 1.
+ *
+ * if @newlen == 0, we remove the segment completely, but we still keep the
+ * totally bufcount the same to save possible data moving. this will leave a
+ * unused segment with size 0 at the tail, but that's ok.
+ *
+ * return new msg size after shrinking.
+ *
+ * CAUTION:
+ * + if any buffers higher than @segment has been filled in, must call shrink
+ *   with non-zero @move_data.
+ * + caller should NOT keep pointers to msg buffers which higher than @segment
+ *   after call shrink.
+ */
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+		      unsigned int newlen, int move_data)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_shrink_msg);
+
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
+{
+	PTLRPC_RS_DEBUG_LRU_DEL(rs);
+
+	LASSERT(atomic_read(&rs->rs_refcount) == 0);
+	LASSERT(!rs->rs_difficult || rs->rs_handled);
+	LASSERT(!rs->rs_on_net);
+	LASSERT(!rs->rs_scheduled);
+	LASSERT(rs->rs_export == NULL);
+	LASSERT(rs->rs_nlocks == 0);
+	LASSERT(list_empty(&rs->rs_exp_list));
+	LASSERT(list_empty(&rs->rs_obd_list));
+
+	sptlrpc_svc_free_rs(rs);
+}
+EXPORT_SYMBOL(lustre_free_reply_state);
+
+static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
+{
+	int swabbed, required_len, i;
+
+	/* Now we know the sender speaks my language. */
+	required_len = lustre_msg_hdr_size_v2(0);
+	if (len < required_len) {
+		/* can't even look inside the message */
+		CERROR("message length %d too small for lustre_msg\n", len);
+		return -EINVAL;
+	}
+
+	swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+	if (swabbed) {
+		__swab32s(&m->lm_magic);
+		__swab32s(&m->lm_bufcount);
+		__swab32s(&m->lm_secflvr);
+		__swab32s(&m->lm_repsize);
+		__swab32s(&m->lm_cksum);
+		__swab32s(&m->lm_flags);
+		CLASSERT(offsetof(typeof(*m), lm_padding_2) != 0);
+		CLASSERT(offsetof(typeof(*m), lm_padding_3) != 0);
+	}
+
+	required_len = lustre_msg_hdr_size_v2(m->lm_bufcount);
+	if (len < required_len) {
+		/* didn't receive all the buffer lengths */
+		CERROR("message length %d too small for %d buflens\n",
+		       len, m->lm_bufcount);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < m->lm_bufcount; i++) {
+		if (swabbed)
+			__swab32s(&m->lm_buflens[i]);
+		required_len += cfs_size_round(m->lm_buflens[i]);
+	}
+
+	if (len < required_len) {
+		CERROR("len: %d, required_len %d\n", len, required_len);
+		CERROR("bufcount: %d\n", m->lm_bufcount);
+		for (i = 0; i < m->lm_bufcount; i++)
+			CERROR("buffer %d length %d\n", i, m->lm_buflens[i]);
+		return -EINVAL;
+	}
+
+	return swabbed;
+}
+
+int __lustre_unpack_msg(struct lustre_msg *m, int len)
+{
+	int required_len, rc;
+
+	/* We can provide a slightly better error log, if we check the
+	 * message magic and version first.  In the future, struct
+	 * lustre_msg may grow, and we'd like to log a version mismatch,
+	 * rather than a short message.
+	 *
+	 */
+	required_len = offsetof(struct lustre_msg, lm_magic) +
+		       sizeof(m->lm_magic);
+	if (len < required_len) {
+		/* can't even look inside the message */
+		CERROR("message length %d too small for magic/version check\n",
+		       len);
+		return -EINVAL;
+	}
+
+	rc = lustre_unpack_msg_v2(m, len);
+
+	return rc;
+}
+EXPORT_SYMBOL(__lustre_unpack_msg);
+
+int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
+{
+	int rc;
+	rc = __lustre_unpack_msg(req->rq_reqmsg, len);
+	if (rc == 1) {
+		lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+		rc = 0;
+	}
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_req_msg);
+
+int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
+{
+	int rc;
+	rc = __lustre_unpack_msg(req->rq_repmsg, len);
+	if (rc == 1) {
+		lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+		rc = 0;
+	}
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_rep_msg);
+
+static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
+					       const int inout, int offset)
+{
+	struct ptlrpc_body *pb;
+	struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg;
+
+	pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2));
+	if (!pb) {
+		CERROR("error unpacking ptlrpc body\n");
+		return -EFAULT;
+	}
+	if (ptlrpc_buf_need_swab(req, inout, offset)) {
+		lustre_swab_ptlrpc_body(pb);
+		ptlrpc_buf_set_swabbed(req, inout, offset);
+	}
+
+	if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) {
+		CERROR("wrong lustre_msg version %08x\n", pb->pb_version);
+		return -EINVAL;
+	}
+
+	if (!inout)
+		pb->pb_status = ptlrpc_status_ntoh(pb->pb_status);
+
+	return 0;
+}
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+	switch (req->rq_reqmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_unpack_ptlrpc_body_v2(req, 1, offset);
+	default:
+		CERROR("bad lustre msg magic: %08x\n",
+		       req->rq_reqmsg->lm_magic);
+		return -EINVAL;
+	}
+}
+
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+	switch (req->rq_repmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_unpack_ptlrpc_body_v2(req, 0, offset);
+	default:
+		CERROR("bad lustre msg magic: %08x\n",
+		       req->rq_repmsg->lm_magic);
+		return -EINVAL;
+	}
+}
+
+static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
+{
+	if (n >= m->lm_bufcount)
+		return 0;
+
+	return m->lm_buflens[n];
+}
+
+/**
+ * lustre_msg_buflen - return the length of buffer \a n in message \a m
+ * \param m lustre_msg (request or reply) to look at
+ * \param n message index (base 0)
+ *
+ * returns zero for non-existent message indices
+ */
+int lustre_msg_buflen(struct lustre_msg *m, int n)
+{
+	switch (m->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_msg_buflen_v2(m, n);
+	default:
+		CERROR("incorrect message magic: %08x\n", m->lm_magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_buflen);
+
+static inline void
+lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len)
+{
+	if (n >= m->lm_bufcount)
+		LBUG();
+
+	m->lm_buflens[n] = len;
+}
+
+void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len)
+{
+	switch (m->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		lustre_msg_set_buflen_v2(m, n, len);
+		return;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+	}
+}
+
+EXPORT_SYMBOL(lustre_msg_set_buflen);
+
+/* NB return the bufcount for lustre_msg_v2 format, so if message is packed
+ * in V1 format, the result is one bigger. (add struct ptlrpc_body). */
+int lustre_msg_bufcount(struct lustre_msg *m)
+{
+	switch (m->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return m->lm_bufcount;
+	default:
+		CERROR("incorrect message magic: %08x\n", m->lm_magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_bufcount);
+
+char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
+{
+	/* max_len == 0 means the string should fill the buffer */
+	char *str;
+	int slen, blen;
+
+	switch (m->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		str = lustre_msg_buf_v2(m, index, 0);
+		blen = lustre_msg_buflen_v2(m, index);
+		break;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+	}
+
+	if (str == NULL) {
+		CERROR("can't unpack string in msg %p buffer[%d]\n", m, index);
+		return NULL;
+	}
+
+	slen = strnlen(str, blen);
+
+	if (slen == blen) {		     /* not NULL terminated */
+		CERROR("can't unpack non-NULL terminated string in msg %p buffer[%d] len %d\n",
+		       m, index, blen);
+		return NULL;
+	}
+
+	if (max_len == 0) {
+		if (slen != blen - 1) {
+			CERROR("can't unpack short string in msg %p buffer[%d] len %d: strlen %d\n",
+			       m, index, blen, slen);
+			return NULL;
+		}
+	} else if (slen > max_len) {
+		CERROR("can't unpack oversized string in msg %p buffer[%d] len %d strlen %d: max %d expected\n",
+		       m, index, blen, slen, max_len);
+		return NULL;
+	}
+
+	return str;
+}
+EXPORT_SYMBOL(lustre_msg_string);
+
+/* Wrap up the normal fixed length cases */
+static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index,
+				      int min_size, void *swabber)
+{
+	void *ptr = NULL;
+
+	LASSERT(msg != NULL);
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		ptr = lustre_msg_buf_v2(msg, index, min_size);
+		break;
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+	}
+
+	if (ptr && swabber)
+		((void (*)(void *))swabber)(ptr);
+
+	return ptr;
+}
+
+static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg)
+{
+	return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+				 sizeof(struct ptlrpc_body_v2));
+}
+
+__u32 lustre_msghdr_get_flags(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+	case LUSTRE_MSG_MAGIC_V1_SWABBED:
+		return 0;
+	case LUSTRE_MSG_MAGIC_V2:
+		/* already in host endian */
+		return msg->lm_flags;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msghdr_get_flags);
+
+void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2:
+		msg->lm_flags = flags;
+		return;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+
+__u32 lustre_msg_get_flags(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_flags;
+	}
+	default:
+		/* flags might be printed in debug code while message
+		 * uninitialized */
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_flags);
+
+void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_flags |= flags;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_add_flags);
+
+void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_flags = flags;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_flags);
+
+void lustre_msg_clear_flags(struct lustre_msg *msg, int flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_flags &= ~(MSG_GEN_FLAG_MASK & flags);
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_clear_flags);
+
+__u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_op_flags;
+	}
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_op_flags);
+
+void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_op_flags |= flags;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_add_op_flags);
+
+void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_op_flags |= flags;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_op_flags);
+
+struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return NULL;
+		}
+		return &pb->pb_handle;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_handle);
+
+__u32 lustre_msg_get_type(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return PTL_RPC_MSG_ERR;
+		}
+		return pb->pb_type;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return PTL_RPC_MSG_ERR;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_type);
+
+__u32 lustre_msg_get_version(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_version;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_version);
+
+void lustre_msg_add_version(struct lustre_msg *msg, int version)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_version |= version;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_add_version);
+
+__u32 lustre_msg_get_opc(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_opc;
+	}
+	default:
+		CERROR("incorrect message magic: %08x(msg:%p)\n", msg->lm_magic, msg);
+		LBUG();
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_opc);
+
+__u64 lustre_msg_get_last_xid(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_last_xid;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_last_xid);
+
+__u64 lustre_msg_get_last_committed(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_last_committed;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_last_committed);
+
+__u64 *lustre_msg_get_versions(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return NULL;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return NULL;
+		}
+		return pb->pb_pre_versions;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_versions);
+
+__u64 lustre_msg_get_transno(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_transno;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_transno);
+
+int lustre_msg_get_status(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return -EINVAL;
+		}
+		return pb->pb_status;
+	}
+	default:
+		/* status might be printed in debug code while message
+		 * uninitialized */
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_status);
+
+__u64 lustre_msg_get_slv(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return -EINVAL;
+		}
+		return pb->pb_slv;
+	}
+	default:
+		CERROR("invalid msg magic %08x\n", msg->lm_magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_slv);
+
+
+void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return;
+		}
+		pb->pb_slv = slv;
+		return;
+	}
+	default:
+		CERROR("invalid msg magic %x\n", msg->lm_magic);
+		return;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_slv);
+
+__u32 lustre_msg_get_limit(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return -EINVAL;
+		}
+		return pb->pb_limit;
+	}
+	default:
+		CERROR("invalid msg magic %x\n", msg->lm_magic);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_limit);
+
+
+void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return;
+		}
+		pb->pb_limit = limit;
+		return;
+	}
+	default:
+		CERROR("invalid msg magic %08x\n", msg->lm_magic);
+		return;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_limit);
+
+__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+		}
+		return pb->pb_conn_cnt;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_conn_cnt);
+
+int lustre_msg_is_v1(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+	case LUSTRE_MSG_MAGIC_V1_SWABBED:
+		return 1;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_is_v1);
+
+__u32 lustre_msg_get_magic(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return msg->lm_magic;
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_magic);
+
+__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+	case LUSTRE_MSG_MAGIC_V1_SWABBED:
+		return 0;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+
+		}
+		return pb->pb_timeout;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+
+__u32 lustre_msg_get_service_time(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+	case LUSTRE_MSG_MAGIC_V1_SWABBED:
+		return 0;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		if (!pb) {
+			CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+			return 0;
+
+		}
+		return pb->pb_service_time;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+
+char *lustre_msg_get_jobid(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+	case LUSTRE_MSG_MAGIC_V1_SWABBED:
+		return NULL;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb =
+			lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+					  sizeof(struct ptlrpc_body));
+		if (!pb)
+			return NULL;
+
+		return pb->pb_jobid;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(lustre_msg_get_jobid);
+
+__u32 lustre_msg_get_cksum(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return msg->lm_cksum;
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		__u32 crc;
+		unsigned int hsize = 4;
+		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
+				   lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF),
+				   NULL, 0, (unsigned char *)&crc, &hsize);
+		return crc;
+	}
+	default:
+		CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+		return 0;
+	}
+}
+
+void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_handle = *handle;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_handle);
+
+void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_type = type;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_type);
+
+void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_opc = opc;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_opc);
+
+void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_last_xid = last_xid;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_last_xid);
+
+void lustre_msg_set_last_committed(struct lustre_msg *msg, __u64 last_committed)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_last_committed = last_committed;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_last_committed);
+
+void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_pre_versions[0] = versions[0];
+		pb->pb_pre_versions[1] = versions[1];
+		pb->pb_pre_versions[2] = versions[2];
+		pb->pb_pre_versions[3] = versions[3];
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_versions);
+
+void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_transno = transno;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_transno);
+
+void lustre_msg_set_status(struct lustre_msg *msg, __u32 status)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_status = status;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_status);
+
+void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_conn_cnt = conn_cnt;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
+
+void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_timeout = timeout;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+
+void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2: {
+		struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+		pb->pb_service_time = service_time;
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+
+void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2: {
+		__u32 opc = lustre_msg_get_opc(msg);
+		struct ptlrpc_body *pb;
+
+		/* Don't set jobid for ldlm ast RPCs, they've been shrunk.
+		 * See the comment in ptlrpc_request_pack(). */
+		if (!opc || opc == LDLM_BL_CALLBACK ||
+		    opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK)
+			return;
+
+		pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+				       sizeof(struct ptlrpc_body));
+		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+
+		if (jobid != NULL)
+			memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+		else if (pb->pb_jobid[0] == '\0')
+			lustre_get_jobid(pb->pb_jobid);
+		return;
+	}
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+EXPORT_SYMBOL(lustre_msg_set_jobid);
+
+void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
+{
+	switch (msg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V1:
+		return;
+	case LUSTRE_MSG_MAGIC_V2:
+		msg->lm_cksum = cksum;
+		return;
+	default:
+		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+	}
+}
+
+
+void ptlrpc_request_set_replen(struct ptlrpc_request *req)
+{
+	int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER);
+
+	req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count,
+					 req->rq_pill.rc_area[RCL_SERVER]);
+	if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+		req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_request_set_replen);
+
+void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *lens)
+{
+	req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens);
+	if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+		req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_req_set_repsize);
+
+/**
+ * Send a remote set_info_async.
+ *
+ * This may go from client to server or server to client.
+ */
+int do_set_info_async(struct obd_import *imp,
+		      int opcode, int version,
+		      u32 keylen, void *key,
+		      u32 vallen, void *val,
+		      struct ptlrpc_request_set *set)
+{
+	struct ptlrpc_request *req;
+	char		  *tmp;
+	int		    rc;
+
+	req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO);
+	if (req == NULL)
+		return -ENOMEM;
+
+	req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
+			     RCL_CLIENT, keylen);
+	req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
+			     RCL_CLIENT, vallen);
+	rc = ptlrpc_request_pack(req, version, opcode);
+	if (rc) {
+		ptlrpc_request_free(req);
+		return rc;
+	}
+
+	tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+	memcpy(tmp, key, keylen);
+	tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
+	memcpy(tmp, val, vallen);
+
+	ptlrpc_request_set_replen(req);
+
+	if (set) {
+		ptlrpc_set_add_req(set, req);
+		ptlrpc_check_set(NULL, set);
+	} else {
+		rc = ptlrpc_queue_wait(req);
+		ptlrpc_req_finished(req);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(do_set_info_async);
+
+/* byte flipping routines for all wire types declared in
+ * lustre_idl.h implemented here.
+ */
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
+{
+	__swab32s(&b->pb_type);
+	__swab32s(&b->pb_version);
+	__swab32s(&b->pb_opc);
+	__swab32s(&b->pb_status);
+	__swab64s(&b->pb_last_xid);
+	__swab64s(&b->pb_last_seen);
+	__swab64s(&b->pb_last_committed);
+	__swab64s(&b->pb_transno);
+	__swab32s(&b->pb_flags);
+	__swab32s(&b->pb_op_flags);
+	__swab32s(&b->pb_conn_cnt);
+	__swab32s(&b->pb_timeout);
+	__swab32s(&b->pb_service_time);
+	__swab32s(&b->pb_limit);
+	__swab64s(&b->pb_slv);
+	__swab64s(&b->pb_pre_versions[0]);
+	__swab64s(&b->pb_pre_versions[1]);
+	__swab64s(&b->pb_pre_versions[2]);
+	__swab64s(&b->pb_pre_versions[3]);
+	CLASSERT(offsetof(typeof(*b), pb_padding) != 0);
+	/* While we need to maintain compatibility between
+	 * clients and servers without ptlrpc_body_v2 (< 2.3)
+	 * do not swab any fields beyond pb_jobid, as we are
+	 * using this swab function for both ptlrpc_body
+	 * and ptlrpc_body_v2. */
+	CLASSERT(offsetof(typeof(*b), pb_jobid) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_ptlrpc_body);
+
+void lustre_swab_connect(struct obd_connect_data *ocd)
+{
+	__swab64s(&ocd->ocd_connect_flags);
+	__swab32s(&ocd->ocd_version);
+	__swab32s(&ocd->ocd_grant);
+	__swab64s(&ocd->ocd_ibits_known);
+	__swab32s(&ocd->ocd_index);
+	__swab32s(&ocd->ocd_brw_size);
+	/* ocd_blocksize and ocd_inodespace don't need to be swabbed because
+	 * they are 8-byte values */
+	__swab16s(&ocd->ocd_grant_extent);
+	__swab32s(&ocd->ocd_unused);
+	__swab64s(&ocd->ocd_transno);
+	__swab32s(&ocd->ocd_group);
+	__swab32s(&ocd->ocd_cksum_types);
+	__swab32s(&ocd->ocd_instance);
+	/* Fields after ocd_cksum_types are only accessible by the receiver
+	 * if the corresponding flag in ocd_connect_flags is set. Accessing
+	 * any field after ocd_maxbytes on the receiver without a valid flag
+	 * may result in out-of-bound memory access and kernel oops. */
+	if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
+		__swab32s(&ocd->ocd_max_easize);
+	if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
+		__swab64s(&ocd->ocd_maxbytes);
+	CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding5) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding6) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding7) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding8) != 0);
+	CLASSERT(offsetof(typeof(*ocd), padding9) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingA) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingB) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingC) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingD) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingE) != 0);
+	CLASSERT(offsetof(typeof(*ocd), paddingF) != 0);
+}
+
+void lustre_swab_obdo(struct obdo  *o)
+{
+	__swab64s(&o->o_valid);
+	lustre_swab_ost_id(&o->o_oi);
+	__swab64s(&o->o_parent_seq);
+	__swab64s(&o->o_size);
+	__swab64s(&o->o_mtime);
+	__swab64s(&o->o_atime);
+	__swab64s(&o->o_ctime);
+	__swab64s(&o->o_blocks);
+	__swab64s(&o->o_grant);
+	__swab32s(&o->o_blksize);
+	__swab32s(&o->o_mode);
+	__swab32s(&o->o_uid);
+	__swab32s(&o->o_gid);
+	__swab32s(&o->o_flags);
+	__swab32s(&o->o_nlink);
+	__swab32s(&o->o_parent_oid);
+	__swab32s(&o->o_misc);
+	__swab64s(&o->o_ioepoch);
+	__swab32s(&o->o_stripe_idx);
+	__swab32s(&o->o_parent_ver);
+	/* o_handle is opaque */
+	/* o_lcookie is swabbed elsewhere */
+	__swab32s(&o->o_uid_h);
+	__swab32s(&o->o_gid_h);
+	__swab64s(&o->o_data_version);
+	CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
+	CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
+	CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
+
+}
+EXPORT_SYMBOL(lustre_swab_obdo);
+
+void lustre_swab_obd_statfs(struct obd_statfs *os)
+{
+	__swab64s(&os->os_type);
+	__swab64s(&os->os_blocks);
+	__swab64s(&os->os_bfree);
+	__swab64s(&os->os_bavail);
+	__swab64s(&os->os_files);
+	__swab64s(&os->os_ffree);
+	/* no need to swab os_fsid */
+	__swab32s(&os->os_bsize);
+	__swab32s(&os->os_namelen);
+	__swab64s(&os->os_maxbytes);
+	__swab32s(&os->os_state);
+	CLASSERT(offsetof(typeof(*os), os_fprecreated) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare6) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare7) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare8) != 0);
+	CLASSERT(offsetof(typeof(*os), os_spare9) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_obd_statfs);
+
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
+{
+	lustre_swab_ost_id(&ioo->ioo_oid);
+	__swab32s(&ioo->ioo_max_brw);
+	__swab32s(&ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(lustre_swab_obd_ioobj);
+
+void lustre_swab_niobuf_remote(struct niobuf_remote *nbr)
+{
+	__swab64s(&nbr->offset);
+	__swab32s(&nbr->len);
+	__swab32s(&nbr->flags);
+}
+EXPORT_SYMBOL(lustre_swab_niobuf_remote);
+
+void lustre_swab_ost_body(struct ost_body *b)
+{
+	lustre_swab_obdo(&b->oa);
+}
+EXPORT_SYMBOL(lustre_swab_ost_body);
+
+void lustre_swab_ost_last_id(u64 *id)
+{
+	__swab64s(id);
+}
+EXPORT_SYMBOL(lustre_swab_ost_last_id);
+
+void lustre_swab_generic_32s(__u32 *val)
+{
+	__swab32s(val);
+}
+EXPORT_SYMBOL(lustre_swab_generic_32s);
+
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
+{
+	lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid);
+	__swab64s(&desc->lquota_desc.gl_flags);
+	__swab64s(&desc->lquota_desc.gl_ver);
+	__swab64s(&desc->lquota_desc.gl_hardlimit);
+	__swab64s(&desc->lquota_desc.gl_softlimit);
+	__swab64s(&desc->lquota_desc.gl_time);
+	CLASSERT(offsetof(typeof(desc->lquota_desc), gl_pad2) != 0);
+}
+
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb)
+{
+	__swab64s(&lvb->lvb_size);
+	__swab64s(&lvb->lvb_mtime);
+	__swab64s(&lvb->lvb_atime);
+	__swab64s(&lvb->lvb_ctime);
+	__swab64s(&lvb->lvb_blocks);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb_v1);
+
+void lustre_swab_ost_lvb(struct ost_lvb *lvb)
+{
+	__swab64s(&lvb->lvb_size);
+	__swab64s(&lvb->lvb_mtime);
+	__swab64s(&lvb->lvb_atime);
+	__swab64s(&lvb->lvb_ctime);
+	__swab64s(&lvb->lvb_blocks);
+	__swab32s(&lvb->lvb_mtime_ns);
+	__swab32s(&lvb->lvb_atime_ns);
+	__swab32s(&lvb->lvb_ctime_ns);
+	__swab32s(&lvb->lvb_padding);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb);
+
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb)
+{
+	__swab64s(&lvb->lvb_flags);
+	__swab64s(&lvb->lvb_id_may_rel);
+	__swab64s(&lvb->lvb_id_rel);
+	__swab64s(&lvb->lvb_id_qunit);
+	__swab64s(&lvb->lvb_pad1);
+}
+EXPORT_SYMBOL(lustre_swab_lquota_lvb);
+
+void lustre_swab_mdt_body(struct mdt_body *b)
+{
+	lustre_swab_lu_fid(&b->fid1);
+	lustre_swab_lu_fid(&b->fid2);
+	/* handle is opaque */
+	__swab64s(&b->valid);
+	__swab64s(&b->size);
+	__swab64s(&b->mtime);
+	__swab64s(&b->atime);
+	__swab64s(&b->ctime);
+	__swab64s(&b->blocks);
+	__swab64s(&b->ioepoch);
+	__swab64s(&b->t_state);
+	__swab32s(&b->fsuid);
+	__swab32s(&b->fsgid);
+	__swab32s(&b->capability);
+	__swab32s(&b->mode);
+	__swab32s(&b->uid);
+	__swab32s(&b->gid);
+	__swab32s(&b->flags);
+	__swab32s(&b->rdev);
+	__swab32s(&b->nlink);
+	CLASSERT(offsetof(typeof(*b), unused2) != 0);
+	__swab32s(&b->suppgid);
+	__swab32s(&b->eadatasize);
+	__swab32s(&b->aclsize);
+	__swab32s(&b->max_mdsize);
+	__swab32s(&b->max_cookiesize);
+	__swab32s(&b->uid_h);
+	__swab32s(&b->gid_h);
+	CLASSERT(offsetof(typeof(*b), padding_5) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_body);
+
+void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
+{
+	/* handle is opaque */
+	 __swab64s(&b->ioepoch);
+	 __swab32s(&b->flags);
+	 CLASSERT(offsetof(typeof(*b), padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_ioepoch);
+
+void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
+{
+	int i;
+	__swab32s(&mti->mti_lustre_ver);
+	__swab32s(&mti->mti_stripe_index);
+	__swab32s(&mti->mti_config_ver);
+	__swab32s(&mti->mti_flags);
+	__swab32s(&mti->mti_instance);
+	__swab32s(&mti->mti_nid_count);
+	CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+	for (i = 0; i < MTI_NIDS_MAX; i++)
+		__swab64s(&mti->mti_nids[i]);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
+
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
+{
+	int i;
+
+	__swab64s(&entry->mne_version);
+	__swab32s(&entry->mne_instance);
+	__swab32s(&entry->mne_index);
+	__swab32s(&entry->mne_length);
+
+	/* mne_nid_(count|type) must be one byte size because we're gonna
+	 * access it w/o swapping. */
+	CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8));
+	CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8));
+
+	/* remove this assertion if ipv6 is supported. */
+	LASSERT(entry->mne_nid_type == 0);
+	for (i = 0; i < entry->mne_nid_count; i++) {
+		CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+		__swab64s(&entry->u.nids[i]);
+	}
+}
+EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
+
+void lustre_swab_mgs_config_body(struct mgs_config_body *body)
+{
+	__swab64s(&body->mcb_offset);
+	__swab32s(&body->mcb_units);
+	__swab16s(&body->mcb_type);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_body);
+
+void lustre_swab_mgs_config_res(struct mgs_config_res *body)
+{
+	__swab64s(&body->mcr_offset);
+	__swab64s(&body->mcr_size);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_res);
+
+static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i)
+{
+	__swab64s(&i->dqi_bgrace);
+	__swab64s(&i->dqi_igrace);
+	__swab32s(&i->dqi_flags);
+	__swab32s(&i->dqi_valid);
+}
+
+static void lustre_swab_obd_dqblk(struct obd_dqblk *b)
+{
+	__swab64s(&b->dqb_ihardlimit);
+	__swab64s(&b->dqb_isoftlimit);
+	__swab64s(&b->dqb_curinodes);
+	__swab64s(&b->dqb_bhardlimit);
+	__swab64s(&b->dqb_bsoftlimit);
+	__swab64s(&b->dqb_curspace);
+	__swab64s(&b->dqb_btime);
+	__swab64s(&b->dqb_itime);
+	__swab32s(&b->dqb_valid);
+	CLASSERT(offsetof(typeof(*b), dqb_padding) != 0);
+}
+
+void lustre_swab_obd_quotactl(struct obd_quotactl *q)
+{
+	__swab32s(&q->qc_cmd);
+	__swab32s(&q->qc_type);
+	__swab32s(&q->qc_id);
+	__swab32s(&q->qc_stat);
+	lustre_swab_obd_dqinfo(&q->qc_dqinfo);
+	lustre_swab_obd_dqblk(&q->qc_dqblk);
+}
+EXPORT_SYMBOL(lustre_swab_obd_quotactl);
+
+void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p)
+{
+	__swab32s(&p->rp_uid);
+	__swab32s(&p->rp_gid);
+	__swab32s(&p->rp_fsuid);
+	__swab32s(&p->rp_fsuid_h);
+	__swab32s(&p->rp_fsgid);
+	__swab32s(&p->rp_fsgid_h);
+	__swab32s(&p->rp_access_perm);
+	__swab32s(&p->rp_padding);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
+
+void lustre_swab_fid2path(struct getinfo_fid2path *gf)
+{
+	lustre_swab_lu_fid(&gf->gf_fid);
+	__swab64s(&gf->gf_recno);
+	__swab32s(&gf->gf_linkno);
+	__swab32s(&gf->gf_pathlen);
+}
+EXPORT_SYMBOL(lustre_swab_fid2path);
+
+void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
+{
+	__swab64s(&fm_extent->fe_logical);
+	__swab64s(&fm_extent->fe_physical);
+	__swab64s(&fm_extent->fe_length);
+	__swab32s(&fm_extent->fe_flags);
+	__swab32s(&fm_extent->fe_device);
+}
+
+void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
+{
+	int i;
+
+	__swab64s(&fiemap->fm_start);
+	__swab64s(&fiemap->fm_length);
+	__swab32s(&fiemap->fm_flags);
+	__swab32s(&fiemap->fm_mapped_extents);
+	__swab32s(&fiemap->fm_extent_count);
+	__swab32s(&fiemap->fm_reserved);
+
+	for (i = 0; i < fiemap->fm_mapped_extents; i++)
+		lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
+}
+EXPORT_SYMBOL(lustre_swab_fiemap);
+
+void lustre_swab_idx_info(struct idx_info *ii)
+{
+	__swab32s(&ii->ii_magic);
+	__swab32s(&ii->ii_flags);
+	__swab16s(&ii->ii_count);
+	__swab32s(&ii->ii_attrs);
+	lustre_swab_lu_fid(&ii->ii_fid);
+	__swab64s(&ii->ii_version);
+	__swab64s(&ii->ii_hash_start);
+	__swab64s(&ii->ii_hash_end);
+	__swab16s(&ii->ii_keysize);
+	__swab16s(&ii->ii_recsize);
+}
+
+void lustre_swab_lip_header(struct lu_idxpage *lip)
+{
+	/* swab header */
+	__swab32s(&lip->lip_magic);
+	__swab16s(&lip->lip_flags);
+	__swab16s(&lip->lip_nr);
+}
+EXPORT_SYMBOL(lustre_swab_lip_header);
+
+void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
+{
+	__swab32s(&rr->rr_opcode);
+	__swab32s(&rr->rr_cap);
+	__swab32s(&rr->rr_fsuid);
+	/* rr_fsuid_h is unused */
+	__swab32s(&rr->rr_fsgid);
+	/* rr_fsgid_h is unused */
+	__swab32s(&rr->rr_suppgid1);
+	/* rr_suppgid1_h is unused */
+	__swab32s(&rr->rr_suppgid2);
+	/* rr_suppgid2_h is unused */
+	lustre_swab_lu_fid(&rr->rr_fid1);
+	lustre_swab_lu_fid(&rr->rr_fid2);
+	__swab64s(&rr->rr_mtime);
+	__swab64s(&rr->rr_atime);
+	__swab64s(&rr->rr_ctime);
+	__swab64s(&rr->rr_size);
+	__swab64s(&rr->rr_blocks);
+	__swab32s(&rr->rr_bias);
+	__swab32s(&rr->rr_mode);
+	__swab32s(&rr->rr_flags);
+	__swab32s(&rr->rr_flags_h);
+	__swab32s(&rr->rr_umask);
+
+	CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
+
+void lustre_swab_lov_desc(struct lov_desc *ld)
+{
+	__swab32s(&ld->ld_tgt_count);
+	__swab32s(&ld->ld_active_tgt_count);
+	__swab32s(&ld->ld_default_stripe_count);
+	__swab32s(&ld->ld_pattern);
+	__swab64s(&ld->ld_default_stripe_size);
+	__swab64s(&ld->ld_default_stripe_offset);
+	__swab32s(&ld->ld_qos_maxage);
+	/* uuid endian insensitive */
+}
+EXPORT_SYMBOL(lustre_swab_lov_desc);
+
+void lustre_swab_lmv_desc(struct lmv_desc *ld)
+{
+	__swab32s(&ld->ld_tgt_count);
+	__swab32s(&ld->ld_active_tgt_count);
+	__swab32s(&ld->ld_default_stripe_count);
+	__swab32s(&ld->ld_pattern);
+	__swab64s(&ld->ld_default_hash_size);
+	__swab32s(&ld->ld_qos_maxage);
+	/* uuid endian insensitive */
+}
+
+void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea)
+{
+	__swab32s(&mea->mea_magic);
+	__swab32s(&mea->mea_count);
+	__swab32s(&mea->mea_master);
+	CLASSERT(offsetof(typeof(*mea), mea_padding) != 0);
+}
+
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
+{
+	int i;
+
+	__swab32s(&lum->lum_magic);
+	__swab32s(&lum->lum_stripe_count);
+	__swab32s(&lum->lum_stripe_offset);
+	__swab32s(&lum->lum_hash_type);
+	__swab32s(&lum->lum_type);
+	CLASSERT(offsetof(typeof(*lum), lum_padding1) != 0);
+	CLASSERT(offsetof(typeof(*lum), lum_padding2) != 0);
+	CLASSERT(offsetof(typeof(*lum), lum_padding3) != 0);
+
+	for (i = 0; i < lum->lum_stripe_count; i++) {
+		__swab32s(&lum->lum_objects[i].lum_mds);
+		lustre_swab_lu_fid(&lum->lum_objects[i].lum_fid);
+	}
+
+}
+EXPORT_SYMBOL(lustre_swab_lmv_user_md);
+
+static void print_lum(struct lov_user_md *lum)
+{
+	CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
+	CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
+	CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
+	CDEBUG(D_OTHER, "\tlmm_object_id: %llu\n", lmm_oi_id(&lum->lmm_oi));
+	CDEBUG(D_OTHER, "\tlmm_object_gr: %llu\n", lmm_oi_seq(&lum->lmm_oi));
+	CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
+	CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
+	CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
+			lum->lmm_stripe_offset);
+}
+
+static void lustre_swab_lmm_oi(struct ost_id *oi)
+{
+	__swab64s(&oi->oi.oi_id);
+	__swab64s(&oi->oi.oi_seq);
+}
+
+static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
+{
+	__swab32s(&lum->lmm_magic);
+	__swab32s(&lum->lmm_pattern);
+	lustre_swab_lmm_oi(&lum->lmm_oi);
+	__swab32s(&lum->lmm_stripe_size);
+	__swab16s(&lum->lmm_stripe_count);
+	__swab16s(&lum->lmm_stripe_offset);
+	print_lum(lum);
+}
+
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
+{
+	CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
+	lustre_swab_lov_user_md_common(lum);
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
+
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
+{
+	CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
+	lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
+	/* lmm_pool_name nothing to do with char */
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
+
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
+{
+	CDEBUG(D_IOCTL, "swabbing lov_mds_md\n");
+	__swab32s(&lmm->lmm_magic);
+	__swab32s(&lmm->lmm_pattern);
+	lustre_swab_lmm_oi(&lmm->lmm_oi);
+	__swab32s(&lmm->lmm_stripe_size);
+	__swab16s(&lmm->lmm_stripe_count);
+	__swab16s(&lmm->lmm_layout_gen);
+}
+EXPORT_SYMBOL(lustre_swab_lov_mds_md);
+
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+				     int stripe_count)
+{
+	int i;
+
+	for (i = 0; i < stripe_count; i++) {
+		lustre_swab_ost_id(&(lod[i].l_ost_oi));
+		__swab32s(&(lod[i].l_ost_gen));
+		__swab32s(&(lod[i].l_ost_idx));
+	}
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
+
+void lustre_swab_ldlm_res_id(struct ldlm_res_id *id)
+{
+	int  i;
+
+	for (i = 0; i < RES_NAME_SIZE; i++)
+		__swab64s(&id->name[i]);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
+
+void lustre_swab_ldlm_policy_data(ldlm_wire_policy_data_t *d)
+{
+	/* the lock data is a union and the first two fields are always an
+	 * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock
+	 * data the same way. */
+	__swab64s(&d->l_extent.start);
+	__swab64s(&d->l_extent.end);
+	__swab64s(&d->l_extent.gid);
+	__swab64s(&d->l_flock.lfw_owner);
+	__swab32s(&d->l_flock.lfw_pid);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_policy_data);
+
+void lustre_swab_ldlm_intent(struct ldlm_intent *i)
+{
+	__swab64s(&i->opc);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_intent);
+
+void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r)
+{
+	__swab32s(&r->lr_type);
+	CLASSERT(offsetof(typeof(*r), lr_padding) != 0);
+	lustre_swab_ldlm_res_id(&r->lr_name);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_resource_desc);
+
+void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l)
+{
+	lustre_swab_ldlm_resource_desc(&l->l_resource);
+	__swab32s(&l->l_req_mode);
+	__swab32s(&l->l_granted_mode);
+	lustre_swab_ldlm_policy_data(&l->l_policy_data);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
+
+void lustre_swab_ldlm_request(struct ldlm_request *rq)
+{
+	__swab32s(&rq->lock_flags);
+	lustre_swab_ldlm_lock_desc(&rq->lock_desc);
+	__swab32s(&rq->lock_count);
+	/* lock_handle[] opaque */
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_request);
+
+void lustre_swab_ldlm_reply(struct ldlm_reply *r)
+{
+	__swab32s(&r->lock_flags);
+	CLASSERT(offsetof(typeof(*r), lock_padding) != 0);
+	lustre_swab_ldlm_lock_desc(&r->lock_desc);
+	/* lock_handle opaque */
+	__swab64s(&r->lock_policy_res1);
+	__swab64s(&r->lock_policy_res2);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_reply);
+
+void lustre_swab_quota_body(struct quota_body *b)
+{
+	lustre_swab_lu_fid(&b->qb_fid);
+	lustre_swab_lu_fid((struct lu_fid *)&b->qb_id);
+	__swab32s(&b->qb_flags);
+	__swab64s(&b->qb_count);
+	__swab64s(&b->qb_usage);
+	__swab64s(&b->qb_slv_ver);
+}
+
+/* Dump functions */
+void dump_ioo(struct obd_ioobj *ioo)
+{
+	CDEBUG(D_RPCTRACE,
+	       "obd_ioobj: ioo_oid=" DOSTID ", ioo_max_brw=%#x, ioo_bufct=%d\n",
+	       POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
+	       ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(dump_ioo);
+
+void dump_rniobuf(struct niobuf_remote *nb)
+{
+	CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n",
+	       nb->offset, nb->len, nb->flags);
+}
+EXPORT_SYMBOL(dump_rniobuf);
+
+void dump_obdo(struct obdo *oa)
+{
+	__u32 valid = oa->o_valid;
+
+	CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid);
+	if (valid & OBD_MD_FLID)
+		CDEBUG(D_RPCTRACE, "obdo: id = "DOSTID"\n", POSTID(&oa->o_oi));
+	if (valid & OBD_MD_FLFID)
+		CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = %#llx\n",
+		       oa->o_parent_seq);
+	if (valid & OBD_MD_FLSIZE)
+		CDEBUG(D_RPCTRACE, "obdo: o_size = %lld\n", oa->o_size);
+	if (valid & OBD_MD_FLMTIME)
+		CDEBUG(D_RPCTRACE, "obdo: o_mtime = %lld\n", oa->o_mtime);
+	if (valid & OBD_MD_FLATIME)
+		CDEBUG(D_RPCTRACE, "obdo: o_atime = %lld\n", oa->o_atime);
+	if (valid & OBD_MD_FLCTIME)
+		CDEBUG(D_RPCTRACE, "obdo: o_ctime = %lld\n", oa->o_ctime);
+	if (valid & OBD_MD_FLBLOCKS)   /* allocation of space */
+		CDEBUG(D_RPCTRACE, "obdo: o_blocks = %lld\n", oa->o_blocks);
+	if (valid & OBD_MD_FLGRANT)
+		CDEBUG(D_RPCTRACE, "obdo: o_grant = %lld\n", oa->o_grant);
+	if (valid & OBD_MD_FLBLKSZ)
+		CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize);
+	if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE))
+		CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n",
+		       oa->o_mode & ((valid & OBD_MD_FLTYPE ?  S_IFMT : 0) |
+				     (valid & OBD_MD_FLMODE ? ~S_IFMT : 0)));
+	if (valid & OBD_MD_FLUID)
+		CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid);
+	if (valid & OBD_MD_FLUID)
+		CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h);
+	if (valid & OBD_MD_FLGID)
+		CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid);
+	if (valid & OBD_MD_FLGID)
+		CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h);
+	if (valid & OBD_MD_FLFLAGS)
+		CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags);
+	if (valid & OBD_MD_FLNLINK)
+		CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink);
+	else if (valid & OBD_MD_FLCKSUM)
+		CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n",
+		       oa->o_nlink);
+	if (valid & OBD_MD_FLGENER)
+		CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n",
+		       oa->o_parent_oid);
+	if (valid & OBD_MD_FLEPOCH)
+		CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = %lld\n",
+		       oa->o_ioepoch);
+	if (valid & OBD_MD_FLFID) {
+		CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n",
+		       oa->o_stripe_idx);
+		CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n",
+		       oa->o_parent_ver);
+	}
+	if (valid & OBD_MD_FLHANDLE)
+		CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n",
+		       oa->o_handle.cookie);
+	if (valid & OBD_MD_FLCOOKIE)
+		CDEBUG(D_RPCTRACE, "obdo: o_lcookie = (llog_cookie dumping not yet implemented)\n");
+}
+EXPORT_SYMBOL(dump_obdo);
+
+void dump_ost_body(struct ost_body *ob)
+{
+	dump_obdo(&ob->oa);
+}
+EXPORT_SYMBOL(dump_ost_body);
+
+void dump_rcs(__u32 *rc)
+{
+	CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
+}
+EXPORT_SYMBOL(dump_rcs);
+
+static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+	LASSERT(req->rq_reqmsg);
+
+	switch (req->rq_reqmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF);
+	default:
+		CERROR("bad lustre msg magic: %#08X\n",
+		       req->rq_reqmsg->lm_magic);
+	}
+	return 0;
+}
+
+static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+	LASSERT(req->rq_repmsg);
+
+	switch (req->rq_repmsg->lm_magic) {
+	case LUSTRE_MSG_MAGIC_V2:
+		return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF);
+	default:
+		/* uninitialized yet */
+		return 0;
+	}
+}
+
+void _debug_req(struct ptlrpc_request *req,
+		struct libcfs_debug_msg_data *msgdata,
+		const char *fmt, ...)
+{
+	int req_ok = req->rq_reqmsg != NULL;
+	int rep_ok = req->rq_repmsg != NULL;
+	lnet_nid_t nid = LNET_NID_ANY;
+	va_list args;
+
+	if (ptlrpc_req_need_swab(req)) {
+		req_ok = req_ok && req_ptlrpc_body_swabbed(req);
+		rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req);
+	}
+
+	if (req->rq_import && req->rq_import->imp_connection)
+		nid = req->rq_import->imp_connection->c_peer.nid;
+	else if (req->rq_export && req->rq_export->exp_connection)
+		nid = req->rq_export->exp_connection->c_peer.nid;
+
+	va_start(args, fmt);
+	libcfs_debug_vmsg2(msgdata, fmt, args,
+			   " req@%p x%llu/t%lld(%lld) o%d->%s@%s:%d/%d lens %d/%d e %d to %d dl " CFS_TIME_T " ref %d fl " REQ_FLAGS_FMT "/%x/%x rc %d/%d\n",
+			   req, req->rq_xid, req->rq_transno,
+			   req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0,
+			   req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
+			   req->rq_import ?
+			   req->rq_import->imp_obd->obd_name :
+			   req->rq_export ?
+			   req->rq_export->exp_client_uuid.uuid :
+			   "<?>",
+			   libcfs_nid2str(nid),
+			   req->rq_request_portal, req->rq_reply_portal,
+			   req->rq_reqlen, req->rq_replen,
+			   req->rq_early_count, req->rq_timedout,
+			   req->rq_deadline,
+			   atomic_read(&req->rq_refcount),
+			   DEBUG_REQ_FLAGS(req),
+			   req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
+			   rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1,
+			   req->rq_status,
+			   rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1);
+	va_end(args);
+}
+EXPORT_SYMBOL(_debug_req);
+
+void lustre_swab_lustre_capa(struct lustre_capa *c)
+{
+	lustre_swab_lu_fid(&c->lc_fid);
+	__swab64s(&c->lc_opc);
+	__swab64s(&c->lc_uid);
+	__swab64s(&c->lc_gid);
+	__swab32s(&c->lc_flags);
+	__swab32s(&c->lc_keyid);
+	__swab32s(&c->lc_timeout);
+	__swab32s(&c->lc_expiry);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa);
+
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k)
+{
+	__swab64s(&k->lk_seq);
+	__swab32s(&k->lk_keyid);
+	CLASSERT(offsetof(typeof(*k), lk_padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa_key);
+
+void lustre_swab_hsm_user_state(struct hsm_user_state *state)
+{
+	__swab32s(&state->hus_states);
+	__swab32s(&state->hus_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_state);
+
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
+{
+	__swab32s(&hss->hss_valid);
+	__swab64s(&hss->hss_setmask);
+	__swab64s(&hss->hss_clearmask);
+	__swab32s(&hss->hss_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_state_set);
+
+void lustre_swab_hsm_extent(struct hsm_extent *extent)
+{
+	__swab64s(&extent->offset);
+	__swab64s(&extent->length);
+}
+
+void lustre_swab_hsm_current_action(struct hsm_current_action *action)
+{
+	__swab32s(&action->hca_state);
+	__swab32s(&action->hca_action);
+	lustre_swab_hsm_extent(&action->hca_location);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_current_action);
+
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
+{
+	lustre_swab_lu_fid(&hui->hui_fid);
+	lustre_swab_hsm_extent(&hui->hui_extent);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_item);
+
+void lustre_swab_layout_intent(struct layout_intent *li)
+{
+	__swab32s(&li->li_opc);
+	__swab32s(&li->li_flags);
+	__swab64s(&li->li_start);
+	__swab64s(&li->li_end);
+}
+EXPORT_SYMBOL(lustre_swab_layout_intent);
+
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
+{
+	lustre_swab_lu_fid(&hpk->hpk_fid);
+	__swab64s(&hpk->hpk_cookie);
+	__swab64s(&hpk->hpk_extent.offset);
+	__swab64s(&hpk->hpk_extent.length);
+	__swab16s(&hpk->hpk_flags);
+	__swab16s(&hpk->hpk_errval);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel);
+
+void lustre_swab_hsm_request(struct hsm_request *hr)
+{
+	__swab32s(&hr->hr_action);
+	__swab32s(&hr->hr_archive_id);
+	__swab64s(&hr->hr_flags);
+	__swab32s(&hr->hr_itemcount);
+	__swab32s(&hr->hr_data_len);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_request);
+
+void lustre_swab_update_buf(struct update_buf *ub)
+{
+	__swab32s(&ub->ub_magic);
+	__swab32s(&ub->ub_count);
+}
+EXPORT_SYMBOL(lustre_swab_update_buf);
+
+void lustre_swab_update_reply_buf(struct update_reply *ur)
+{
+	int i;
+
+	__swab32s(&ur->ur_version);
+	__swab32s(&ur->ur_count);
+	for (i = 0; i < ur->ur_count; i++)
+		__swab32s(&ur->ur_lens[i]);
+}
+EXPORT_SYMBOL(lustre_swab_update_reply_buf);
+
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
+{
+	__swab64s(&msl->msl_flags);
+}
+EXPORT_SYMBOL(lustre_swab_swap_layouts);
+
+void lustre_swab_close_data(struct close_data *cd)
+{
+	lustre_swab_lu_fid(&cd->cd_fid);
+	__swab64s(&cd->cd_data_version);
+}
+EXPORT_SYMBOL(lustre_swab_close_data);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
new file mode 100644
index 000000000..e1334c24e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_import.h"
+
+#include "ptlrpc_internal.h"
+
+
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+			 int mdidx)
+{
+	CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON);
+
+	LASSERT(mdidx < desc->bd_md_max_brw);
+	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+	LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV |
+				 LNET_MD_PHYS)));
+
+	md->options |= LNET_MD_KIOV;
+	md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV);
+	md->length = min_t(unsigned int, LNET_MAX_IOV, md->length);
+	if (desc->bd_enc_iov)
+		md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV];
+	else
+		md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV];
+}
+
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+			  int pageoffset, int len)
+{
+	lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
+
+	kiov->kiov_page = page;
+	kiov->kiov_offset = pageoffset;
+	kiov->kiov_len = len;
+
+	desc->bd_iov_count++;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
new file mode 100644
index 000000000..9dbda9332
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -0,0 +1,678 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pinger.c
+ *
+ * Portal-RPC reconnection and replay operations, for use in recovery.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+static int suppress_pings;
+module_param(suppress_pings, int, 0644);
+MODULE_PARM_DESC(suppress_pings, "Suppress pings");
+
+struct mutex pinger_mutex;
+static LIST_HEAD(pinger_imports);
+static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list);
+
+int ptlrpc_pinger_suppress_pings(void)
+{
+	return suppress_pings;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings);
+
+struct ptlrpc_request *
+ptlrpc_prep_ping(struct obd_import *imp)
+{
+	struct ptlrpc_request *req;
+
+	req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING,
+					LUSTRE_OBD_VERSION, OBD_PING);
+	if (req) {
+		ptlrpc_request_set_replen(req);
+		req->rq_no_resend = req->rq_no_delay = 1;
+	}
+	return req;
+}
+
+int ptlrpc_obd_ping(struct obd_device *obd)
+{
+	int rc;
+	struct ptlrpc_request *req;
+
+	req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+	if (req == NULL)
+		return -ENOMEM;
+
+	req->rq_send_state = LUSTRE_IMP_FULL;
+
+	rc = ptlrpc_queue_wait(req);
+
+	ptlrpc_req_finished(req);
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_obd_ping);
+
+int ptlrpc_ping(struct obd_import *imp)
+{
+	struct ptlrpc_request *req;
+
+	req = ptlrpc_prep_ping(imp);
+	if (req == NULL) {
+		CERROR("OOM trying to ping %s->%s\n",
+		       imp->imp_obd->obd_uuid.uuid,
+		       obd2cli_tgt(imp->imp_obd));
+		return -ENOMEM;
+	}
+
+	DEBUG_REQ(D_INFO, req, "pinging %s->%s",
+		  imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+	ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+
+	return 0;
+}
+
+void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
+{
+	int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
+	if (imp->imp_state == LUSTRE_IMP_DISCON) {
+		int dtime = max_t(int, CONNECTION_SWITCH_MIN,
+				  AT_OFF ? 0 :
+				  at_get(&imp->imp_at.iat_net_latency));
+		time = min(time, dtime);
+	}
+	imp->imp_next_ping = cfs_time_shift(time);
+}
+
+void ptlrpc_ping_import_soon(struct obd_import *imp)
+{
+	imp->imp_next_ping = cfs_time_current();
+}
+
+static inline int imp_is_deactive(struct obd_import *imp)
+{
+	return (imp->imp_deactive ||
+		OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
+}
+
+static inline int ptlrpc_next_reconnect(struct obd_import *imp)
+{
+	if (imp->imp_server_timeout)
+		return cfs_time_shift(obd_timeout / 2);
+	else
+		return cfs_time_shift(obd_timeout);
+}
+
+long pinger_check_timeout(unsigned long time)
+{
+	struct timeout_item *item;
+	unsigned long timeout = PING_INTERVAL;
+
+	/* The timeout list is a increase order sorted list */
+	mutex_lock(&pinger_mutex);
+	list_for_each_entry(item, &timeout_list, ti_chain) {
+		int ti_timeout = item->ti_timeout;
+		if (timeout > ti_timeout)
+			timeout = ti_timeout;
+		break;
+	}
+	mutex_unlock(&pinger_mutex);
+
+	return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)),
+					 cfs_time_current());
+}
+
+static bool ir_up;
+
+void ptlrpc_pinger_ir_up(void)
+{
+	CDEBUG(D_HA, "IR up\n");
+	ir_up = true;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_up);
+
+void ptlrpc_pinger_ir_down(void)
+{
+	CDEBUG(D_HA, "IR down\n");
+	ir_up = false;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_down);
+
+static void ptlrpc_pinger_process_import(struct obd_import *imp,
+					 unsigned long this_ping)
+{
+	int level;
+	int force;
+	int force_next;
+	int suppress;
+
+	spin_lock(&imp->imp_lock);
+
+	level = imp->imp_state;
+	force = imp->imp_force_verify;
+	force_next = imp->imp_force_next_verify;
+	/*
+	 * This will be used below only if the import is "FULL".
+	 */
+	suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS);
+
+	imp->imp_force_verify = 0;
+
+	if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) &&
+	    !force) {
+		spin_unlock(&imp->imp_lock);
+		return;
+	}
+
+	imp->imp_force_next_verify = 0;
+
+	spin_unlock(&imp->imp_lock);
+
+	CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
+	       imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+	       ptlrpc_import_state_name(level), level, force, force_next,
+	       imp->imp_deactive, imp->imp_pingable, suppress);
+
+	if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
+		/* wait for a while before trying recovery again */
+		imp->imp_next_ping = ptlrpc_next_reconnect(imp);
+		if (!imp->imp_no_pinger_recover)
+			ptlrpc_initiate_recovery(imp);
+	} else if (level != LUSTRE_IMP_FULL ||
+		   imp->imp_obd->obd_no_recov ||
+		   imp_is_deactive(imp)) {
+		CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
+		       imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+		       ptlrpc_import_state_name(level));
+		if (force) {
+			spin_lock(&imp->imp_lock);
+			imp->imp_force_verify = 1;
+			spin_unlock(&imp->imp_lock);
+		}
+	} else if ((imp->imp_pingable && !suppress) || force_next || force) {
+		ptlrpc_ping(imp);
+	}
+}
+
+static int ptlrpc_pinger_main(void *arg)
+{
+	struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+
+	/* Record that the thread is running */
+	thread_set_flags(thread, SVC_RUNNING);
+	wake_up(&thread->t_ctl_waitq);
+
+	/* And now, loop forever, pinging as needed. */
+	while (1) {
+		unsigned long this_ping = cfs_time_current();
+		struct l_wait_info lwi;
+		long time_to_next_wake;
+		struct timeout_item *item;
+		struct list_head *iter;
+
+		mutex_lock(&pinger_mutex);
+		list_for_each_entry(item, &timeout_list, ti_chain) {
+			item->ti_cb(item, item->ti_cb_data);
+		}
+		list_for_each(iter, &pinger_imports) {
+			struct obd_import *imp =
+				list_entry(iter, struct obd_import,
+					       imp_pinger_chain);
+
+			ptlrpc_pinger_process_import(imp, this_ping);
+			/* obd_timeout might have changed */
+			if (imp->imp_pingable && imp->imp_next_ping &&
+			    cfs_time_after(imp->imp_next_ping,
+					   cfs_time_add(this_ping,
+							cfs_time_seconds(PING_INTERVAL))))
+				ptlrpc_update_next_ping(imp, 0);
+		}
+		mutex_unlock(&pinger_mutex);
+		/* update memory usage info */
+		obd_update_maxusage();
+
+		/* Wait until the next ping time, or until we're stopped. */
+		time_to_next_wake = pinger_check_timeout(this_ping);
+		/* The ping sent by ptlrpc_send_rpc may get sent out
+		   say .01 second after this.
+		   ptlrpc_pinger_sending_on_import will then set the
+		   next ping time to next_ping + .01 sec, which means
+		   we will SKIP the next ping at next_ping, and the
+		   ping will get sent 2 timeouts from now!  Beware. */
+		CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" ("
+		       CFS_TIME_T")\n", time_to_next_wake,
+		       cfs_time_add(this_ping,
+				    cfs_time_seconds(PING_INTERVAL)));
+		if (time_to_next_wake > 0) {
+			lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake,
+						cfs_time_seconds(1)),
+					  NULL, NULL);
+			l_wait_event(thread->t_ctl_waitq,
+				     thread_is_stopping(thread) ||
+				     thread_is_event(thread),
+				     &lwi);
+			if (thread_test_and_clear_flags(thread, SVC_STOPPING)) {
+				break;
+			} else {
+				/* woken after adding import to reset timer */
+				thread_test_and_clear_flags(thread, SVC_EVENT);
+			}
+		}
+	}
+
+	thread_set_flags(thread, SVC_STOPPED);
+	wake_up(&thread->t_ctl_waitq);
+
+	CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid());
+	return 0;
+}
+
+static struct ptlrpc_thread pinger_thread;
+
+int ptlrpc_start_pinger(void)
+{
+	struct l_wait_info lwi = { 0 };
+	int rc;
+
+	if (!thread_is_init(&pinger_thread) &&
+	    !thread_is_stopped(&pinger_thread))
+		return -EALREADY;
+
+	init_waitqueue_head(&pinger_thread.t_ctl_waitq);
+
+	strcpy(pinger_thread.t_name, "ll_ping");
+
+	/* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
+	 * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */
+	rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread,
+				 "%s", pinger_thread.t_name));
+	if (IS_ERR_VALUE(rc)) {
+		CERROR("cannot start thread: %d\n", rc);
+		return rc;
+	}
+	l_wait_event(pinger_thread.t_ctl_waitq,
+		     thread_is_running(&pinger_thread), &lwi);
+
+	if (suppress_pings)
+		CWARN("Pings will be suppressed at the request of the administrator.  The configuration shall meet the additional requirements described in the manual.  (Search for the \"suppress_pings\" kernel module parameter.)\n");
+
+	return 0;
+}
+
+int ptlrpc_pinger_remove_timeouts(void);
+
+int ptlrpc_stop_pinger(void)
+{
+	struct l_wait_info lwi = { 0 };
+	int rc = 0;
+
+	if (thread_is_init(&pinger_thread) ||
+	    thread_is_stopped(&pinger_thread))
+		return -EALREADY;
+
+	ptlrpc_pinger_remove_timeouts();
+	thread_set_flags(&pinger_thread, SVC_STOPPING);
+	wake_up(&pinger_thread.t_ctl_waitq);
+
+	l_wait_event(pinger_thread.t_ctl_waitq,
+		     thread_is_stopped(&pinger_thread), &lwi);
+
+	return rc;
+}
+
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
+{
+	ptlrpc_update_next_ping(imp, 0);
+}
+EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
+
+void ptlrpc_pinger_commit_expected(struct obd_import *imp)
+{
+	ptlrpc_update_next_ping(imp, 1);
+	assert_spin_locked(&imp->imp_lock);
+	/*
+	 * Avoid reading stale imp_connect_data.  When not sure if pings are
+	 * expected or not on next connection, we assume they are not and force
+	 * one anyway to guarantee the chance of updating
+	 * imp_peer_committed_transno.
+	 */
+	if (imp->imp_state != LUSTRE_IMP_FULL ||
+	    OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS))
+		imp->imp_force_next_verify = 1;
+}
+
+int ptlrpc_pinger_add_import(struct obd_import *imp)
+{
+	if (!list_empty(&imp->imp_pinger_chain))
+		return -EALREADY;
+
+	mutex_lock(&pinger_mutex);
+	CDEBUG(D_HA, "adding pingable import %s->%s\n",
+	       imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+	/* if we add to pinger we want recovery on this import */
+	imp->imp_obd->obd_no_recov = 0;
+	ptlrpc_update_next_ping(imp, 0);
+	/* XXX sort, blah blah */
+	list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
+	class_import_get(imp);
+
+	ptlrpc_pinger_wake_up();
+	mutex_unlock(&pinger_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_add_import);
+
+int ptlrpc_pinger_del_import(struct obd_import *imp)
+{
+	if (list_empty(&imp->imp_pinger_chain))
+		return -ENOENT;
+
+	mutex_lock(&pinger_mutex);
+	list_del_init(&imp->imp_pinger_chain);
+	CDEBUG(D_HA, "removing pingable import %s->%s\n",
+	       imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+	/* if we remove from pinger we don't want recovery on this import */
+	imp->imp_obd->obd_no_recov = 1;
+	class_import_put(imp);
+	mutex_unlock(&pinger_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_del_import);
+
+/**
+ * Register a timeout callback to the pinger list, and the callback will
+ * be called when timeout happens.
+ */
+struct timeout_item *ptlrpc_new_timeout(int time, enum timeout_event event,
+					timeout_cb_t cb, void *data)
+{
+	struct timeout_item *ti;
+
+	OBD_ALLOC_PTR(ti);
+	if (!ti)
+		return NULL;
+
+	INIT_LIST_HEAD(&ti->ti_obd_list);
+	INIT_LIST_HEAD(&ti->ti_chain);
+	ti->ti_timeout = time;
+	ti->ti_event = event;
+	ti->ti_cb = cb;
+	ti->ti_cb_data = data;
+
+	return ti;
+}
+
+/**
+ * Register timeout event on the pinger thread.
+ * Note: the timeout list is an sorted list with increased timeout value.
+ */
+static struct timeout_item*
+ptlrpc_pinger_register_timeout(int time, enum timeout_event event,
+			       timeout_cb_t cb, void *data)
+{
+	struct timeout_item *item, *tmp;
+
+	LASSERT(mutex_is_locked(&pinger_mutex));
+
+	list_for_each_entry(item, &timeout_list, ti_chain)
+		if (item->ti_event == event)
+			goto out;
+
+	item = ptlrpc_new_timeout(time, event, cb, data);
+	if (item) {
+		list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) {
+			if (tmp->ti_timeout < time) {
+				list_add(&item->ti_chain, &tmp->ti_chain);
+				goto out;
+			}
+		}
+		list_add(&item->ti_chain, &timeout_list);
+	}
+out:
+	return item;
+}
+
+/* Add a client_obd to the timeout event list, when timeout(@time)
+ * happens, the callback(@cb) will be called.
+ */
+int ptlrpc_add_timeout_client(int time, enum timeout_event event,
+			      timeout_cb_t cb, void *data,
+			      struct list_head *obd_list)
+{
+	struct timeout_item *ti;
+
+	mutex_lock(&pinger_mutex);
+	ti = ptlrpc_pinger_register_timeout(time, event, cb, data);
+	if (!ti) {
+		mutex_unlock(&pinger_mutex);
+		return -EINVAL;
+	}
+	list_add(obd_list, &ti->ti_obd_list);
+	mutex_unlock(&pinger_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_add_timeout_client);
+
+int ptlrpc_del_timeout_client(struct list_head *obd_list,
+			      enum timeout_event event)
+{
+	struct timeout_item *ti = NULL, *item;
+
+	if (list_empty(obd_list))
+		return 0;
+	mutex_lock(&pinger_mutex);
+	list_del_init(obd_list);
+	/**
+	 * If there are no obd attached to the timeout event
+	 * list, remove this timeout event from the pinger
+	 */
+	list_for_each_entry(item, &timeout_list, ti_chain) {
+		if (item->ti_event == event) {
+			ti = item;
+			break;
+		}
+	}
+	LASSERTF(ti != NULL, "ti is NULL !\n");
+	if (list_empty(&ti->ti_obd_list)) {
+		list_del(&ti->ti_chain);
+		OBD_FREE_PTR(ti);
+	}
+	mutex_unlock(&pinger_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_del_timeout_client);
+
+int ptlrpc_pinger_remove_timeouts(void)
+{
+	struct timeout_item *item, *tmp;
+
+	mutex_lock(&pinger_mutex);
+	list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) {
+		LASSERT(list_empty(&item->ti_obd_list));
+		list_del(&item->ti_chain);
+		OBD_FREE_PTR(item);
+	}
+	mutex_unlock(&pinger_mutex);
+	return 0;
+}
+
+void ptlrpc_pinger_wake_up(void)
+{
+	thread_add_flags(&pinger_thread, SVC_EVENT);
+	wake_up(&pinger_thread.t_ctl_waitq);
+}
+
+/* Ping evictor thread */
+#define PET_READY     1
+#define PET_TERMINATE 2
+
+static int pet_refcount;
+static int	       pet_state;
+static wait_queue_head_t       pet_waitq;
+LIST_HEAD(pet_list);
+static DEFINE_SPINLOCK(pet_lock);
+
+int ping_evictor_wake(struct obd_export *exp)
+{
+	struct obd_device *obd;
+
+	spin_lock(&pet_lock);
+	if (pet_state != PET_READY) {
+		/* eventually the new obd will call here again. */
+		spin_unlock(&pet_lock);
+		return 1;
+	}
+
+	obd = class_exp2obd(exp);
+	if (list_empty(&obd->obd_evict_list)) {
+		class_incref(obd, "evictor", obd);
+		list_add(&obd->obd_evict_list, &pet_list);
+	}
+	spin_unlock(&pet_lock);
+
+	wake_up(&pet_waitq);
+	return 0;
+}
+
+static int ping_evictor_main(void *arg)
+{
+	struct obd_device *obd;
+	struct obd_export *exp;
+	struct l_wait_info lwi = { 0 };
+	time_t expire_time;
+
+	unshare_fs_struct();
+
+	CDEBUG(D_HA, "Starting Ping Evictor\n");
+	pet_state = PET_READY;
+	while (1) {
+		l_wait_event(pet_waitq, (!list_empty(&pet_list)) ||
+			     (pet_state == PET_TERMINATE), &lwi);
+
+		/* loop until all obd's will be removed */
+		if ((pet_state == PET_TERMINATE) && list_empty(&pet_list))
+			break;
+
+		/* we only get here if pet_exp != NULL, and the end of this
+		 * loop is the only place which sets it NULL again, so lock
+		 * is not strictly necessary. */
+		spin_lock(&pet_lock);
+		obd = list_entry(pet_list.next, struct obd_device,
+				     obd_evict_list);
+		spin_unlock(&pet_lock);
+
+		expire_time = get_seconds() - PING_EVICT_TIMEOUT;
+
+		CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n",
+		       obd->obd_name, expire_time);
+
+		/* Exports can't be deleted out of the list while we hold
+		 * the obd lock (class_unlink_export), which means we can't
+		 * lose the last ref on the export.  If they've already been
+		 * removed from the list, we won't find them here. */
+		spin_lock(&obd->obd_dev_lock);
+		while (!list_empty(&obd->obd_exports_timed)) {
+			exp = list_entry(obd->obd_exports_timed.next,
+					     struct obd_export,
+					     exp_obd_chain_timed);
+			if (expire_time > exp->exp_last_request_time) {
+				class_export_get(exp);
+				spin_unlock(&obd->obd_dev_lock);
+				LCONSOLE_WARN("%s: haven't heard from client %s (at %s) in %ld seconds. I think it's dead, and I am evicting it. exp %p, cur %ld expire %ld last %ld\n",
+					      obd->obd_name,
+					      obd_uuid2str(&exp->exp_client_uuid),
+					      obd_export_nid2str(exp),
+					      (long)(get_seconds() -
+						     exp->exp_last_request_time),
+					      exp, (long)get_seconds(),
+					      (long)expire_time,
+					      (long)exp->exp_last_request_time);
+				CDEBUG(D_HA, "Last request was at %ld\n",
+				       exp->exp_last_request_time);
+				class_fail_export(exp);
+				class_export_put(exp);
+				spin_lock(&obd->obd_dev_lock);
+			} else {
+				/* List is sorted, so everyone below is ok */
+				break;
+			}
+		}
+		spin_unlock(&obd->obd_dev_lock);
+
+		spin_lock(&pet_lock);
+		list_del_init(&obd->obd_evict_list);
+		spin_unlock(&pet_lock);
+
+		class_decref(obd, "evictor", obd);
+	}
+	CDEBUG(D_HA, "Exiting Ping Evictor\n");
+
+	return 0;
+}
+
+void ping_evictor_start(void)
+{
+	struct task_struct *task;
+
+	if (++pet_refcount > 1)
+		return;
+
+	init_waitqueue_head(&pet_waitq);
+
+	task = kthread_run(ping_evictor_main, NULL, "ll_evictor");
+	if (IS_ERR(task)) {
+		pet_refcount--;
+		CERROR("Cannot start ping evictor thread: %ld\n",
+			PTR_ERR(task));
+	}
+}
+EXPORT_SYMBOL(ping_evictor_start);
+
+void ping_evictor_stop(void)
+{
+	if (--pet_refcount > 0)
+		return;
+
+	pet_state = PET_TERMINATE;
+	wake_up(&pet_waitq);
+}
+EXPORT_SYMBOL(ping_evictor_stop);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
new file mode 100644
index 000000000..a66dc3c6d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -0,0 +1,312 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/* Intramodule declarations for ptlrpc. */
+
+#ifndef PTLRPC_INTERNAL_H
+#define PTLRPC_INTERNAL_H
+
+#include "../ldlm/ldlm_internal.h"
+
+struct ldlm_namespace;
+struct obd_import;
+struct ldlm_res_id;
+struct ptlrpc_request_set;
+extern int test_req_buffer_pressure;
+extern struct mutex ptlrpc_all_services_mutex;
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
+/* ptlrpcd.c */
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
+
+/* client.c */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+					 unsigned type, unsigned portal);
+int ptlrpc_request_cache_init(void);
+void ptlrpc_request_cache_fini(void);
+struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
+void ptlrpc_request_cache_free(struct ptlrpc_request *req);
+void ptlrpc_init_xid(void);
+
+/* events.c */
+int ptlrpc_init_portals(void);
+void ptlrpc_exit_portals(void);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
+void lustre_assert_wire_constants(void);
+int ptlrpc_import_in_recovery(struct obd_import *imp);
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_handle_failed_import(struct obd_import *imp);
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
+void ptlrpc_initiate_recovery(struct obd_import *imp);
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
+
+#if defined(CONFIG_PROC_FS)
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *proc_entry,
+				     struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount);
+void ptlrpc_lprocfs_do_request_stat(struct ptlrpc_request *req,
+				     long q_usec, long work_usec);
+#else
+#define ptlrpc_lprocfs_register_service(params...) do {} while (0)
+#define ptlrpc_lprocfs_unregister_service(params...) do {} while (0)
+#define ptlrpc_lprocfs_rpc_sent(params...) do {} while (0)
+#define ptlrpc_lprocfs_do_request_stat(params...) do {} while (0)
+#endif /* CONFIG_PROC_FS */
+
+/* NRS */
+
+/**
+ * NRS core object.
+ *
+ * Holds NRS core fields.
+ */
+struct nrs_core {
+	/**
+	 * Protects nrs_core::nrs_policies, serializes external policy
+	 * registration/unregistration, and NRS core lprocfs operations.
+	 */
+	struct mutex nrs_mutex;
+	/* XXX: This is just for liblustre. Remove the #if defined directive
+	 * when the * "cfs_" prefix is dropped from cfs_list_head. */
+	/**
+	 * List of all policy descriptors registered with NRS core; protected
+	 * by nrs_core::nrs_mutex.
+	 */
+	struct list_head nrs_policies;
+
+};
+
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc);
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc);
+
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+			       struct ptlrpc_request *req, bool hp);
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+			struct ptlrpc_request *req, bool hp);
+
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+			   bool peek, bool force);
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp,
+			  bool force)
+{
+	return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force);
+}
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_peek_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+	return ptlrpc_nrs_req_get_nolock0(svcpt, hp, true, false);
+}
+
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req);
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp);
+
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+			      enum ptlrpc_nrs_queue_type queue, char *name,
+			      enum ptlrpc_nrs_ctl opc, bool single, void *arg);
+
+int ptlrpc_nrs_init(void);
+void ptlrpc_nrs_fini(void);
+
+static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt)
+{
+	return svcpt->scp_nrs_hp != NULL;
+}
+
+static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc)
+{
+	/**
+	 * If the first service partition has an HP NRS head, all service
+	 * partitions will.
+	 */
+	return nrs_svcpt_has_hp(svc->srv_parts[0]);
+}
+
+static inline
+struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp)
+{
+	LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt)));
+	return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+}
+
+static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy)
+{
+	return policy->pol_nrs->nrs_svcpt->scp_cpt;
+}
+
+static inline
+struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy)
+{
+	return policy->pol_nrs->nrs_svcpt->scp_service;
+}
+
+static inline
+struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy)
+{
+	return policy->pol_nrs->nrs_svcpt;
+}
+
+static inline
+struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy)
+{
+	return nrs_pol2svc(policy)->srv_cptable;
+}
+
+static inline struct ptlrpc_nrs_resource *
+nrs_request_resource(struct ptlrpc_nrs_request *nrq)
+{
+	LASSERT(nrq->nr_initialized);
+	LASSERT(!nrq->nr_finalized);
+
+	return nrq->nr_res_ptrs[nrq->nr_res_idx];
+}
+
+static inline
+struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq)
+{
+	return nrs_request_resource(nrq)->res_policy;
+}
+
+#define NRS_LPROCFS_QUANTUM_NAME_REG	"reg_quantum:"
+#define NRS_LPROCFS_QUANTUM_NAME_HP	"hp_quantum:"
+
+/**
+ * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum.
+ */
+#define LPROCFS_NRS_QUANTUM_MAX		65535
+
+/**
+ * Max valid command string is the size of the labels, plus "65535" twice, plus
+ * a separating space character.
+ */
+#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD					       \
+ sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " "  \
+	NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX))
+
+/* recovd_thread.c */
+
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink);
+
+/* pers.c */
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+			 int mdcnt);
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+			  int pageoffset, int len);
+
+/* pack_generic.c */
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt);
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
+
+/* pinger.c */
+int ptlrpc_start_pinger(void);
+int ptlrpc_stop_pinger(void);
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
+void ptlrpc_pinger_commit_expected(struct obd_import *imp);
+void ptlrpc_pinger_wake_up(void);
+void ptlrpc_ping_import_soon(struct obd_import *imp);
+int ping_evictor_wake(struct obd_export *exp);
+
+/* sec_null.c */
+int  sptlrpc_null_init(void);
+void sptlrpc_null_fini(void);
+
+/* sec_plain.c */
+int  sptlrpc_plain_init(void);
+void sptlrpc_plain_fini(void);
+
+/* sec_bulk.c */
+int  sptlrpc_enc_pool_init(void);
+void sptlrpc_enc_pool_fini(void);
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v);
+
+/* sec_lproc.c */
+#if defined(CONFIG_PROC_FS)
+int  sptlrpc_lproc_init(void);
+void sptlrpc_lproc_fini(void);
+#else
+static inline int sptlrpc_lproc_init(void)
+{ return 0; }
+static inline void sptlrpc_lproc_fini(void) {}
+#endif
+
+/* sec_gc.c */
+int sptlrpc_gc_init(void);
+void sptlrpc_gc_fini(void);
+
+/* sec_config.c */
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+				enum lustre_sec_part to,
+				struct obd_uuid *target,
+				lnet_nid_t nid,
+				struct sptlrpc_flavor *sf);
+int  sptlrpc_conf_init(void);
+void sptlrpc_conf_fini(void);
+
+/* sec.c */
+int  sptlrpc_init(void);
+void sptlrpc_fini(void);
+
+static inline int ll_rpc_recoverable_error(int rc)
+{
+	return (rc == -ENOTCONN || rc == -ENODEV);
+}
+
+static inline int tgt_mod_init(void)
+{
+	return 0;
+}
+
+static inline void tgt_mod_exit(void)
+{
+	return;
+}
+
+static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
+{
+	if (atomic_dec_and_test(&set->set_refcount))
+		OBD_FREE_PTR(set);
+}
+#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
new file mode 100644
index 000000000..5268887ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -0,0 +1,171 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_req_layout.h"
+
+#include "ptlrpc_internal.h"
+
+extern spinlock_t ptlrpc_last_xid_lock;
+#if RS_DEBUG
+extern spinlock_t ptlrpc_rs_debug_lock;
+#endif
+extern struct mutex pinger_mutex;
+extern struct mutex ptlrpcd_mutex;
+
+__init int ptlrpc_init(void)
+{
+	int rc, cleanup_phase = 0;
+
+	lustre_assert_wire_constants();
+#if RS_DEBUG
+	spin_lock_init(&ptlrpc_rs_debug_lock);
+#endif
+	mutex_init(&ptlrpc_all_services_mutex);
+	mutex_init(&pinger_mutex);
+	mutex_init(&ptlrpcd_mutex);
+	ptlrpc_init_xid();
+
+	rc = req_layout_init();
+	if (rc)
+		return rc;
+
+	rc = ptlrpc_hr_init();
+	if (rc)
+		return rc;
+
+	cleanup_phase = 1;
+	rc = ptlrpc_request_cache_init();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 2;
+	rc = ptlrpc_init_portals();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 3;
+
+	rc = ptlrpc_connection_init();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 4;
+	ptlrpc_put_connection_superhack = ptlrpc_connection_put;
+
+	rc = ptlrpc_start_pinger();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 5;
+	rc = ldlm_init();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 6;
+	rc = sptlrpc_init();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 7;
+	rc = ptlrpc_nrs_init();
+	if (rc)
+		goto cleanup;
+
+	cleanup_phase = 8;
+	rc = tgt_mod_init();
+	if (rc)
+		goto cleanup;
+	return 0;
+
+cleanup:
+	switch (cleanup_phase) {
+	case 8:
+		ptlrpc_nrs_fini();
+		/* Fall through */
+	case 7:
+		sptlrpc_fini();
+		/* Fall through */
+	case 6:
+		ldlm_exit();
+		/* Fall through */
+	case 5:
+		ptlrpc_stop_pinger();
+		/* Fall through */
+	case 4:
+		ptlrpc_connection_fini();
+		/* Fall through */
+	case 3:
+		ptlrpc_exit_portals();
+		/* Fall through */
+	case 2:
+		ptlrpc_request_cache_fini();
+		/* Fall through */
+	case 1:
+		ptlrpc_hr_fini();
+		req_layout_fini();
+		/* Fall through */
+	default: ;
+	}
+
+	return rc;
+}
+
+static void __exit ptlrpc_exit(void)
+{
+	tgt_mod_exit();
+	ptlrpc_nrs_fini();
+	sptlrpc_fini();
+	ldlm_exit();
+	ptlrpc_stop_pinger();
+	ptlrpc_exit_portals();
+	ptlrpc_request_cache_fini();
+	ptlrpc_hr_fini();
+	ptlrpc_connection_fini();
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Request Processor and Lock Management");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+
+module_init(ptlrpc_init);
+module_exit(ptlrpc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
new file mode 100644
index 000000000..0c178ec0e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -0,0 +1,811 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/ptlrpcd.c
+ */
+
+/** \defgroup ptlrpcd PortalRPC daemon
+ *
+ * ptlrpcd is a special thread with its own set where other user might add
+ * requests when they don't want to wait for their completion.
+ * PtlRPCD will take care of sending such requests and then processing their
+ * replies and calling completion callbacks as necessary.
+ * The callbacks are called directly from ptlrpcd context.
+ * It is important to never significantly block (esp. on RPCs!) within such
+ * completion handler or a deadlock might occur where ptlrpcd enters some
+ * callback that attempts to send another RPC and wait for it to return,
+ * during which time ptlrpcd is completely blocked, so e.g. if import
+ * fails, recovery cannot progress because connection requests are also
+ * sent by ptlrpcd.
+ *
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/lustre_net.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/obd_class.h"	/* for obd_zombie */
+#include "../include/obd_support.h"	/* for OBD_FAIL_CHECK */
+#include "../include/cl_object.h"	/* cl_env_{get,put}() */
+#include "../include/lprocfs_status.h"
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpcd {
+	int		pd_size;
+	int		pd_index;
+	int		pd_nthreads;
+	struct ptlrpcd_ctl pd_thread_rcv;
+	struct ptlrpcd_ctl pd_threads[0];
+};
+
+static int max_ptlrpcds;
+module_param(max_ptlrpcds, int, 0644);
+MODULE_PARM_DESC(max_ptlrpcds, "Max ptlrpcd thread count to be started.");
+
+static int ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+module_param(ptlrpcd_bind_policy, int, 0644);
+MODULE_PARM_DESC(ptlrpcd_bind_policy, "Ptlrpcd threads binding mode.");
+static struct ptlrpcd *ptlrpcds;
+
+struct mutex ptlrpcd_mutex;
+static int ptlrpcd_users;
+
+void ptlrpcd_wake(struct ptlrpc_request *req)
+{
+	struct ptlrpc_request_set *rq_set = req->rq_set;
+
+	LASSERT(rq_set != NULL);
+
+	wake_up(&rq_set->set_waitq);
+}
+EXPORT_SYMBOL(ptlrpcd_wake);
+
+static struct ptlrpcd_ctl *
+ptlrpcd_select_pc(struct ptlrpc_request *req, pdl_policy_t policy, int index)
+{
+	int idx = 0;
+
+	if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL)
+		return &ptlrpcds->pd_thread_rcv;
+
+	switch (policy) {
+	case PDL_POLICY_SAME:
+		idx = smp_processor_id() % ptlrpcds->pd_nthreads;
+		break;
+	case PDL_POLICY_LOCAL:
+		/* Before CPU partition patches available, process it the same
+		 * as "PDL_POLICY_ROUND". */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix this code to use new CPU partition APIs"
+# endif
+		/* Fall through to PDL_POLICY_ROUND until the CPU
+		 * CPU partition patches are available. */
+		index = -1;
+	case PDL_POLICY_PREFERRED:
+		if (index >= 0 && index < num_online_cpus()) {
+			idx = index % ptlrpcds->pd_nthreads;
+			break;
+		}
+		/* Fall through to PDL_POLICY_ROUND for bad index. */
+	default:
+		/* Fall through to PDL_POLICY_ROUND for unknown policy. */
+	case PDL_POLICY_ROUND:
+		/* We do not care whether it is strict load balance. */
+		idx = ptlrpcds->pd_index + 1;
+		if (idx == smp_processor_id())
+			idx++;
+		idx %= ptlrpcds->pd_nthreads;
+		ptlrpcds->pd_index = idx;
+		break;
+	}
+
+	return &ptlrpcds->pd_threads[idx];
+}
+
+/**
+ * Move all request from an existing request set to the ptlrpcd queue.
+ * All requests from the set must be in phase RQ_PHASE_NEW.
+ */
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
+{
+	struct list_head *tmp, *pos;
+	struct ptlrpcd_ctl *pc;
+	struct ptlrpc_request_set *new;
+	int count, i;
+
+	pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1);
+	new = pc->pc_set;
+
+	list_for_each_safe(pos, tmp, &set->set_requests) {
+		struct ptlrpc_request *req =
+			list_entry(pos, struct ptlrpc_request,
+				       rq_set_chain);
+
+		LASSERT(req->rq_phase == RQ_PHASE_NEW);
+		req->rq_set = new;
+		req->rq_queued_time = cfs_time_current();
+	}
+
+	spin_lock(&new->set_new_req_lock);
+	list_splice_init(&set->set_requests, &new->set_new_requests);
+	i = atomic_read(&set->set_remaining);
+	count = atomic_add_return(i, &new->set_new_count);
+	atomic_set(&set->set_remaining, 0);
+	spin_unlock(&new->set_new_req_lock);
+	if (count == i) {
+		wake_up(&new->set_waitq);
+
+		/* XXX: It maybe unnecessary to wakeup all the partners. But to
+		 *      guarantee the async RPC can be processed ASAP, we have
+		 *      no other better choice. It maybe fixed in future. */
+		for (i = 0; i < pc->pc_npartners; i++)
+			wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+	}
+}
+EXPORT_SYMBOL(ptlrpcd_add_rqset);
+
+/**
+ * Return transferred RPCs count.
+ */
+static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
+			       struct ptlrpc_request_set *src)
+{
+	struct list_head *tmp, *pos;
+	struct ptlrpc_request *req;
+	int rc = 0;
+
+	spin_lock(&src->set_new_req_lock);
+	if (likely(!list_empty(&src->set_new_requests))) {
+		list_for_each_safe(pos, tmp, &src->set_new_requests) {
+			req = list_entry(pos, struct ptlrpc_request,
+					     rq_set_chain);
+			req->rq_set = des;
+		}
+		list_splice_init(&src->set_new_requests,
+				     &des->set_requests);
+		rc = atomic_read(&src->set_new_count);
+		atomic_add(rc, &des->set_remaining);
+		atomic_set(&src->set_new_count, 0);
+	}
+	spin_unlock(&src->set_new_req_lock);
+	return rc;
+}
+
+/**
+ * Requests that are added to the ptlrpcd queue are sent via
+ * ptlrpcd_check->ptlrpc_check_set().
+ */
+void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
+{
+	struct ptlrpcd_ctl *pc;
+
+	if (req->rq_reqmsg)
+		lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+	spin_lock(&req->rq_lock);
+	if (req->rq_invalid_rqset) {
+		struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
+						     back_to_sleep, NULL);
+
+		req->rq_invalid_rqset = 0;
+		spin_unlock(&req->rq_lock);
+		l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+	} else if (req->rq_set) {
+		/* If we have a valid "rq_set", just reuse it to avoid double
+		 * linked. */
+		LASSERT(req->rq_phase == RQ_PHASE_NEW);
+		LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
+
+		/* ptlrpc_check_set will decrease the count */
+		atomic_inc(&req->rq_set->set_remaining);
+		spin_unlock(&req->rq_lock);
+		wake_up(&req->rq_set->set_waitq);
+		return;
+	} else {
+		spin_unlock(&req->rq_lock);
+	}
+
+	pc = ptlrpcd_select_pc(req, policy, idx);
+
+	DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
+		  req, pc->pc_name, pc->pc_index);
+
+	ptlrpc_set_add_new_req(pc, req);
+}
+EXPORT_SYMBOL(ptlrpcd_add_req);
+
+static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
+{
+	atomic_inc(&set->set_refcount);
+}
+
+/**
+ * Check if there is more work to do on ptlrpcd set.
+ * Returns 1 if yes.
+ */
+static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
+{
+	struct list_head *tmp, *pos;
+	struct ptlrpc_request *req;
+	struct ptlrpc_request_set *set = pc->pc_set;
+	int rc = 0;
+	int rc2;
+
+	if (atomic_read(&set->set_new_count)) {
+		spin_lock(&set->set_new_req_lock);
+		if (likely(!list_empty(&set->set_new_requests))) {
+			list_splice_init(&set->set_new_requests,
+					     &set->set_requests);
+			atomic_add(atomic_read(&set->set_new_count),
+				       &set->set_remaining);
+			atomic_set(&set->set_new_count, 0);
+			/*
+			 * Need to calculate its timeout.
+			 */
+			rc = 1;
+		}
+		spin_unlock(&set->set_new_req_lock);
+	}
+
+	/* We should call lu_env_refill() before handling new requests to make
+	 * sure that env key the requests depending on really exists.
+	 */
+	rc2 = lu_env_refill(env);
+	if (rc2 != 0) {
+		/*
+		 * XXX This is very awkward situation, because
+		 * execution can neither continue (request
+		 * interpreters assume that env is set up), nor repeat
+		 * the loop (as this potentially results in a tight
+		 * loop of -ENOMEM's).
+		 *
+		 * Fortunately, refill only ever does something when
+		 * new modules are loaded, i.e., early during boot up.
+		 */
+		CERROR("Failure to refill session: %d\n", rc2);
+		return rc;
+	}
+
+	if (atomic_read(&set->set_remaining))
+		rc |= ptlrpc_check_set(env, set);
+
+	/* NB: ptlrpc_check_set has already moved completed request at the
+	 * head of seq::set_requests */
+	list_for_each_safe(pos, tmp, &set->set_requests) {
+		req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
+		if (req->rq_phase != RQ_PHASE_COMPLETE)
+			break;
+
+		list_del_init(&req->rq_set_chain);
+		req->rq_set = NULL;
+		ptlrpc_req_finished(req);
+	}
+
+	if (rc == 0) {
+		/*
+		 * If new requests have been added, make sure to wake up.
+		 */
+		rc = atomic_read(&set->set_new_count);
+
+		/* If we have nothing to do, check whether we can take some
+		 * work from our partner threads. */
+		if (rc == 0 && pc->pc_npartners > 0) {
+			struct ptlrpcd_ctl *partner;
+			struct ptlrpc_request_set *ps;
+			int first = pc->pc_cursor;
+
+			do {
+				partner = pc->pc_partners[pc->pc_cursor++];
+				if (pc->pc_cursor >= pc->pc_npartners)
+					pc->pc_cursor = 0;
+				if (partner == NULL)
+					continue;
+
+				spin_lock(&partner->pc_lock);
+				ps = partner->pc_set;
+				if (ps == NULL) {
+					spin_unlock(&partner->pc_lock);
+					continue;
+				}
+
+				ptlrpc_reqset_get(ps);
+				spin_unlock(&partner->pc_lock);
+
+				if (atomic_read(&ps->set_new_count)) {
+					rc = ptlrpcd_steal_rqset(set, ps);
+					if (rc > 0)
+						CDEBUG(D_RPCTRACE, "transfer %d async RPCs [%d->%d]\n",
+						       rc, partner->pc_index,
+						       pc->pc_index);
+				}
+				ptlrpc_reqset_put(ps);
+			} while (rc == 0 && pc->pc_cursor != first);
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * Main ptlrpcd thread.
+ * ptlrpc's code paths like to execute in process context, so we have this
+ * thread which spins on a set which contains the rpcs and sends them.
+ *
+ */
+static int ptlrpcd(void *arg)
+{
+	struct ptlrpcd_ctl *pc = arg;
+	struct ptlrpc_request_set *set = pc->pc_set;
+	struct lu_env env = { .le_ses = NULL };
+	int rc, exit = 0;
+
+	unshare_fs_struct();
+#if defined(CONFIG_SMP)
+	if (test_bit(LIOD_BIND, &pc->pc_flags)) {
+		int index = pc->pc_index;
+
+		if (index >= 0 && index < num_possible_cpus()) {
+			while (!cpu_online(index)) {
+				if (++index >= num_possible_cpus())
+					index = 0;
+			}
+			set_cpus_allowed_ptr(current,
+					cpumask_of_node(cpu_to_node(index)));
+		}
+	}
+#endif
+	/*
+	 * XXX So far only "client" ptlrpcd uses an environment. In
+	 * the future, ptlrpcd thread (or a thread-set) has to given
+	 * an argument, describing its "scope".
+	 */
+	rc = lu_context_init(&env.le_ctx,
+			     LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+	complete(&pc->pc_starting);
+
+	if (rc != 0)
+		return rc;
+
+	/*
+	 * This mainloop strongly resembles ptlrpc_set_wait() except that our
+	 * set never completes.  ptlrpcd_check() calls ptlrpc_check_set() when
+	 * there are requests in the set. New requests come in on the set's
+	 * new_req_list and ptlrpcd_check() moves them into the set.
+	 */
+	do {
+		struct l_wait_info lwi;
+		int timeout;
+
+		timeout = ptlrpc_set_next_timeout(set);
+		lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+				  ptlrpc_expired_set, set);
+
+		lu_context_enter(&env.le_ctx);
+		l_wait_event(set->set_waitq,
+			     ptlrpcd_check(&env, pc), &lwi);
+		lu_context_exit(&env.le_ctx);
+
+		/*
+		 * Abort inflight rpcs for forced stop case.
+		 */
+		if (test_bit(LIOD_STOP, &pc->pc_flags)) {
+			if (test_bit(LIOD_FORCE, &pc->pc_flags))
+				ptlrpc_abort_set(set);
+			exit++;
+		}
+
+		/*
+		 * Let's make one more loop to make sure that ptlrpcd_check()
+		 * copied all raced new rpcs into the set so we can kill them.
+		 */
+	} while (exit < 2);
+
+	/*
+	 * Wait for inflight requests to drain.
+	 */
+	if (!list_empty(&set->set_requests))
+		ptlrpc_set_wait(set);
+	lu_context_fini(&env.le_ctx);
+
+	complete(&pc->pc_finishing);
+
+	return 0;
+}
+
+/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
+ *      ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
+ *      data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
+ *      CPU core. But binding all ptlrpcd threads maybe cause response delay
+ *      because of some CPU core(s) busy with other loads.
+ *
+ *      For example: "ls -l", some async RPCs for statahead are assigned to
+ *      ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
+ *      with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
+ *      thread, statahead thread, and ptlrpcd thread can run in parallel), under
+ *      such case, the statahead async RPCs can not be processed in time, it is
+ *      unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
+ *      be better. But it breaks former data transfer policy.
+ *
+ *      So we shouldn't be blind for avoiding the data transfer. We make some
+ *      compromise: divide the ptlrpcd threads pool into two parts. One part is
+ *      for bound mode, each ptlrpcd thread in this part is bound to some CPU
+ *      core. The other part is for free mode, all the ptlrpcd threads in the
+ *      part can be scheduled on any CPU core. We specify some partnership
+ *      between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
+ *      and the async RPC load within the partners are shared.
+ *
+ *      It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
+ *      thread can be scheduled in time), and try to guarantee the async RPC
+ *      processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
+ *      on any CPU core).
+ *
+ *      As for how to specify the partnership between bound mode ptlrpcd
+ *      thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
+ *      <free bound> pair. In future, we can specify some more complex
+ *      partnership based on the patches for CPU partition. But before such
+ *      patches are available, we prefer to use the simplest one.
+ */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix ptlrpcd_bind() to use new CPU partition APIs"
+# endif
+static int ptlrpcd_bind(int index, int max)
+{
+	struct ptlrpcd_ctl *pc;
+	int rc = 0;
+#if defined(CONFIG_NUMA)
+	cpumask_t mask;
+#endif
+
+	LASSERT(index <= max - 1);
+	pc = &ptlrpcds->pd_threads[index];
+	switch (ptlrpcd_bind_policy) {
+	case PDB_POLICY_NONE:
+		pc->pc_npartners = -1;
+		break;
+	case PDB_POLICY_FULL:
+		pc->pc_npartners = 0;
+		set_bit(LIOD_BIND, &pc->pc_flags);
+		break;
+	case PDB_POLICY_PAIR:
+		LASSERT(max % 2 == 0);
+		pc->pc_npartners = 1;
+		break;
+	case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+	{
+		int i;
+		cpumask_copy(&mask, cpumask_of_node(cpu_to_node(index)));
+		for (i = max; i < num_online_cpus(); i++)
+			cpumask_clear_cpu(i, &mask);
+		pc->pc_npartners = cpumask_weight(&mask) - 1;
+		set_bit(LIOD_BIND, &pc->pc_flags);
+	}
+#else
+		LASSERT(max >= 3);
+		pc->pc_npartners = 2;
+#endif
+		break;
+	default:
+		CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
+		rc = -EINVAL;
+	}
+
+	if (rc == 0 && pc->pc_npartners > 0) {
+		OBD_ALLOC(pc->pc_partners,
+			  sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+		if (pc->pc_partners == NULL) {
+			pc->pc_npartners = 0;
+			rc = -ENOMEM;
+		} else {
+			switch (ptlrpcd_bind_policy) {
+			case PDB_POLICY_PAIR:
+				if (index & 0x1) {
+					set_bit(LIOD_BIND, &pc->pc_flags);
+					pc->pc_partners[0] = &ptlrpcds->
+						pd_threads[index - 1];
+					ptlrpcds->pd_threads[index - 1].
+						pc_partners[0] = pc;
+				}
+				break;
+			case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+			{
+				struct ptlrpcd_ctl *ppc;
+				int i, pidx;
+				/* partners are cores in the same NUMA node.
+				 * setup partnership only with ptlrpcd threads
+				 * that are already initialized
+				 */
+				for (pidx = 0, i = 0; i < index; i++) {
+					if (cpumask_test_cpu(i, &mask)) {
+						ppc = &ptlrpcds->pd_threads[i];
+						pc->pc_partners[pidx++] = ppc;
+						ppc->pc_partners[ppc->
+							  pc_npartners++] = pc;
+					}
+				}
+				/* adjust number of partners to the number
+				 * of partnership really setup */
+				pc->pc_npartners = pidx;
+			}
+#else
+				if (index & 0x1)
+					set_bit(LIOD_BIND, &pc->pc_flags);
+				if (index > 0) {
+					pc->pc_partners[0] = &ptlrpcds->
+						pd_threads[index - 1];
+					ptlrpcds->pd_threads[index - 1].
+						pc_partners[1] = pc;
+					if (index == max - 1) {
+						pc->pc_partners[1] =
+						&ptlrpcds->pd_threads[0];
+						ptlrpcds->pd_threads[0].
+						pc_partners[0] = pc;
+					}
+				}
+#endif
+				break;
+			}
+		}
+	}
+
+	return rc;
+}
+
+
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc)
+{
+	int rc;
+
+	/*
+	 * Do not allow start second thread for one pc.
+	 */
+	if (test_and_set_bit(LIOD_START, &pc->pc_flags)) {
+		CWARN("Starting second thread (%s) for same pc %p\n",
+		      name, pc);
+		return 0;
+	}
+
+	pc->pc_index = index;
+	init_completion(&pc->pc_starting);
+	init_completion(&pc->pc_finishing);
+	spin_lock_init(&pc->pc_lock);
+	strlcpy(pc->pc_name, name, sizeof(pc->pc_name));
+	pc->pc_set = ptlrpc_prep_set();
+	if (pc->pc_set == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * So far only "client" ptlrpcd uses an environment. In the future,
+	 * ptlrpcd thread (or a thread-set) has to be given an argument,
+	 * describing its "scope".
+	 */
+	rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
+	if (rc != 0)
+		goto out_set;
+
+	{
+		struct task_struct *task;
+		if (index >= 0) {
+			rc = ptlrpcd_bind(index, max);
+			if (rc < 0)
+				goto out_env;
+		}
+
+		task = kthread_run(ptlrpcd, pc, "%s", pc->pc_name);
+		if (IS_ERR(task)) {
+			rc = PTR_ERR(task);
+			goto out_env;
+		}
+
+		wait_for_completion(&pc->pc_starting);
+	}
+	return 0;
+
+out_env:
+	lu_context_fini(&pc->pc_env.le_ctx);
+
+out_set:
+	if (pc->pc_set != NULL) {
+		struct ptlrpc_request_set *set = pc->pc_set;
+
+		spin_lock(&pc->pc_lock);
+		pc->pc_set = NULL;
+		spin_unlock(&pc->pc_lock);
+		ptlrpc_set_destroy(set);
+	}
+	clear_bit(LIOD_BIND, &pc->pc_flags);
+
+out:
+	clear_bit(LIOD_START, &pc->pc_flags);
+	return rc;
+}
+
+void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
+{
+	if (!test_bit(LIOD_START, &pc->pc_flags)) {
+		CWARN("Thread for pc %p was not started\n", pc);
+		return;
+	}
+
+	set_bit(LIOD_STOP, &pc->pc_flags);
+	if (force)
+		set_bit(LIOD_FORCE, &pc->pc_flags);
+	wake_up(&pc->pc_set->set_waitq);
+}
+
+void ptlrpcd_free(struct ptlrpcd_ctl *pc)
+{
+	struct ptlrpc_request_set *set = pc->pc_set;
+
+	if (!test_bit(LIOD_START, &pc->pc_flags)) {
+		CWARN("Thread for pc %p was not started\n", pc);
+		goto out;
+	}
+
+	wait_for_completion(&pc->pc_finishing);
+	lu_context_fini(&pc->pc_env.le_ctx);
+
+	spin_lock(&pc->pc_lock);
+	pc->pc_set = NULL;
+	spin_unlock(&pc->pc_lock);
+	ptlrpc_set_destroy(set);
+
+	clear_bit(LIOD_START, &pc->pc_flags);
+	clear_bit(LIOD_STOP, &pc->pc_flags);
+	clear_bit(LIOD_FORCE, &pc->pc_flags);
+	clear_bit(LIOD_BIND, &pc->pc_flags);
+
+out:
+	if (pc->pc_npartners > 0) {
+		LASSERT(pc->pc_partners != NULL);
+
+		OBD_FREE(pc->pc_partners,
+			 sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+		pc->pc_partners = NULL;
+	}
+	pc->pc_npartners = 0;
+}
+
+static void ptlrpcd_fini(void)
+{
+	int i;
+
+	if (ptlrpcds != NULL) {
+		for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+			ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0);
+		for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+			ptlrpcd_free(&ptlrpcds->pd_threads[i]);
+		ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+		ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+		OBD_FREE(ptlrpcds, ptlrpcds->pd_size);
+		ptlrpcds = NULL;
+	}
+}
+
+static int ptlrpcd_init(void)
+{
+	int nthreads = num_online_cpus();
+	char name[16];
+	int size, i = -1, j, rc = 0;
+
+	if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads)
+		nthreads = max_ptlrpcds;
+	if (nthreads < 2)
+		nthreads = 2;
+	if (nthreads < 3 && ptlrpcd_bind_policy == PDB_POLICY_NEIGHBOR)
+		ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+	else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR)
+		nthreads &= ~1; /* make sure it is even */
+
+	size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
+	OBD_ALLOC(ptlrpcds, size);
+	if (ptlrpcds == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	snprintf(name, sizeof(name), "ptlrpcd_rcv");
+	set_bit(LIOD_RECOVERY, &ptlrpcds->pd_thread_rcv.pc_flags);
+	rc = ptlrpcd_start(-1, nthreads, name, &ptlrpcds->pd_thread_rcv);
+	if (rc < 0)
+		goto out;
+
+	/* XXX: We start nthreads ptlrpc daemons. Each of them can process any
+	 *      non-recovery async RPC to improve overall async RPC efficiency.
+	 *
+	 *      But there are some issues with async I/O RPCs and async non-I/O
+	 *      RPCs processed in the same set under some cases. The ptlrpcd may
+	 *      be blocked by some async I/O RPC(s), then will cause other async
+	 *      non-I/O RPC(s) can not be processed in time.
+	 *
+	 *      Maybe we should distinguish blocked async RPCs from non-blocked
+	 *      async RPCs, and process them in different ptlrpcd sets to avoid
+	 *      unnecessary dependency. But how to distribute async RPCs load
+	 *      among all the ptlrpc daemons becomes another trouble. */
+	for (i = 0; i < nthreads; i++) {
+		snprintf(name, sizeof(name), "ptlrpcd_%d", i);
+		rc = ptlrpcd_start(i, nthreads, name, &ptlrpcds->pd_threads[i]);
+		if (rc < 0)
+			goto out;
+	}
+
+	ptlrpcds->pd_size = size;
+	ptlrpcds->pd_index = 0;
+	ptlrpcds->pd_nthreads = nthreads;
+
+out:
+	if (rc != 0 && ptlrpcds != NULL) {
+		for (j = 0; j <= i; j++)
+			ptlrpcd_stop(&ptlrpcds->pd_threads[j], 0);
+		for (j = 0; j <= i; j++)
+			ptlrpcd_free(&ptlrpcds->pd_threads[j]);
+		ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+		ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+		OBD_FREE(ptlrpcds, size);
+		ptlrpcds = NULL;
+	}
+
+	return 0;
+}
+
+int ptlrpcd_addref(void)
+{
+	int rc = 0;
+
+	mutex_lock(&ptlrpcd_mutex);
+	if (++ptlrpcd_users == 1)
+		rc = ptlrpcd_init();
+	mutex_unlock(&ptlrpcd_mutex);
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpcd_addref);
+
+void ptlrpcd_decref(void)
+{
+	mutex_lock(&ptlrpcd_mutex);
+	if (--ptlrpcd_users == 0)
+		ptlrpcd_fini();
+	mutex_unlock(&ptlrpcd_mutex);
+}
+EXPORT_SYMBOL(ptlrpcd_decref);
+/** @} ptlrpcd */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
new file mode 100644
index 000000000..7b1d72947
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -0,0 +1,379 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/recover.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_export.h"
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include <linux/list.h>
+
+#include "ptlrpc_internal.h"
+
+/**
+ * Start recovery on disconnected import.
+ * This is done by just attempting a connect
+ */
+void ptlrpc_initiate_recovery(struct obd_import *imp)
+{
+	CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
+	ptlrpc_connect_import(imp);
+}
+
+/**
+ * Identify what request from replay list needs to be replayed next
+ * (based on what we have already replayed) and send it to server.
+ */
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
+{
+	int rc = 0;
+	struct list_head *tmp, *pos;
+	struct ptlrpc_request *req = NULL;
+	__u64 last_transno;
+
+	*inflight = 0;
+
+	/* It might have committed some after we last spoke, so make sure we
+	 * get rid of them now.
+	 */
+	spin_lock(&imp->imp_lock);
+	imp->imp_last_transno_checked = 0;
+	ptlrpc_free_committed(imp);
+	last_transno = imp->imp_last_replay_transno;
+	spin_unlock(&imp->imp_lock);
+
+	CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n",
+	       imp, obd2cli_tgt(imp->imp_obd),
+	       imp->imp_peer_committed_transno, last_transno);
+
+	/* Do I need to hold a lock across this iteration?  We shouldn't be
+	 * racing with any additions to the list, because we're in recovery
+	 * and are therefore not processing additional requests to add.  Calls
+	 * to ptlrpc_free_committed might commit requests, but nothing "newer"
+	 * than the one we're replaying (it can't be committed until it's
+	 * replayed, and we're doing that here).  l_f_e_safe protects against
+	 * problems with the current request being committed, in the unlikely
+	 * event of that race.  So, in conclusion, I think that it's safe to
+	 * perform this list-walk without the imp_lock held.
+	 *
+	 * But, the {mdc,osc}_replay_open callbacks both iterate
+	 * request lists, and have comments saying they assume the
+	 * imp_lock is being held by ptlrpc_replay, but it's not. it's
+	 * just a little race...
+	 */
+
+	/* Replay all the committed open requests on committed_list first */
+	if (!list_empty(&imp->imp_committed_list)) {
+		tmp = imp->imp_committed_list.prev;
+		req = list_entry(tmp, struct ptlrpc_request,
+				     rq_replay_list);
+
+		/* The last request on committed_list hasn't been replayed */
+		if (req->rq_transno > last_transno) {
+			/* Since the imp_committed_list is immutable before
+			 * all of it's requests being replayed, it's safe to
+			 * use a cursor to accelerate the search */
+			imp->imp_replay_cursor = imp->imp_replay_cursor->next;
+
+			while (imp->imp_replay_cursor !=
+			       &imp->imp_committed_list) {
+				req = list_entry(imp->imp_replay_cursor,
+						 struct ptlrpc_request,
+						 rq_replay_list);
+				if (req->rq_transno > last_transno)
+					break;
+
+				req = NULL;
+				imp->imp_replay_cursor =
+					imp->imp_replay_cursor->next;
+			}
+		} else {
+			/* All requests on committed_list have been replayed */
+			imp->imp_replay_cursor = &imp->imp_committed_list;
+			req = NULL;
+		}
+	}
+
+	/* All the requests in committed list have been replayed, let's replay
+	 * the imp_replay_list */
+	if (req == NULL) {
+		list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+			req = list_entry(tmp, struct ptlrpc_request,
+					 rq_replay_list);
+
+			if (req->rq_transno > last_transno)
+				break;
+			req = NULL;
+		}
+	}
+
+	/* If need to resend the last sent transno (because a reconnect
+	 * has occurred), then stop on the matching req and send it again.
+	 * If, however, the last sent transno has been committed then we
+	 * continue replay from the next request. */
+	if (req != NULL && imp->imp_resend_replay)
+		lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+
+	spin_lock(&imp->imp_lock);
+	imp->imp_resend_replay = 0;
+	spin_unlock(&imp->imp_lock);
+
+	if (req != NULL) {
+		rc = ptlrpc_replay_req(req);
+		if (rc) {
+			CERROR("recovery replay error %d for req %llu\n",
+			       rc, req->rq_xid);
+			return rc;
+		}
+		*inflight = 1;
+	}
+	return rc;
+}
+
+/**
+ * Schedule resending of request on sending_list. This is done after
+ * we completed replaying of requests and locks.
+ */
+int ptlrpc_resend(struct obd_import *imp)
+{
+	struct ptlrpc_request *req, *next;
+
+	/* As long as we're in recovery, nothing should be added to the sending
+	 * list, so we don't need to hold the lock during this iteration and
+	 * resend process.
+	 */
+	/* Well... what if lctl recover is called twice at the same time?
+	 */
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state != LUSTRE_IMP_RECOVER) {
+		spin_unlock(&imp->imp_lock);
+		return -1;
+	}
+
+	list_for_each_entry_safe(req, next, &imp->imp_sending_list,
+				     rq_list) {
+		LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
+			 "req %p bad\n", req);
+		LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
+		if (!ptlrpc_no_resend(req))
+			ptlrpc_resend_req(req);
+	}
+	spin_unlock(&imp->imp_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_resend);
+
+/**
+ * Go through all requests in delayed list and wake their threads
+ * for resending
+ */
+void ptlrpc_wake_delayed(struct obd_import *imp)
+{
+	struct list_head *tmp, *pos;
+	struct ptlrpc_request *req;
+
+	spin_lock(&imp->imp_lock);
+	list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
+		req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+		DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
+		ptlrpc_client_wake_req(req);
+	}
+	spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_wake_delayed);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
+{
+	struct obd_import *imp = failed_req->rq_import;
+
+	CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
+	       imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+	       imp->imp_connection->c_remote_uuid.uuid);
+
+	if (ptlrpc_set_import_discon(imp,
+			      lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
+		if (!imp->imp_replayable) {
+			CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
+			       obd2cli_tgt(imp->imp_obd),
+			       imp->imp_connection->c_remote_uuid.uuid,
+			       imp->imp_obd->obd_name);
+			ptlrpc_deactivate_import(imp);
+		}
+		/* to control recovery via lctl {disable|enable}_recovery */
+		if (imp->imp_deactive == 0)
+			ptlrpc_connect_import(imp);
+	}
+
+	/* Wait for recovery to complete and resend. If evicted, then
+	   this request will be errored out later.*/
+	spin_lock(&failed_req->rq_lock);
+	if (!failed_req->rq_no_resend)
+		failed_req->rq_resend = 1;
+	spin_unlock(&failed_req->rq_lock);
+}
+
+/**
+ * Administratively active/deactive a client.
+ * This should only be called by the ioctl interface, currently
+ *  - the lctl deactivate and activate commands
+ *  - echo 0/1 >> /proc/osc/XXX/active
+ *  - client umount -f (ll_umount_begin)
+ */
+int ptlrpc_set_import_active(struct obd_import *imp, int active)
+{
+	struct obd_device *obd = imp->imp_obd;
+	int rc = 0;
+
+	LASSERT(obd);
+
+	/* When deactivating, mark import invalid, and abort in-flight
+	 * requests. */
+	if (!active) {
+		LCONSOLE_WARN("setting import %s INACTIVE by administrator request\n",
+			      obd2cli_tgt(imp->imp_obd));
+
+		/* set before invalidate to avoid messages about imp_inval
+		 * set without imp_deactive in ptlrpc_import_delay_req */
+		spin_lock(&imp->imp_lock);
+		imp->imp_deactive = 1;
+		spin_unlock(&imp->imp_lock);
+
+		obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
+
+		ptlrpc_invalidate_import(imp);
+	}
+
+	/* When activating, mark import valid, and attempt recovery */
+	if (active) {
+		CDEBUG(D_HA, "setting import %s VALID\n",
+		       obd2cli_tgt(imp->imp_obd));
+
+		spin_lock(&imp->imp_lock);
+		imp->imp_deactive = 0;
+		spin_unlock(&imp->imp_lock);
+		obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
+
+		rc = ptlrpc_recover_import(imp, NULL, 0);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_set_import_active);
+
+/* Attempt to reconnect an import */
+int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
+{
+	int rc = 0;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
+	    atomic_read(&imp->imp_inval_count))
+		rc = -EINVAL;
+	spin_unlock(&imp->imp_lock);
+	if (rc)
+		goto out;
+
+	/* force import to be disconnected. */
+	ptlrpc_set_import_discon(imp, 0);
+
+	if (new_uuid) {
+		struct obd_uuid uuid;
+
+		/* intruct import to use new uuid */
+		obd_str2uuid(&uuid, new_uuid);
+		rc = import_set_conn_priority(imp, &uuid);
+		if (rc)
+			goto out;
+	}
+
+	/* Check if reconnect is already in progress */
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state != LUSTRE_IMP_DISCON) {
+		imp->imp_force_verify = 1;
+		rc = -EALREADY;
+	}
+	spin_unlock(&imp->imp_lock);
+	if (rc)
+		goto out;
+
+	rc = ptlrpc_connect_import(imp);
+	if (rc)
+		goto out;
+
+	if (!async) {
+		struct l_wait_info lwi;
+		int secs = cfs_time_seconds(obd_timeout);
+
+		CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
+		       obd2cli_tgt(imp->imp_obd), secs);
+
+		lwi = LWI_TIMEOUT(secs, NULL, NULL);
+		rc = l_wait_event(imp->imp_recovery_waitq,
+				  !ptlrpc_import_in_recovery(imp), &lwi);
+		CDEBUG(D_HA, "%s: recovery finished\n",
+		       obd2cli_tgt(imp->imp_obd));
+	}
+
+out:
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_recover_import);
+
+int ptlrpc_import_in_recovery(struct obd_import *imp)
+{
+	int in_recovery = 1;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state == LUSTRE_IMP_FULL ||
+	    imp->imp_state == LUSTRE_IMP_CLOSED ||
+	    imp->imp_state == LUSTRE_IMP_DISCON ||
+	    imp->imp_obd->obd_no_recov)
+		in_recovery = 0;
+	spin_unlock(&imp->imp_lock);
+
+	return in_recovery;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
new file mode 100644
index 000000000..21e9dc9d5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -0,0 +1,2459 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+/***********************************************
+ * policy registers			    *
+ ***********************************************/
+
+static rwlock_t policy_lock;
+static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
+	NULL,
+};
+
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
+{
+	__u16 number = policy->sp_policy;
+
+	LASSERT(policy->sp_name);
+	LASSERT(policy->sp_cops);
+	LASSERT(policy->sp_sops);
+
+	if (number >= SPTLRPC_POLICY_MAX)
+		return -EINVAL;
+
+	write_lock(&policy_lock);
+	if (unlikely(policies[number])) {
+		write_unlock(&policy_lock);
+		return -EALREADY;
+	}
+	policies[number] = policy;
+	write_unlock(&policy_lock);
+
+	CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_register_policy);
+
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
+{
+	__u16 number = policy->sp_policy;
+
+	LASSERT(number < SPTLRPC_POLICY_MAX);
+
+	write_lock(&policy_lock);
+	if (unlikely(policies[number] == NULL)) {
+		write_unlock(&policy_lock);
+		CERROR("%s: already unregistered\n", policy->sp_name);
+		return -EINVAL;
+	}
+
+	LASSERT(policies[number] == policy);
+	policies[number] = NULL;
+	write_unlock(&policy_lock);
+
+	CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unregister_policy);
+
+static
+struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor)
+{
+	static DEFINE_MUTEX(load_mutex);
+	static atomic_t       loaded = ATOMIC_INIT(0);
+	struct ptlrpc_sec_policy *policy;
+	__u16		     number = SPTLRPC_FLVR_POLICY(flavor);
+	__u16		     flag = 0;
+
+	if (number >= SPTLRPC_POLICY_MAX)
+		return NULL;
+
+	while (1) {
+		read_lock(&policy_lock);
+		policy = policies[number];
+		if (policy && !try_module_get(policy->sp_owner))
+			policy = NULL;
+		if (policy == NULL)
+			flag = atomic_read(&loaded);
+		read_unlock(&policy_lock);
+
+		if (policy != NULL || flag != 0 ||
+		    number != SPTLRPC_POLICY_GSS)
+			break;
+
+		/* try to load gss module, once */
+		mutex_lock(&load_mutex);
+		if (atomic_read(&loaded) == 0) {
+			if (request_module("ptlrpc_gss") == 0)
+				CDEBUG(D_SEC,
+				       "module ptlrpc_gss loaded on demand\n");
+			else
+				CERROR("Unable to load module ptlrpc_gss\n");
+
+			atomic_set(&loaded, 1);
+		}
+		mutex_unlock(&load_mutex);
+	}
+
+	return policy;
+}
+
+__u32 sptlrpc_name2flavor_base(const char *name)
+{
+	if (!strcmp(name, "null"))
+		return SPTLRPC_FLVR_NULL;
+	if (!strcmp(name, "plain"))
+		return SPTLRPC_FLVR_PLAIN;
+	if (!strcmp(name, "krb5n"))
+		return SPTLRPC_FLVR_KRB5N;
+	if (!strcmp(name, "krb5a"))
+		return SPTLRPC_FLVR_KRB5A;
+	if (!strcmp(name, "krb5i"))
+		return SPTLRPC_FLVR_KRB5I;
+	if (!strcmp(name, "krb5p"))
+		return SPTLRPC_FLVR_KRB5P;
+
+	return SPTLRPC_FLVR_INVALID;
+}
+EXPORT_SYMBOL(sptlrpc_name2flavor_base);
+
+const char *sptlrpc_flavor2name_base(__u32 flvr)
+{
+	__u32   base = SPTLRPC_FLVR_BASE(flvr);
+
+	if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
+		return "null";
+	else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
+		return "plain";
+	else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
+		return "krb5n";
+	else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
+		return "krb5a";
+	else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
+		return "krb5i";
+	else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
+		return "krb5p";
+
+	CERROR("invalid wire flavor 0x%x\n", flvr);
+	return "invalid";
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_base);
+
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+			       char *buf, int bufsize)
+{
+	if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
+		snprintf(buf, bufsize, "hash:%s",
+			 sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
+	else
+		snprintf(buf, bufsize, "%s",
+			 sptlrpc_flavor2name_base(sf->sf_rpc));
+
+	buf[bufsize - 1] = '\0';
+	return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
+
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+{
+	strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize);
+
+	/*
+	 * currently we don't support customized bulk specification for
+	 * flavors other than plain
+	 */
+	if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
+		char bspec[16];
+
+		bspec[0] = '-';
+		sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
+		strlcat(buf, bspec, bufsize);
+	}
+
+	return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name);
+
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
+{
+	buf[0] = '\0';
+
+	if (flags & PTLRPC_SEC_FL_REVERSE)
+		strlcat(buf, "reverse,", bufsize);
+	if (flags & PTLRPC_SEC_FL_ROOTONLY)
+		strlcat(buf, "rootonly,", bufsize);
+	if (flags & PTLRPC_SEC_FL_UDESC)
+		strlcat(buf, "udesc,", bufsize);
+	if (flags & PTLRPC_SEC_FL_BULK)
+		strlcat(buf, "bulk,", bufsize);
+	if (buf[0] == '\0')
+		strlcat(buf, "-,", bufsize);
+
+	return buf;
+}
+EXPORT_SYMBOL(sptlrpc_secflags2str);
+
+/**************************************************
+ * client context APIs			    *
+ **************************************************/
+
+static
+struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
+{
+	struct vfs_cred vcred;
+	int create = 1, remove_dead = 1;
+
+	LASSERT(sec);
+	LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
+
+	if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
+				     PTLRPC_SEC_FL_ROOTONLY)) {
+		vcred.vc_uid = 0;
+		vcred.vc_gid = 0;
+		if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
+			create = 0;
+			remove_dead = 0;
+		}
+	} else {
+		vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
+		vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
+	}
+
+	return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
+						   create, remove_dead);
+}
+
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
+{
+	atomic_inc(&ctx->cc_refcount);
+	return ctx;
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
+
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+	struct ptlrpc_sec *sec = ctx->cc_sec;
+
+	LASSERT(sec);
+	LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+	if (!atomic_dec_and_test(&ctx->cc_refcount))
+		return;
+
+	sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
+
+/**
+ * Expire the client context immediately.
+ *
+ * \pre Caller must hold at least 1 reference on the \a ctx.
+ */
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+	LASSERT(ctx->cc_ops->force_die);
+	ctx->cc_ops->force_die(ctx, 0);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
+
+/**
+ * To wake up the threads who are waiting for this client context. Called
+ * after some status change happened on \a ctx.
+ */
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
+{
+	struct ptlrpc_request *req, *next;
+
+	spin_lock(&ctx->cc_lock);
+	list_for_each_entry_safe(req, next, &ctx->cc_req_list,
+				     rq_ctx_chain) {
+		list_del_init(&req->rq_ctx_chain);
+		ptlrpc_client_wake_req(req);
+	}
+	spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
+
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+	LASSERT(ctx->cc_ops);
+
+	if (ctx->cc_ops->display == NULL)
+		return 0;
+
+	return ctx->cc_ops->display(ctx, buf, bufsize);
+}
+
+static int import_sec_check_expire(struct obd_import *imp)
+{
+	int     adapt = 0;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_sec_expire &&
+	    imp->imp_sec_expire < get_seconds()) {
+		adapt = 1;
+		imp->imp_sec_expire = 0;
+	}
+	spin_unlock(&imp->imp_lock);
+
+	if (!adapt)
+		return 0;
+
+	CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
+	return sptlrpc_import_sec_adapt(imp, NULL, NULL);
+}
+
+static int import_sec_validate_get(struct obd_import *imp,
+				   struct ptlrpc_sec **sec)
+{
+	int     rc;
+
+	if (unlikely(imp->imp_sec_expire)) {
+		rc = import_sec_check_expire(imp);
+		if (rc)
+			return rc;
+	}
+
+	*sec = sptlrpc_import_sec_ref(imp);
+	if (*sec == NULL) {
+		CERROR("import %p (%s) with no sec\n",
+		       imp, ptlrpc_import_state_name(imp->imp_state));
+		return -EACCES;
+	}
+
+	if (unlikely((*sec)->ps_dying)) {
+		CERROR("attempt to use dying sec %p\n", sec);
+		sptlrpc_sec_put(*sec);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+/**
+ * Given a \a req, find or allocate a appropriate context for it.
+ * \pre req->rq_cli_ctx == NULL.
+ *
+ * \retval 0 succeed, and req->rq_cli_ctx is set.
+ * \retval -ev error number, and req->rq_cli_ctx == NULL.
+ */
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+	struct obd_import *imp = req->rq_import;
+	struct ptlrpc_sec *sec;
+	int		rc;
+
+	LASSERT(!req->rq_cli_ctx);
+	LASSERT(imp);
+
+	rc = import_sec_validate_get(imp, &sec);
+	if (rc)
+		return rc;
+
+	req->rq_cli_ctx = get_my_ctx(sec);
+
+	sptlrpc_sec_put(sec);
+
+	if (!req->rq_cli_ctx) {
+		CERROR("req %p: fail to get context\n", req);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * Drop the context for \a req.
+ * \pre req->rq_cli_ctx != NULL.
+ * \post req->rq_cli_ctx == NULL.
+ *
+ * If \a sync == 0, this function should return quickly without sleep;
+ * otherwise it might trigger and wait for the whole process of sending
+ * an context-destroying rpc to server.
+ */
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
+{
+	LASSERT(req);
+	LASSERT(req->rq_cli_ctx);
+
+	/* request might be asked to release earlier while still
+	 * in the context waiting list.
+	 */
+	if (!list_empty(&req->rq_ctx_chain)) {
+		spin_lock(&req->rq_cli_ctx->cc_lock);
+		list_del_init(&req->rq_ctx_chain);
+		spin_unlock(&req->rq_cli_ctx->cc_lock);
+	}
+
+	sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
+	req->rq_cli_ctx = NULL;
+}
+
+static
+int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
+			   struct ptlrpc_cli_ctx *oldctx,
+			   struct ptlrpc_cli_ctx *newctx)
+{
+	struct sptlrpc_flavor   old_flvr;
+	char		   *reqmsg = NULL; /* to workaround old gcc */
+	int		     reqmsg_size;
+	int		     rc = 0;
+
+	LASSERT(req->rq_reqmsg);
+	LASSERT(req->rq_reqlen);
+	LASSERT(req->rq_replen);
+
+	CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
+	       req,
+	       oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec),
+	       newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec),
+	       oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
+	       newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
+
+	/* save flavor */
+	old_flvr = req->rq_flvr;
+
+	/* save request message */
+	reqmsg_size = req->rq_reqlen;
+	if (reqmsg_size != 0) {
+		OBD_ALLOC_LARGE(reqmsg, reqmsg_size);
+		if (reqmsg == NULL)
+			return -ENOMEM;
+		memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
+	}
+
+	/* release old req/rep buf */
+	req->rq_cli_ctx = oldctx;
+	sptlrpc_cli_free_reqbuf(req);
+	sptlrpc_cli_free_repbuf(req);
+	req->rq_cli_ctx = newctx;
+
+	/* recalculate the flavor */
+	sptlrpc_req_set_flavor(req, 0);
+
+	/* alloc new request buffer
+	 * we don't need to alloc reply buffer here, leave it to the
+	 * rest procedure of ptlrpc */
+	if (reqmsg_size != 0) {
+		rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
+		if (!rc) {
+			LASSERT(req->rq_reqmsg);
+			memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
+		} else {
+			CWARN("failed to alloc reqbuf: %d\n", rc);
+			req->rq_flvr = old_flvr;
+		}
+
+		OBD_FREE_LARGE(reqmsg, reqmsg_size);
+	}
+	return rc;
+}
+
+/**
+ * If current context of \a req is dead somehow, e.g. we just switched flavor
+ * thus marked original contexts dead, we'll find a new context for it. if
+ * no switch is needed, \a req will end up with the same context.
+ *
+ * \note a request must have a context, to keep other parts of code happy.
+ * In any case of failure during the switching, we must restore the old one.
+ */
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
+	struct ptlrpc_cli_ctx *newctx;
+	int		    rc;
+
+	LASSERT(oldctx);
+
+	sptlrpc_cli_ctx_get(oldctx);
+	sptlrpc_req_put_ctx(req, 0);
+
+	rc = sptlrpc_req_get_ctx(req);
+	if (unlikely(rc)) {
+		LASSERT(!req->rq_cli_ctx);
+
+		/* restore old ctx */
+		req->rq_cli_ctx = oldctx;
+		return rc;
+	}
+
+	newctx = req->rq_cli_ctx;
+	LASSERT(newctx);
+
+	if (unlikely(newctx == oldctx &&
+		     test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
+		/*
+		 * still get the old dead ctx, usually means system too busy
+		 */
+		CDEBUG(D_SEC,
+		       "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
+		       newctx, newctx->cc_flags);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ);
+	} else {
+		/*
+		 * it's possible newctx == oldctx if we're switching
+		 * subflavor with the same sec.
+		 */
+		rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
+		if (rc) {
+			/* restore old ctx */
+			sptlrpc_req_put_ctx(req, 0);
+			req->rq_cli_ctx = oldctx;
+			return rc;
+		}
+
+		LASSERT(req->rq_cli_ctx == newctx);
+	}
+
+	sptlrpc_cli_ctx_put(oldctx, 1);
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
+
+static
+int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+	if (cli_ctx_is_refreshed(ctx))
+		return 1;
+	return 0;
+}
+
+static
+int ctx_refresh_timeout(void *data)
+{
+	struct ptlrpc_request *req = data;
+	int rc;
+
+	/* conn_cnt is needed in expire_one_request */
+	lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
+
+	rc = ptlrpc_expire_one_request(req, 1);
+	/* if we started recovery, we should mark this ctx dead; otherwise
+	 * in case of lgssd died nobody would retire this ctx, following
+	 * connecting will still find the same ctx thus cause deadlock.
+	 * there's an assumption that expire time of the request should be
+	 * later than the context refresh expire time.
+	 */
+	if (rc == 0)
+		req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0);
+	return rc;
+}
+
+static
+void ctx_refresh_interrupt(void *data)
+{
+	struct ptlrpc_request *req = data;
+
+	spin_lock(&req->rq_lock);
+	req->rq_intr = 1;
+	spin_unlock(&req->rq_lock);
+}
+
+static
+void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
+{
+	spin_lock(&ctx->cc_lock);
+	if (!list_empty(&req->rq_ctx_chain))
+		list_del_init(&req->rq_ctx_chain);
+	spin_unlock(&ctx->cc_lock);
+}
+
+/**
+ * To refresh the context of \req, if it's not up-to-date.
+ * \param timeout
+ * - < 0: don't wait
+ * - = 0: wait until success or fatal error occur
+ * - > 0: timeout value (in seconds)
+ *
+ * The status of the context could be subject to be changed by other threads
+ * at any time. We allow this race, but once we return with 0, the caller will
+ * suppose it's uptodated and keep using it until the owning rpc is done.
+ *
+ * \retval 0 only if the context is uptodated.
+ * \retval -ev error number.
+ */
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
+{
+	struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec      *sec;
+	struct l_wait_info      lwi;
+	int		     rc;
+
+	LASSERT(ctx);
+
+	if (req->rq_ctx_init || req->rq_ctx_fini)
+		return 0;
+
+	/*
+	 * during the process a request's context might change type even
+	 * (e.g. from gss ctx to null ctx), so each loop we need to re-check
+	 * everything
+	 */
+again:
+	rc = import_sec_validate_get(req->rq_import, &sec);
+	if (rc)
+		return rc;
+
+	if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
+		CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
+		      req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
+		req_off_ctx_list(req, ctx);
+		sptlrpc_req_replace_dead_ctx(req);
+		ctx = req->rq_cli_ctx;
+	}
+	sptlrpc_sec_put(sec);
+
+	if (cli_ctx_is_eternal(ctx))
+		return 0;
+
+	if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
+		LASSERT(ctx->cc_ops->refresh);
+		ctx->cc_ops->refresh(ctx);
+	}
+	LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
+
+	LASSERT(ctx->cc_ops->validate);
+	if (ctx->cc_ops->validate(ctx) == 0) {
+		req_off_ctx_list(req, ctx);
+		return 0;
+	}
+
+	if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
+		spin_lock(&req->rq_lock);
+		req->rq_err = 1;
+		spin_unlock(&req->rq_lock);
+		req_off_ctx_list(req, ctx);
+		return -EPERM;
+	}
+
+	/*
+	 * There's a subtle issue for resending RPCs, suppose following
+	 * situation:
+	 *  1. the request was sent to server.
+	 *  2. recovery was kicked start, after finished the request was
+	 *     marked as resent.
+	 *  3. resend the request.
+	 *  4. old reply from server received, we accept and verify the reply.
+	 *     this has to be success, otherwise the error will be aware
+	 *     by application.
+	 *  5. new reply from server received, dropped by LNet.
+	 *
+	 * Note the xid of old & new request is the same. We can't simply
+	 * change xid for the resent request because the server replies on
+	 * it for reply reconstruction.
+	 *
+	 * Commonly the original context should be uptodate because we
+	 * have a expiry nice time; server will keep its context because
+	 * we at least hold a ref of old context which prevent context
+	 * destroying RPC being sent. So server still can accept the request
+	 * and finish the RPC. But if that's not the case:
+	 *  1. If server side context has been trimmed, a NO_CONTEXT will
+	 *     be returned, gss_cli_ctx_verify/unseal will switch to new
+	 *     context by force.
+	 *  2. Current context never be refreshed, then we are fine: we
+	 *     never really send request with old context before.
+	 */
+	if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
+	    unlikely(req->rq_reqmsg) &&
+	    lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+		req_off_ctx_list(req, ctx);
+		return 0;
+	}
+
+	if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+		req_off_ctx_list(req, ctx);
+		/*
+		 * don't switch ctx if import was deactivated
+		 */
+		if (req->rq_import->imp_deactive) {
+			spin_lock(&req->rq_lock);
+			req->rq_err = 1;
+			spin_unlock(&req->rq_lock);
+			return -EINTR;
+		}
+
+		rc = sptlrpc_req_replace_dead_ctx(req);
+		if (rc) {
+			LASSERT(ctx == req->rq_cli_ctx);
+			CERROR("req %p: failed to replace dead ctx %p: %d\n",
+			       req, ctx, rc);
+			spin_lock(&req->rq_lock);
+			req->rq_err = 1;
+			spin_unlock(&req->rq_lock);
+			return rc;
+		}
+
+		ctx = req->rq_cli_ctx;
+		goto again;
+	}
+
+	/*
+	 * Now we're sure this context is during upcall, add myself into
+	 * waiting list
+	 */
+	spin_lock(&ctx->cc_lock);
+	if (list_empty(&req->rq_ctx_chain))
+		list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
+	spin_unlock(&ctx->cc_lock);
+
+	if (timeout < 0)
+		return -EWOULDBLOCK;
+
+	/* Clear any flags that may be present from previous sends */
+	LASSERT(req->rq_receiving_reply == 0);
+	spin_lock(&req->rq_lock);
+	req->rq_err = 0;
+	req->rq_timedout = 0;
+	req->rq_resend = 0;
+	req->rq_restart = 0;
+	spin_unlock(&req->rq_lock);
+
+	lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
+			       ctx_refresh_interrupt, req);
+	rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
+
+	/*
+	 * following cases could lead us here:
+	 * - successfully refreshed;
+	 * - interrupted;
+	 * - timedout, and we don't want recover from the failure;
+	 * - timedout, and waked up upon recovery finished;
+	 * - someone else mark this ctx dead by force;
+	 * - someone invalidate the req and call ptlrpc_client_wake_req(),
+	 *   e.g. ptlrpc_abort_inflight();
+	 */
+	if (!cli_ctx_is_refreshed(ctx)) {
+		/* timed out or interrupted */
+		req_off_ctx_list(req, ctx);
+
+		LASSERT(rc != 0);
+		return rc;
+	}
+
+	goto again;
+}
+
+/**
+ * Initialize flavor settings for \a req, according to \a opcode.
+ *
+ * \note this could be called in two situations:
+ * - new request from ptlrpc_pre_req(), with proper @opcode
+ * - old request which changed ctx in the middle, with @opcode == 0
+ */
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
+{
+	struct ptlrpc_sec *sec;
+
+	LASSERT(req->rq_import);
+	LASSERT(req->rq_cli_ctx);
+	LASSERT(req->rq_cli_ctx->cc_sec);
+	LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
+
+	/* special security flags according to opcode */
+	switch (opcode) {
+	case OST_READ:
+	case MDS_READPAGE:
+	case MGS_CONFIG_READ:
+	case OBD_IDX_READ:
+		req->rq_bulk_read = 1;
+		break;
+	case OST_WRITE:
+	case MDS_WRITEPAGE:
+		req->rq_bulk_write = 1;
+		break;
+	case SEC_CTX_INIT:
+		req->rq_ctx_init = 1;
+		break;
+	case SEC_CTX_FINI:
+		req->rq_ctx_fini = 1;
+		break;
+	case 0:
+		/* init/fini rpc won't be resend, so can't be here */
+		LASSERT(req->rq_ctx_init == 0);
+		LASSERT(req->rq_ctx_fini == 0);
+
+		/* cleanup flags, which should be recalculated */
+		req->rq_pack_udesc = 0;
+		req->rq_pack_bulk = 0;
+		break;
+	}
+
+	sec = req->rq_cli_ctx->cc_sec;
+
+	spin_lock(&sec->ps_lock);
+	req->rq_flvr = sec->ps_flvr;
+	spin_unlock(&sec->ps_lock);
+
+	/* force SVC_NULL for context initiation rpc, SVC_INTG for context
+	 * destruction rpc */
+	if (unlikely(req->rq_ctx_init))
+		flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
+	else if (unlikely(req->rq_ctx_fini))
+		flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
+
+	/* user descriptor flag, null security can't do it anyway */
+	if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
+	    (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
+		req->rq_pack_udesc = 1;
+
+	/* bulk security flag */
+	if ((req->rq_bulk_read || req->rq_bulk_write) &&
+	    sptlrpc_flavor_has_bulk(&req->rq_flvr))
+		req->rq_pack_bulk = 1;
+}
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req)
+{
+	if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
+		return;
+
+	LASSERT(req->rq_clrbuf);
+	if (req->rq_pool || !req->rq_reqbuf)
+		return;
+
+	OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+	req->rq_reqbuf = NULL;
+	req->rq_reqbuf_len = 0;
+}
+
+/**
+ * Given an import \a imp, check whether current user has a valid context
+ * or not. We may create a new context and try to refresh it, and try
+ * repeatedly try in case of non-fatal errors. Return 0 means success.
+ */
+int sptlrpc_import_check_ctx(struct obd_import *imp)
+{
+	struct ptlrpc_sec     *sec;
+	struct ptlrpc_cli_ctx *ctx;
+	struct ptlrpc_request *req = NULL;
+	int rc;
+
+	might_sleep();
+
+	sec = sptlrpc_import_sec_ref(imp);
+	ctx = get_my_ctx(sec);
+	sptlrpc_sec_put(sec);
+
+	if (!ctx)
+		return -ENOMEM;
+
+	if (cli_ctx_is_eternal(ctx) ||
+	    ctx->cc_ops->validate(ctx) == 0) {
+		sptlrpc_cli_ctx_put(ctx, 1);
+		return 0;
+	}
+
+	if (cli_ctx_is_error(ctx)) {
+		sptlrpc_cli_ctx_put(ctx, 1);
+		return -EACCES;
+	}
+
+	req = ptlrpc_request_cache_alloc(GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	spin_lock_init(&req->rq_lock);
+	atomic_set(&req->rq_refcount, 10000);
+	INIT_LIST_HEAD(&req->rq_ctx_chain);
+	init_waitqueue_head(&req->rq_reply_waitq);
+	init_waitqueue_head(&req->rq_set_waitq);
+	req->rq_import = imp;
+	req->rq_flvr = sec->ps_flvr;
+	req->rq_cli_ctx = ctx;
+
+	rc = sptlrpc_req_refresh_ctx(req, 0);
+	LASSERT(list_empty(&req->rq_ctx_chain));
+	sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
+	ptlrpc_request_cache_free(req);
+
+	return rc;
+}
+
+/**
+ * Used by ptlrpc client, to perform the pre-defined security transformation
+ * upon the request message of \a req. After this function called,
+ * req->rq_reqmsg is still accessible as clear text.
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	int rc = 0;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+	/* we wrap bulk request here because now we can be sure
+	 * the context is uptodate.
+	 */
+	if (req->rq_bulk) {
+		rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
+		if (rc)
+			return rc;
+	}
+
+	switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+	case SPTLRPC_SVC_NULL:
+	case SPTLRPC_SVC_AUTH:
+	case SPTLRPC_SVC_INTG:
+		LASSERT(ctx->cc_ops->sign);
+		rc = ctx->cc_ops->sign(ctx, req);
+		break;
+	case SPTLRPC_SVC_PRIV:
+		LASSERT(ctx->cc_ops->seal);
+		rc = ctx->cc_ops->seal(ctx, req);
+		break;
+	default:
+		LBUG();
+	}
+
+	if (rc == 0) {
+		LASSERT(req->rq_reqdata_len);
+		LASSERT(req->rq_reqdata_len % 8 == 0);
+		LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
+	}
+
+	return rc;
+}
+
+static int do_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	int		    rc;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(req->rq_repbuf);
+	LASSERT(req->rq_repdata);
+	LASSERT(req->rq_repmsg == NULL);
+
+	req->rq_rep_swab_mask = 0;
+
+	rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
+	switch (rc) {
+	case 1:
+		lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+	case 0:
+		break;
+	default:
+		CERROR("failed unpack reply: x%llu\n", req->rq_xid);
+		return -EPROTO;
+	}
+
+	if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
+		CERROR("replied data length %d too small\n",
+		       req->rq_repdata_len);
+		return -EPROTO;
+	}
+
+	if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
+	    SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+		CERROR("reply policy %u doesn't match request policy %u\n",
+		       SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
+		       SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
+		return -EPROTO;
+	}
+
+	switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+	case SPTLRPC_SVC_NULL:
+	case SPTLRPC_SVC_AUTH:
+	case SPTLRPC_SVC_INTG:
+		LASSERT(ctx->cc_ops->verify);
+		rc = ctx->cc_ops->verify(ctx, req);
+		break;
+	case SPTLRPC_SVC_PRIV:
+		LASSERT(ctx->cc_ops->unseal);
+		rc = ctx->cc_ops->unseal(ctx, req);
+		break;
+	default:
+		LBUG();
+	}
+	LASSERT(rc || req->rq_repmsg || req->rq_resend);
+
+	if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
+	    !req->rq_ctx_init)
+		req->rq_rep_swab_mask = 0;
+	return rc;
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the reply
+ * message of \a req. After return successfully, req->rq_repmsg points to
+ * the reply message in clear text.
+ *
+ * \pre the reply buffer should have been un-posted from LNet, so nothing is
+ * going to change.
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+	LASSERT(req->rq_repbuf);
+	LASSERT(req->rq_repdata == NULL);
+	LASSERT(req->rq_repmsg == NULL);
+	LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
+
+	if (req->rq_reply_off == 0 &&
+	    (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+		CERROR("real reply with offset 0\n");
+		return -EPROTO;
+	}
+
+	if (req->rq_reply_off % 8 != 0) {
+		CERROR("reply at odd offset %u\n", req->rq_reply_off);
+		return -EPROTO;
+	}
+
+	req->rq_repdata = (struct lustre_msg *)
+				(req->rq_repbuf + req->rq_reply_off);
+	req->rq_repdata_len = req->rq_nob_received;
+
+	return do_cli_unwrap_reply(req);
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the early
+ * reply message of \a req. We expect the rq_reply_off is 0, and
+ * rq_nob_received is the early reply size.
+ *
+ * Because the receive buffer might be still posted, the reply data might be
+ * changed at any time, no matter we're holding rq_lock or not. For this reason
+ * we allocate a separate ptlrpc_request and reply buffer for early reply
+ * processing.
+ *
+ * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
+ * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
+ * \a *req_ret to release it.
+ * \retval -ev error number, and \a req_ret will not be set.
+ */
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+				   struct ptlrpc_request **req_ret)
+{
+	struct ptlrpc_request  *early_req;
+	char		   *early_buf;
+	int		     early_bufsz, early_size;
+	int		     rc;
+
+	early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
+	if (early_req == NULL)
+		return -ENOMEM;
+
+	early_size = req->rq_nob_received;
+	early_bufsz = size_roundup_power2(early_size);
+	OBD_ALLOC_LARGE(early_buf, early_bufsz);
+	if (early_buf == NULL) {
+		rc = -ENOMEM;
+		goto err_req;
+	}
+
+	/* sanity checkings and copy data out, do it inside spinlock */
+	spin_lock(&req->rq_lock);
+
+	if (req->rq_replied) {
+		spin_unlock(&req->rq_lock);
+		rc = -EALREADY;
+		goto err_buf;
+	}
+
+	LASSERT(req->rq_repbuf);
+	LASSERT(req->rq_repdata == NULL);
+	LASSERT(req->rq_repmsg == NULL);
+
+	if (req->rq_reply_off != 0) {
+		CERROR("early reply with offset %u\n", req->rq_reply_off);
+		spin_unlock(&req->rq_lock);
+		rc = -EPROTO;
+		goto err_buf;
+	}
+
+	if (req->rq_nob_received != early_size) {
+		/* even another early arrived the size should be the same */
+		CERROR("data size has changed from %u to %u\n",
+		       early_size, req->rq_nob_received);
+		spin_unlock(&req->rq_lock);
+		rc = -EINVAL;
+		goto err_buf;
+	}
+
+	if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+		CERROR("early reply length %d too small\n",
+		       req->rq_nob_received);
+		spin_unlock(&req->rq_lock);
+		rc = -EALREADY;
+		goto err_buf;
+	}
+
+	memcpy(early_buf, req->rq_repbuf, early_size);
+	spin_unlock(&req->rq_lock);
+
+	spin_lock_init(&early_req->rq_lock);
+	early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
+	early_req->rq_flvr = req->rq_flvr;
+	early_req->rq_repbuf = early_buf;
+	early_req->rq_repbuf_len = early_bufsz;
+	early_req->rq_repdata = (struct lustre_msg *) early_buf;
+	early_req->rq_repdata_len = early_size;
+	early_req->rq_early = 1;
+	early_req->rq_reqmsg = req->rq_reqmsg;
+
+	rc = do_cli_unwrap_reply(early_req);
+	if (rc) {
+		DEBUG_REQ(D_ADAPTTO, early_req,
+			  "error %d unwrap early reply", rc);
+		goto err_ctx;
+	}
+
+	LASSERT(early_req->rq_repmsg);
+	*req_ret = early_req;
+	return 0;
+
+err_ctx:
+	sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+err_buf:
+	OBD_FREE_LARGE(early_buf, early_bufsz);
+err_req:
+	ptlrpc_request_cache_free(early_req);
+	return rc;
+}
+
+/**
+ * Used by ptlrpc client, to release a processed early reply \a early_req.
+ *
+ * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
+ */
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
+{
+	LASSERT(early_req->rq_repbuf);
+	LASSERT(early_req->rq_repdata);
+	LASSERT(early_req->rq_repmsg);
+
+	sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+	OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len);
+	ptlrpc_request_cache_free(early_req);
+}
+
+/**************************************************
+ * sec ID					 *
+ **************************************************/
+
+/*
+ * "fixed" sec (e.g. null) use sec_id < 0
+ */
+static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
+
+int sptlrpc_get_next_secid(void)
+{
+	return atomic_inc_return(&sptlrpc_sec_id);
+}
+EXPORT_SYMBOL(sptlrpc_get_next_secid);
+
+/**************************************************
+ * client side high-level security APIs	   *
+ **************************************************/
+
+static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
+				   int grace, int force)
+{
+	struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+	LASSERT(policy->sp_cops);
+	LASSERT(policy->sp_cops->flush_ctx_cache);
+
+	return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
+}
+
+static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
+{
+	struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+	LASSERT_ATOMIC_ZERO(&sec->ps_refcount);
+	LASSERT_ATOMIC_ZERO(&sec->ps_nctx);
+	LASSERT(policy->sp_cops->destroy_sec);
+
+	CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec);
+
+	policy->sp_cops->destroy_sec(sec);
+	sptlrpc_policy_put(policy);
+}
+
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+	sec_cop_destroy_sec(sec);
+}
+EXPORT_SYMBOL(sptlrpc_sec_destroy);
+
+static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
+{
+	LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+	if (sec->ps_policy->sp_cops->kill_sec) {
+		sec->ps_policy->sp_cops->kill_sec(sec);
+
+		sec_cop_flush_ctx_cache(sec, -1, 1, 1);
+	}
+}
+
+struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
+{
+	if (sec)
+		atomic_inc(&sec->ps_refcount);
+
+	return sec;
+}
+EXPORT_SYMBOL(sptlrpc_sec_get);
+
+void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+{
+	if (sec) {
+		LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+		if (atomic_dec_and_test(&sec->ps_refcount)) {
+			sptlrpc_gc_del_sec(sec);
+			sec_cop_destroy_sec(sec);
+		}
+	}
+}
+EXPORT_SYMBOL(sptlrpc_sec_put);
+
+/*
+ * policy module is responsible for taking reference of import
+ */
+static
+struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp,
+				       struct ptlrpc_svc_ctx *svc_ctx,
+				       struct sptlrpc_flavor *sf,
+				       enum lustre_sec_part sp)
+{
+	struct ptlrpc_sec_policy *policy;
+	struct ptlrpc_sec	*sec;
+	char		      str[32];
+
+	if (svc_ctx) {
+		LASSERT(imp->imp_dlm_fake == 1);
+
+		CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
+		       imp->imp_obd->obd_type->typ_name,
+		       imp->imp_obd->obd_name,
+		       sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+		policy = sptlrpc_policy_get(svc_ctx->sc_policy);
+		sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+	} else {
+		LASSERT(imp->imp_dlm_fake == 0);
+
+		CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
+		       imp->imp_obd->obd_type->typ_name,
+		       imp->imp_obd->obd_name,
+		       sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+		policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
+		if (!policy) {
+			CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
+			return NULL;
+		}
+	}
+
+	sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
+	if (sec) {
+		atomic_inc(&sec->ps_refcount);
+
+		sec->ps_part = sp;
+
+		if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
+			sptlrpc_gc_add_sec(sec);
+	} else {
+		sptlrpc_policy_put(policy);
+	}
+
+	return sec;
+}
+
+struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
+{
+	struct ptlrpc_sec *sec;
+
+	spin_lock(&imp->imp_lock);
+	sec = sptlrpc_sec_get(imp->imp_sec);
+	spin_unlock(&imp->imp_lock);
+
+	return sec;
+}
+EXPORT_SYMBOL(sptlrpc_import_sec_ref);
+
+static void sptlrpc_import_sec_install(struct obd_import *imp,
+				       struct ptlrpc_sec *sec)
+{
+	struct ptlrpc_sec *old_sec;
+
+	LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+	spin_lock(&imp->imp_lock);
+	old_sec = imp->imp_sec;
+	imp->imp_sec = sec;
+	spin_unlock(&imp->imp_lock);
+
+	if (old_sec) {
+		sptlrpc_sec_kill(old_sec);
+
+		/* balance the ref taken by this import */
+		sptlrpc_sec_put(old_sec);
+	}
+}
+
+static inline
+int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
+{
+	return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
+}
+
+static inline
+void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
+{
+	*dst = *src;
+}
+
+static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
+					     struct ptlrpc_sec *sec,
+					     struct sptlrpc_flavor *sf)
+{
+	char    str1[32], str2[32];
+
+	if (sec->ps_flvr.sf_flags != sf->sf_flags)
+		CDEBUG(D_SEC, "changing sec flags: %s -> %s\n",
+		       sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
+					    str1, sizeof(str1)),
+		       sptlrpc_secflags2str(sf->sf_flags,
+					    str2, sizeof(str2)));
+
+	spin_lock(&sec->ps_lock);
+	flavor_copy(&sec->ps_flvr, sf);
+	spin_unlock(&sec->ps_lock);
+}
+
+/**
+ * To get an appropriate ptlrpc_sec for the \a imp, according to the current
+ * configuration. Upon called, imp->imp_sec may or may not be NULL.
+ *
+ *  - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
+ *  - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
+ */
+int sptlrpc_import_sec_adapt(struct obd_import *imp,
+			     struct ptlrpc_svc_ctx *svc_ctx,
+			     struct sptlrpc_flavor *flvr)
+{
+	struct ptlrpc_connection   *conn;
+	struct sptlrpc_flavor       sf;
+	struct ptlrpc_sec	  *sec, *newsec;
+	enum lustre_sec_part	sp;
+	char			str[24];
+	int			 rc = 0;
+
+	might_sleep();
+
+	if (imp == NULL)
+		return 0;
+
+	conn = imp->imp_connection;
+
+	if (svc_ctx == NULL) {
+		struct client_obd *cliobd = &imp->imp_obd->u.cli;
+		/*
+		 * normal import, determine flavor from rule set, except
+		 * for mgc the flavor is predetermined.
+		 */
+		if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
+			sf = cliobd->cl_flvr_mgc;
+		else
+			sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
+						   cliobd->cl_sp_to,
+						   &cliobd->cl_target_uuid,
+						   conn->c_self, &sf);
+
+		sp = imp->imp_obd->u.cli.cl_sp_me;
+	} else {
+		/* reverse import, determine flavor from incoming request */
+		sf = *flvr;
+
+		if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
+			sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
+				      PTLRPC_SEC_FL_ROOTONLY;
+
+		sp = sptlrpc_target_sec_part(imp->imp_obd);
+	}
+
+	sec = sptlrpc_import_sec_ref(imp);
+	if (sec) {
+		char    str2[24];
+
+		if (flavor_equal(&sf, &sec->ps_flvr))
+			goto out;
+
+		CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
+		       imp->imp_obd->obd_name,
+		       obd_uuid2str(&conn->c_remote_uuid),
+		       sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
+		       sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
+
+		if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
+		    SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
+		    SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
+		    SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
+			sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
+			goto out;
+		}
+	} else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
+		   SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
+		CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
+		       imp->imp_obd->obd_name,
+		       obd_uuid2str(&conn->c_remote_uuid),
+		       LNET_NIDNET(conn->c_self),
+		       sptlrpc_flavor2name(&sf, str, sizeof(str)));
+	}
+
+	mutex_lock(&imp->imp_sec_mutex);
+
+	newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
+	if (newsec) {
+		sptlrpc_import_sec_install(imp, newsec);
+	} else {
+		CERROR("import %s->%s: failed to create new sec\n",
+		       imp->imp_obd->obd_name,
+		       obd_uuid2str(&conn->c_remote_uuid));
+		rc = -EPERM;
+	}
+
+	mutex_unlock(&imp->imp_sec_mutex);
+out:
+	sptlrpc_sec_put(sec);
+	return rc;
+}
+
+void sptlrpc_import_sec_put(struct obd_import *imp)
+{
+	if (imp->imp_sec) {
+		sptlrpc_sec_kill(imp->imp_sec);
+
+		sptlrpc_sec_put(imp->imp_sec);
+		imp->imp_sec = NULL;
+	}
+}
+
+static void import_flush_ctx_common(struct obd_import *imp,
+				    uid_t uid, int grace, int force)
+{
+	struct ptlrpc_sec *sec;
+
+	if (imp == NULL)
+		return;
+
+	sec = sptlrpc_import_sec_ref(imp);
+	if (sec == NULL)
+		return;
+
+	sec_cop_flush_ctx_cache(sec, uid, grace, force);
+	sptlrpc_sec_put(sec);
+}
+
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+	/* it's important to use grace mode, see explain in
+	 * sptlrpc_req_refresh_ctx() */
+	import_flush_ctx_common(imp, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
+{
+	import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()),
+				1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
+
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
+{
+	import_flush_ctx_common(imp, -1, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
+
+/**
+ * Used by ptlrpc client to allocate request buffer of \a req. Upon return
+ * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
+ */
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec_policy *policy;
+	int rc;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(ctx->cc_sec->ps_policy);
+	LASSERT(req->rq_reqmsg == NULL);
+	LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+	policy = ctx->cc_sec->ps_policy;
+	rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
+	if (!rc) {
+		LASSERT(req->rq_reqmsg);
+		LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+		/* zeroing preallocated buffer */
+		if (req->rq_pool)
+			memset(req->rq_reqmsg, 0, msgsize);
+	}
+
+	return rc;
+}
+
+/**
+ * Used by ptlrpc client to free request buffer of \a req. After this
+ * req->rq_reqmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec_policy *policy;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(ctx->cc_sec->ps_policy);
+	LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+	if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
+		return;
+
+	policy = ctx->cc_sec->ps_policy;
+	policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
+	req->rq_reqmsg = NULL;
+}
+
+/*
+ * NOTE caller must guarantee the buffer size is enough for the enlargement
+ */
+void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
+				  int segment, int newsize)
+{
+	void   *src, *dst;
+	int     oldsize, oldmsg_size, movesize;
+
+	LASSERT(segment < msg->lm_bufcount);
+	LASSERT(msg->lm_buflens[segment] <= newsize);
+
+	if (msg->lm_buflens[segment] == newsize)
+		return;
+
+	/* nothing to do if we are enlarging the last segment */
+	if (segment == msg->lm_bufcount - 1) {
+		msg->lm_buflens[segment] = newsize;
+		return;
+	}
+
+	oldsize = msg->lm_buflens[segment];
+
+	src = lustre_msg_buf(msg, segment + 1, 0);
+	msg->lm_buflens[segment] = newsize;
+	dst = lustre_msg_buf(msg, segment + 1, 0);
+	msg->lm_buflens[segment] = oldsize;
+
+	/* move from segment + 1 to end segment */
+	LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
+	oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+	movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+	LASSERT(movesize >= 0);
+
+	if (movesize)
+		memmove(dst, src, movesize);
+
+	/* note we don't clear the ares where old data live, not secret */
+
+	/* finally set new segment size */
+	msg->lm_buflens[segment] = newsize;
+}
+EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
+
+/**
+ * Used by ptlrpc client to enlarge the \a segment of request message pointed
+ * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
+ * preserved after the enlargement. this must be called after original request
+ * buffer being allocated.
+ *
+ * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
+ * so caller should refresh its local pointers if needed.
+ */
+int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
+			       int segment, int newsize)
+{
+	struct ptlrpc_cli_ctx    *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec_cops   *cops;
+	struct lustre_msg	*msg = req->rq_reqmsg;
+
+	LASSERT(ctx);
+	LASSERT(msg);
+	LASSERT(msg->lm_bufcount > segment);
+	LASSERT(msg->lm_buflens[segment] <= newsize);
+
+	if (msg->lm_buflens[segment] == newsize)
+		return 0;
+
+	cops = ctx->cc_sec->ps_policy->sp_cops;
+	LASSERT(cops->enlarge_reqbuf);
+	return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
+}
+EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
+
+/**
+ * Used by ptlrpc client to allocate reply buffer of \a req.
+ *
+ * \note After this, req->rq_repmsg is still not accessible.
+ */
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec_policy *policy;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(ctx->cc_sec->ps_policy);
+
+	if (req->rq_repbuf)
+		return 0;
+
+	policy = ctx->cc_sec->ps_policy;
+	return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize);
+}
+
+/**
+ * Used by ptlrpc client to free reply buffer of \a req. After this
+ * req->rq_repmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
+{
+	struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+	struct ptlrpc_sec_policy *policy;
+
+	LASSERT(ctx);
+	LASSERT(ctx->cc_sec);
+	LASSERT(ctx->cc_sec->ps_policy);
+	LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+	if (req->rq_repbuf == NULL)
+		return;
+	LASSERT(req->rq_repbuf_len);
+
+	policy = ctx->cc_sec->ps_policy;
+	policy->sp_cops->free_repbuf(ctx->cc_sec, req);
+	req->rq_repmsg = NULL;
+}
+
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+				struct ptlrpc_cli_ctx *ctx)
+{
+	struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
+
+	if (!policy->sp_cops->install_rctx)
+		return 0;
+	return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
+}
+
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+				struct ptlrpc_svc_ctx *ctx)
+{
+	struct ptlrpc_sec_policy *policy = ctx->sc_policy;
+
+	if (!policy->sp_sops->install_rctx)
+		return 0;
+	return policy->sp_sops->install_rctx(imp, ctx);
+}
+
+/****************************************
+ * server side security		 *
+ ****************************************/
+
+static int flavor_allowed(struct sptlrpc_flavor *exp,
+			  struct ptlrpc_request *req)
+{
+	struct sptlrpc_flavor *flvr = &req->rq_flvr;
+
+	if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
+		return 1;
+
+	if ((req->rq_ctx_init || req->rq_ctx_fini) &&
+	    SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
+	    SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
+	    SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
+		return 1;
+
+	return 0;
+}
+
+#define EXP_FLVR_UPDATE_EXPIRE      (OBD_TIMEOUT_DEFAULT + 10)
+
+/**
+ * Given an export \a exp, check whether the flavor of incoming \a req
+ * is allowed by the export \a exp. Main logic is about taking care of
+ * changing configurations. Return 0 means success.
+ */
+int sptlrpc_target_export_check(struct obd_export *exp,
+				struct ptlrpc_request *req)
+{
+	struct sptlrpc_flavor   flavor;
+
+	if (exp == NULL)
+		return 0;
+
+	/* client side export has no imp_reverse, skip
+	 * FIXME maybe we should check flavor this as well??? */
+	if (exp->exp_imp_reverse == NULL)
+		return 0;
+
+	/* don't care about ctx fini rpc */
+	if (req->rq_ctx_fini)
+		return 0;
+
+	spin_lock(&exp->exp_lock);
+
+	/* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
+	 * the first req with the new flavor, then treat it as current flavor,
+	 * adapt reverse sec according to it.
+	 * note the first rpc with new flavor might not be with root ctx, in
+	 * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */
+	if (unlikely(exp->exp_flvr_changed) &&
+	    flavor_allowed(&exp->exp_flvr_old[1], req)) {
+		/* make the new flavor as "current", and old ones as
+		 * about-to-expire */
+		CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
+		       exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
+		flavor = exp->exp_flvr_old[1];
+		exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
+		exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
+		exp->exp_flvr_old[0] = exp->exp_flvr;
+		exp->exp_flvr_expire[0] = get_seconds() +
+					  EXP_FLVR_UPDATE_EXPIRE;
+		exp->exp_flvr = flavor;
+
+		/* flavor change finished */
+		exp->exp_flvr_changed = 0;
+		LASSERT(exp->exp_flvr_adapt == 1);
+
+		/* if it's gss, we only interested in root ctx init */
+		if (req->rq_auth_gss &&
+		    !(req->rq_ctx_init &&
+		      (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
+		       req->rq_auth_usr_ost))) {
+			spin_unlock(&exp->exp_lock);
+			CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
+			       req->rq_auth_gss, req->rq_ctx_init,
+			       req->rq_auth_usr_root, req->rq_auth_usr_mdt,
+			       req->rq_auth_usr_ost);
+			return 0;
+		}
+
+		exp->exp_flvr_adapt = 0;
+		spin_unlock(&exp->exp_lock);
+
+		return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+						req->rq_svc_ctx, &flavor);
+	}
+
+	/* if it equals to the current flavor, we accept it, but need to
+	 * dealing with reverse sec/ctx */
+	if (likely(flavor_allowed(&exp->exp_flvr, req))) {
+		/* most cases should return here, we only interested in
+		 * gss root ctx init */
+		if (!req->rq_auth_gss || !req->rq_ctx_init ||
+		    (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+		     !req->rq_auth_usr_ost)) {
+			spin_unlock(&exp->exp_lock);
+			return 0;
+		}
+
+		/* if flavor just changed, we should not proceed, just leave
+		 * it and current flavor will be discovered and replaced
+		 * shortly, and let _this_ rpc pass through */
+		if (exp->exp_flvr_changed) {
+			LASSERT(exp->exp_flvr_adapt);
+			spin_unlock(&exp->exp_lock);
+			return 0;
+		}
+
+		if (exp->exp_flvr_adapt) {
+			exp->exp_flvr_adapt = 0;
+			CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
+			       exp, exp->exp_flvr.sf_rpc,
+			       exp->exp_flvr_old[0].sf_rpc,
+			       exp->exp_flvr_old[1].sf_rpc);
+			flavor = exp->exp_flvr;
+			spin_unlock(&exp->exp_lock);
+
+			return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+							req->rq_svc_ctx,
+							&flavor);
+		} else {
+			CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n",
+			       exp, exp->exp_flvr.sf_rpc,
+			       exp->exp_flvr_old[0].sf_rpc,
+			       exp->exp_flvr_old[1].sf_rpc);
+			spin_unlock(&exp->exp_lock);
+
+			return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
+							   req->rq_svc_ctx);
+		}
+	}
+
+	if (exp->exp_flvr_expire[0]) {
+		if (exp->exp_flvr_expire[0] >= get_seconds()) {
+			if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
+				CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (" CFS_DURATION_T ")\n", exp,
+				       exp->exp_flvr.sf_rpc,
+				       exp->exp_flvr_old[0].sf_rpc,
+				       exp->exp_flvr_old[1].sf_rpc,
+				       exp->exp_flvr_expire[0] -
+				       get_seconds());
+				spin_unlock(&exp->exp_lock);
+				return 0;
+			}
+		} else {
+			CDEBUG(D_SEC, "mark middle expired\n");
+			exp->exp_flvr_expire[0] = 0;
+		}
+		CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
+		       exp->exp_flvr.sf_rpc,
+		       exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+		       req->rq_flvr.sf_rpc);
+	}
+
+	/* now it doesn't match the current flavor, the only chance we can
+	 * accept it is match the old flavors which is not expired. */
+	if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
+		if (exp->exp_flvr_expire[1] >= get_seconds()) {
+			if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
+				CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (" CFS_DURATION_T ")\n",
+				       exp,
+				       exp->exp_flvr.sf_rpc,
+				       exp->exp_flvr_old[0].sf_rpc,
+				       exp->exp_flvr_old[1].sf_rpc,
+				       exp->exp_flvr_expire[1] -
+				       get_seconds());
+				spin_unlock(&exp->exp_lock);
+				return 0;
+			}
+		} else {
+			CDEBUG(D_SEC, "mark oldest expired\n");
+			exp->exp_flvr_expire[1] = 0;
+		}
+		CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
+		       exp, exp->exp_flvr.sf_rpc,
+		       exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+		       req->rq_flvr.sf_rpc);
+	} else {
+		CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
+		       exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
+		       exp->exp_flvr_old[1].sf_rpc);
+	}
+
+	spin_unlock(&exp->exp_lock);
+
+	CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
+	      exp, exp->exp_obd->obd_name,
+	      req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
+	      req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
+	      req->rq_flvr.sf_rpc,
+	      exp->exp_flvr.sf_rpc,
+	      exp->exp_flvr_old[0].sf_rpc,
+	      exp->exp_flvr_expire[0] ?
+	      (unsigned long) (exp->exp_flvr_expire[0] -
+			       get_seconds()) : 0,
+	      exp->exp_flvr_old[1].sf_rpc,
+	      exp->exp_flvr_expire[1] ?
+	      (unsigned long) (exp->exp_flvr_expire[1] -
+			       get_seconds()) : 0);
+	return -EACCES;
+}
+EXPORT_SYMBOL(sptlrpc_target_export_check);
+
+void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
+				      struct sptlrpc_rule_set *rset)
+{
+	struct obd_export       *exp;
+	struct sptlrpc_flavor    new_flvr;
+
+	LASSERT(obd);
+
+	spin_lock(&obd->obd_dev_lock);
+
+	list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+		if (exp->exp_connection == NULL)
+			continue;
+
+		/* note if this export had just been updated flavor
+		 * (exp_flvr_changed == 1), this will override the
+		 * previous one. */
+		spin_lock(&exp->exp_lock);
+		sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
+					     exp->exp_connection->c_peer.nid,
+					     &new_flvr);
+		if (exp->exp_flvr_changed ||
+		    !flavor_equal(&new_flvr, &exp->exp_flvr)) {
+			exp->exp_flvr_old[1] = new_flvr;
+			exp->exp_flvr_expire[1] = 0;
+			exp->exp_flvr_changed = 1;
+			exp->exp_flvr_adapt = 1;
+
+			CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
+			       exp, sptlrpc_part2name(exp->exp_sp_peer),
+			       exp->exp_flvr.sf_rpc,
+			       exp->exp_flvr_old[1].sf_rpc);
+		}
+		spin_unlock(&exp->exp_lock);
+	}
+
+	spin_unlock(&obd->obd_dev_lock);
+}
+EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
+
+static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
+{
+	/* peer's claim is unreliable unless gss is being used */
+	if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
+		return svc_rc;
+
+	switch (req->rq_sp_from) {
+	case LUSTRE_SP_CLI:
+		if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
+			DEBUG_REQ(D_ERROR, req, "faked source CLI");
+			svc_rc = SECSVC_DROP;
+		}
+		break;
+	case LUSTRE_SP_MDT:
+		if (!req->rq_auth_usr_mdt) {
+			DEBUG_REQ(D_ERROR, req, "faked source MDT");
+			svc_rc = SECSVC_DROP;
+		}
+		break;
+	case LUSTRE_SP_OST:
+		if (!req->rq_auth_usr_ost) {
+			DEBUG_REQ(D_ERROR, req, "faked source OST");
+			svc_rc = SECSVC_DROP;
+		}
+		break;
+	case LUSTRE_SP_MGS:
+	case LUSTRE_SP_MGC:
+		if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+		    !req->rq_auth_usr_ost) {
+			DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
+			svc_rc = SECSVC_DROP;
+		}
+		break;
+	case LUSTRE_SP_ANY:
+	default:
+		DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
+		svc_rc = SECSVC_DROP;
+	}
+
+	return svc_rc;
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon request message of
+ * incoming \a req. This must be the first thing to do with a incoming
+ * request in ptlrpc layer.
+ *
+ * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
+ * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
+ * \retval SECSVC_COMPLETE success, the request has been fully processed, and
+ * reply message has been prepared.
+ * \retval SECSVC_DROP failed, this request should be dropped.
+ */
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
+{
+	struct ptlrpc_sec_policy *policy;
+	struct lustre_msg	*msg = req->rq_reqbuf;
+	int		       rc;
+
+	LASSERT(msg);
+	LASSERT(req->rq_reqmsg == NULL);
+	LASSERT(req->rq_repmsg == NULL);
+	LASSERT(req->rq_svc_ctx == NULL);
+
+	req->rq_req_swab_mask = 0;
+
+	rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
+	switch (rc) {
+	case 1:
+		lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+	case 0:
+		break;
+	default:
+		CERROR("error unpacking request from %s x%llu\n",
+		       libcfs_id2str(req->rq_peer), req->rq_xid);
+		return SECSVC_DROP;
+	}
+
+	req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
+	req->rq_sp_from = LUSTRE_SP_ANY;
+	req->rq_auth_uid = -1;
+	req->rq_auth_mapped_uid = -1;
+
+	policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
+	if (!policy) {
+		CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
+		return SECSVC_DROP;
+	}
+
+	LASSERT(policy->sp_sops->accept);
+	rc = policy->sp_sops->accept(req);
+	sptlrpc_policy_put(policy);
+	LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+	LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
+
+	/*
+	 * if it's not null flavor (which means embedded packing msg),
+	 * reset the swab mask for the coming inner msg unpacking.
+	 */
+	if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
+		req->rq_req_swab_mask = 0;
+
+	/* sanity check for the request source */
+	rc = sptlrpc_svc_check_from(req, rc);
+	return rc;
+}
+
+/**
+ * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
+ * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
+ * a buffer of \a msglen size.
+ */
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
+{
+	struct ptlrpc_sec_policy *policy;
+	struct ptlrpc_reply_state *rs;
+	int rc;
+
+	LASSERT(req->rq_svc_ctx);
+	LASSERT(req->rq_svc_ctx->sc_policy);
+
+	policy = req->rq_svc_ctx->sc_policy;
+	LASSERT(policy->sp_sops->alloc_rs);
+
+	rc = policy->sp_sops->alloc_rs(req, msglen);
+	if (unlikely(rc == -ENOMEM)) {
+		struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+		if (svcpt->scp_service->srv_max_reply_size <
+		   msglen + sizeof(struct ptlrpc_reply_state)) {
+			/* Just return failure if the size is too big */
+			CERROR("size of message is too big (%zd), %d allowed",
+				msglen + sizeof(struct ptlrpc_reply_state),
+				svcpt->scp_service->srv_max_reply_size);
+			return -ENOMEM;
+		}
+
+		/* failed alloc, try emergency pool */
+		rs = lustre_get_emerg_rs(svcpt);
+		if (rs == NULL)
+			return -ENOMEM;
+
+		req->rq_reply_state = rs;
+		rc = policy->sp_sops->alloc_rs(req, msglen);
+		if (rc) {
+			lustre_put_emerg_rs(rs);
+			req->rq_reply_state = NULL;
+		}
+	}
+
+	LASSERT(rc != 0 ||
+		(req->rq_reply_state && req->rq_reply_state->rs_msg));
+
+	return rc;
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon reply message.
+ *
+ * \post req->rq_reply_off is set to appropriate server-controlled reply offset.
+ * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
+ */
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
+{
+	struct ptlrpc_sec_policy *policy;
+	int rc;
+
+	LASSERT(req->rq_svc_ctx);
+	LASSERT(req->rq_svc_ctx->sc_policy);
+
+	policy = req->rq_svc_ctx->sc_policy;
+	LASSERT(policy->sp_sops->authorize);
+
+	rc = policy->sp_sops->authorize(req);
+	LASSERT(rc || req->rq_reply_state->rs_repdata_len);
+
+	return rc;
+}
+
+/**
+ * Used by ptlrpc server, to free reply_state.
+ */
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+	struct ptlrpc_sec_policy *policy;
+	unsigned int prealloc;
+
+	LASSERT(rs->rs_svc_ctx);
+	LASSERT(rs->rs_svc_ctx->sc_policy);
+
+	policy = rs->rs_svc_ctx->sc_policy;
+	LASSERT(policy->sp_sops->free_rs);
+
+	prealloc = rs->rs_prealloc;
+	policy->sp_sops->free_rs(rs);
+
+	if (prealloc)
+		lustre_put_emerg_rs(rs);
+}
+
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
+{
+	struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+	if (ctx != NULL)
+		atomic_inc(&ctx->sc_refcount);
+}
+
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
+{
+	struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+	if (ctx == NULL)
+		return;
+
+	LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+	if (atomic_dec_and_test(&ctx->sc_refcount)) {
+		if (ctx->sc_policy->sp_sops->free_ctx)
+			ctx->sc_policy->sp_sops->free_ctx(ctx);
+	}
+	req->rq_svc_ctx = NULL;
+}
+
+void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
+{
+	struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+	if (ctx == NULL)
+		return;
+
+	LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+	if (ctx->sc_policy->sp_sops->invalidate_ctx)
+		ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
+}
+EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
+
+/****************************************
+ * bulk security			*
+ ****************************************/
+
+/**
+ * Perform transformation upon bulk data pointed by \a desc. This is called
+ * before transforming the request message.
+ */
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+			  struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_cli_ctx *ctx;
+
+	LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+	if (!req->rq_pack_bulk)
+		return 0;
+
+	ctx = req->rq_cli_ctx;
+	if (ctx->cc_ops->wrap_bulk)
+		return ctx->cc_ops->wrap_bulk(ctx, req, desc);
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
+
+/**
+ * This is called after unwrap the reply message.
+ * return nob of actual plain text size received, or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+				 struct ptlrpc_bulk_desc *desc,
+				 int nob)
+{
+	struct ptlrpc_cli_ctx  *ctx;
+	int		     rc;
+
+	LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
+
+	if (!req->rq_pack_bulk)
+		return desc->bd_nob_transferred;
+
+	ctx = req->rq_cli_ctx;
+	if (ctx->cc_ops->unwrap_bulk) {
+		rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+		if (rc < 0)
+			return rc;
+	}
+	return desc->bd_nob_transferred;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+
+/**
+ * This is called after unwrap the reply message.
+ * return 0 for success or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+				  struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_cli_ctx  *ctx;
+	int		     rc;
+
+	LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
+
+	if (!req->rq_pack_bulk)
+		return 0;
+
+	ctx = req->rq_cli_ctx;
+	if (ctx->cc_ops->unwrap_bulk) {
+		rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+		if (rc < 0)
+			return rc;
+	}
+
+	/*
+	 * if everything is going right, nob should equals to nob_transferred.
+	 * in case of privacy mode, nob_transferred needs to be adjusted.
+	 */
+	if (desc->bd_nob != desc->bd_nob_transferred) {
+		CERROR("nob %d doesn't match transferred nob %d",
+		       desc->bd_nob, desc->bd_nob_transferred);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
+
+
+/****************************************
+ * user descriptor helpers	      *
+ ****************************************/
+
+int sptlrpc_current_user_desc_size(void)
+{
+	int ngroups;
+
+	ngroups = current_ngroups;
+
+	if (ngroups > LUSTRE_MAX_GROUPS)
+		ngroups = LUSTRE_MAX_GROUPS;
+	return sptlrpc_user_desc_size(ngroups);
+}
+EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
+
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
+{
+	struct ptlrpc_user_desc *pud;
+
+	pud = lustre_msg_buf(msg, offset, 0);
+
+	pud->pud_uid = from_kuid(&init_user_ns, current_uid());
+	pud->pud_gid = from_kgid(&init_user_ns, current_gid());
+	pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
+	pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid());
+	pud->pud_cap = cfs_curproc_cap_pack();
+	pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
+
+	task_lock(current);
+	if (pud->pud_ngroups > current_ngroups)
+		pud->pud_ngroups = current_ngroups;
+	memcpy(pud->pud_groups, current_cred()->group_info->blocks[0],
+	       pud->pud_ngroups * sizeof(__u32));
+	task_unlock(current);
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_pack_user_desc);
+
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
+{
+	struct ptlrpc_user_desc *pud;
+	int		      i;
+
+	pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+	if (!pud)
+		return -EINVAL;
+
+	if (swabbed) {
+		__swab32s(&pud->pud_uid);
+		__swab32s(&pud->pud_gid);
+		__swab32s(&pud->pud_fsuid);
+		__swab32s(&pud->pud_fsgid);
+		__swab32s(&pud->pud_cap);
+		__swab32s(&pud->pud_ngroups);
+	}
+
+	if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
+		CERROR("%u groups is too large\n", pud->pud_ngroups);
+		return -EINVAL;
+	}
+
+	if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
+	    msg->lm_buflens[offset]) {
+		CERROR("%u groups are claimed but bufsize only %u\n",
+		       pud->pud_ngroups, msg->lm_buflens[offset]);
+		return -EINVAL;
+	}
+
+	if (swabbed) {
+		for (i = 0; i < pud->pud_ngroups; i++)
+			__swab32s(&pud->pud_groups[i]);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
+
+/****************************************
+ * misc helpers			 *
+ ****************************************/
+
+const char *sec2target_str(struct ptlrpc_sec *sec)
+{
+	if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
+		return "*";
+	if (sec_is_reverse(sec))
+		return "c";
+	return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
+}
+EXPORT_SYMBOL(sec2target_str);
+
+/*
+ * return true if the bulk data is protected
+ */
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
+{
+	switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+	case SPTLRPC_BULK_SVC_INTG:
+	case SPTLRPC_BULK_SVC_PRIV:
+		return 1;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
+
+/****************************************
+ * crypto API helper/alloc blkciper     *
+ ****************************************/
+
+/****************************************
+ * initialize/finalize		  *
+ ****************************************/
+
+int sptlrpc_init(void)
+{
+	int rc;
+
+	rwlock_init(&policy_lock);
+
+	rc = sptlrpc_gc_init();
+	if (rc)
+		goto out;
+
+	rc = sptlrpc_conf_init();
+	if (rc)
+		goto out_gc;
+
+	rc = sptlrpc_enc_pool_init();
+	if (rc)
+		goto out_conf;
+
+	rc = sptlrpc_null_init();
+	if (rc)
+		goto out_pool;
+
+	rc = sptlrpc_plain_init();
+	if (rc)
+		goto out_null;
+
+	rc = sptlrpc_lproc_init();
+	if (rc)
+		goto out_plain;
+
+	return 0;
+
+out_plain:
+	sptlrpc_plain_fini();
+out_null:
+	sptlrpc_null_fini();
+out_pool:
+	sptlrpc_enc_pool_fini();
+out_conf:
+	sptlrpc_conf_fini();
+out_gc:
+	sptlrpc_gc_fini();
+out:
+	return rc;
+}
+
+void sptlrpc_fini(void)
+{
+	sptlrpc_lproc_fini();
+	sptlrpc_plain_fini();
+	sptlrpc_null_fini();
+	sptlrpc_enc_pool_fini();
+	sptlrpc_conf_fini();
+	sptlrpc_gc_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
new file mode 100644
index 000000000..c05a8554d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -0,0 +1,884 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_bulk.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+
+#include "../include/obd.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+/****************************************
+ * bulk encryption page pools	   *
+ ****************************************/
+
+
+#define POINTERS_PER_PAGE	(PAGE_CACHE_SIZE / sizeof(void *))
+#define PAGES_PER_POOL		(POINTERS_PER_PAGE)
+
+#define IDLE_IDX_MAX	    (100)
+#define IDLE_IDX_WEIGHT	 (3)
+
+#define CACHE_QUIESCENT_PERIOD  (20)
+
+static struct ptlrpc_enc_page_pool {
+	/*
+	 * constants
+	 */
+	unsigned long    epp_max_pages;   /* maximum pages can hold, const */
+	unsigned int     epp_max_pools;   /* number of pools, const */
+
+	/*
+	 * wait queue in case of not enough free pages.
+	 */
+	wait_queue_head_t      epp_waitq;       /* waiting threads */
+	unsigned int     epp_waitqlen;    /* wait queue length */
+	unsigned long    epp_pages_short; /* # of pages wanted of in-q users */
+	unsigned int     epp_growing:1;   /* during adding pages */
+
+	/*
+	 * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
+	 * this is counted based on each time when getting pages from
+	 * the pools, not based on time. which means in case that system
+	 * is idled for a while but the idle_idx might still be low if no
+	 * activities happened in the pools.
+	 */
+	unsigned long    epp_idle_idx;
+
+	/* last shrink time due to mem tight */
+	long	     epp_last_shrink;
+	long	     epp_last_access;
+
+	/*
+	 * in-pool pages bookkeeping
+	 */
+	spinlock_t	 epp_lock;	   /* protect following fields */
+	unsigned long    epp_total_pages; /* total pages in pools */
+	unsigned long    epp_free_pages;  /* current pages available */
+
+	/*
+	 * statistics
+	 */
+	unsigned long    epp_st_max_pages;      /* # of pages ever reached */
+	unsigned int     epp_st_grows;	  /* # of grows */
+	unsigned int     epp_st_grow_fails;     /* # of add pages failures */
+	unsigned int     epp_st_shrinks;	/* # of shrinks */
+	unsigned long    epp_st_access;	 /* # of access */
+	unsigned long    epp_st_missings;       /* # of cache missing */
+	unsigned long    epp_st_lowfree;	/* lowest free pages reached */
+	unsigned int     epp_st_max_wqlen;      /* highest waitqueue length */
+	unsigned long       epp_st_max_wait;       /* in jiffies */
+	/*
+	 * pointers to pools
+	 */
+	struct page    ***epp_pools;
+} page_pools;
+
+/*
+ * /proc/fs/lustre/sptlrpc/encrypt_page_pools
+ */
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
+{
+	spin_lock(&page_pools.epp_lock);
+
+	seq_printf(m,
+		   "physical pages:	  %lu\n"
+		   "pages per pool:	  %lu\n"
+		   "max pages:	       %lu\n"
+		   "max pools:	       %u\n"
+		   "total pages:	     %lu\n"
+		   "total free:	      %lu\n"
+		   "idle index:	      %lu/100\n"
+		   "last shrink:	     %lds\n"
+		   "last access:	     %lds\n"
+		   "max pages reached:       %lu\n"
+		   "grows:		   %u\n"
+		   "grows failure:	   %u\n"
+		   "shrinks:		 %u\n"
+		   "cache access:	    %lu\n"
+		   "cache missing:	   %lu\n"
+		   "low free mark:	   %lu\n"
+		   "max waitqueue depth:     %u\n"
+		   "max wait time:	   " CFS_TIME_T "/%u\n",
+		   totalram_pages,
+		   PAGES_PER_POOL,
+		   page_pools.epp_max_pages,
+		   page_pools.epp_max_pools,
+		   page_pools.epp_total_pages,
+		   page_pools.epp_free_pages,
+		   page_pools.epp_idle_idx,
+		   get_seconds() - page_pools.epp_last_shrink,
+		   get_seconds() - page_pools.epp_last_access,
+		   page_pools.epp_st_max_pages,
+		   page_pools.epp_st_grows,
+		   page_pools.epp_st_grow_fails,
+		   page_pools.epp_st_shrinks,
+		   page_pools.epp_st_access,
+		   page_pools.epp_st_missings,
+		   page_pools.epp_st_lowfree,
+		   page_pools.epp_st_max_wqlen,
+		   page_pools.epp_st_max_wait,
+		   HZ);
+
+	spin_unlock(&page_pools.epp_lock);
+
+	return 0;
+}
+
+static void enc_pools_release_free_pages(long npages)
+{
+	int     p_idx, g_idx;
+	int     p_idx_max1, p_idx_max2;
+
+	LASSERT(npages > 0);
+	LASSERT(npages <= page_pools.epp_free_pages);
+	LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
+
+	/* max pool index before the release */
+	p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
+
+	page_pools.epp_free_pages -= npages;
+	page_pools.epp_total_pages -= npages;
+
+	/* max pool index after the release */
+	p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
+		     ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
+
+	p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+	g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+	LASSERT(page_pools.epp_pools[p_idx]);
+
+	while (npages--) {
+		LASSERT(page_pools.epp_pools[p_idx]);
+		LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+
+		__free_page(page_pools.epp_pools[p_idx][g_idx]);
+		page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+		if (++g_idx == PAGES_PER_POOL) {
+			p_idx++;
+			g_idx = 0;
+		}
+	}
+
+	/* free unused pools */
+	while (p_idx_max1 < p_idx_max2) {
+		LASSERT(page_pools.epp_pools[p_idx_max2]);
+		OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
+		page_pools.epp_pools[p_idx_max2] = NULL;
+		p_idx_max2--;
+	}
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_count(struct shrinker *s,
+					    struct shrink_control *sc)
+{
+	/*
+	 * if no pool access for a long time, we consider it's fully idle.
+	 * a little race here is fine.
+	 */
+	if (unlikely(get_seconds() - page_pools.epp_last_access >
+		     CACHE_QUIESCENT_PERIOD)) {
+		spin_lock(&page_pools.epp_lock);
+		page_pools.epp_idle_idx = IDLE_IDX_MAX;
+		spin_unlock(&page_pools.epp_lock);
+	}
+
+	LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+	return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+		(IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_scan(struct shrinker *s,
+					   struct shrink_control *sc)
+{
+	spin_lock(&page_pools.epp_lock);
+	sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
+			      page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
+	if (sc->nr_to_scan > 0) {
+		enc_pools_release_free_pages(sc->nr_to_scan);
+		CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+		       (long)sc->nr_to_scan, page_pools.epp_free_pages);
+
+		page_pools.epp_st_shrinks++;
+		page_pools.epp_last_shrink = get_seconds();
+	}
+	spin_unlock(&page_pools.epp_lock);
+
+	/*
+	 * if no pool access for a long time, we consider it's fully idle.
+	 * a little race here is fine.
+	 */
+	if (unlikely(get_seconds() - page_pools.epp_last_access >
+		     CACHE_QUIESCENT_PERIOD)) {
+		spin_lock(&page_pools.epp_lock);
+		page_pools.epp_idle_idx = IDLE_IDX_MAX;
+		spin_unlock(&page_pools.epp_lock);
+	}
+
+	LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+	return sc->nr_to_scan;
+}
+
+static inline
+int npages_to_npools(unsigned long npages)
+{
+	return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+}
+
+/*
+ * return how many pages cleaned up.
+ */
+static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
+{
+	unsigned long cleaned = 0;
+	int	   i, j;
+
+	for (i = 0; i < npools; i++) {
+		if (pools[i]) {
+			for (j = 0; j < PAGES_PER_POOL; j++) {
+				if (pools[i][j]) {
+					__free_page(pools[i][j]);
+					cleaned++;
+				}
+			}
+			OBD_FREE(pools[i], PAGE_CACHE_SIZE);
+			pools[i] = NULL;
+		}
+	}
+
+	return cleaned;
+}
+
+/*
+ * merge @npools pointed by @pools which contains @npages new pages
+ * into current pools.
+ *
+ * we have options to avoid most memory copy with some tricks. but we choose
+ * the simplest way to avoid complexity. It's not frequently called.
+ */
+static void enc_pools_insert(struct page ***pools, int npools, int npages)
+{
+	int     freeslot;
+	int     op_idx, np_idx, og_idx, ng_idx;
+	int     cur_npools, end_npools;
+
+	LASSERT(npages > 0);
+	LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
+	LASSERT(npages_to_npools(npages) == npools);
+	LASSERT(page_pools.epp_growing);
+
+	spin_lock(&page_pools.epp_lock);
+
+	/*
+	 * (1) fill all the free slots of current pools.
+	 */
+	/* free slots are those left by rent pages, and the extra ones with
+	 * index >= total_pages, locate at the tail of last pool. */
+	freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
+	if (freeslot != 0)
+		freeslot = PAGES_PER_POOL - freeslot;
+	freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
+
+	op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+	og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+	np_idx = npools - 1;
+	ng_idx = (npages - 1) % PAGES_PER_POOL;
+
+	while (freeslot) {
+		LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
+		LASSERT(pools[np_idx][ng_idx] != NULL);
+
+		page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
+		pools[np_idx][ng_idx] = NULL;
+
+		freeslot--;
+
+		if (++og_idx == PAGES_PER_POOL) {
+			op_idx++;
+			og_idx = 0;
+		}
+		if (--ng_idx < 0) {
+			if (np_idx == 0)
+				break;
+			np_idx--;
+			ng_idx = PAGES_PER_POOL - 1;
+		}
+	}
+
+	/*
+	 * (2) add pools if needed.
+	 */
+	cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
+		     PAGES_PER_POOL;
+	end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL - 1)
+		     / PAGES_PER_POOL;
+	LASSERT(end_npools <= page_pools.epp_max_pools);
+
+	np_idx = 0;
+	while (cur_npools < end_npools) {
+		LASSERT(page_pools.epp_pools[cur_npools] == NULL);
+		LASSERT(np_idx < npools);
+		LASSERT(pools[np_idx] != NULL);
+
+		page_pools.epp_pools[cur_npools++] = pools[np_idx];
+		pools[np_idx++] = NULL;
+	}
+
+	page_pools.epp_total_pages += npages;
+	page_pools.epp_free_pages += npages;
+	page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+	if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
+		page_pools.epp_st_max_pages = page_pools.epp_total_pages;
+
+	CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
+	       page_pools.epp_total_pages);
+
+	spin_unlock(&page_pools.epp_lock);
+}
+
+static int enc_pools_add_pages(int npages)
+{
+	static DEFINE_MUTEX(add_pages_mutex);
+	struct page   ***pools;
+	int	     npools, alloced = 0;
+	int	     i, j, rc = -ENOMEM;
+
+	if (npages < PTLRPC_MAX_BRW_PAGES)
+		npages = PTLRPC_MAX_BRW_PAGES;
+
+	mutex_lock(&add_pages_mutex);
+
+	if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
+		npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
+	LASSERT(npages > 0);
+
+	page_pools.epp_st_grows++;
+
+	npools = npages_to_npools(npages);
+	OBD_ALLOC(pools, npools * sizeof(*pools));
+	if (pools == NULL)
+		goto out;
+
+	for (i = 0; i < npools; i++) {
+		OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
+		if (pools[i] == NULL)
+			goto out_pools;
+
+		for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
+			pools[i][j] = alloc_page(GFP_NOFS |
+						     __GFP_HIGHMEM);
+			if (pools[i][j] == NULL)
+				goto out_pools;
+
+			alloced++;
+		}
+	}
+	LASSERT(alloced == npages);
+
+	enc_pools_insert(pools, npools, npages);
+	CDEBUG(D_SEC, "added %d pages into pools\n", npages);
+	rc = 0;
+
+out_pools:
+	enc_pools_cleanup(pools, npools);
+	OBD_FREE(pools, npools * sizeof(*pools));
+out:
+	if (rc) {
+		page_pools.epp_st_grow_fails++;
+		CERROR("Failed to allocate %d enc pages\n", npages);
+	}
+
+	mutex_unlock(&add_pages_mutex);
+	return rc;
+}
+
+static inline void enc_pools_wakeup(void)
+{
+	assert_spin_locked(&page_pools.epp_lock);
+	LASSERT(page_pools.epp_waitqlen >= 0);
+
+	if (unlikely(page_pools.epp_waitqlen)) {
+		LASSERT(waitqueue_active(&page_pools.epp_waitq));
+		wake_up_all(&page_pools.epp_waitq);
+	}
+}
+
+static int enc_pools_should_grow(int page_needed, long now)
+{
+	/* don't grow if someone else is growing the pools right now,
+	 * or the pools has reached its full capacity
+	 */
+	if (page_pools.epp_growing ||
+	    page_pools.epp_total_pages == page_pools.epp_max_pages)
+		return 0;
+
+	/* if total pages is not enough, we need to grow */
+	if (page_pools.epp_total_pages < page_needed)
+		return 1;
+
+	/*
+	 * we wanted to return 0 here if there was a shrink just happened
+	 * moment ago, but this may cause deadlock if both client and ost
+	 * live on single node.
+	 */
+#if 0
+	if (now - page_pools.epp_last_shrink < 2)
+		return 0;
+#endif
+
+	/*
+	 * here we perhaps need consider other factors like wait queue
+	 * length, idle index, etc. ?
+	 */
+
+	/* grow the pools in any other cases */
+	return 1;
+}
+
+/*
+ * we allocate the requested pages atomically.
+ */
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
+{
+	wait_queue_t  waitlink;
+	unsigned long   this_idle = -1;
+	unsigned long      tick = 0;
+	long	    now;
+	int	     p_idx, g_idx;
+	int	     i;
+
+	LASSERT(desc->bd_iov_count > 0);
+	LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
+
+	/* resent bulk, enc iov might have been allocated previously */
+	if (desc->bd_enc_iov != NULL)
+		return 0;
+
+	OBD_ALLOC(desc->bd_enc_iov,
+		  desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+	if (desc->bd_enc_iov == NULL)
+		return -ENOMEM;
+
+	spin_lock(&page_pools.epp_lock);
+
+	page_pools.epp_st_access++;
+again:
+	if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
+		if (tick == 0)
+			tick = cfs_time_current();
+
+		now = get_seconds();
+
+		page_pools.epp_st_missings++;
+		page_pools.epp_pages_short += desc->bd_iov_count;
+
+		if (enc_pools_should_grow(desc->bd_iov_count, now)) {
+			page_pools.epp_growing = 1;
+
+			spin_unlock(&page_pools.epp_lock);
+			enc_pools_add_pages(page_pools.epp_pages_short / 2);
+			spin_lock(&page_pools.epp_lock);
+
+			page_pools.epp_growing = 0;
+
+			enc_pools_wakeup();
+		} else {
+			if (++page_pools.epp_waitqlen >
+			    page_pools.epp_st_max_wqlen)
+				page_pools.epp_st_max_wqlen =
+						page_pools.epp_waitqlen;
+
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			init_waitqueue_entry(&waitlink, current);
+			add_wait_queue(&page_pools.epp_waitq, &waitlink);
+
+			spin_unlock(&page_pools.epp_lock);
+			schedule();
+			remove_wait_queue(&page_pools.epp_waitq, &waitlink);
+			LASSERT(page_pools.epp_waitqlen > 0);
+			spin_lock(&page_pools.epp_lock);
+			page_pools.epp_waitqlen--;
+		}
+
+		LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
+		page_pools.epp_pages_short -= desc->bd_iov_count;
+
+		this_idle = 0;
+		goto again;
+	}
+
+	/* record max wait time */
+	if (unlikely(tick != 0)) {
+		tick = cfs_time_current() - tick;
+		if (tick > page_pools.epp_st_max_wait)
+			page_pools.epp_st_max_wait = tick;
+	}
+
+	/* proceed with rest of allocation */
+	page_pools.epp_free_pages -= desc->bd_iov_count;
+
+	p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+	g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+	for (i = 0; i < desc->bd_iov_count; i++) {
+		LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+		desc->bd_enc_iov[i].kiov_page =
+					page_pools.epp_pools[p_idx][g_idx];
+		page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+		if (++g_idx == PAGES_PER_POOL) {
+			p_idx++;
+			g_idx = 0;
+		}
+	}
+
+	if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
+		page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+	/*
+	 * new idle index = (old * weight + new) / (weight + 1)
+	 */
+	if (this_idle == -1) {
+		this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
+			    page_pools.epp_total_pages;
+	}
+	page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
+				   this_idle) /
+				  (IDLE_IDX_WEIGHT + 1);
+
+	page_pools.epp_last_access = get_seconds();
+
+	spin_unlock(&page_pools.epp_lock);
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
+
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
+{
+	int     p_idx, g_idx;
+	int     i;
+
+	if (desc->bd_enc_iov == NULL)
+		return;
+
+	LASSERT(desc->bd_iov_count > 0);
+
+	spin_lock(&page_pools.epp_lock);
+
+	p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+	g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+	LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
+		page_pools.epp_total_pages);
+	LASSERT(page_pools.epp_pools[p_idx]);
+
+	for (i = 0; i < desc->bd_iov_count; i++) {
+		LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
+		LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
+		LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
+
+		page_pools.epp_pools[p_idx][g_idx] =
+					desc->bd_enc_iov[i].kiov_page;
+
+		if (++g_idx == PAGES_PER_POOL) {
+			p_idx++;
+			g_idx = 0;
+		}
+	}
+
+	page_pools.epp_free_pages += desc->bd_iov_count;
+
+	enc_pools_wakeup();
+
+	spin_unlock(&page_pools.epp_lock);
+
+	OBD_FREE(desc->bd_enc_iov,
+		 desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+	desc->bd_enc_iov = NULL;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
+
+/*
+ * we don't do much stuff for add_user/del_user anymore, except adding some
+ * initial pages in add_user() if current pools are empty, rest would be
+ * handled by the pools's self-adaption.
+ */
+int sptlrpc_enc_pool_add_user(void)
+{
+	int     need_grow = 0;
+
+	spin_lock(&page_pools.epp_lock);
+	if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
+		page_pools.epp_growing = 1;
+		need_grow = 1;
+	}
+	spin_unlock(&page_pools.epp_lock);
+
+	if (need_grow) {
+		enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
+				    PTLRPC_MAX_BRW_PAGES);
+
+		spin_lock(&page_pools.epp_lock);
+		page_pools.epp_growing = 0;
+		enc_pools_wakeup();
+		spin_unlock(&page_pools.epp_lock);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+
+int sptlrpc_enc_pool_del_user(void)
+{
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
+
+static inline void enc_pools_alloc(void)
+{
+	LASSERT(page_pools.epp_max_pools);
+	OBD_ALLOC_LARGE(page_pools.epp_pools,
+			page_pools.epp_max_pools *
+			sizeof(*page_pools.epp_pools));
+}
+
+static inline void enc_pools_free(void)
+{
+	LASSERT(page_pools.epp_max_pools);
+	LASSERT(page_pools.epp_pools);
+
+	OBD_FREE_LARGE(page_pools.epp_pools,
+		       page_pools.epp_max_pools *
+		       sizeof(*page_pools.epp_pools));
+}
+
+static struct shrinker pools_shrinker = {
+	.count_objects	= enc_pools_shrink_count,
+	.scan_objects	= enc_pools_shrink_scan,
+	.seeks		= DEFAULT_SEEKS,
+};
+
+int sptlrpc_enc_pool_init(void)
+{
+	/*
+	 * maximum capacity is 1/8 of total physical memory.
+	 * is the 1/8 a good number?
+	 */
+	page_pools.epp_max_pages = totalram_pages / 8;
+	page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
+
+	init_waitqueue_head(&page_pools.epp_waitq);
+	page_pools.epp_waitqlen = 0;
+	page_pools.epp_pages_short = 0;
+
+	page_pools.epp_growing = 0;
+
+	page_pools.epp_idle_idx = 0;
+	page_pools.epp_last_shrink = get_seconds();
+	page_pools.epp_last_access = get_seconds();
+
+	spin_lock_init(&page_pools.epp_lock);
+	page_pools.epp_total_pages = 0;
+	page_pools.epp_free_pages = 0;
+
+	page_pools.epp_st_max_pages = 0;
+	page_pools.epp_st_grows = 0;
+	page_pools.epp_st_grow_fails = 0;
+	page_pools.epp_st_shrinks = 0;
+	page_pools.epp_st_access = 0;
+	page_pools.epp_st_missings = 0;
+	page_pools.epp_st_lowfree = 0;
+	page_pools.epp_st_max_wqlen = 0;
+	page_pools.epp_st_max_wait = 0;
+
+	enc_pools_alloc();
+	if (page_pools.epp_pools == NULL)
+		return -ENOMEM;
+
+	register_shrinker(&pools_shrinker);
+
+	return 0;
+}
+
+void sptlrpc_enc_pool_fini(void)
+{
+	unsigned long cleaned, npools;
+
+	LASSERT(page_pools.epp_pools);
+	LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
+
+	unregister_shrinker(&pools_shrinker);
+
+	npools = npages_to_npools(page_pools.epp_total_pages);
+	cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
+	LASSERT(cleaned == page_pools.epp_total_pages);
+
+	enc_pools_free();
+
+	if (page_pools.epp_st_access > 0) {
+		CDEBUG(D_SEC,
+		       "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait "
+		       CFS_TIME_T"/%d\n",
+		       page_pools.epp_st_max_pages, page_pools.epp_st_grows,
+		       page_pools.epp_st_grow_fails,
+		       page_pools.epp_st_shrinks, page_pools.epp_st_access,
+		       page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
+		       page_pools.epp_st_max_wait, HZ);
+	}
+}
+
+
+static int cfs_hash_alg_id[] = {
+	[BULK_HASH_ALG_NULL]	= CFS_HASH_ALG_NULL,
+	[BULK_HASH_ALG_ADLER32]	= CFS_HASH_ALG_ADLER32,
+	[BULK_HASH_ALG_CRC32]	= CFS_HASH_ALG_CRC32,
+	[BULK_HASH_ALG_MD5]	= CFS_HASH_ALG_MD5,
+	[BULK_HASH_ALG_SHA1]	= CFS_HASH_ALG_SHA1,
+	[BULK_HASH_ALG_SHA256]	= CFS_HASH_ALG_SHA256,
+	[BULK_HASH_ALG_SHA384]	= CFS_HASH_ALG_SHA384,
+	[BULK_HASH_ALG_SHA512]	= CFS_HASH_ALG_SHA512,
+};
+const char *sptlrpc_get_hash_name(__u8 hash_alg)
+{
+	return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_name);
+
+__u8 sptlrpc_get_hash_alg(const char *algname)
+{
+	return cfs_crypto_hash_alg(algname);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_alg);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
+{
+	struct ptlrpc_bulk_sec_desc *bsd;
+	int			  size = msg->lm_buflens[offset];
+
+	bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+	if (bsd == NULL) {
+		CERROR("Invalid bulk sec desc: size %d\n", size);
+		return -EINVAL;
+	}
+
+	if (swabbed)
+		__swab32s(&bsd->bsd_nob);
+
+	if (unlikely(bsd->bsd_version != 0)) {
+		CERROR("Unexpected version %u\n", bsd->bsd_version);
+		return -EPROTO;
+	}
+
+	if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
+		CERROR("Invalid type %u\n", bsd->bsd_type);
+		return -EPROTO;
+	}
+
+	/* FIXME more sanity check here */
+
+	if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+		     bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
+		     bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
+		CERROR("Invalid svc %u\n", bsd->bsd_svc);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+			      void *buf, int buflen)
+{
+	struct cfs_crypto_hash_desc	*hdesc;
+	int				hashsize;
+	char				hashbuf[64];
+	unsigned int			bufsize;
+	int				i, err;
+
+	LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
+	LASSERT(buflen >= 4);
+
+	hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
+	if (IS_ERR(hdesc)) {
+		CERROR("Unable to initialize checksum hash %s\n",
+		       cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
+		return PTR_ERR(hdesc);
+	}
+
+	hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
+
+	for (i = 0; i < desc->bd_iov_count; i++) {
+		cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
+				  desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+				  desc->bd_iov[i].kiov_len);
+	}
+	if (hashsize > buflen) {
+		bufsize = sizeof(hashbuf);
+		err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
+					    &bufsize);
+		memcpy(buf, hashbuf, buflen);
+	} else {
+		bufsize = buflen;
+		err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf,
+					    &bufsize);
+	}
+
+	if (err)
+		cfs_crypto_hash_final(hdesc, NULL, NULL);
+	return err;
+}
+EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
new file mode 100644
index 000000000..56ba9e4e5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -0,0 +1,901 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_param.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+const char *sptlrpc_part2name(enum lustre_sec_part part)
+{
+	switch (part) {
+	case LUSTRE_SP_CLI:
+		return "cli";
+	case LUSTRE_SP_MDT:
+		return "mdt";
+	case LUSTRE_SP_OST:
+		return "ost";
+	case LUSTRE_SP_MGC:
+		return "mgc";
+	case LUSTRE_SP_MGS:
+		return "mgs";
+	case LUSTRE_SP_ANY:
+		return "any";
+	default:
+		return "err";
+	}
+}
+EXPORT_SYMBOL(sptlrpc_part2name);
+
+enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
+{
+	const char *type = obd->obd_type->typ_name;
+
+	if (!strcmp(type, LUSTRE_MDT_NAME))
+		return LUSTRE_SP_MDT;
+	if (!strcmp(type, LUSTRE_OST_NAME))
+		return LUSTRE_SP_OST;
+	if (!strcmp(type, LUSTRE_MGS_NAME))
+		return LUSTRE_SP_MGS;
+
+	CERROR("unknown target %p(%s)\n", obd, type);
+	return LUSTRE_SP_ANY;
+}
+EXPORT_SYMBOL(sptlrpc_target_sec_part);
+
+/****************************************
+ * user supplied flavor string parsing  *
+ ****************************************/
+
+/*
+ * format: <base_flavor>[-<bulk_type:alg_spec>]
+ */
+int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr)
+{
+	char	    buf[32];
+	char	   *bulk, *alg;
+
+	memset(flvr, 0, sizeof(*flvr));
+
+	if (str == NULL || str[0] == '\0') {
+		flvr->sf_rpc = SPTLRPC_FLVR_INVALID;
+		return 0;
+	}
+
+	strncpy(buf, str, sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	bulk = strchr(buf, '-');
+	if (bulk)
+		*bulk++ = '\0';
+
+	flvr->sf_rpc = sptlrpc_name2flavor_base(buf);
+	if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID)
+		goto err_out;
+
+	/*
+	 * currently only base flavor "plain" can have bulk specification.
+	 */
+	if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) {
+		flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32;
+		if (bulk) {
+			/*
+			 * format: plain-hash:<hash_alg>
+			 */
+			alg = strchr(bulk, ':');
+			if (alg == NULL)
+				goto err_out;
+			*alg++ = '\0';
+
+			if (strcmp(bulk, "hash"))
+				goto err_out;
+
+			flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg);
+			if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX)
+				goto err_out;
+		}
+
+		if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL)
+			flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL);
+		else
+			flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG);
+	} else {
+		if (bulk)
+			goto err_out;
+	}
+
+	flvr->sf_flags = 0;
+	return 0;
+
+err_out:
+	CERROR("invalid flavor string: %s\n", str);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(sptlrpc_parse_flavor);
+
+/****************************************
+ * configure rules		      *
+ ****************************************/
+
+static void get_default_flavor(struct sptlrpc_flavor *sf)
+{
+	memset(sf, 0, sizeof(*sf));
+
+	sf->sf_rpc = SPTLRPC_FLVR_NULL;
+	sf->sf_flags = 0;
+}
+
+static void sptlrpc_rule_init(struct sptlrpc_rule *rule)
+{
+	rule->sr_netid = LNET_NIDNET(LNET_NID_ANY);
+	rule->sr_from = LUSTRE_SP_ANY;
+	rule->sr_to = LUSTRE_SP_ANY;
+	rule->sr_padding = 0;
+
+	get_default_flavor(&rule->sr_flvr);
+}
+
+/*
+ * format: network[.direction]=flavor
+ */
+int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule)
+{
+	char	   *flavor, *dir;
+	int	     rc;
+
+	sptlrpc_rule_init(rule);
+
+	flavor = strchr(param, '=');
+	if (flavor == NULL) {
+		CERROR("invalid param, no '='\n");
+		return -EINVAL;
+	}
+	*flavor++ = '\0';
+
+	dir = strchr(param, '.');
+	if (dir)
+		*dir++ = '\0';
+
+	/* 1.1 network */
+	if (strcmp(param, "default")) {
+		rule->sr_netid = libcfs_str2net(param);
+		if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) {
+			CERROR("invalid network name: %s\n", param);
+			return -EINVAL;
+		}
+	}
+
+	/* 1.2 direction */
+	if (dir) {
+		if (!strcmp(dir, "mdt2ost")) {
+			rule->sr_from = LUSTRE_SP_MDT;
+			rule->sr_to = LUSTRE_SP_OST;
+		} else if (!strcmp(dir, "mdt2mdt")) {
+			rule->sr_from = LUSTRE_SP_MDT;
+			rule->sr_to = LUSTRE_SP_MDT;
+		} else if (!strcmp(dir, "cli2ost")) {
+			rule->sr_from = LUSTRE_SP_CLI;
+			rule->sr_to = LUSTRE_SP_OST;
+		} else if (!strcmp(dir, "cli2mdt")) {
+			rule->sr_from = LUSTRE_SP_CLI;
+			rule->sr_to = LUSTRE_SP_MDT;
+		} else {
+			CERROR("invalid rule dir segment: %s\n", dir);
+			return -EINVAL;
+		}
+	}
+
+	/* 2.1 flavor */
+	rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr);
+	if (rc)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_parse_rule);
+
+void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset)
+{
+	LASSERT(rset->srs_nslot ||
+		(rset->srs_nrule == 0 && rset->srs_rules == NULL));
+
+	if (rset->srs_nslot) {
+		OBD_FREE(rset->srs_rules,
+			 rset->srs_nslot * sizeof(*rset->srs_rules));
+		sptlrpc_rule_set_init(rset);
+	}
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_free);
+
+/*
+ * return 0 if the rule set could accommodate one more rule.
+ */
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset)
+{
+	struct sptlrpc_rule *rules;
+	int nslot;
+
+	might_sleep();
+
+	if (rset->srs_nrule < rset->srs_nslot)
+		return 0;
+
+	nslot = rset->srs_nslot + 8;
+
+	/* better use realloc() if available */
+	OBD_ALLOC(rules, nslot * sizeof(*rset->srs_rules));
+	if (rules == NULL)
+		return -ENOMEM;
+
+	if (rset->srs_nrule) {
+		LASSERT(rset->srs_nslot && rset->srs_rules);
+		memcpy(rules, rset->srs_rules,
+		       rset->srs_nrule * sizeof(*rset->srs_rules));
+
+		OBD_FREE(rset->srs_rules,
+			 rset->srs_nslot * sizeof(*rset->srs_rules));
+	}
+
+	rset->srs_rules = rules;
+	rset->srs_nslot = nslot;
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_expand);
+
+static inline int rule_spec_dir(struct sptlrpc_rule *rule)
+{
+	return (rule->sr_from != LUSTRE_SP_ANY ||
+		rule->sr_to != LUSTRE_SP_ANY);
+}
+static inline int rule_spec_net(struct sptlrpc_rule *rule)
+{
+	return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY));
+}
+static inline int rule_match_dir(struct sptlrpc_rule *r1,
+				 struct sptlrpc_rule *r2)
+{
+	return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to);
+}
+static inline int rule_match_net(struct sptlrpc_rule *r1,
+				 struct sptlrpc_rule *r2)
+{
+	return (r1->sr_netid == r2->sr_netid);
+}
+
+/*
+ * merge @rule into @rset.
+ * the @rset slots might be expanded.
+ */
+int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset,
+			   struct sptlrpc_rule *rule)
+{
+	struct sptlrpc_rule      *p = rset->srs_rules;
+	int		       spec_dir, spec_net;
+	int		       rc, n, match = 0;
+
+	might_sleep();
+
+	spec_net = rule_spec_net(rule);
+	spec_dir = rule_spec_dir(rule);
+
+	for (n = 0; n < rset->srs_nrule; n++) {
+		p = &rset->srs_rules[n];
+
+		/* test network match, if failed:
+		 * - spec rule: skip rules which is also spec rule match, until
+		 *   we hit a wild rule, which means no more chance
+		 * - wild rule: skip until reach the one which is also wild
+		 *   and matches
+		 */
+		if (!rule_match_net(p, rule)) {
+			if (spec_net) {
+				if (rule_spec_net(p))
+					continue;
+				else
+					break;
+			} else {
+				continue;
+			}
+		}
+
+		/* test dir match, same logic as net matching */
+		if (!rule_match_dir(p, rule)) {
+			if (spec_dir) {
+				if (rule_spec_dir(p))
+					continue;
+				else
+					break;
+			} else {
+				continue;
+			}
+		}
+
+		/* find a match */
+		match = 1;
+		break;
+	}
+
+	if (match) {
+		LASSERT(n >= 0 && n < rset->srs_nrule);
+
+		if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) {
+			/* remove this rule */
+			if (n < rset->srs_nrule - 1)
+				memmove(&rset->srs_rules[n],
+					&rset->srs_rules[n + 1],
+					(rset->srs_nrule - n - 1) *
+					sizeof(*rule));
+			rset->srs_nrule--;
+		} else {
+			/* override the rule */
+			memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+		}
+	} else {
+		LASSERT(n >= 0 && n <= rset->srs_nrule);
+
+		if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) {
+			rc = sptlrpc_rule_set_expand(rset);
+			if (rc)
+				return rc;
+
+			if (n < rset->srs_nrule)
+				memmove(&rset->srs_rules[n + 1],
+					&rset->srs_rules[n],
+					(rset->srs_nrule - n) * sizeof(*rule));
+			memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+			rset->srs_nrule++;
+		} else {
+			CDEBUG(D_CONFIG, "ignore the unmatched deletion\n");
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_merge);
+
+/**
+ * given from/to/nid, determine a matching flavor in ruleset.
+ * return 1 if a match found, otherwise return 0.
+ */
+int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
+			    enum lustre_sec_part from,
+			    enum lustre_sec_part to,
+			    lnet_nid_t nid,
+			    struct sptlrpc_flavor *sf)
+{
+	struct sptlrpc_rule    *r;
+	int		     n;
+
+	for (n = 0; n < rset->srs_nrule; n++) {
+		r = &rset->srs_rules[n];
+
+		if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) &&
+		    r->sr_netid != LNET_NIDNET(LNET_NID_ANY) &&
+		    LNET_NIDNET(nid) != r->sr_netid)
+			continue;
+
+		if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY &&
+		    from != r->sr_from)
+			continue;
+
+		if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY &&
+		    to != r->sr_to)
+			continue;
+
+		*sf = r->sr_flvr;
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_choose);
+
+void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *rset)
+{
+	struct sptlrpc_rule *r;
+	int     n;
+
+	for (n = 0; n < rset->srs_nrule; n++) {
+		r = &rset->srs_rules[n];
+		CDEBUG(D_SEC, "<%02d> from %x to %x, net %x, rpc %x\n", n,
+		       r->sr_from, r->sr_to, r->sr_netid, r->sr_flvr.sf_rpc);
+	}
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_dump);
+
+/**********************************
+ * sptlrpc configuration support  *
+ **********************************/
+
+struct sptlrpc_conf_tgt {
+	struct list_head	      sct_list;
+	char		    sct_name[MAX_OBD_NAME];
+	struct sptlrpc_rule_set sct_rset;
+};
+
+struct sptlrpc_conf {
+	struct list_head	      sc_list;
+	char		    sc_fsname[MTI_NAME_MAXLEN];
+	unsigned int	    sc_modified;  /* modified during updating */
+	unsigned int	    sc_updated:1, /* updated copy from MGS */
+				sc_local:1;   /* local copy from target */
+	struct sptlrpc_rule_set sc_rset;      /* fs general rules */
+	struct list_head	      sc_tgts;      /* target-specific rules */
+};
+
+static struct mutex sptlrpc_conf_lock;
+static LIST_HEAD(sptlrpc_confs);
+
+static inline int is_hex(char c)
+{
+	return ((c >= '0' && c <= '9') ||
+		(c >= 'a' && c <= 'f'));
+}
+
+static void target2fsname(const char *tgt, char *fsname, int buflen)
+{
+	const char     *ptr;
+	int	     len;
+
+	ptr = strrchr(tgt, '-');
+	if (ptr) {
+		if ((strncmp(ptr, "-MDT", 4) != 0 &&
+		     strncmp(ptr, "-OST", 4) != 0) ||
+		    !is_hex(ptr[4]) || !is_hex(ptr[5]) ||
+		    !is_hex(ptr[6]) || !is_hex(ptr[7]))
+			ptr = NULL;
+	}
+
+	/* if we didn't find the pattern, treat the whole string as fsname */
+	if (ptr == NULL)
+		len = strlen(tgt);
+	else
+		len = ptr - tgt;
+
+	len = min(len, buflen - 1);
+	memcpy(fsname, tgt, len);
+	fsname[len] = '\0';
+}
+
+static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf)
+{
+	struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next;
+
+	sptlrpc_rule_set_free(&conf->sc_rset);
+
+	list_for_each_entry_safe(conf_tgt, conf_tgt_next,
+				     &conf->sc_tgts, sct_list) {
+		sptlrpc_rule_set_free(&conf_tgt->sct_rset);
+		list_del(&conf_tgt->sct_list);
+		OBD_FREE_PTR(conf_tgt);
+	}
+	LASSERT(list_empty(&conf->sc_tgts));
+
+	conf->sc_updated = 0;
+	conf->sc_local = 0;
+}
+
+static void sptlrpc_conf_free(struct sptlrpc_conf *conf)
+{
+	CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname);
+
+	sptlrpc_conf_free_rsets(conf);
+	list_del(&conf->sc_list);
+	OBD_FREE_PTR(conf);
+}
+
+static
+struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf,
+					      const char *name,
+					      int create)
+{
+	struct sptlrpc_conf_tgt *conf_tgt;
+
+	list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) {
+		if (strcmp(conf_tgt->sct_name, name) == 0)
+			return conf_tgt;
+	}
+
+	if (!create)
+		return NULL;
+
+	OBD_ALLOC_PTR(conf_tgt);
+	if (conf_tgt) {
+		strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name));
+		sptlrpc_rule_set_init(&conf_tgt->sct_rset);
+		list_add(&conf_tgt->sct_list, &conf->sc_tgts);
+	}
+
+	return conf_tgt;
+}
+
+static
+struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname,
+				      int create)
+{
+	struct sptlrpc_conf *conf;
+
+	list_for_each_entry(conf, &sptlrpc_confs, sc_list) {
+		if (strcmp(conf->sc_fsname, fsname) == 0)
+			return conf;
+	}
+
+	if (!create)
+		return NULL;
+
+	OBD_ALLOC_PTR(conf);
+	if (conf == NULL)
+		return NULL;
+
+	strcpy(conf->sc_fsname, fsname);
+	sptlrpc_rule_set_init(&conf->sc_rset);
+	INIT_LIST_HEAD(&conf->sc_tgts);
+	list_add(&conf->sc_list, &sptlrpc_confs);
+
+	CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname);
+	return conf;
+}
+
+/**
+ * caller must hold conf_lock already.
+ */
+static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf,
+				   const char *target,
+				   struct sptlrpc_rule *rule)
+{
+	struct sptlrpc_conf_tgt  *conf_tgt;
+	struct sptlrpc_rule_set  *rule_set;
+
+	/* fsname == target means general rules for the whole fs */
+	if (strcmp(conf->sc_fsname, target) == 0) {
+		rule_set = &conf->sc_rset;
+	} else {
+		conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1);
+		if (conf_tgt) {
+			rule_set = &conf_tgt->sct_rset;
+		} else {
+			CERROR("out of memory, can't merge rule!\n");
+			return -ENOMEM;
+		}
+	}
+
+	return sptlrpc_rule_set_merge(rule_set, rule);
+}
+
+/**
+ * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we
+ * find one through the target name in the record inside conf_lock;
+ * otherwise means caller already hold conf_lock.
+ */
+static int __sptlrpc_process_config(struct lustre_cfg *lcfg,
+				    struct sptlrpc_conf *conf)
+{
+	char		   *target, *param;
+	char		    fsname[MTI_NAME_MAXLEN];
+	struct sptlrpc_rule     rule;
+	int		     rc;
+
+	target = lustre_cfg_string(lcfg, 1);
+	if (target == NULL) {
+		CERROR("missing target name\n");
+		return -EINVAL;
+	}
+
+	param = lustre_cfg_string(lcfg, 2);
+	if (param == NULL) {
+		CERROR("missing parameter\n");
+		return -EINVAL;
+	}
+
+	CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param);
+
+	/* parse rule to make sure the format is correct */
+	if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
+		CERROR("Invalid sptlrpc parameter: %s\n", param);
+		return -EINVAL;
+	}
+	param += sizeof(PARAM_SRPC_FLVR) - 1;
+
+	rc = sptlrpc_parse_rule(param, &rule);
+	if (rc)
+		return -EINVAL;
+
+	if (conf == NULL) {
+		target2fsname(target, fsname, sizeof(fsname));
+
+		mutex_lock(&sptlrpc_conf_lock);
+		conf = sptlrpc_conf_get(fsname, 0);
+		if (conf == NULL) {
+			CERROR("can't find conf\n");
+			rc = -ENOMEM;
+		} else {
+			rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+		}
+		mutex_unlock(&sptlrpc_conf_lock);
+	} else {
+		LASSERT(mutex_is_locked(&sptlrpc_conf_lock));
+		rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+	}
+
+	if (rc == 0)
+		conf->sc_modified++;
+
+	return rc;
+}
+
+int sptlrpc_process_config(struct lustre_cfg *lcfg)
+{
+	return __sptlrpc_process_config(lcfg, NULL);
+}
+EXPORT_SYMBOL(sptlrpc_process_config);
+
+static int logname2fsname(const char *logname, char *buf, int buflen)
+{
+	char   *ptr;
+	int     len;
+
+	ptr = strrchr(logname, '-');
+	if (ptr == NULL || strcmp(ptr, "-sptlrpc")) {
+		CERROR("%s is not a sptlrpc config log\n", logname);
+		return -EINVAL;
+	}
+
+	len = min((int) (ptr - logname), buflen - 1);
+
+	memcpy(buf, logname, len);
+	buf[len] = '\0';
+	return 0;
+}
+
+void sptlrpc_conf_log_update_begin(const char *logname)
+{
+	struct sptlrpc_conf *conf;
+	char		 fsname[16];
+
+	if (logname2fsname(logname, fsname, sizeof(fsname)))
+		return;
+
+	mutex_lock(&sptlrpc_conf_lock);
+
+	conf = sptlrpc_conf_get(fsname, 0);
+	if (conf) {
+		if (conf->sc_local) {
+			LASSERT(conf->sc_updated == 0);
+			sptlrpc_conf_free_rsets(conf);
+		}
+		conf->sc_modified = 0;
+	}
+
+	mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_begin);
+
+/**
+ * mark a config log has been updated
+ */
+void sptlrpc_conf_log_update_end(const char *logname)
+{
+	struct sptlrpc_conf *conf;
+	char		 fsname[16];
+
+	if (logname2fsname(logname, fsname, sizeof(fsname)))
+		return;
+
+	mutex_lock(&sptlrpc_conf_lock);
+
+	conf = sptlrpc_conf_get(fsname, 0);
+	if (conf) {
+		/*
+		 * if original state is not updated, make sure the
+		 * modified counter > 0 to enforce updating local copy.
+		 */
+		if (conf->sc_updated == 0)
+			conf->sc_modified++;
+
+		conf->sc_updated = 1;
+	}
+
+	mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_end);
+
+void sptlrpc_conf_log_start(const char *logname)
+{
+	char		 fsname[16];
+
+	if (logname2fsname(logname, fsname, sizeof(fsname)))
+		return;
+
+	mutex_lock(&sptlrpc_conf_lock);
+	sptlrpc_conf_get(fsname, 1);
+	mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_start);
+
+void sptlrpc_conf_log_stop(const char *logname)
+{
+	struct sptlrpc_conf *conf;
+	char		 fsname[16];
+
+	if (logname2fsname(logname, fsname, sizeof(fsname)))
+		return;
+
+	mutex_lock(&sptlrpc_conf_lock);
+	conf = sptlrpc_conf_get(fsname, 0);
+	if (conf)
+		sptlrpc_conf_free(conf);
+	mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_stop);
+
+static inline void flavor_set_flags(struct sptlrpc_flavor *sf,
+				    enum lustre_sec_part from,
+				    enum lustre_sec_part to,
+				    unsigned int fl_udesc)
+{
+	/*
+	 * null flavor doesn't need to set any flavor, and in fact
+	 * we'd better not do that because everybody share a single sec.
+	 */
+	if (sf->sf_rpc == SPTLRPC_FLVR_NULL)
+		return;
+
+	if (from == LUSTRE_SP_MDT) {
+		/* MDT->MDT; MDT->OST */
+		sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY;
+	} else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) {
+		/* CLI->OST */
+		sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK;
+	} else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) {
+		/* CLI->MDT */
+		if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL)
+			sf->sf_flags |= PTLRPC_SEC_FL_UDESC;
+	}
+}
+
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+				enum lustre_sec_part to,
+				struct obd_uuid *target,
+				lnet_nid_t nid,
+				struct sptlrpc_flavor *sf)
+{
+	struct sptlrpc_conf     *conf;
+	struct sptlrpc_conf_tgt *conf_tgt;
+	char		     name[MTI_NAME_MAXLEN];
+	int		      len, rc = 0;
+
+	target2fsname(target->uuid, name, sizeof(name));
+
+	mutex_lock(&sptlrpc_conf_lock);
+
+	conf = sptlrpc_conf_get(name, 0);
+	if (conf == NULL)
+		goto out;
+
+	/* convert uuid name (supposed end with _UUID) to target name */
+	len = strlen(target->uuid);
+	LASSERT(len > 5);
+	memcpy(name, target->uuid, len - 5);
+	name[len - 5] = '\0';
+
+	conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0);
+	if (conf_tgt) {
+		rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset,
+					     from, to, nid, sf);
+		if (rc)
+			goto out;
+	}
+
+	rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf);
+out:
+	mutex_unlock(&sptlrpc_conf_lock);
+
+	if (rc == 0)
+		get_default_flavor(sf);
+
+	flavor_set_flags(sf, from, to, 1);
+}
+
+/**
+ * called by target devices, determine the expected flavor from
+ * certain peer (from, nid).
+ */
+void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset,
+				  enum lustre_sec_part from,
+				  lnet_nid_t nid,
+				  struct sptlrpc_flavor *sf)
+{
+	if (sptlrpc_rule_set_choose(rset, from, LUSTRE_SP_ANY, nid, sf) == 0)
+		get_default_flavor(sf);
+}
+EXPORT_SYMBOL(sptlrpc_target_choose_flavor);
+
+#define SEC_ADAPT_DELAY	 (10)
+
+/**
+ * called by client devices, notify the sptlrpc config has changed and
+ * do import_sec_adapt later.
+ */
+void sptlrpc_conf_client_adapt(struct obd_device *obd)
+{
+	struct obd_import  *imp;
+
+	LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+		strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0);
+	CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
+
+	/* serialize with connect/disconnect import */
+	down_read(&obd->u.cli.cl_sem);
+
+	imp = obd->u.cli.cl_import;
+	if (imp) {
+		spin_lock(&imp->imp_lock);
+		if (imp->imp_sec)
+			imp->imp_sec_expire = get_seconds() +
+				SEC_ADAPT_DELAY;
+		spin_unlock(&imp->imp_lock);
+	}
+
+	up_read(&obd->u.cli.cl_sem);
+}
+EXPORT_SYMBOL(sptlrpc_conf_client_adapt);
+
+int  sptlrpc_conf_init(void)
+{
+	mutex_init(&sptlrpc_conf_lock);
+	return 0;
+}
+
+void sptlrpc_conf_fini(void)
+{
+	struct sptlrpc_conf  *conf, *conf_next;
+
+	mutex_lock(&sptlrpc_conf_lock);
+	list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) {
+		sptlrpc_conf_free(conf);
+	}
+	LASSERT(list_empty(&sptlrpc_confs));
+	mutex_unlock(&sptlrpc_conf_lock);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
new file mode 100644
index 000000000..81de68edb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -0,0 +1,252 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_gc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+#define SEC_GC_INTERVAL (30 * 60)
+
+
+static struct mutex sec_gc_mutex;
+static LIST_HEAD(sec_gc_list);
+static spinlock_t sec_gc_list_lock;
+
+static LIST_HEAD(sec_gc_ctx_list);
+static spinlock_t sec_gc_ctx_list_lock;
+
+static struct ptlrpc_thread sec_gc_thread;
+static atomic_t sec_gc_wait_del = ATOMIC_INIT(0);
+
+
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
+{
+	LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+	LASSERT(sec->ps_gc_interval > 0);
+	LASSERT(list_empty(&sec->ps_gc_list));
+
+	sec->ps_gc_next = get_seconds() + sec->ps_gc_interval;
+
+	spin_lock(&sec_gc_list_lock);
+	list_add_tail(&sec_gc_list, &sec->ps_gc_list);
+	spin_unlock(&sec_gc_list_lock);
+
+	CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_sec);
+
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
+{
+	if (list_empty(&sec->ps_gc_list))
+		return;
+
+	might_sleep();
+
+	/* signal before list_del to make iteration in gc thread safe */
+	atomic_inc(&sec_gc_wait_del);
+
+	spin_lock(&sec_gc_list_lock);
+	list_del_init(&sec->ps_gc_list);
+	spin_unlock(&sec_gc_list_lock);
+
+	/* barrier */
+	mutex_lock(&sec_gc_mutex);
+	mutex_unlock(&sec_gc_mutex);
+
+	atomic_dec(&sec_gc_wait_del);
+
+	CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_del_sec);
+
+void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx)
+{
+	LASSERT(list_empty(&ctx->cc_gc_chain));
+
+	CDEBUG(D_SEC, "hand over ctx %p(%u->%s)\n",
+	       ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+	spin_lock(&sec_gc_ctx_list_lock);
+	list_add(&ctx->cc_gc_chain, &sec_gc_ctx_list);
+	spin_unlock(&sec_gc_ctx_list_lock);
+
+	thread_add_flags(&sec_gc_thread, SVC_SIGNAL);
+	wake_up(&sec_gc_thread.t_ctl_waitq);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_ctx);
+
+static void sec_process_ctx_list(void)
+{
+	struct ptlrpc_cli_ctx *ctx;
+
+	spin_lock(&sec_gc_ctx_list_lock);
+
+	while (!list_empty(&sec_gc_ctx_list)) {
+		ctx = list_entry(sec_gc_ctx_list.next,
+				     struct ptlrpc_cli_ctx, cc_gc_chain);
+		list_del_init(&ctx->cc_gc_chain);
+		spin_unlock(&sec_gc_ctx_list_lock);
+
+		LASSERT(ctx->cc_sec);
+		LASSERT(atomic_read(&ctx->cc_refcount) == 1);
+		CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n",
+		       ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+		sptlrpc_cli_ctx_put(ctx, 1);
+
+		spin_lock(&sec_gc_ctx_list_lock);
+	}
+
+	spin_unlock(&sec_gc_ctx_list_lock);
+}
+
+static void sec_do_gc(struct ptlrpc_sec *sec)
+{
+	LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+
+	if (unlikely(sec->ps_gc_next == 0)) {
+		CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n",
+		      sec, sec->ps_policy->sp_name);
+		return;
+	}
+
+	CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+
+	if (cfs_time_after(sec->ps_gc_next, get_seconds()))
+		return;
+
+	sec->ps_policy->sp_cops->gc_ctx(sec);
+	sec->ps_gc_next = get_seconds() + sec->ps_gc_interval;
+}
+
+static int sec_gc_main(void *arg)
+{
+	struct ptlrpc_thread *thread = (struct ptlrpc_thread *) arg;
+	struct l_wait_info    lwi;
+
+	unshare_fs_struct();
+
+	/* Record that the thread is running */
+	thread_set_flags(thread, SVC_RUNNING);
+	wake_up(&thread->t_ctl_waitq);
+
+	while (1) {
+		struct ptlrpc_sec *sec;
+
+		thread_clear_flags(thread, SVC_SIGNAL);
+		sec_process_ctx_list();
+again:
+		/* go through sec list do gc.
+		 * FIXME here we iterate through the whole list each time which
+		 * is not optimal. we perhaps want to use balanced binary tree
+		 * to trace each sec as order of expiry time.
+		 * another issue here is we wakeup as fixed interval instead of
+		 * according to each sec's expiry time */
+		mutex_lock(&sec_gc_mutex);
+		list_for_each_entry(sec, &sec_gc_list, ps_gc_list) {
+			/* if someone is waiting to be deleted, let it
+			 * proceed as soon as possible. */
+			if (atomic_read(&sec_gc_wait_del)) {
+				CDEBUG(D_SEC, "deletion pending, start over\n");
+				mutex_unlock(&sec_gc_mutex);
+				goto again;
+			}
+
+			sec_do_gc(sec);
+		}
+		mutex_unlock(&sec_gc_mutex);
+
+		/* check ctx list again before sleep */
+		sec_process_ctx_list();
+
+		lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+		l_wait_event(thread->t_ctl_waitq,
+			     thread_is_stopping(thread) ||
+			     thread_is_signal(thread),
+			     &lwi);
+
+		if (thread_test_and_clear_flags(thread, SVC_STOPPING))
+			break;
+	}
+
+	thread_set_flags(thread, SVC_STOPPED);
+	wake_up(&thread->t_ctl_waitq);
+	return 0;
+}
+
+int sptlrpc_gc_init(void)
+{
+	struct l_wait_info lwi = { 0 };
+	struct task_struct *task;
+
+	mutex_init(&sec_gc_mutex);
+	spin_lock_init(&sec_gc_list_lock);
+	spin_lock_init(&sec_gc_ctx_list_lock);
+
+	/* initialize thread control */
+	memset(&sec_gc_thread, 0, sizeof(sec_gc_thread));
+	init_waitqueue_head(&sec_gc_thread.t_ctl_waitq);
+
+	task = kthread_run(sec_gc_main, &sec_gc_thread, "sptlrpc_gc");
+	if (IS_ERR(task)) {
+		CERROR("can't start gc thread: %ld\n", PTR_ERR(task));
+		return PTR_ERR(task);
+	}
+
+	l_wait_event(sec_gc_thread.t_ctl_waitq,
+		     thread_is_running(&sec_gc_thread), &lwi);
+	return 0;
+}
+
+void sptlrpc_gc_fini(void)
+{
+	struct l_wait_info lwi = { 0 };
+
+	thread_set_flags(&sec_gc_thread, SVC_STOPPING);
+	wake_up(&sec_gc_thread.t_ctl_waitq);
+
+	l_wait_event(sec_gc_thread.t_ctl_waitq,
+		     thread_is_stopped(&sec_gc_thread), &lwi);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
new file mode 100644
index 000000000..0d08145a6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_lproc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+
+struct proc_dir_entry *sptlrpc_proc_root = NULL;
+EXPORT_SYMBOL(sptlrpc_proc_root);
+
+static char *sec_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+	buf[0] = '\0';
+
+	if (flags & PTLRPC_SEC_FL_REVERSE)
+		strlcat(buf, "reverse,", bufsize);
+	if (flags & PTLRPC_SEC_FL_ROOTONLY)
+		strlcat(buf, "rootonly,", bufsize);
+	if (flags & PTLRPC_SEC_FL_UDESC)
+		strlcat(buf, "udesc,", bufsize);
+	if (flags & PTLRPC_SEC_FL_BULK)
+		strlcat(buf, "bulk,", bufsize);
+	if (buf[0] == '\0')
+		strlcat(buf, "-,", bufsize);
+
+	return buf;
+}
+
+static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+	struct obd_device *dev = seq->private;
+	struct client_obd *cli = &dev->u.cli;
+	struct ptlrpc_sec *sec = NULL;
+	char	       str[32];
+
+	LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+		strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+		strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+	if (cli->cl_import)
+		sec = sptlrpc_import_sec_ref(cli->cl_import);
+	if (sec == NULL)
+		goto out;
+
+	sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));
+
+	seq_printf(seq, "rpc flavor:    %s\n",
+		   sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
+	seq_printf(seq, "bulk flavor:   %s\n",
+		   sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
+	seq_printf(seq, "flags:	 %s\n",
+		   sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
+	seq_printf(seq, "id:	    %d\n", sec->ps_id);
+	seq_printf(seq, "refcount:      %d\n",
+		   atomic_read(&sec->ps_refcount));
+	seq_printf(seq, "nctx:	  %d\n", atomic_read(&sec->ps_nctx));
+	seq_printf(seq, "gc internal    %ld\n", sec->ps_gc_interval);
+	seq_printf(seq, "gc next	%ld\n",
+		   sec->ps_gc_interval ?
+		   sec->ps_gc_next - get_seconds() : 0);
+
+	sptlrpc_sec_put(sec);
+out:
+	return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs);
+
+static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+	struct obd_device *dev = seq->private;
+	struct client_obd *cli = &dev->u.cli;
+	struct ptlrpc_sec *sec = NULL;
+
+	LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+		strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+		strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+	if (cli->cl_import)
+		sec = sptlrpc_import_sec_ref(cli->cl_import);
+	if (sec == NULL)
+		goto out;
+
+	if (sec->ps_policy->sp_cops->display)
+		sec->ps_policy->sp_cops->display(sec, seq);
+
+	sptlrpc_sec_put(sec);
+out:
+	return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs);
+
+int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev)
+{
+	int     rc;
+
+	if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 &&
+	    strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 &&
+	    strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) {
+		CERROR("can't register lproc for obd type %s\n",
+		       dev->obd_type->typ_name);
+		return -EINVAL;
+	}
+
+	rc = lprocfs_obd_seq_create(dev, "srpc_info", 0444,
+				    &sptlrpc_info_lprocfs_fops, dev);
+	if (rc) {
+		CERROR("create proc entry srpc_info for %s: %d\n",
+		       dev->obd_name, rc);
+		return rc;
+	}
+
+	rc = lprocfs_obd_seq_create(dev, "srpc_contexts", 0444,
+				    &sptlrpc_ctxs_lprocfs_fops, dev);
+	if (rc) {
+		CERROR("create proc entry srpc_contexts for %s: %d\n",
+		       dev->obd_name, rc);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach);
+
+LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool);
+static struct lprocfs_vars sptlrpc_lprocfs_vars[] = {
+	{ "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops },
+	{ NULL }
+};
+
+int sptlrpc_lproc_init(void)
+{
+	int     rc;
+
+	LASSERT(sptlrpc_proc_root == NULL);
+
+	sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root,
+					     sptlrpc_lprocfs_vars, NULL);
+	if (IS_ERR(sptlrpc_proc_root)) {
+		rc = PTR_ERR(sptlrpc_proc_root);
+		sptlrpc_proc_root = NULL;
+		return rc;
+	}
+	return 0;
+}
+
+void sptlrpc_lproc_fini(void)
+{
+	if (sptlrpc_proc_root) {
+		lprocfs_remove(&sptlrpc_proc_root);
+		sptlrpc_proc_root = NULL;
+	}
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
new file mode 100644
index 000000000..4e132435b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -0,0 +1,458 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_null.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+static struct ptlrpc_sec_policy null_policy;
+static struct ptlrpc_sec	null_sec;
+static struct ptlrpc_cli_ctx    null_cli_ctx;
+static struct ptlrpc_svc_ctx    null_svc_ctx;
+
+/*
+ * we can temporarily use the topmost 8-bits of lm_secflvr to identify
+ * the source sec part.
+ */
+static inline
+void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
+{
+	msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+}
+
+static inline
+enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg)
+{
+	return (msg->lm_secflvr >> 24) & 0xFF;
+}
+
+static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+	/* should never reach here */
+	LBUG();
+	return 0;
+}
+
+static
+int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+	req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+
+	if (!req->rq_import->imp_dlm_fake) {
+		struct obd_device *obd = req->rq_import->imp_obd;
+		null_encode_sec_part(req->rq_reqbuf,
+				     obd->u.cli.cl_sp_me);
+	}
+	req->rq_reqdata_len = req->rq_reqlen;
+	return 0;
+}
+
+static
+int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+	__u32   cksums, cksumc;
+
+	LASSERT(req->rq_repdata);
+
+	req->rq_repmsg = req->rq_repdata;
+	req->rq_replen = req->rq_repdata_len;
+
+	if (req->rq_early) {
+		cksums = lustre_msg_get_cksum(req->rq_repdata);
+		cksumc = lustre_msg_calc_cksum(req->rq_repmsg);
+		if (cksumc != cksums) {
+			CDEBUG(D_SEC,
+			       "early reply checksum mismatch: %08x != %08x\n",
+			       cksumc, cksums);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static
+struct ptlrpc_sec *null_create_sec(struct obd_import *imp,
+				   struct ptlrpc_svc_ctx *svc_ctx,
+				   struct sptlrpc_flavor *sf)
+{
+	LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
+
+	/* general layer has take a module reference for us, because we never
+	 * really destroy the sec, simply release the reference here.
+	 */
+	sptlrpc_policy_put(&null_policy);
+	return &null_sec;
+}
+
+static
+void null_destroy_sec(struct ptlrpc_sec *sec)
+{
+	LASSERT(sec == &null_sec);
+}
+
+static
+struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
+				       struct vfs_cred *vcred,
+				       int create, int remove_dead)
+{
+	atomic_inc(&null_cli_ctx.cc_refcount);
+	return &null_cli_ctx;
+}
+
+static
+int null_flush_ctx_cache(struct ptlrpc_sec *sec,
+			 uid_t uid,
+			 int grace, int force)
+{
+	return 0;
+}
+
+static
+int null_alloc_reqbuf(struct ptlrpc_sec *sec,
+		      struct ptlrpc_request *req,
+		      int msgsize)
+{
+	if (!req->rq_reqbuf) {
+		int alloc_size = size_roundup_power2(msgsize);
+
+		LASSERT(!req->rq_pool);
+		OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_size);
+		if (!req->rq_reqbuf)
+			return -ENOMEM;
+
+		req->rq_reqbuf_len = alloc_size;
+	} else {
+		LASSERT(req->rq_pool);
+		LASSERT(req->rq_reqbuf_len >= msgsize);
+		memset(req->rq_reqbuf, 0, msgsize);
+	}
+
+	req->rq_reqmsg = req->rq_reqbuf;
+	return 0;
+}
+
+static
+void null_free_reqbuf(struct ptlrpc_sec *sec,
+		      struct ptlrpc_request *req)
+{
+	if (!req->rq_pool) {
+		LASSERTF(req->rq_reqmsg == req->rq_reqbuf,
+			 "req %p: reqmsg %p is not reqbuf %p in null sec\n",
+			 req, req->rq_reqmsg, req->rq_reqbuf);
+		LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen,
+			 "req %p: reqlen %d should smaller than buflen %d\n",
+			 req, req->rq_reqlen, req->rq_reqbuf_len);
+
+		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+		req->rq_reqbuf = NULL;
+		req->rq_reqbuf_len = 0;
+	}
+}
+
+static
+int null_alloc_repbuf(struct ptlrpc_sec *sec,
+		      struct ptlrpc_request *req,
+		      int msgsize)
+{
+	/* add space for early replied */
+	msgsize += lustre_msg_early_size();
+
+	msgsize = size_roundup_power2(msgsize);
+
+	OBD_ALLOC_LARGE(req->rq_repbuf, msgsize);
+	if (!req->rq_repbuf)
+		return -ENOMEM;
+
+	req->rq_repbuf_len = msgsize;
+	return 0;
+}
+
+static
+void null_free_repbuf(struct ptlrpc_sec *sec,
+		      struct ptlrpc_request *req)
+{
+	LASSERT(req->rq_repbuf);
+
+	OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+	req->rq_repbuf = NULL;
+	req->rq_repbuf_len = 0;
+}
+
+static
+int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
+			struct ptlrpc_request *req,
+			int segment, int newsize)
+{
+	struct lustre_msg      *newbuf;
+	struct lustre_msg      *oldbuf = req->rq_reqmsg;
+	int		     oldsize, newmsg_size, alloc_size;
+
+	LASSERT(req->rq_reqbuf);
+	LASSERT(req->rq_reqbuf == req->rq_reqmsg);
+	LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+	LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));
+
+	/* compute new message size */
+	oldsize = req->rq_reqbuf->lm_buflens[segment];
+	req->rq_reqbuf->lm_buflens[segment] = newsize;
+	newmsg_size = lustre_packed_msg_size(oldbuf);
+	req->rq_reqbuf->lm_buflens[segment] = oldsize;
+
+	/* request from pool should always have enough buffer */
+	LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);
+
+	if (req->rq_reqbuf_len < newmsg_size) {
+		alloc_size = size_roundup_power2(newmsg_size);
+
+		OBD_ALLOC_LARGE(newbuf, alloc_size);
+		if (newbuf == NULL)
+			return -ENOMEM;
+
+		/* Must lock this, so that otherwise unprotected change of
+		 * rq_reqmsg is not racing with parallel processing of
+		 * imp_replay_list traversing threads. See LU-3333
+		 * This is a bandaid at best, we really need to deal with this
+		 * in request enlarging code before unpacking that's already
+		 * there */
+		if (req->rq_import)
+			spin_lock(&req->rq_import->imp_lock);
+		memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
+
+		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+		req->rq_reqbuf = req->rq_reqmsg = newbuf;
+		req->rq_reqbuf_len = alloc_size;
+
+		if (req->rq_import)
+			spin_unlock(&req->rq_import->imp_lock);
+	}
+
+	_sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+	req->rq_reqlen = newmsg_size;
+
+	return 0;
+}
+
+static struct ptlrpc_svc_ctx null_svc_ctx = {
+	.sc_refcount    = ATOMIC_INIT(1),
+	.sc_policy      = &null_policy,
+};
+
+static
+int null_accept(struct ptlrpc_request *req)
+{
+	LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+		SPTLRPC_POLICY_NULL);
+
+	if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) {
+		CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc);
+		return SECSVC_DROP;
+	}
+
+	req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf);
+
+	req->rq_reqmsg = req->rq_reqbuf;
+	req->rq_reqlen = req->rq_reqdata_len;
+
+	req->rq_svc_ctx = &null_svc_ctx;
+	atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+	return SECSVC_OK;
+}
+
+static
+int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+	struct ptlrpc_reply_state *rs;
+	int rs_size = sizeof(*rs) + msgsize;
+
+	LASSERT(msgsize % 8 == 0);
+
+	rs = req->rq_reply_state;
+
+	if (rs) {
+		/* pre-allocated */
+		LASSERT(rs->rs_size >= rs_size);
+	} else {
+		OBD_ALLOC_LARGE(rs, rs_size);
+		if (rs == NULL)
+			return -ENOMEM;
+
+		rs->rs_size = rs_size;
+	}
+
+	rs->rs_svc_ctx = req->rq_svc_ctx;
+	atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf_len = rs_size - sizeof(*rs);
+	rs->rs_msg = rs->rs_repbuf;
+
+	req->rq_reply_state = rs;
+	return 0;
+}
+
+static
+void null_free_rs(struct ptlrpc_reply_state *rs)
+{
+	LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1);
+	atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+	if (!rs->rs_prealloc)
+		OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+static
+int null_authorize(struct ptlrpc_request *req)
+{
+	struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+	LASSERT(rs);
+
+	rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+	rs->rs_repdata_len = req->rq_replen;
+
+	if (likely(req->rq_packed_final)) {
+		if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+			req->rq_reply_off = lustre_msg_early_size();
+		else
+			req->rq_reply_off = 0;
+	} else {
+		__u32 cksum;
+
+		cksum = lustre_msg_calc_cksum(rs->rs_repbuf);
+		lustre_msg_set_cksum(rs->rs_repbuf, cksum);
+		req->rq_reply_off = 0;
+	}
+
+	return 0;
+}
+
+static struct ptlrpc_ctx_ops null_ctx_ops = {
+	.refresh		= null_ctx_refresh,
+	.sign		   = null_ctx_sign,
+	.verify		 = null_ctx_verify,
+};
+
+static struct ptlrpc_sec_cops null_sec_cops = {
+	.create_sec	     = null_create_sec,
+	.destroy_sec	    = null_destroy_sec,
+	.lookup_ctx	     = null_lookup_ctx,
+	.flush_ctx_cache	= null_flush_ctx_cache,
+	.alloc_reqbuf	   = null_alloc_reqbuf,
+	.alloc_repbuf	   = null_alloc_repbuf,
+	.free_reqbuf	    = null_free_reqbuf,
+	.free_repbuf	    = null_free_repbuf,
+	.enlarge_reqbuf	 = null_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops null_sec_sops = {
+	.accept		 = null_accept,
+	.alloc_rs	       = null_alloc_rs,
+	.authorize	      = null_authorize,
+	.free_rs		= null_free_rs,
+};
+
+static struct ptlrpc_sec_policy null_policy = {
+	.sp_owner	       = THIS_MODULE,
+	.sp_name		= "sec.null",
+	.sp_policy	      = SPTLRPC_POLICY_NULL,
+	.sp_cops		= &null_sec_cops,
+	.sp_sops		= &null_sec_sops,
+};
+
+static void null_init_internal(void)
+{
+	static HLIST_HEAD(__list);
+
+	null_sec.ps_policy = &null_policy;
+	atomic_set(&null_sec.ps_refcount, 1);     /* always busy */
+	null_sec.ps_id = -1;
+	null_sec.ps_import = NULL;
+	null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
+	null_sec.ps_flvr.sf_flags = 0;
+	null_sec.ps_part = LUSTRE_SP_ANY;
+	null_sec.ps_dying = 0;
+	spin_lock_init(&null_sec.ps_lock);
+	atomic_set(&null_sec.ps_nctx, 1);	 /* for "null_cli_ctx" */
+	INIT_LIST_HEAD(&null_sec.ps_gc_list);
+	null_sec.ps_gc_interval = 0;
+	null_sec.ps_gc_next = 0;
+
+	hlist_add_head(&null_cli_ctx.cc_cache, &__list);
+	atomic_set(&null_cli_ctx.cc_refcount, 1);    /* for hash */
+	null_cli_ctx.cc_sec = &null_sec;
+	null_cli_ctx.cc_ops = &null_ctx_ops;
+	null_cli_ctx.cc_expire = 0;
+	null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
+				PTLRPC_CTX_UPTODATE;
+	null_cli_ctx.cc_vcred.vc_uid = 0;
+	spin_lock_init(&null_cli_ctx.cc_lock);
+	INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
+	INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain);
+}
+
+int sptlrpc_null_init(void)
+{
+	int rc;
+
+	null_init_internal();
+
+	rc = sptlrpc_register_policy(&null_policy);
+	if (rc)
+		CERROR("failed to register %s: %d\n", null_policy.sp_name, rc);
+
+	return rc;
+}
+
+void sptlrpc_null_fini(void)
+{
+	int rc;
+
+	rc = sptlrpc_unregister_policy(&null_policy);
+	if (rc)
+		CERROR("failed to unregister %s: %d\n",
+		       null_policy.sp_name, rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
new file mode 100644
index 000000000..a79cd5301
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -0,0 +1,1013 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_plain.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+struct plain_sec {
+	struct ptlrpc_sec       pls_base;
+	rwlock_t	    pls_lock;
+	struct ptlrpc_cli_ctx  *pls_ctx;
+};
+
+static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec)
+{
+	return container_of(sec, struct plain_sec, pls_base);
+}
+
+static struct ptlrpc_sec_policy plain_policy;
+static struct ptlrpc_ctx_ops    plain_ctx_ops;
+static struct ptlrpc_svc_ctx    plain_svc_ctx;
+
+static unsigned int plain_at_offset;
+
+/*
+ * for simplicity, plain policy rpc use fixed layout.
+ */
+#define PLAIN_PACK_SEGMENTS	     (4)
+
+#define PLAIN_PACK_HDR_OFF	      (0)
+#define PLAIN_PACK_MSG_OFF	      (1)
+#define PLAIN_PACK_USER_OFF	     (2)
+#define PLAIN_PACK_BULK_OFF	     (3)
+
+#define PLAIN_FL_USER		   (0x01)
+#define PLAIN_FL_BULK		   (0x02)
+
+struct plain_header {
+	__u8	    ph_ver;	    /* 0 */
+	__u8	    ph_flags;
+	__u8	    ph_sp;	     /* source */
+	__u8	    ph_bulk_hash_alg;  /* complete flavor desc */
+	__u8	    ph_pad[4];
+};
+
+struct plain_bulk_token {
+	__u8	    pbt_hash[8];
+};
+
+#define PLAIN_BSD_SIZE \
+	(sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token))
+
+/****************************************
+ * bulk checksum helpers		*
+ ****************************************/
+
+static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed)
+{
+	struct ptlrpc_bulk_sec_desc *bsd;
+
+	if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed))
+		return -EPROTO;
+
+	bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE);
+	if (bsd == NULL) {
+		CERROR("bulk sec desc has short size %d\n",
+		       lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF));
+		return -EPROTO;
+	}
+
+	if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+	    bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) {
+		CERROR("invalid bulk svc %u\n", bsd->bsd_svc);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc,
+				    __u8 hash_alg,
+				    struct plain_bulk_token *token)
+{
+	if (hash_alg == BULK_HASH_ALG_NULL)
+		return 0;
+
+	memset(token->pbt_hash, 0, sizeof(token->pbt_hash));
+	return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash,
+					 sizeof(token->pbt_hash));
+}
+
+static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc,
+				  __u8 hash_alg,
+				  struct plain_bulk_token *tokenr)
+{
+	struct plain_bulk_token tokenv;
+	int		     rc;
+
+	if (hash_alg == BULK_HASH_ALG_NULL)
+		return 0;
+
+	memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash));
+	rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash,
+				       sizeof(tokenv.pbt_hash));
+	if (rc)
+		return rc;
+
+	if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash)))
+		return -EACCES;
+	return 0;
+}
+
+static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
+{
+	char	   *ptr;
+	unsigned int    off, i;
+
+	for (i = 0; i < desc->bd_iov_count; i++) {
+		if (desc->bd_iov[i].kiov_len == 0)
+			continue;
+
+		ptr = kmap(desc->bd_iov[i].kiov_page);
+		off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+		ptr[off] ^= 0x1;
+		kunmap(desc->bd_iov[i].kiov_page);
+		return;
+	}
+}
+
+/****************************************
+ * cli_ctx apis			 *
+ ****************************************/
+
+static
+int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+	/* should never reach here */
+	LBUG();
+	return 0;
+}
+
+static
+int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx)
+{
+	return 0;
+}
+
+static
+int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+	struct lustre_msg   *msg = req->rq_reqbuf;
+	struct plain_header *phdr;
+
+	msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+	phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+	phdr->ph_ver = 0;
+	phdr->ph_flags = 0;
+	phdr->ph_sp = ctx->cc_sec->ps_part;
+	phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+	if (req->rq_pack_udesc)
+		phdr->ph_flags |= PLAIN_FL_USER;
+	if (req->rq_pack_bulk)
+		phdr->ph_flags |= PLAIN_FL_BULK;
+
+	req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
+						 msg->lm_buflens);
+	return 0;
+}
+
+static
+int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+	struct lustre_msg   *msg = req->rq_repdata;
+	struct plain_header *phdr;
+	__u32		cksum;
+	int		  swabbed;
+
+	if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) {
+		CERROR("unexpected reply buf count %u\n", msg->lm_bufcount);
+		return -EPROTO;
+	}
+
+	swabbed = ptlrpc_rep_need_swab(req);
+
+	phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+	if (phdr == NULL) {
+		CERROR("missing plain header\n");
+		return -EPROTO;
+	}
+
+	if (phdr->ph_ver != 0) {
+		CERROR("Invalid header version\n");
+		return -EPROTO;
+	}
+
+	/* expect no user desc in reply */
+	if (phdr->ph_flags & PLAIN_FL_USER) {
+		CERROR("Unexpected udesc flag in reply\n");
+		return -EPROTO;
+	}
+
+	if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) {
+		CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg,
+		       req->rq_flvr.u_bulk.hash.hash_alg);
+		return -EPROTO;
+	}
+
+	if (unlikely(req->rq_early)) {
+		unsigned int hsize = 4;
+
+		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+				lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+				lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+				NULL, 0, (unsigned char *)&cksum, &hsize);
+		if (cksum != msg->lm_cksum) {
+			CDEBUG(D_SEC,
+			       "early reply checksum mismatch: %08x != %08x\n",
+			       cpu_to_le32(cksum), msg->lm_cksum);
+			return -EINVAL;
+		}
+	} else {
+		/* whether we sent with bulk or not, we expect the same
+		 * in reply, except for early reply */
+		if (!req->rq_early &&
+		    !equi(req->rq_pack_bulk == 1,
+			  phdr->ph_flags & PLAIN_FL_BULK)) {
+			CERROR("%s bulk checksum in reply\n",
+			       req->rq_pack_bulk ? "Missing" : "Unexpected");
+			return -EPROTO;
+		}
+
+		if (phdr->ph_flags & PLAIN_FL_BULK) {
+			if (plain_unpack_bsd(msg, swabbed))
+				return -EPROTO;
+		}
+	}
+
+	req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+	req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF);
+	return 0;
+}
+
+static
+int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+			struct ptlrpc_request *req,
+			struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_bulk_sec_desc *bsd;
+	struct plain_bulk_token     *token;
+	int			  rc;
+
+	LASSERT(req->rq_pack_bulk);
+	LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+	bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+	token = (struct plain_bulk_token *) bsd->bsd_data;
+
+	bsd->bsd_version = 0;
+	bsd->bsd_flags = 0;
+	bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+	bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
+
+	if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+		return 0;
+
+	if (req->rq_bulk_read)
+		return 0;
+
+	rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+				      token);
+	if (rc) {
+		CERROR("bulk write: failed to compute checksum: %d\n", rc);
+	} else {
+		/*
+		 * for sending we only compute the wrong checksum instead
+		 * of corrupting the data so it is still correct on a redo
+		 */
+		if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
+		    req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL)
+			token->pbt_hash[0] ^= 0x1;
+	}
+
+	return rc;
+}
+
+static
+int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+			  struct ptlrpc_request *req,
+			  struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_bulk_sec_desc *bsdv;
+	struct plain_bulk_token     *tokenv;
+	int			  rc;
+	int			  i, nob;
+
+	LASSERT(req->rq_pack_bulk);
+	LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+	LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+	bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
+	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+	if (req->rq_bulk_write) {
+		if (bsdv->bsd_flags & BSD_FL_ERR)
+			return -EIO;
+		return 0;
+	}
+
+	/* fix the actual data size */
+	for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+		if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
+			desc->bd_iov[i].kiov_len =
+				desc->bd_nob_transferred - nob;
+		}
+		nob += desc->bd_iov[i].kiov_len;
+	}
+
+	rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+				    tokenv);
+	if (rc)
+		CERROR("bulk read: client verify failed: %d\n", rc);
+
+	return rc;
+}
+
+/****************************************
+ * sec apis			     *
+ ****************************************/
+
+static
+struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec)
+{
+	struct ptlrpc_cli_ctx  *ctx, *ctx_new;
+
+	OBD_ALLOC_PTR(ctx_new);
+
+	write_lock(&plsec->pls_lock);
+
+	ctx = plsec->pls_ctx;
+	if (ctx) {
+		atomic_inc(&ctx->cc_refcount);
+
+		if (ctx_new)
+			OBD_FREE_PTR(ctx_new);
+	} else if (ctx_new) {
+		ctx = ctx_new;
+
+		atomic_set(&ctx->cc_refcount, 1); /* for cache */
+		ctx->cc_sec = &plsec->pls_base;
+		ctx->cc_ops = &plain_ctx_ops;
+		ctx->cc_expire = 0;
+		ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE;
+		ctx->cc_vcred.vc_uid = 0;
+		spin_lock_init(&ctx->cc_lock);
+		INIT_LIST_HEAD(&ctx->cc_req_list);
+		INIT_LIST_HEAD(&ctx->cc_gc_chain);
+
+		plsec->pls_ctx = ctx;
+		atomic_inc(&plsec->pls_base.ps_nctx);
+		atomic_inc(&plsec->pls_base.ps_refcount);
+
+		atomic_inc(&ctx->cc_refcount); /* for caller */
+	}
+
+	write_unlock(&plsec->pls_lock);
+
+	return ctx;
+}
+
+static
+void plain_destroy_sec(struct ptlrpc_sec *sec)
+{
+	struct plain_sec       *plsec = sec2plsec(sec);
+
+	LASSERT(sec->ps_policy == &plain_policy);
+	LASSERT(sec->ps_import);
+	LASSERT(atomic_read(&sec->ps_refcount) == 0);
+	LASSERT(atomic_read(&sec->ps_nctx) == 0);
+	LASSERT(plsec->pls_ctx == NULL);
+
+	class_import_put(sec->ps_import);
+
+	OBD_FREE_PTR(plsec);
+}
+
+static
+void plain_kill_sec(struct ptlrpc_sec *sec)
+{
+	sec->ps_dying = 1;
+}
+
+static
+struct ptlrpc_sec *plain_create_sec(struct obd_import *imp,
+				    struct ptlrpc_svc_ctx *svc_ctx,
+				    struct sptlrpc_flavor *sf)
+{
+	struct plain_sec       *plsec;
+	struct ptlrpc_sec      *sec;
+	struct ptlrpc_cli_ctx  *ctx;
+
+	LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
+
+	OBD_ALLOC_PTR(plsec);
+	if (plsec == NULL)
+		return NULL;
+
+	/*
+	 * initialize plain_sec
+	 */
+	rwlock_init(&plsec->pls_lock);
+	plsec->pls_ctx = NULL;
+
+	sec = &plsec->pls_base;
+	sec->ps_policy = &plain_policy;
+	atomic_set(&sec->ps_refcount, 0);
+	atomic_set(&sec->ps_nctx, 0);
+	sec->ps_id = sptlrpc_get_next_secid();
+	sec->ps_import = class_import_get(imp);
+	sec->ps_flvr = *sf;
+	spin_lock_init(&sec->ps_lock);
+	INIT_LIST_HEAD(&sec->ps_gc_list);
+	sec->ps_gc_interval = 0;
+	sec->ps_gc_next = 0;
+
+	/* install ctx immediately if this is a reverse sec */
+	if (svc_ctx) {
+		ctx = plain_sec_install_ctx(plsec);
+		if (ctx == NULL) {
+			plain_destroy_sec(sec);
+			return NULL;
+		}
+		sptlrpc_cli_ctx_put(ctx, 1);
+	}
+
+	return sec;
+}
+
+static
+struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
+					struct vfs_cred *vcred,
+					int create, int remove_dead)
+{
+	struct plain_sec       *plsec = sec2plsec(sec);
+	struct ptlrpc_cli_ctx  *ctx;
+
+	read_lock(&plsec->pls_lock);
+	ctx = plsec->pls_ctx;
+	if (ctx)
+		atomic_inc(&ctx->cc_refcount);
+	read_unlock(&plsec->pls_lock);
+
+	if (unlikely(ctx == NULL))
+		ctx = plain_sec_install_ctx(plsec);
+
+	return ctx;
+}
+
+static
+void plain_release_ctx(struct ptlrpc_sec *sec,
+		       struct ptlrpc_cli_ctx *ctx, int sync)
+{
+	LASSERT(atomic_read(&sec->ps_refcount) > 0);
+	LASSERT(atomic_read(&sec->ps_nctx) > 0);
+	LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+	LASSERT(ctx->cc_sec == sec);
+
+	OBD_FREE_PTR(ctx);
+
+	atomic_dec(&sec->ps_nctx);
+	sptlrpc_sec_put(sec);
+}
+
+static
+int plain_flush_ctx_cache(struct ptlrpc_sec *sec,
+			  uid_t uid, int grace, int force)
+{
+	struct plain_sec       *plsec = sec2plsec(sec);
+	struct ptlrpc_cli_ctx  *ctx;
+
+	/* do nothing unless caller want to flush for 'all' */
+	if (uid != -1)
+		return 0;
+
+	write_lock(&plsec->pls_lock);
+	ctx = plsec->pls_ctx;
+	plsec->pls_ctx = NULL;
+	write_unlock(&plsec->pls_lock);
+
+	if (ctx)
+		sptlrpc_cli_ctx_put(ctx, 1);
+	return 0;
+}
+
+static
+int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
+		       struct ptlrpc_request *req,
+		       int msgsize)
+{
+	__u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+	int   alloc_len;
+
+	buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+	buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+	if (req->rq_pack_udesc)
+		buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size();
+
+	if (req->rq_pack_bulk) {
+		LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+		buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+	}
+
+	alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+	if (!req->rq_reqbuf) {
+		LASSERT(!req->rq_pool);
+
+		alloc_len = size_roundup_power2(alloc_len);
+		OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_len);
+		if (!req->rq_reqbuf)
+			return -ENOMEM;
+
+		req->rq_reqbuf_len = alloc_len;
+	} else {
+		LASSERT(req->rq_pool);
+		LASSERT(req->rq_reqbuf_len >= alloc_len);
+		memset(req->rq_reqbuf, 0, alloc_len);
+	}
+
+	lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+	req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
+
+	if (req->rq_pack_udesc)
+		sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+
+	return 0;
+}
+
+static
+void plain_free_reqbuf(struct ptlrpc_sec *sec,
+		       struct ptlrpc_request *req)
+{
+	if (!req->rq_pool) {
+		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+		req->rq_reqbuf = NULL;
+		req->rq_reqbuf_len = 0;
+	}
+}
+
+static
+int plain_alloc_repbuf(struct ptlrpc_sec *sec,
+		       struct ptlrpc_request *req,
+		       int msgsize)
+{
+	__u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+	int alloc_len;
+
+	buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+	buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+	if (req->rq_pack_bulk) {
+		LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+		buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+	}
+
+	alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+	/* add space for early reply */
+	alloc_len += plain_at_offset;
+
+	alloc_len = size_roundup_power2(alloc_len);
+
+	OBD_ALLOC_LARGE(req->rq_repbuf, alloc_len);
+	if (!req->rq_repbuf)
+		return -ENOMEM;
+
+	req->rq_repbuf_len = alloc_len;
+	return 0;
+}
+
+static
+void plain_free_repbuf(struct ptlrpc_sec *sec,
+		       struct ptlrpc_request *req)
+{
+	OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+	req->rq_repbuf = NULL;
+	req->rq_repbuf_len = 0;
+}
+
+static
+int plain_enlarge_reqbuf(struct ptlrpc_sec *sec,
+			 struct ptlrpc_request *req,
+			 int segment, int newsize)
+{
+	struct lustre_msg      *newbuf;
+	int		     oldsize;
+	int		     newmsg_size, newbuf_size;
+
+	LASSERT(req->rq_reqbuf);
+	LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+	LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) ==
+		req->rq_reqmsg);
+
+	/* compute new embedded msg size.  */
+	oldsize = req->rq_reqmsg->lm_buflens[segment];
+	req->rq_reqmsg->lm_buflens[segment] = newsize;
+	newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount,
+					 req->rq_reqmsg->lm_buflens);
+	req->rq_reqmsg->lm_buflens[segment] = oldsize;
+
+	/* compute new wrapper msg size.  */
+	oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF];
+	req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size;
+	newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount,
+					 req->rq_reqbuf->lm_buflens);
+	req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize;
+
+	/* request from pool should always have enough buffer */
+	LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size);
+
+	if (req->rq_reqbuf_len < newbuf_size) {
+		newbuf_size = size_roundup_power2(newbuf_size);
+
+		OBD_ALLOC_LARGE(newbuf, newbuf_size);
+		if (newbuf == NULL)
+			return -ENOMEM;
+
+		/* Must lock this, so that otherwise unprotected change of
+		 * rq_reqmsg is not racing with parallel processing of
+		 * imp_replay_list traversing threads. See LU-3333
+		 * This is a bandaid at best, we really need to deal with this
+		 * in request enlarging code before unpacking that's already
+		 * there */
+		if (req->rq_import)
+			spin_lock(&req->rq_import->imp_lock);
+
+		memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
+
+		OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+		req->rq_reqbuf = newbuf;
+		req->rq_reqbuf_len = newbuf_size;
+		req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf,
+						PLAIN_PACK_MSG_OFF, 0);
+
+		if (req->rq_import)
+			spin_unlock(&req->rq_import->imp_lock);
+	}
+
+	_sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF,
+				     newmsg_size);
+	_sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+
+	req->rq_reqlen = newmsg_size;
+	return 0;
+}
+
+/****************************************
+ * service apis			 *
+ ****************************************/
+
+static struct ptlrpc_svc_ctx plain_svc_ctx = {
+	.sc_refcount    = ATOMIC_INIT(1),
+	.sc_policy      = &plain_policy,
+};
+
+static
+int plain_accept(struct ptlrpc_request *req)
+{
+	struct lustre_msg   *msg = req->rq_reqbuf;
+	struct plain_header *phdr;
+	int		  swabbed;
+
+	LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+		SPTLRPC_POLICY_PLAIN);
+
+	if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) !=
+	    SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) ||
+	    SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) !=
+	    SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) {
+		CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
+		return SECSVC_DROP;
+	}
+
+	if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) {
+		CERROR("unexpected request buf count %u\n", msg->lm_bufcount);
+		return SECSVC_DROP;
+	}
+
+	swabbed = ptlrpc_req_need_swab(req);
+
+	phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+	if (phdr == NULL) {
+		CERROR("missing plain header\n");
+		return -EPROTO;
+	}
+
+	if (phdr->ph_ver != 0) {
+		CERROR("Invalid header version\n");
+		return -EPROTO;
+	}
+
+	if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) {
+		CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg);
+		return -EPROTO;
+	}
+
+	req->rq_sp_from = phdr->ph_sp;
+	req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg;
+
+	if (phdr->ph_flags & PLAIN_FL_USER) {
+		if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF,
+					     swabbed)) {
+			CERROR("Mal-formed user descriptor\n");
+			return SECSVC_DROP;
+		}
+
+		req->rq_pack_udesc = 1;
+		req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0);
+	}
+
+	if (phdr->ph_flags & PLAIN_FL_BULK) {
+		if (plain_unpack_bsd(msg, swabbed))
+			return SECSVC_DROP;
+
+		req->rq_pack_bulk = 1;
+	}
+
+	req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+	req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF];
+
+	req->rq_svc_ctx = &plain_svc_ctx;
+	atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+	return SECSVC_OK;
+}
+
+static
+int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+	struct ptlrpc_reply_state   *rs;
+	__u32			buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+	int			  rs_size = sizeof(*rs);
+
+	LASSERT(msgsize % 8 == 0);
+
+	buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+	buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+	if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write))
+		buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+
+	rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+	rs = req->rq_reply_state;
+
+	if (rs) {
+		/* pre-allocated */
+		LASSERT(rs->rs_size >= rs_size);
+	} else {
+		OBD_ALLOC_LARGE(rs, rs_size);
+		if (rs == NULL)
+			return -ENOMEM;
+
+		rs->rs_size = rs_size;
+	}
+
+	rs->rs_svc_ctx = req->rq_svc_ctx;
+	atomic_inc(&req->rq_svc_ctx->sc_refcount);
+	rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+	rs->rs_repbuf_len = rs_size - sizeof(*rs);
+
+	lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+	rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0);
+
+	req->rq_reply_state = rs;
+	return 0;
+}
+
+static
+void plain_free_rs(struct ptlrpc_reply_state *rs)
+{
+	LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+	atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+	if (!rs->rs_prealloc)
+		OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+static
+int plain_authorize(struct ptlrpc_request *req)
+{
+	struct ptlrpc_reply_state *rs = req->rq_reply_state;
+	struct lustre_msg_v2      *msg = rs->rs_repbuf;
+	struct plain_header       *phdr;
+	int			len;
+
+	LASSERT(rs);
+	LASSERT(msg);
+
+	if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF])
+		len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF,
+					req->rq_replen, 1);
+	else
+		len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+	msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+	phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+	phdr->ph_ver = 0;
+	phdr->ph_flags = 0;
+	phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+	if (req->rq_pack_bulk)
+		phdr->ph_flags |= PLAIN_FL_BULK;
+
+	rs->rs_repdata_len = len;
+
+	if (likely(req->rq_packed_final)) {
+		if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+			req->rq_reply_off = plain_at_offset;
+		else
+			req->rq_reply_off = 0;
+	} else {
+		unsigned int hsize = 4;
+
+		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+			lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+			lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+			NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize);
+			req->rq_reply_off = 0;
+	}
+
+	return 0;
+}
+
+static
+int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
+			  struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_reply_state   *rs = req->rq_reply_state;
+	struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+	struct plain_bulk_token     *tokenr;
+	int			  rc;
+
+	LASSERT(req->rq_bulk_write);
+	LASSERT(req->rq_pack_bulk);
+
+	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+	tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+
+	bsdv->bsd_version = 0;
+	bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+	bsdv->bsd_svc = bsdr->bsd_svc;
+	bsdv->bsd_flags = 0;
+
+	if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+		return 0;
+
+	rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+				    tokenr);
+	if (rc) {
+		bsdv->bsd_flags |= BSD_FL_ERR;
+		CERROR("bulk write: server verify failed: %d\n", rc);
+	}
+
+	return rc;
+}
+
+static
+int plain_svc_wrap_bulk(struct ptlrpc_request *req,
+			struct ptlrpc_bulk_desc *desc)
+{
+	struct ptlrpc_reply_state   *rs = req->rq_reply_state;
+	struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+	struct plain_bulk_token     *tokenv;
+	int			  rc;
+
+	LASSERT(req->rq_bulk_read);
+	LASSERT(req->rq_pack_bulk);
+
+	bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+	bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+	tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+	bsdv->bsd_version = 0;
+	bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+	bsdv->bsd_svc = bsdr->bsd_svc;
+	bsdv->bsd_flags = 0;
+
+	if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+		return 0;
+
+	rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+				      tokenv);
+	if (rc) {
+		CERROR("bulk read: server failed to compute checksum: %d\n",
+		       rc);
+	} else {
+		if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
+			corrupt_bulk_data(desc);
+	}
+
+	return rc;
+}
+
+static struct ptlrpc_ctx_ops plain_ctx_ops = {
+	.refresh		= plain_ctx_refresh,
+	.validate	       = plain_ctx_validate,
+	.sign		   = plain_ctx_sign,
+	.verify		 = plain_ctx_verify,
+	.wrap_bulk	      = plain_cli_wrap_bulk,
+	.unwrap_bulk	    = plain_cli_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops plain_sec_cops = {
+	.create_sec	     = plain_create_sec,
+	.destroy_sec	    = plain_destroy_sec,
+	.kill_sec	       = plain_kill_sec,
+	.lookup_ctx	     = plain_lookup_ctx,
+	.release_ctx	    = plain_release_ctx,
+	.flush_ctx_cache	= plain_flush_ctx_cache,
+	.alloc_reqbuf	   = plain_alloc_reqbuf,
+	.free_reqbuf	    = plain_free_reqbuf,
+	.alloc_repbuf	   = plain_alloc_repbuf,
+	.free_repbuf	    = plain_free_repbuf,
+	.enlarge_reqbuf	 = plain_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops plain_sec_sops = {
+	.accept		 = plain_accept,
+	.alloc_rs	       = plain_alloc_rs,
+	.authorize	      = plain_authorize,
+	.free_rs		= plain_free_rs,
+	.unwrap_bulk	    = plain_svc_unwrap_bulk,
+	.wrap_bulk	      = plain_svc_wrap_bulk,
+};
+
+static struct ptlrpc_sec_policy plain_policy = {
+	.sp_owner	       = THIS_MODULE,
+	.sp_name		= "plain",
+	.sp_policy	      = SPTLRPC_POLICY_PLAIN,
+	.sp_cops		= &plain_sec_cops,
+	.sp_sops		= &plain_sec_sops,
+};
+
+int sptlrpc_plain_init(void)
+{
+	__u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+	int rc;
+
+	buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size();
+	plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+	rc = sptlrpc_register_policy(&plain_policy);
+	if (rc)
+		CERROR("failed to register: %d\n", rc);
+
+	return rc;
+}
+
+void sptlrpc_plain_fini(void)
+{
+	int rc;
+
+	rc = sptlrpc_unregister_policy(&plain_policy);
+	if (rc)
+		CERROR("cannot unregister: %d\n", rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
new file mode 100644
index 000000000..8e6142151
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -0,0 +1,3105 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lu_object.h"
+#include "../../include/linux/lnet/types.h"
+#include "ptlrpc_internal.h"
+
+/* The following are visible and mutable through /sys/module/ptlrpc */
+int test_req_buffer_pressure = 0;
+module_param(test_req_buffer_pressure, int, 0444);
+MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools");
+module_param(at_min, int, 0644);
+MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)");
+module_param(at_max, int, 0644);
+MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)");
+module_param(at_history, int, 0644);
+MODULE_PARM_DESC(at_history,
+		 "Adaptive timeouts remember the slowest event that took place within this period (sec)");
+module_param(at_early_margin, int, 0644);
+MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply");
+module_param(at_extra, int, 0644);
+MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply");
+
+
+/* forward ref */
+static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req);
+static void ptlrpc_at_remove_timed(struct ptlrpc_request *req);
+
+/** Holds a list of all PTLRPC services */
+LIST_HEAD(ptlrpc_all_services);
+/** Used to protect the \e ptlrpc_all_services list */
+struct mutex ptlrpc_all_services_mutex;
+
+struct ptlrpc_request_buffer_desc *
+ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_service		  *svc = svcpt->scp_service;
+	struct ptlrpc_request_buffer_desc *rqbd;
+
+	OBD_CPT_ALLOC_PTR(rqbd, svc->srv_cptable, svcpt->scp_cpt);
+	if (rqbd == NULL)
+		return NULL;
+
+	rqbd->rqbd_svcpt = svcpt;
+	rqbd->rqbd_refcount = 0;
+	rqbd->rqbd_cbid.cbid_fn = request_in_callback;
+	rqbd->rqbd_cbid.cbid_arg = rqbd;
+	INIT_LIST_HEAD(&rqbd->rqbd_reqs);
+	OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable,
+			    svcpt->scp_cpt, svc->srv_buf_size);
+	if (rqbd->rqbd_buffer == NULL) {
+		OBD_FREE_PTR(rqbd);
+		return NULL;
+	}
+
+	spin_lock(&svcpt->scp_lock);
+	list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+	svcpt->scp_nrqbds_total++;
+	spin_unlock(&svcpt->scp_lock);
+
+	return rqbd;
+}
+
+void
+ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+	struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+
+	LASSERT(rqbd->rqbd_refcount == 0);
+	LASSERT(list_empty(&rqbd->rqbd_reqs));
+
+	spin_lock(&svcpt->scp_lock);
+	list_del(&rqbd->rqbd_list);
+	svcpt->scp_nrqbds_total--;
+	spin_unlock(&svcpt->scp_lock);
+
+	OBD_FREE_LARGE(rqbd->rqbd_buffer, svcpt->scp_service->srv_buf_size);
+	OBD_FREE_PTR(rqbd);
+}
+
+int
+ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
+{
+	struct ptlrpc_service		  *svc = svcpt->scp_service;
+	struct ptlrpc_request_buffer_desc *rqbd;
+	int				rc = 0;
+	int				i;
+
+	if (svcpt->scp_rqbd_allocating)
+		goto try_post;
+
+	spin_lock(&svcpt->scp_lock);
+	/* check again with lock */
+	if (svcpt->scp_rqbd_allocating) {
+		/* NB: we might allow more than one thread in the future */
+		LASSERT(svcpt->scp_rqbd_allocating == 1);
+		spin_unlock(&svcpt->scp_lock);
+		goto try_post;
+	}
+
+	svcpt->scp_rqbd_allocating++;
+	spin_unlock(&svcpt->scp_lock);
+
+
+	for (i = 0; i < svc->srv_nbuf_per_group; i++) {
+		/* NB: another thread might have recycled enough rqbds, we
+		 * need to make sure it wouldn't over-allocate, see LU-1212. */
+		if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
+			break;
+
+		rqbd = ptlrpc_alloc_rqbd(svcpt);
+
+		if (rqbd == NULL) {
+			CERROR("%s: Can't allocate request buffer\n",
+			       svc->srv_name);
+			rc = -ENOMEM;
+			break;
+		}
+	}
+
+	spin_lock(&svcpt->scp_lock);
+
+	LASSERT(svcpt->scp_rqbd_allocating == 1);
+	svcpt->scp_rqbd_allocating--;
+
+	spin_unlock(&svcpt->scp_lock);
+
+	CDEBUG(D_RPCTRACE,
+	       "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
+	       svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
+	       svcpt->scp_nrqbds_total, rc);
+
+ try_post:
+	if (post && rc == 0)
+		rc = ptlrpc_server_post_idle_rqbds(svcpt);
+
+	return rc;
+}
+
+/**
+ * Part of Rep-Ack logic.
+ * Puts a lock and its mode into reply state associated to request reply.
+ */
+void
+ptlrpc_save_lock(struct ptlrpc_request *req,
+		 struct lustre_handle *lock, int mode, int no_ack)
+{
+	struct ptlrpc_reply_state *rs = req->rq_reply_state;
+	int			idx;
+
+	LASSERT(rs != NULL);
+	LASSERT(rs->rs_nlocks < RS_MAX_LOCKS);
+
+	if (req->rq_export->exp_disconnected) {
+		ldlm_lock_decref(lock, mode);
+	} else {
+		idx = rs->rs_nlocks++;
+		rs->rs_locks[idx] = *lock;
+		rs->rs_modes[idx] = mode;
+		rs->rs_difficult = 1;
+		rs->rs_no_ack = !!no_ack;
+	}
+}
+EXPORT_SYMBOL(ptlrpc_save_lock);
+
+
+struct ptlrpc_hr_partition;
+
+struct ptlrpc_hr_thread {
+	int				hrt_id;		/* thread ID */
+	spinlock_t			hrt_lock;
+	wait_queue_head_t			hrt_waitq;
+	struct list_head			hrt_queue;	/* RS queue */
+	struct ptlrpc_hr_partition	*hrt_partition;
+};
+
+struct ptlrpc_hr_partition {
+	/* # of started threads */
+	atomic_t			hrp_nstarted;
+	/* # of stopped threads */
+	atomic_t			hrp_nstopped;
+	/* cpu partition id */
+	int				hrp_cpt;
+	/* round-robin rotor for choosing thread */
+	int				hrp_rotor;
+	/* total number of threads on this partition */
+	int				hrp_nthrs;
+	/* threads table */
+	struct ptlrpc_hr_thread		*hrp_thrs;
+};
+
+#define HRT_RUNNING 0
+#define HRT_STOPPING 1
+
+struct ptlrpc_hr_service {
+	/* CPU partition table, it's just cfs_cpt_table for now */
+	struct cfs_cpt_table		*hr_cpt_table;
+	/** controller sleep waitq */
+	wait_queue_head_t			hr_waitq;
+	unsigned int			hr_stopping;
+	/** roundrobin rotor for non-affinity service */
+	unsigned int			hr_rotor;
+	/* partition data */
+	struct ptlrpc_hr_partition	**hr_partitions;
+};
+
+struct rs_batch {
+	struct list_head			rsb_replies;
+	unsigned int			rsb_n_replies;
+	struct ptlrpc_service_part	*rsb_svcpt;
+};
+
+/** reply handling service. */
+static struct ptlrpc_hr_service		ptlrpc_hr;
+
+/**
+ * maximum number of replies scheduled in one batch
+ */
+#define MAX_SCHEDULED 256
+
+/**
+ * Initialize a reply batch.
+ *
+ * \param b batch
+ */
+static void rs_batch_init(struct rs_batch *b)
+{
+	memset(b, 0, sizeof(*b));
+	INIT_LIST_HEAD(&b->rsb_replies);
+}
+
+/**
+ * Choose an hr thread to dispatch requests to.
+ */
+static struct ptlrpc_hr_thread *
+ptlrpc_hr_select(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_hr_partition	*hrp;
+	unsigned int			rotor;
+
+	if (svcpt->scp_cpt >= 0 &&
+	    svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) {
+		/* directly match partition */
+		hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt];
+
+	} else {
+		rotor = ptlrpc_hr.hr_rotor++;
+		rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table);
+
+		hrp = ptlrpc_hr.hr_partitions[rotor];
+	}
+
+	rotor = hrp->hrp_rotor++;
+	return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs];
+}
+
+/**
+ * Dispatch all replies accumulated in the batch to one from
+ * dedicated reply handling threads.
+ *
+ * \param b batch
+ */
+static void rs_batch_dispatch(struct rs_batch *b)
+{
+	if (b->rsb_n_replies != 0) {
+		struct ptlrpc_hr_thread	*hrt;
+
+		hrt = ptlrpc_hr_select(b->rsb_svcpt);
+
+		spin_lock(&hrt->hrt_lock);
+		list_splice_init(&b->rsb_replies, &hrt->hrt_queue);
+		spin_unlock(&hrt->hrt_lock);
+
+		wake_up(&hrt->hrt_waitq);
+		b->rsb_n_replies = 0;
+	}
+}
+
+/**
+ * Add a reply to a batch.
+ * Add one reply object to a batch, schedule batched replies if overload.
+ *
+ * \param b batch
+ * \param rs reply
+ */
+static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
+{
+	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+	if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) {
+		if (b->rsb_svcpt != NULL) {
+			rs_batch_dispatch(b);
+			spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+		}
+		spin_lock(&svcpt->scp_rep_lock);
+		b->rsb_svcpt = svcpt;
+	}
+	spin_lock(&rs->rs_lock);
+	rs->rs_scheduled_ever = 1;
+	if (rs->rs_scheduled == 0) {
+		list_move(&rs->rs_list, &b->rsb_replies);
+		rs->rs_scheduled = 1;
+		b->rsb_n_replies++;
+	}
+	rs->rs_committed = 1;
+	spin_unlock(&rs->rs_lock);
+}
+
+/**
+ * Reply batch finalization.
+ * Dispatch remaining replies from the batch
+ * and release remaining spinlock.
+ *
+ * \param b batch
+ */
+static void rs_batch_fini(struct rs_batch *b)
+{
+	if (b->rsb_svcpt != NULL) {
+		rs_batch_dispatch(b);
+		spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+	}
+}
+
+#define DECLARE_RS_BATCH(b)     struct rs_batch b
+
+
+/**
+ * Put reply state into a queue for processing because we received
+ * ACK from the client
+ */
+void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+	struct ptlrpc_hr_thread *hrt;
+
+	LASSERT(list_empty(&rs->rs_list));
+
+	hrt = ptlrpc_hr_select(rs->rs_svcpt);
+
+	spin_lock(&hrt->hrt_lock);
+	list_add_tail(&rs->rs_list, &hrt->hrt_queue);
+	spin_unlock(&hrt->hrt_lock);
+
+	wake_up(&hrt->hrt_waitq);
+}
+
+void
+ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+	assert_spin_locked(&rs->rs_svcpt->scp_rep_lock);
+	assert_spin_locked(&rs->rs_lock);
+	LASSERT(rs->rs_difficult);
+	rs->rs_scheduled_ever = 1;  /* flag any notification attempt */
+
+	if (rs->rs_scheduled) {     /* being set up or already notified */
+		return;
+	}
+
+	rs->rs_scheduled = 1;
+	list_del_init(&rs->rs_list);
+	ptlrpc_dispatch_difficult_reply(rs);
+}
+EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply);
+
+void ptlrpc_commit_replies(struct obd_export *exp)
+{
+	struct ptlrpc_reply_state *rs, *nxt;
+	DECLARE_RS_BATCH(batch);
+
+	rs_batch_init(&batch);
+	/* Find any replies that have been committed and get their service
+	 * to attend to complete them. */
+
+	/* CAVEAT EMPTOR: spinlock ordering!!! */
+	spin_lock(&exp->exp_uncommitted_replies_lock);
+	list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies,
+				     rs_obd_list) {
+		LASSERT(rs->rs_difficult);
+		/* VBR: per-export last_committed */
+		LASSERT(rs->rs_export);
+		if (rs->rs_transno <= exp->exp_last_committed) {
+			list_del_init(&rs->rs_obd_list);
+			rs_batch_add(&batch, rs);
+		}
+	}
+	spin_unlock(&exp->exp_uncommitted_replies_lock);
+	rs_batch_fini(&batch);
+}
+EXPORT_SYMBOL(ptlrpc_commit_replies);
+
+static int
+ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_request_buffer_desc *rqbd;
+	int				  rc;
+	int				  posted = 0;
+
+	for (;;) {
+		spin_lock(&svcpt->scp_lock);
+
+		if (list_empty(&svcpt->scp_rqbd_idle)) {
+			spin_unlock(&svcpt->scp_lock);
+			return posted;
+		}
+
+		rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+				      struct ptlrpc_request_buffer_desc,
+				      rqbd_list);
+		list_del(&rqbd->rqbd_list);
+
+		/* assume we will post successfully */
+		svcpt->scp_nrqbds_posted++;
+		list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
+
+		spin_unlock(&svcpt->scp_lock);
+
+		rc = ptlrpc_register_rqbd(rqbd);
+		if (rc != 0)
+			break;
+
+		posted = 1;
+	}
+
+	spin_lock(&svcpt->scp_lock);
+
+	svcpt->scp_nrqbds_posted--;
+	list_del(&rqbd->rqbd_list);
+	list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+
+	/* Don't complain if no request buffers are posted right now; LNET
+	 * won't drop requests because we set the portal lazy! */
+
+	spin_unlock(&svcpt->scp_lock);
+
+	return -1;
+}
+
+static void ptlrpc_at_timer(unsigned long castmeharder)
+{
+	struct ptlrpc_service_part *svcpt;
+
+	svcpt = (struct ptlrpc_service_part *)castmeharder;
+
+	svcpt->scp_at_check = 1;
+	svcpt->scp_at_checktime = cfs_time_current();
+	wake_up(&svcpt->scp_waitq);
+}
+
+static void
+ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
+			     struct ptlrpc_service_conf *conf)
+{
+	struct ptlrpc_service_thr_conf	*tc = &conf->psc_thr;
+	unsigned			init;
+	unsigned			total;
+	unsigned			nthrs;
+	int				weight;
+
+	/*
+	 * Common code for estimating & validating threads number.
+	 * CPT affinity service could have percpt thread-pool instead
+	 * of a global thread-pool, which means user might not always
+	 * get the threads number they give it in conf::tc_nthrs_user
+	 * even they did set. It's because we need to validate threads
+	 * number for each CPT to guarantee each pool will have enough
+	 * threads to keep the service healthy.
+	 */
+	init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL);
+	init = max_t(int, init, tc->tc_nthrs_init);
+
+	/* NB: please see comments in lustre_lnet.h for definition
+	 * details of these members */
+	LASSERT(tc->tc_nthrs_max != 0);
+
+	if (tc->tc_nthrs_user != 0) {
+		/* In case there is a reason to test a service with many
+		 * threads, we give a less strict check here, it can
+		 * be up to 8 * nthrs_max */
+		total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user);
+		nthrs = total / svc->srv_ncpts;
+		init  = max(init, nthrs);
+		goto out;
+	}
+
+	total = tc->tc_nthrs_max;
+	if (tc->tc_nthrs_base == 0) {
+		/* don't care about base threads number per partition,
+		 * this is most for non-affinity service */
+		nthrs = total / svc->srv_ncpts;
+		goto out;
+	}
+
+	nthrs = tc->tc_nthrs_base;
+	if (svc->srv_ncpts == 1) {
+		int	i;
+
+		/* NB: Increase the base number if it's single partition
+		 * and total number of cores/HTs is larger or equal to 4.
+		 * result will always < 2 * nthrs_base */
+		weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY);
+		for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */
+			    (tc->tc_nthrs_base >> i) != 0; i++)
+			nthrs += tc->tc_nthrs_base >> i;
+	}
+
+	if (tc->tc_thr_factor != 0) {
+		int	  factor = tc->tc_thr_factor;
+		const int fade = 4;
+
+		/*
+		 * User wants to increase number of threads with for
+		 * each CPU core/HT, most likely the factor is larger then
+		 * one thread/core because service threads are supposed to
+		 * be blocked by lock or wait for IO.
+		 */
+		/*
+		 * Amdahl's law says that adding processors wouldn't give
+		 * a linear increasing of parallelism, so it's nonsense to
+		 * have too many threads no matter how many cores/HTs
+		 * there are.
+		 */
+		/* weight is # of HTs */
+		if (cpumask_weight(topology_thread_cpumask(0)) > 1) {
+			/* depress thread factor for hyper-thread */
+			factor = factor - (factor >> 1) + (factor >> 3);
+		}
+
+		weight = cfs_cpt_weight(svc->srv_cptable, 0);
+		LASSERT(weight > 0);
+
+		for (; factor > 0 && weight > 0; factor--, weight -= fade)
+			nthrs += min(weight, fade) * factor;
+	}
+
+	if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+		nthrs = max(tc->tc_nthrs_base,
+			    tc->tc_nthrs_max / svc->srv_ncpts);
+	}
+ out:
+	nthrs = max(nthrs, tc->tc_nthrs_init);
+	svc->srv_nthrs_cpt_limit = nthrs;
+	svc->srv_nthrs_cpt_init = init;
+
+	if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+		CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n",
+		       svc->srv_name, nthrs * svc->srv_ncpts,
+		       tc->tc_nthrs_max);
+	}
+}
+
+/**
+ * Initialize percpt data for a service
+ */
+static int
+ptlrpc_service_part_init(struct ptlrpc_service *svc,
+			 struct ptlrpc_service_part *svcpt, int cpt)
+{
+	struct ptlrpc_at_array	*array;
+	int			size;
+	int			index;
+	int			rc;
+
+	svcpt->scp_cpt = cpt;
+	INIT_LIST_HEAD(&svcpt->scp_threads);
+
+	/* rqbd and incoming request queue */
+	spin_lock_init(&svcpt->scp_lock);
+	INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
+	INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
+	INIT_LIST_HEAD(&svcpt->scp_req_incoming);
+	init_waitqueue_head(&svcpt->scp_waitq);
+	/* history request & rqbd list */
+	INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
+	INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
+
+	/* active requests and hp requests */
+	spin_lock_init(&svcpt->scp_req_lock);
+
+	/* reply states */
+	spin_lock_init(&svcpt->scp_rep_lock);
+	INIT_LIST_HEAD(&svcpt->scp_rep_active);
+	INIT_LIST_HEAD(&svcpt->scp_rep_idle);
+	init_waitqueue_head(&svcpt->scp_rep_waitq);
+	atomic_set(&svcpt->scp_nreps_difficult, 0);
+
+	/* adaptive timeout */
+	spin_lock_init(&svcpt->scp_at_lock);
+	array = &svcpt->scp_at_array;
+
+	size = at_est2timeout(at_max);
+	array->paa_size     = size;
+	array->paa_count    = 0;
+	array->paa_deadline = -1;
+
+	/* allocate memory for scp_at_array (ptlrpc_at_array) */
+	OBD_CPT_ALLOC(array->paa_reqs_array,
+		      svc->srv_cptable, cpt, sizeof(struct list_head) * size);
+	if (array->paa_reqs_array == NULL)
+		return -ENOMEM;
+
+	for (index = 0; index < size; index++)
+		INIT_LIST_HEAD(&array->paa_reqs_array[index]);
+
+	OBD_CPT_ALLOC(array->paa_reqs_count,
+		      svc->srv_cptable, cpt, sizeof(__u32) * size);
+	if (array->paa_reqs_count == NULL)
+		goto failed;
+
+	cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
+	/* At SOW, service time should be quick; 10s seems generous. If client
+	 * timeout is less than this, we'll be sending an early reply. */
+	at_init(&svcpt->scp_at_estimate, 10, 0);
+
+	/* assign this before call ptlrpc_grow_req_bufs */
+	svcpt->scp_service = svc;
+	/* Now allocate the request buffers, but don't post them now */
+	rc = ptlrpc_grow_req_bufs(svcpt, 0);
+	/* We shouldn't be under memory pressure at startup, so
+	 * fail if we can't allocate all our buffers at this time. */
+	if (rc != 0)
+		goto failed;
+
+	return 0;
+
+ failed:
+	if (array->paa_reqs_count != NULL) {
+		OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size);
+		array->paa_reqs_count = NULL;
+	}
+
+	if (array->paa_reqs_array != NULL) {
+		OBD_FREE(array->paa_reqs_array,
+			 sizeof(struct list_head) * array->paa_size);
+		array->paa_reqs_array = NULL;
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * Initialize service on a given portal.
+ * This includes starting serving threads , allocating and posting rqbds and
+ * so on.
+ */
+struct ptlrpc_service *
+ptlrpc_register_service(struct ptlrpc_service_conf *conf,
+			struct proc_dir_entry *proc_entry)
+{
+	struct ptlrpc_service_cpt_conf	*cconf = &conf->psc_cpt;
+	struct ptlrpc_service		*service;
+	struct ptlrpc_service_part	*svcpt;
+	struct cfs_cpt_table		*cptable;
+	__u32				*cpts = NULL;
+	int				ncpts;
+	int				cpt;
+	int				rc;
+	int				i;
+
+	LASSERT(conf->psc_buf.bc_nbufs > 0);
+	LASSERT(conf->psc_buf.bc_buf_size >=
+		conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD);
+	LASSERT(conf->psc_thr.tc_ctx_tags != 0);
+
+	cptable = cconf->cc_cptable;
+	if (cptable == NULL)
+		cptable = cfs_cpt_table;
+
+	if (!conf->psc_thr.tc_cpu_affinity) {
+		ncpts = 1;
+	} else {
+		ncpts = cfs_cpt_number(cptable);
+		if (cconf->cc_pattern != NULL) {
+			struct cfs_expr_list	*el;
+
+			rc = cfs_expr_list_parse(cconf->cc_pattern,
+						 strlen(cconf->cc_pattern),
+						 0, ncpts - 1, &el);
+			if (rc != 0) {
+				CERROR("%s: invalid CPT pattern string: %s",
+				       conf->psc_name, cconf->cc_pattern);
+				return ERR_PTR(-EINVAL);
+			}
+
+			rc = cfs_expr_list_values(el, ncpts, &cpts);
+			cfs_expr_list_free(el);
+			if (rc <= 0) {
+				CERROR("%s: failed to parse CPT array %s: %d\n",
+				       conf->psc_name, cconf->cc_pattern, rc);
+				if (cpts != NULL)
+					OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+				return ERR_PTR(rc < 0 ? rc : -EINVAL);
+			}
+			ncpts = rc;
+		}
+	}
+
+	OBD_ALLOC(service, offsetof(struct ptlrpc_service, srv_parts[ncpts]));
+	if (service == NULL) {
+		if (cpts != NULL)
+			OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	service->srv_cptable		= cptable;
+	service->srv_cpts		= cpts;
+	service->srv_ncpts		= ncpts;
+
+	service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
+	while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
+		service->srv_cpt_bits++;
+
+	/* public members */
+	spin_lock_init(&service->srv_lock);
+	service->srv_name		= conf->psc_name;
+	service->srv_watchdog_factor	= conf->psc_watchdog_factor;
+	INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */
+
+	/* buffer configuration */
+	service->srv_nbuf_per_group	= test_req_buffer_pressure ?
+					  1 : conf->psc_buf.bc_nbufs;
+	service->srv_max_req_size	= conf->psc_buf.bc_req_max_size +
+					  SPTLRPC_MAX_PAYLOAD;
+	service->srv_buf_size		= conf->psc_buf.bc_buf_size;
+	service->srv_rep_portal		= conf->psc_buf.bc_rep_portal;
+	service->srv_req_portal		= conf->psc_buf.bc_req_portal;
+
+	/* Increase max reply size to next power of two */
+	service->srv_max_reply_size = 1;
+	while (service->srv_max_reply_size <
+	       conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
+		service->srv_max_reply_size <<= 1;
+
+	service->srv_thread_name	= conf->psc_thr.tc_thr_name;
+	service->srv_ctx_tags		= conf->psc_thr.tc_ctx_tags;
+	service->srv_hpreq_ratio	= PTLRPC_SVC_HP_RATIO;
+	service->srv_ops		= conf->psc_ops;
+
+	for (i = 0; i < ncpts; i++) {
+		if (!conf->psc_thr.tc_cpu_affinity)
+			cpt = CFS_CPT_ANY;
+		else
+			cpt = cpts != NULL ? cpts[i] : i;
+
+		OBD_CPT_ALLOC(svcpt, cptable, cpt, sizeof(*svcpt));
+		if (svcpt == NULL) {
+			rc = -ENOMEM;
+			goto failed;
+		}
+
+		service->srv_parts[i] = svcpt;
+		rc = ptlrpc_service_part_init(service, svcpt, cpt);
+		if (rc != 0)
+			goto failed;
+	}
+
+	ptlrpc_server_nthreads_check(service, conf);
+
+	rc = LNetSetLazyPortal(service->srv_req_portal);
+	LASSERT(rc == 0);
+
+	mutex_lock(&ptlrpc_all_services_mutex);
+	list_add(&service->srv_list, &ptlrpc_all_services);
+	mutex_unlock(&ptlrpc_all_services_mutex);
+
+	if (proc_entry != NULL)
+		ptlrpc_lprocfs_register_service(proc_entry, service);
+
+	rc = ptlrpc_service_nrs_setup(service);
+	if (rc != 0)
+		goto failed;
+
+	CDEBUG(D_NET, "%s: Started, listening on portal %d\n",
+	       service->srv_name, service->srv_req_portal);
+
+	rc = ptlrpc_start_threads(service);
+	if (rc != 0) {
+		CERROR("Failed to start threads for service %s: %d\n",
+		       service->srv_name, rc);
+		goto failed;
+	}
+
+	return service;
+failed:
+	ptlrpc_unregister_service(service);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ptlrpc_register_service);
+
+/**
+ * to actually free the request, must be called without holding svc_lock.
+ * note it's caller's responsibility to unlink req->rq_list.
+ */
+static void ptlrpc_server_free_request(struct ptlrpc_request *req)
+{
+	LASSERT(atomic_read(&req->rq_refcount) == 0);
+	LASSERT(list_empty(&req->rq_timed_list));
+
+	 /* DEBUG_REQ() assumes the reply state of a request with a valid
+	  * ref will not be destroyed until that reference is dropped. */
+	ptlrpc_req_drop_rs(req);
+
+	sptlrpc_svc_ctx_decref(req);
+
+	if (req != &req->rq_rqbd->rqbd_req) {
+		/* NB request buffers use an embedded
+		 * req if the incoming req unlinked the
+		 * MD; this isn't one of them! */
+		ptlrpc_request_cache_free(req);
+	}
+}
+
+/**
+ * drop a reference count of the request. if it reaches 0, we either
+ * put it into history list, or free it immediately.
+ */
+void ptlrpc_server_drop_request(struct ptlrpc_request *req)
+{
+	struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
+	struct ptlrpc_service_part	  *svcpt = rqbd->rqbd_svcpt;
+	struct ptlrpc_service		  *svc = svcpt->scp_service;
+	int				refcount;
+	struct list_head			*tmp;
+	struct list_head			*nxt;
+
+	if (!atomic_dec_and_test(&req->rq_refcount))
+		return;
+
+	if (req->rq_at_linked) {
+		spin_lock(&svcpt->scp_at_lock);
+		/* recheck with lock, in case it's unlinked by
+		 * ptlrpc_at_check_timed() */
+		if (likely(req->rq_at_linked))
+			ptlrpc_at_remove_timed(req);
+		spin_unlock(&svcpt->scp_at_lock);
+	}
+
+	LASSERT(list_empty(&req->rq_timed_list));
+
+	/* finalize request */
+	if (req->rq_export) {
+		class_export_put(req->rq_export);
+		req->rq_export = NULL;
+	}
+
+	spin_lock(&svcpt->scp_lock);
+
+	list_add(&req->rq_list, &rqbd->rqbd_reqs);
+
+	refcount = --(rqbd->rqbd_refcount);
+	if (refcount == 0) {
+		/* request buffer is now idle: add to history */
+		list_del(&rqbd->rqbd_list);
+
+		list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
+		svcpt->scp_hist_nrqbds++;
+
+		/* cull some history?
+		 * I expect only about 1 or 2 rqbds need to be recycled here */
+		while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) {
+			rqbd = list_entry(svcpt->scp_hist_rqbds.next,
+					      struct ptlrpc_request_buffer_desc,
+					      rqbd_list);
+
+			list_del(&rqbd->rqbd_list);
+			svcpt->scp_hist_nrqbds--;
+
+			/* remove rqbd's reqs from svc's req history while
+			 * I've got the service lock */
+			list_for_each(tmp, &rqbd->rqbd_reqs) {
+				req = list_entry(tmp, struct ptlrpc_request,
+						     rq_list);
+				/* Track the highest culled req seq */
+				if (req->rq_history_seq >
+				    svcpt->scp_hist_seq_culled) {
+					svcpt->scp_hist_seq_culled =
+						req->rq_history_seq;
+				}
+				list_del(&req->rq_history_list);
+			}
+
+			spin_unlock(&svcpt->scp_lock);
+
+			list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) {
+				req = list_entry(rqbd->rqbd_reqs.next,
+						     struct ptlrpc_request,
+						     rq_list);
+				list_del(&req->rq_list);
+				ptlrpc_server_free_request(req);
+			}
+
+			spin_lock(&svcpt->scp_lock);
+			/*
+			 * now all reqs including the embedded req has been
+			 * disposed, schedule request buffer for re-use.
+			 */
+			LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) ==
+				0);
+			list_add_tail(&rqbd->rqbd_list,
+					  &svcpt->scp_rqbd_idle);
+		}
+
+		spin_unlock(&svcpt->scp_lock);
+	} else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
+		/* If we are low on memory, we are not interested in history */
+		list_del(&req->rq_list);
+		list_del_init(&req->rq_history_list);
+
+		/* Track the highest culled req seq */
+		if (req->rq_history_seq > svcpt->scp_hist_seq_culled)
+			svcpt->scp_hist_seq_culled = req->rq_history_seq;
+
+		spin_unlock(&svcpt->scp_lock);
+
+		ptlrpc_server_free_request(req);
+	} else {
+		spin_unlock(&svcpt->scp_lock);
+	}
+}
+
+/** Change request export and move hp request from old export to new */
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+				  struct obd_export *export)
+{
+	if (req->rq_export != NULL) {
+		if (!list_empty(&req->rq_exp_list)) {
+			/* remove rq_exp_list from last export */
+			spin_lock_bh(&req->rq_export->exp_rpc_lock);
+			list_del_init(&req->rq_exp_list);
+			spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+
+			/* export has one reference already, so it`s safe to
+			 * add req to export queue here and get another
+			 * reference for request later */
+			spin_lock_bh(&export->exp_rpc_lock);
+			list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
+			spin_unlock_bh(&export->exp_rpc_lock);
+		}
+		class_export_rpc_dec(req->rq_export);
+		class_export_put(req->rq_export);
+	}
+
+	/* request takes one export refcount */
+	req->rq_export = class_export_get(export);
+	class_export_rpc_inc(export);
+
+	return;
+}
+
+/**
+ * to finish a request: stop sending more early replies, and release
+ * the request.
+ */
+static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
+					 struct ptlrpc_request *req)
+{
+	ptlrpc_server_hpreq_fini(req);
+
+	ptlrpc_server_drop_request(req);
+}
+
+/**
+ * to finish a active request: stop sending more early replies, and release
+ * the request. should be called after we finished handling the request.
+ */
+static void ptlrpc_server_finish_active_request(
+					struct ptlrpc_service_part *svcpt,
+					struct ptlrpc_request *req)
+{
+	spin_lock(&svcpt->scp_req_lock);
+	ptlrpc_nrs_req_stop_nolock(req);
+	svcpt->scp_nreqs_active--;
+	if (req->rq_hp)
+		svcpt->scp_nhreqs_active--;
+	spin_unlock(&svcpt->scp_req_lock);
+
+	ptlrpc_nrs_req_finalize(req);
+
+	if (req->rq_export != NULL)
+		class_export_rpc_dec(req->rq_export);
+
+	ptlrpc_server_finish_request(svcpt, req);
+}
+
+/**
+ * This function makes sure dead exports are evicted in a timely manner.
+ * This function is only called when some export receives a message (i.e.,
+ * the network is up.)
+ */
+static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
+{
+	struct obd_export *oldest_exp;
+	time_t oldest_time, new_time;
+
+	LASSERT(exp);
+
+	/* Compensate for slow machines, etc, by faking our request time
+	   into the future.  Although this can break the strict time-ordering
+	   of the list, we can be really lazy here - we don't have to evict
+	   at the exact right moment.  Eventually, all silent exports
+	   will make it to the top of the list. */
+
+	/* Do not pay attention on 1sec or smaller renewals. */
+	new_time = get_seconds() + extra_delay;
+	if (exp->exp_last_request_time + 1 /*second */ >= new_time)
+		return;
+
+	exp->exp_last_request_time = new_time;
+
+	/* exports may get disconnected from the chain even though the
+	   export has references, so we must keep the spin lock while
+	   manipulating the lists */
+	spin_lock(&exp->exp_obd->obd_dev_lock);
+
+	if (list_empty(&exp->exp_obd_chain_timed)) {
+		/* this one is not timed */
+		spin_unlock(&exp->exp_obd->obd_dev_lock);
+		return;
+	}
+
+	list_move_tail(&exp->exp_obd_chain_timed,
+			   &exp->exp_obd->obd_exports_timed);
+
+	oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
+				    struct obd_export, exp_obd_chain_timed);
+	oldest_time = oldest_exp->exp_last_request_time;
+	spin_unlock(&exp->exp_obd->obd_dev_lock);
+
+	if (exp->exp_obd->obd_recovering) {
+		/* be nice to everyone during recovery */
+		return;
+	}
+
+	/* Note - racing to start/reset the obd_eviction timer is safe */
+	if (exp->exp_obd->obd_eviction_timer == 0) {
+		/* Check if the oldest entry is expired. */
+		if (get_seconds() > (oldest_time + PING_EVICT_TIMEOUT +
+					      extra_delay)) {
+			/* We need a second timer, in case the net was down and
+			 * it just came back. Since the pinger may skip every
+			 * other PING_INTERVAL (see note in ptlrpc_pinger_main),
+			 * we better wait for 3. */
+			exp->exp_obd->obd_eviction_timer =
+				get_seconds() + 3 * PING_INTERVAL;
+			CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n",
+			       exp->exp_obd->obd_name,
+			       obd_export_nid2str(oldest_exp), oldest_time);
+		}
+	} else {
+		if (get_seconds() >
+		    (exp->exp_obd->obd_eviction_timer + extra_delay)) {
+			/* The evictor won't evict anyone who we've heard from
+			 * recently, so we don't have to check before we start
+			 * it. */
+			if (!ping_evictor_wake(exp))
+				exp->exp_obd->obd_eviction_timer = 0;
+		}
+	}
+}
+
+/**
+ * Sanity check request \a req.
+ * Return 0 if all is ok, error code otherwise.
+ */
+static int ptlrpc_check_req(struct ptlrpc_request *req)
+{
+	struct obd_device *obd = req->rq_export->exp_obd;
+	int rc = 0;
+
+	if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
+		     req->rq_export->exp_conn_cnt)) {
+		DEBUG_REQ(D_RPCTRACE, req,
+			  "DROPPING req from old connection %d < %d",
+			  lustre_msg_get_conn_cnt(req->rq_reqmsg),
+			  req->rq_export->exp_conn_cnt);
+		return -EEXIST;
+	}
+	if (unlikely(obd == NULL || obd->obd_fail)) {
+		/*
+		 * Failing over, don't handle any more reqs, send
+		 * error response instead.
+		 */
+		CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
+		       req, (obd != NULL) ? obd->obd_name : "unknown");
+		rc = -ENODEV;
+	} else if (lustre_msg_get_flags(req->rq_reqmsg) &
+		   (MSG_REPLAY | MSG_REQ_REPLAY_DONE) &&
+		   !obd->obd_recovering) {
+			DEBUG_REQ(D_ERROR, req,
+				  "Invalid replay without recovery");
+			class_fail_export(req->rq_export);
+			rc = -ENODEV;
+	} else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
+		   !obd->obd_recovering) {
+			DEBUG_REQ(D_ERROR, req, "Invalid req with transno %llu without recovery",
+				  lustre_msg_get_transno(req->rq_reqmsg));
+			class_fail_export(req->rq_export);
+			rc = -ENODEV;
+	}
+
+	if (unlikely(rc < 0)) {
+		req->rq_status = rc;
+		ptlrpc_error(req);
+	}
+	return rc;
+}
+
+static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+	__s32 next;
+
+	if (array->paa_count == 0) {
+		cfs_timer_disarm(&svcpt->scp_at_timer);
+		return;
+	}
+
+	/* Set timer for closest deadline */
+	next = (__s32)(array->paa_deadline - get_seconds() -
+		       at_early_margin);
+	if (next <= 0) {
+		ptlrpc_at_timer((unsigned long)svcpt);
+	} else {
+		cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
+		CDEBUG(D_INFO, "armed %s at %+ds\n",
+		       svcpt->scp_service->srv_name, next);
+	}
+}
+
+/* Add rpc to early reply check list */
+static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
+{
+	struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+	struct ptlrpc_request *rq = NULL;
+	__u32 index;
+
+	if (AT_OFF)
+		return 0;
+
+	if (req->rq_no_reply)
+		return 0;
+
+	if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
+		return -ENOSYS;
+
+	spin_lock(&svcpt->scp_at_lock);
+	LASSERT(list_empty(&req->rq_timed_list));
+
+	index = (unsigned long)req->rq_deadline % array->paa_size;
+	if (array->paa_reqs_count[index] > 0) {
+		/* latest rpcs will have the latest deadlines in the list,
+		 * so search backward. */
+		list_for_each_entry_reverse(rq,
+						&array->paa_reqs_array[index],
+						rq_timed_list) {
+			if (req->rq_deadline >= rq->rq_deadline) {
+				list_add(&req->rq_timed_list,
+					     &rq->rq_timed_list);
+				break;
+			}
+		}
+	}
+
+	/* Add the request at the head of the list */
+	if (list_empty(&req->rq_timed_list))
+		list_add(&req->rq_timed_list,
+			     &array->paa_reqs_array[index]);
+
+	spin_lock(&req->rq_lock);
+	req->rq_at_linked = 1;
+	spin_unlock(&req->rq_lock);
+	req->rq_at_index = index;
+	array->paa_reqs_count[index]++;
+	array->paa_count++;
+	if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) {
+		array->paa_deadline = req->rq_deadline;
+		ptlrpc_at_set_timer(svcpt);
+	}
+	spin_unlock(&svcpt->scp_at_lock);
+
+	return 0;
+}
+
+static void
+ptlrpc_at_remove_timed(struct ptlrpc_request *req)
+{
+	struct ptlrpc_at_array *array;
+
+	array = &req->rq_rqbd->rqbd_svcpt->scp_at_array;
+
+	/* NB: must call with hold svcpt::scp_at_lock */
+	LASSERT(!list_empty(&req->rq_timed_list));
+	list_del_init(&req->rq_timed_list);
+
+	spin_lock(&req->rq_lock);
+	req->rq_at_linked = 0;
+	spin_unlock(&req->rq_lock);
+
+	array->paa_reqs_count[req->rq_at_index]--;
+	array->paa_count--;
+}
+
+static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
+{
+	struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+	struct ptlrpc_request *reqcopy;
+	struct lustre_msg *reqmsg;
+	long olddl = req->rq_deadline - get_seconds();
+	time_t newdl;
+	int rc;
+
+	/* deadline is when the client expects us to reply, margin is the
+	   difference between clients' and servers' expectations */
+	DEBUG_REQ(D_ADAPTTO, req,
+		  "%ssending early reply (deadline %+lds, margin %+lds) for %d+%d",
+		  AT_OFF ? "AT off - not " : "",
+		  olddl, olddl - at_get(&svcpt->scp_at_estimate),
+		  at_get(&svcpt->scp_at_estimate), at_extra);
+
+	if (AT_OFF)
+		return 0;
+
+	if (olddl < 0) {
+		DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?",
+			  olddl, at_early_margin);
+
+		/* Return an error so we're not re-added to the timed list. */
+		return -ETIMEDOUT;
+	}
+
+	if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+		DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support");
+		return -ENOSYS;
+	}
+
+	if (req->rq_export &&
+	    lustre_msg_get_flags(req->rq_reqmsg) &
+	    (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+		/* During recovery, we don't want to send too many early
+		 * replies, but on the other hand we want to make sure the
+		 * client has enough time to resend if the rpc is lost. So
+		 * during the recovery period send at least 4 early replies,
+		 * spacing them every at_extra if we can. at_estimate should
+		 * always equal this fixed value during recovery. */
+		at_measured(&svcpt->scp_at_estimate, min(at_extra,
+			    req->rq_export->exp_obd->obd_recovery_timeout / 4));
+	} else {
+		/* Fake our processing time into the future to ask the clients
+		 * for some extra amount of time */
+		at_measured(&svcpt->scp_at_estimate, at_extra +
+			    get_seconds() -
+			    req->rq_arrival_time.tv_sec);
+
+		/* Check to see if we've actually increased the deadline -
+		 * we may be past adaptive_max */
+		if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+		    at_get(&svcpt->scp_at_estimate)) {
+			DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%ld), not sending early reply\n",
+				  olddl, req->rq_arrival_time.tv_sec +
+				  at_get(&svcpt->scp_at_estimate) -
+				  get_seconds());
+			return -ETIMEDOUT;
+		}
+	}
+	newdl = get_seconds() + at_get(&svcpt->scp_at_estimate);
+
+	reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
+	if (reqcopy == NULL)
+		return -ENOMEM;
+	OBD_ALLOC_LARGE(reqmsg, req->rq_reqlen);
+	if (!reqmsg) {
+		rc = -ENOMEM;
+		goto out_free;
+	}
+
+	*reqcopy = *req;
+	reqcopy->rq_reply_state = NULL;
+	reqcopy->rq_rep_swab_mask = 0;
+	reqcopy->rq_pack_bulk = 0;
+	reqcopy->rq_pack_udesc = 0;
+	reqcopy->rq_packed_final = 0;
+	sptlrpc_svc_ctx_addref(reqcopy);
+	/* We only need the reqmsg for the magic */
+	reqcopy->rq_reqmsg = reqmsg;
+	memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
+
+	LASSERT(atomic_read(&req->rq_refcount));
+	/** if it is last refcount then early reply isn't needed */
+	if (atomic_read(&req->rq_refcount) == 1) {
+		DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Connection ref */
+	reqcopy->rq_export = class_conn2export(
+				     lustre_msg_get_handle(reqcopy->rq_reqmsg));
+	if (reqcopy->rq_export == NULL) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	/* RPC ref */
+	class_export_rpc_inc(reqcopy->rq_export);
+	if (reqcopy->rq_export->exp_obd &&
+	    reqcopy->rq_export->exp_obd->obd_fail) {
+		rc = -ENODEV;
+		goto out_put;
+	}
+
+	rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY);
+	if (rc)
+		goto out_put;
+
+	rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
+
+	if (!rc) {
+		/* Adjust our own deadline to what we told the client */
+		req->rq_deadline = newdl;
+		req->rq_early_count++; /* number sent, server side */
+	} else {
+		DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
+	}
+
+	/* Free the (early) reply state from lustre_pack_reply.
+	   (ptlrpc_send_reply takes it's own rs ref, so this is safe here) */
+	ptlrpc_req_drop_rs(reqcopy);
+
+out_put:
+	class_export_rpc_dec(reqcopy->rq_export);
+	class_export_put(reqcopy->rq_export);
+out:
+	sptlrpc_svc_ctx_decref(reqcopy);
+	OBD_FREE_LARGE(reqmsg, req->rq_reqlen);
+out_free:
+	ptlrpc_request_cache_free(reqcopy);
+	return rc;
+}
+
+/* Send early replies to everybody expiring within at_early_margin
+   asking for at_extra time */
+static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+	struct ptlrpc_request *rq, *n;
+	struct list_head work_list;
+	__u32  index, count;
+	time_t deadline;
+	time_t now = get_seconds();
+	long delay;
+	int first, counter = 0;
+
+	spin_lock(&svcpt->scp_at_lock);
+	if (svcpt->scp_at_check == 0) {
+		spin_unlock(&svcpt->scp_at_lock);
+		return 0;
+	}
+	delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime);
+	svcpt->scp_at_check = 0;
+
+	if (array->paa_count == 0) {
+		spin_unlock(&svcpt->scp_at_lock);
+		return 0;
+	}
+
+	/* The timer went off, but maybe the nearest rpc already completed. */
+	first = array->paa_deadline - now;
+	if (first > at_early_margin) {
+		/* We've still got plenty of time.  Reset the timer. */
+		ptlrpc_at_set_timer(svcpt);
+		spin_unlock(&svcpt->scp_at_lock);
+		return 0;
+	}
+
+	/* We're close to a timeout, and we don't know how much longer the
+	   server will take. Send early replies to everyone expiring soon. */
+	INIT_LIST_HEAD(&work_list);
+	deadline = -1;
+	index = (unsigned long)array->paa_deadline % array->paa_size;
+	count = array->paa_count;
+	while (count > 0) {
+		count -= array->paa_reqs_count[index];
+		list_for_each_entry_safe(rq, n,
+					     &array->paa_reqs_array[index],
+					     rq_timed_list) {
+			if (rq->rq_deadline > now + at_early_margin) {
+				/* update the earliest deadline */
+				if (deadline == -1 ||
+				    rq->rq_deadline < deadline)
+					deadline = rq->rq_deadline;
+				break;
+			}
+
+			ptlrpc_at_remove_timed(rq);
+			/**
+			 * ptlrpc_server_drop_request() may drop
+			 * refcount to 0 already. Let's check this and
+			 * don't add entry to work_list
+			 */
+			if (likely(atomic_inc_not_zero(&rq->rq_refcount)))
+				list_add(&rq->rq_timed_list, &work_list);
+			counter++;
+		}
+
+		if (++index >= array->paa_size)
+			index = 0;
+	}
+	array->paa_deadline = deadline;
+	/* we have a new earliest deadline, restart the timer */
+	ptlrpc_at_set_timer(svcpt);
+
+	spin_unlock(&svcpt->scp_at_lock);
+
+	CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n",
+	       first, at_extra, counter);
+	if (first < 0) {
+		/* We're already past request deadlines before we even get a
+		   chance to send early replies */
+		LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n",
+			      svcpt->scp_service->srv_name);
+		CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=" CFS_DURATION_T "(jiff)\n",
+		      counter, svcpt->scp_nreqs_incoming,
+		      svcpt->scp_nreqs_active,
+		      at_get(&svcpt->scp_at_estimate), delay);
+	}
+
+	/* we took additional refcount so entries can't be deleted from list, no
+	 * locking is needed */
+	while (!list_empty(&work_list)) {
+		rq = list_entry(work_list.next, struct ptlrpc_request,
+				    rq_timed_list);
+		list_del_init(&rq->rq_timed_list);
+
+		if (ptlrpc_at_send_early_reply(rq) == 0)
+			ptlrpc_at_add_timed(rq);
+
+		ptlrpc_server_drop_request(rq);
+	}
+
+	return 1; /* return "did_something" for liblustre */
+}
+
+/**
+ * Put the request to the export list if the request may become
+ * a high priority one.
+ */
+static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
+				    struct ptlrpc_request *req)
+{
+	int rc = 0;
+
+	if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
+		rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
+		if (rc < 0)
+			return rc;
+		LASSERT(rc == 0);
+	}
+	if (req->rq_export && req->rq_ops) {
+		/* Perform request specific check. We should do this check
+		 * before the request is added into exp_hp_rpcs list otherwise
+		 * it may hit swab race at LU-1044. */
+		if (req->rq_ops->hpreq_check) {
+			rc = req->rq_ops->hpreq_check(req);
+			/**
+			 * XXX: Out of all current
+			 * ptlrpc_hpreq_ops::hpreq_check(), only
+			 * ldlm_cancel_hpreq_check() can return an error code;
+			 * other functions assert in similar places, which seems
+			 * odd. What also does not seem right is that handlers
+			 * for those RPCs do not assert on the same checks, but
+			 * rather handle the error cases. e.g. see
+			 * ost_rw_hpreq_check(), and ost_brw_read(),
+			 * ost_brw_write().
+			 */
+			if (rc < 0)
+				return rc;
+			LASSERT(rc == 0 || rc == 1);
+		}
+
+		spin_lock_bh(&req->rq_export->exp_rpc_lock);
+		list_add(&req->rq_exp_list,
+			     &req->rq_export->exp_hp_rpcs);
+		spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+	}
+
+	ptlrpc_nrs_req_initialize(svcpt, req, rc);
+
+	return rc;
+}
+
+/** Remove the request from the export list. */
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
+{
+	if (req->rq_export && req->rq_ops) {
+		/* refresh lock timeout again so that client has more
+		 * room to send lock cancel RPC. */
+		if (req->rq_ops->hpreq_fini)
+			req->rq_ops->hpreq_fini(req);
+
+		spin_lock_bh(&req->rq_export->exp_rpc_lock);
+		list_del_init(&req->rq_exp_list);
+		spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+	}
+}
+
+static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
+{
+	return 1;
+}
+
+static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
+	.hpreq_check       = ptlrpc_hpreq_check,
+};
+
+/* Hi-Priority RPC check by RPC operation code. */
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
+{
+	int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+	/* Check for export to let only reconnects for not yet evicted
+	 * export to become a HP rpc. */
+	if ((req->rq_export != NULL) &&
+	    (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT))
+		req->rq_ops = &ptlrpc_hpreq_common;
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_hpreq_handler);
+
+static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
+				     struct ptlrpc_request *req)
+{
+	int	rc;
+
+	rc = ptlrpc_server_hpreq_init(svcpt, req);
+	if (rc < 0)
+		return rc;
+
+	ptlrpc_nrs_req_add(svcpt, req, !!rc);
+
+	return 0;
+}
+
+/**
+ * Allow to handle high priority request
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
+				     bool force)
+{
+	int running = svcpt->scp_nthrs_running;
+
+	if (!nrs_svcpt_has_hp(svcpt))
+		return false;
+
+	if (force)
+		return true;
+
+	if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+		     CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+		/* leave just 1 thread for normal RPCs */
+		running = PTLRPC_NTHRS_INIT;
+		if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+			running += 1;
+	}
+
+	if (svcpt->scp_nreqs_active >= running - 1)
+		return false;
+
+	if (svcpt->scp_nhreqs_active == 0)
+		return true;
+
+	return !ptlrpc_nrs_req_pending_nolock(svcpt, false) ||
+	       svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
+}
+
+static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
+				       bool force)
+{
+	return ptlrpc_server_allow_high(svcpt, force) &&
+	       ptlrpc_nrs_req_pending_nolock(svcpt, true);
+}
+
+/**
+ * Only allow normal priority requests on a service that has a high-priority
+ * queue if forced (i.e. cleanup), if there are other high priority requests
+ * already being processed (i.e. those threads can service more high-priority
+ * requests), or if there are enough idle threads that a later thread can do
+ * a high priority request.
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
+				       bool force)
+{
+	int running = svcpt->scp_nthrs_running;
+	if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+		     CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+		/* leave just 1 thread for normal RPCs */
+		running = PTLRPC_NTHRS_INIT;
+		if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+			running += 1;
+	}
+
+	if (force ||
+	    svcpt->scp_nreqs_active < running - 2)
+		return true;
+
+	if (svcpt->scp_nreqs_active >= running - 1)
+		return false;
+
+	return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt);
+}
+
+static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
+					 bool force)
+{
+	return ptlrpc_server_allow_normal(svcpt, force) &&
+	       ptlrpc_nrs_req_pending_nolock(svcpt, false);
+}
+
+/**
+ * Returns true if there are requests available in incoming
+ * request queue for processing and it is allowed to fetch them.
+ * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
+ * to get reliable result
+ * \see ptlrpc_server_allow_normal
+ * \see ptlrpc_server_allow high
+ */
+static inline bool
+ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force)
+{
+	return ptlrpc_server_high_pending(svcpt, force) ||
+	       ptlrpc_server_normal_pending(svcpt, force);
+}
+
+/**
+ * Fetch a request for processing from queue of unprocessed requests.
+ * Favors high-priority requests.
+ * Returns a pointer to fetched request.
+ */
+static struct ptlrpc_request *
+ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force)
+{
+	struct ptlrpc_request *req = NULL;
+
+	spin_lock(&svcpt->scp_req_lock);
+
+	if (ptlrpc_server_high_pending(svcpt, force)) {
+		req = ptlrpc_nrs_req_get_nolock(svcpt, true, force);
+		if (req != NULL) {
+			svcpt->scp_hreq_count++;
+			goto got_request;
+		}
+	}
+
+	if (ptlrpc_server_normal_pending(svcpt, force)) {
+		req = ptlrpc_nrs_req_get_nolock(svcpt, false, force);
+		if (req != NULL) {
+			svcpt->scp_hreq_count = 0;
+			goto got_request;
+		}
+	}
+
+	spin_unlock(&svcpt->scp_req_lock);
+	return NULL;
+
+got_request:
+	svcpt->scp_nreqs_active++;
+	if (req->rq_hp)
+		svcpt->scp_nhreqs_active++;
+
+	spin_unlock(&svcpt->scp_req_lock);
+
+	if (likely(req->rq_export))
+		class_export_rpc_inc(req->rq_export);
+
+	return req;
+}
+
+/**
+ * Handle freshly incoming reqs, add to timed early reply list,
+ * pass on to regular request queue.
+ * All incoming requests pass through here before getting into
+ * ptlrpc_server_handle_req later on.
+ */
+static int
+ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
+			    struct ptlrpc_thread *thread)
+{
+	struct ptlrpc_service	*svc = svcpt->scp_service;
+	struct ptlrpc_request	*req;
+	__u32			deadline;
+	int			rc;
+
+	spin_lock(&svcpt->scp_lock);
+	if (list_empty(&svcpt->scp_req_incoming)) {
+		spin_unlock(&svcpt->scp_lock);
+		return 0;
+	}
+
+	req = list_entry(svcpt->scp_req_incoming.next,
+			     struct ptlrpc_request, rq_list);
+	list_del_init(&req->rq_list);
+	svcpt->scp_nreqs_incoming--;
+	/* Consider this still a "queued" request as far as stats are
+	 * concerned */
+	spin_unlock(&svcpt->scp_lock);
+
+	/* go through security check/transform */
+	rc = sptlrpc_svc_unwrap_request(req);
+	switch (rc) {
+	case SECSVC_OK:
+		break;
+	case SECSVC_COMPLETE:
+		target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+		goto err_req;
+	case SECSVC_DROP:
+		goto err_req;
+	default:
+		LBUG();
+	}
+
+	/*
+	 * for null-flavored rpc, msg has been unpacked by sptlrpc, although
+	 * redo it wouldn't be harmful.
+	 */
+	if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+		rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
+		if (rc != 0) {
+			CERROR("error unpacking request: ptl %d from %s x%llu\n",
+			       svc->srv_req_portal, libcfs_id2str(req->rq_peer),
+			       req->rq_xid);
+			goto err_req;
+		}
+	}
+
+	rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+	if (rc) {
+		CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n",
+		       svc->srv_req_portal, libcfs_id2str(req->rq_peer),
+		       req->rq_xid);
+		goto err_req;
+	}
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
+	    lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
+		CERROR("drop incoming rpc opc %u, x%llu\n",
+		       cfs_fail_val, req->rq_xid);
+		goto err_req;
+	}
+
+	rc = -EINVAL;
+	if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
+		CERROR("wrong packet type received (type=%u) from %s\n",
+		       lustre_msg_get_type(req->rq_reqmsg),
+		       libcfs_id2str(req->rq_peer));
+		goto err_req;
+	}
+
+	switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+	case MDS_WRITEPAGE:
+	case OST_WRITE:
+		req->rq_bulk_write = 1;
+		break;
+	case MDS_READPAGE:
+	case OST_READ:
+	case MGS_CONFIG_READ:
+		req->rq_bulk_read = 1;
+		break;
+	}
+
+	CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid);
+
+	req->rq_export = class_conn2export(
+		lustre_msg_get_handle(req->rq_reqmsg));
+	if (req->rq_export) {
+		rc = ptlrpc_check_req(req);
+		if (rc == 0) {
+			rc = sptlrpc_target_export_check(req->rq_export, req);
+			if (rc)
+				DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,");
+		}
+
+		if (rc)
+			goto err_req;
+		ptlrpc_update_export_timer(req->rq_export, 0);
+	}
+
+	/* req_in handling should/must be fast */
+	if (get_seconds() - req->rq_arrival_time.tv_sec > 5)
+		DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s",
+			  cfs_time_sub(get_seconds(),
+				       req->rq_arrival_time.tv_sec));
+
+	/* Set rpc server deadline and add it to the timed list */
+	deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
+		    MSGHDR_AT_SUPPORT) ?
+		   /* The max time the client expects us to take */
+		   lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+	req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
+	if (unlikely(deadline == 0)) {
+		DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
+		goto err_req;
+	}
+
+	req->rq_svc_thread = thread;
+
+	ptlrpc_at_add_timed(req);
+
+	/* Move it over to the request processing queue */
+	rc = ptlrpc_server_request_add(svcpt, req);
+	if (rc)
+		goto err_req;
+
+	wake_up(&svcpt->scp_waitq);
+	return 1;
+
+err_req:
+	ptlrpc_server_finish_request(svcpt, req);
+
+	return 1;
+}
+
+/**
+ * Main incoming request handling logic.
+ * Calls handler function from service to do actual processing.
+ */
+static int
+ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
+			     struct ptlrpc_thread *thread)
+{
+	struct ptlrpc_service *svc = svcpt->scp_service;
+	struct ptlrpc_request *request;
+	struct timeval	 work_start;
+	struct timeval	 work_end;
+	long		   timediff;
+	int		    rc;
+	int		    fail_opc = 0;
+
+	request = ptlrpc_server_request_get(svcpt, false);
+	if (request == NULL)
+		return 0;
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
+		fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
+	else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
+		fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT;
+
+	if (unlikely(fail_opc)) {
+		if (request->rq_export && request->rq_ops)
+			OBD_FAIL_TIMEOUT(fail_opc, 4);
+	}
+
+	ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
+		libcfs_debug_dumplog();
+
+	do_gettimeofday(&work_start);
+	timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,
+				   NULL);
+	if (likely(svc->srv_stats != NULL)) {
+		lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
+				    timediff);
+		lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
+				    svcpt->scp_nreqs_incoming);
+		lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
+				    svcpt->scp_nreqs_active);
+		lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
+				    at_get(&svcpt->scp_at_estimate));
+	}
+
+	rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
+	if (rc) {
+		CERROR("Failure to initialize session: %d\n", rc);
+		goto out_req;
+	}
+	request->rq_session.lc_thread = thread;
+	request->rq_session.lc_cookie = 0x5;
+	lu_context_enter(&request->rq_session);
+
+	CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
+
+	request->rq_svc_thread = thread;
+	if (thread)
+		request->rq_svc_thread->t_env->le_ses = &request->rq_session;
+
+	if (likely(request->rq_export)) {
+		if (unlikely(ptlrpc_check_req(request)))
+			goto put_conn;
+		ptlrpc_update_export_timer(request->rq_export, timediff >> 19);
+	}
+
+	/* Discard requests queued for longer than the deadline.
+	   The deadline is increased if we send an early reply. */
+	if (get_seconds() > request->rq_deadline) {
+		DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline " CFS_DURATION_T ":" CFS_DURATION_T "s ago\n",
+			  libcfs_id2str(request->rq_peer),
+			  cfs_time_sub(request->rq_deadline,
+				       request->rq_arrival_time.tv_sec),
+			  cfs_time_sub(get_seconds(),
+				       request->rq_deadline));
+		goto put_conn;
+	}
+
+	CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n",
+	       current_comm(),
+	       (request->rq_export ?
+		(char *)request->rq_export->exp_client_uuid.uuid : "0"),
+	       (request->rq_export ?
+		atomic_read(&request->rq_export->exp_refcount) : -99),
+	       lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
+	       libcfs_id2str(request->rq_peer),
+	       lustre_msg_get_opc(request->rq_reqmsg));
+
+	if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
+		CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
+
+	rc = svc->srv_ops.so_req_handler(request);
+
+	ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
+
+put_conn:
+	lu_context_exit(&request->rq_session);
+	lu_context_fini(&request->rq_session);
+
+	if (unlikely(get_seconds() > request->rq_deadline)) {
+		DEBUG_REQ(D_WARNING, request,
+			  "Request took longer than estimated ("
+				CFS_DURATION_T":"CFS_DURATION_T
+				"s); client may timeout.",
+			  cfs_time_sub(request->rq_deadline,
+				       request->rq_arrival_time.tv_sec),
+			  cfs_time_sub(get_seconds(),
+				       request->rq_deadline));
+	}
+
+	do_gettimeofday(&work_end);
+	timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+	CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n",
+	       current_comm(),
+	       (request->rq_export ?
+		(char *)request->rq_export->exp_client_uuid.uuid : "0"),
+	       (request->rq_export ?
+		atomic_read(&request->rq_export->exp_refcount) : -99),
+	       lustre_msg_get_status(request->rq_reqmsg),
+	       request->rq_xid,
+	       libcfs_id2str(request->rq_peer),
+	       lustre_msg_get_opc(request->rq_reqmsg),
+	       timediff,
+	       cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL),
+	       (request->rq_repmsg ?
+		lustre_msg_get_transno(request->rq_repmsg) :
+		request->rq_transno),
+	       request->rq_status,
+	       (request->rq_repmsg ?
+		lustre_msg_get_status(request->rq_repmsg) : -999));
+	if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
+		__u32 op = lustre_msg_get_opc(request->rq_reqmsg);
+		int opc = opcode_offset(op);
+		if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
+			LASSERT(opc < LUSTRE_MAX_OPCODES);
+			lprocfs_counter_add(svc->srv_stats,
+					    opc + EXTRA_MAX_OPCODES,
+					    timediff);
+		}
+	}
+	if (unlikely(request->rq_early_count)) {
+		DEBUG_REQ(D_ADAPTTO, request,
+			  "sent %d early replies before finishing in "
+			  CFS_DURATION_T"s",
+			  request->rq_early_count,
+			  cfs_time_sub(work_end.tv_sec,
+			  request->rq_arrival_time.tv_sec));
+	}
+
+out_req:
+	ptlrpc_server_finish_active_request(svcpt, request);
+
+	return 1;
+}
+
+/**
+ * An internal function to process a single reply state object.
+ */
+static int
+ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
+{
+	struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+	struct ptlrpc_service     *svc = svcpt->scp_service;
+	struct obd_export	 *exp;
+	int			nlocks;
+	int			been_handled;
+
+	exp = rs->rs_export;
+
+	LASSERT(rs->rs_difficult);
+	LASSERT(rs->rs_scheduled);
+	LASSERT(list_empty(&rs->rs_list));
+
+	spin_lock(&exp->exp_lock);
+	/* Noop if removed already */
+	list_del_init(&rs->rs_exp_list);
+	spin_unlock(&exp->exp_lock);
+
+	/* The disk commit callback holds exp_uncommitted_replies_lock while it
+	 * iterates over newly committed replies, removing them from
+	 * exp_uncommitted_replies.  It then drops this lock and schedules the
+	 * replies it found for handling here.
+	 *
+	 * We can avoid contention for exp_uncommitted_replies_lock between the
+	 * HRT threads and further commit callbacks by checking rs_committed
+	 * which is set in the commit callback while it holds both
+	 * rs_lock and exp_uncommitted_reples.
+	 *
+	 * If we see rs_committed clear, the commit callback _may_ not have
+	 * handled this reply yet and we race with it to grab
+	 * exp_uncommitted_replies_lock before removing the reply from
+	 * exp_uncommitted_replies.  Note that if we lose the race and the
+	 * reply has already been removed, list_del_init() is a noop.
+	 *
+	 * If we see rs_committed set, we know the commit callback is handling,
+	 * or has handled this reply since store reordering might allow us to
+	 * see rs_committed set out of sequence.  But since this is done
+	 * holding rs_lock, we can be sure it has all completed once we hold
+	 * rs_lock, which we do right next.
+	 */
+	if (!rs->rs_committed) {
+		spin_lock(&exp->exp_uncommitted_replies_lock);
+		list_del_init(&rs->rs_obd_list);
+		spin_unlock(&exp->exp_uncommitted_replies_lock);
+	}
+
+	spin_lock(&rs->rs_lock);
+
+	been_handled = rs->rs_handled;
+	rs->rs_handled = 1;
+
+	nlocks = rs->rs_nlocks;		 /* atomic "steal", but */
+	rs->rs_nlocks = 0;		      /* locks still on rs_locks! */
+
+	if (nlocks == 0 && !been_handled) {
+		/* If we see this, we should already have seen the warning
+		 * in mds_steal_ack_locks()  */
+		CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n",
+		       rs,
+		       rs->rs_xid, rs->rs_transno, rs->rs_opc,
+		       libcfs_nid2str(exp->exp_connection->c_peer.nid));
+	}
+
+	if ((!been_handled && rs->rs_on_net) || nlocks > 0) {
+		spin_unlock(&rs->rs_lock);
+
+		if (!been_handled && rs->rs_on_net) {
+			LNetMDUnlink(rs->rs_md_h);
+			/* Ignore return code; we're racing with completion */
+		}
+
+		while (nlocks-- > 0)
+			ldlm_lock_decref(&rs->rs_locks[nlocks],
+					 rs->rs_modes[nlocks]);
+
+		spin_lock(&rs->rs_lock);
+	}
+
+	rs->rs_scheduled = 0;
+
+	if (!rs->rs_on_net) {
+		/* Off the net */
+		spin_unlock(&rs->rs_lock);
+
+		class_export_put(exp);
+		rs->rs_export = NULL;
+		ptlrpc_rs_decref(rs);
+		if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
+		    svc->srv_is_stopping)
+			wake_up_all(&svcpt->scp_waitq);
+		return 1;
+	}
+
+	/* still on the net; callback will schedule */
+	spin_unlock(&rs->rs_lock);
+	return 1;
+}
+
+
+static void
+ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
+{
+	int avail = svcpt->scp_nrqbds_posted;
+	int low_water = test_req_buffer_pressure ? 0 :
+			svcpt->scp_service->srv_nbuf_per_group / 2;
+
+	/* NB I'm not locking; just looking. */
+
+	/* CAVEAT EMPTOR: We might be allocating buffers here because we've
+	 * allowed the request history to grow out of control.  We could put a
+	 * sanity check on that here and cull some history if we need the
+	 * space. */
+
+	if (avail <= low_water)
+		ptlrpc_grow_req_bufs(svcpt, 1);
+
+	if (svcpt->scp_service->srv_stats) {
+		lprocfs_counter_add(svcpt->scp_service->srv_stats,
+				    PTLRPC_REQBUF_AVAIL_CNTR, avail);
+	}
+}
+
+static int
+ptlrpc_retry_rqbds(void *arg)
+{
+	struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg;
+
+	svcpt->scp_rqbd_timeout = 0;
+	return -ETIMEDOUT;
+}
+
+static inline int
+ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
+{
+	return svcpt->scp_nreqs_active <
+	       svcpt->scp_nthrs_running - 1 -
+	       (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
+}
+
+/**
+ * allowed to create more threads
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
+{
+	return svcpt->scp_nthrs_running +
+	       svcpt->scp_nthrs_starting <
+	       svcpt->scp_service->srv_nthrs_cpt_limit;
+}
+
+/**
+ * too many requests and allowed to create more threads
+ */
+static inline int
+ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
+{
+	return !ptlrpc_threads_enough(svcpt) &&
+		ptlrpc_threads_increasable(svcpt);
+}
+
+static inline int
+ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
+{
+	return thread_is_stopping(thread) ||
+	       thread->t_svcpt->scp_service->srv_is_stopping;
+}
+
+static inline int
+ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
+{
+	return !list_empty(&svcpt->scp_rqbd_idle) &&
+	       svcpt->scp_rqbd_timeout == 0;
+}
+
+static inline int
+ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
+{
+	return svcpt->scp_at_check;
+}
+
+/**
+ * requests wait on preprocessing
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
+{
+	return !list_empty(&svcpt->scp_req_incoming);
+}
+
+static __attribute__((__noinline__)) int
+ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
+		  struct ptlrpc_thread *thread)
+{
+	/* Don't exit while there are replies to be handled */
+	struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
+					     ptlrpc_retry_rqbds, svcpt);
+
+	/* XXX: Add this back when libcfs watchdog is merged upstream
+	lc_watchdog_disable(thread->t_watchdog);
+	 */
+
+	cond_resched();
+
+	l_wait_event_exclusive_head(svcpt->scp_waitq,
+				ptlrpc_thread_stopping(thread) ||
+				ptlrpc_server_request_incoming(svcpt) ||
+				ptlrpc_server_request_pending(svcpt, false) ||
+				ptlrpc_rqbd_pending(svcpt) ||
+				ptlrpc_at_check(svcpt), &lwi);
+
+	if (ptlrpc_thread_stopping(thread))
+		return -EINTR;
+
+	/*
+	lc_watchdog_touch(thread->t_watchdog,
+			  ptlrpc_server_get_timeout(svcpt));
+	 */
+	return 0;
+}
+
+/**
+ * Main thread body for service threads.
+ * Waits in a loop waiting for new requests to process to appear.
+ * Every time an incoming requests is added to its queue, a waitq
+ * is woken up and one of the threads will handle it.
+ */
+static int ptlrpc_main(void *arg)
+{
+	struct ptlrpc_thread		*thread = (struct ptlrpc_thread *)arg;
+	struct ptlrpc_service_part	*svcpt = thread->t_svcpt;
+	struct ptlrpc_service		*svc = svcpt->scp_service;
+	struct ptlrpc_reply_state	*rs;
+	struct group_info *ginfo = NULL;
+	struct lu_env *env;
+	int counter = 0, rc = 0;
+
+	thread->t_pid = current_pid();
+	unshare_fs_struct();
+
+	/* NB: we will call cfs_cpt_bind() for all threads, because we
+	 * might want to run lustre server only on a subset of system CPUs,
+	 * in that case ->scp_cpt is CFS_CPT_ANY */
+	rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
+	if (rc != 0) {
+		CWARN("%s: failed to bind %s on CPT %d\n",
+		      svc->srv_name, thread->t_name, svcpt->scp_cpt);
+	}
+
+	ginfo = groups_alloc(0);
+	if (!ginfo) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	set_current_groups(ginfo);
+	put_group_info(ginfo);
+
+	if (svc->srv_ops.so_thr_init != NULL) {
+		rc = svc->srv_ops.so_thr_init(thread);
+		if (rc)
+			goto out;
+	}
+
+	OBD_ALLOC_PTR(env);
+	if (env == NULL) {
+		rc = -ENOMEM;
+		goto out_srv_fini;
+	}
+
+	rc = lu_context_init(&env->le_ctx,
+			     svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF);
+	if (rc)
+		goto out_srv_fini;
+
+	thread->t_env = env;
+	env->le_ctx.lc_thread = thread;
+	env->le_ctx.lc_cookie = 0x6;
+
+	while (!list_empty(&svcpt->scp_rqbd_idle)) {
+		rc = ptlrpc_server_post_idle_rqbds(svcpt);
+		if (rc >= 0)
+			continue;
+
+		CERROR("Failed to post rqbd for %s on CPT %d: %d\n",
+			svc->srv_name, svcpt->scp_cpt, rc);
+		goto out_srv_fini;
+	}
+
+	/* Alloc reply state structure for this one */
+	OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size);
+	if (!rs) {
+		rc = -ENOMEM;
+		goto out_srv_fini;
+	}
+
+	spin_lock(&svcpt->scp_lock);
+
+	LASSERT(thread_is_starting(thread));
+	thread_clear_flags(thread, SVC_STARTING);
+
+	LASSERT(svcpt->scp_nthrs_starting == 1);
+	svcpt->scp_nthrs_starting--;
+
+	/* SVC_STOPPING may already be set here if someone else is trying
+	 * to stop the service while this new thread has been dynamically
+	 * forked. We still set SVC_RUNNING to let our creator know that
+	 * we are now running, however we will exit as soon as possible */
+	thread_add_flags(thread, SVC_RUNNING);
+	svcpt->scp_nthrs_running++;
+	spin_unlock(&svcpt->scp_lock);
+
+	/* wake up our creator in case he's still waiting. */
+	wake_up(&thread->t_ctl_waitq);
+
+	/*
+	thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
+					     NULL, NULL);
+	 */
+
+	spin_lock(&svcpt->scp_rep_lock);
+	list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+	wake_up(&svcpt->scp_rep_waitq);
+	spin_unlock(&svcpt->scp_rep_lock);
+
+	CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
+	       svcpt->scp_nthrs_running);
+
+	/* XXX maintain a list of all managed devices: insert here */
+	while (!ptlrpc_thread_stopping(thread)) {
+		if (ptlrpc_wait_event(svcpt, thread))
+			break;
+
+		ptlrpc_check_rqbd_pool(svcpt);
+
+		if (ptlrpc_threads_need_create(svcpt)) {
+			/* Ignore return code - we tried... */
+			ptlrpc_start_thread(svcpt, 0);
+		}
+
+		/* Process all incoming reqs before handling any */
+		if (ptlrpc_server_request_incoming(svcpt)) {
+			lu_context_enter(&env->le_ctx);
+			env->le_ses = NULL;
+			ptlrpc_server_handle_req_in(svcpt, thread);
+			lu_context_exit(&env->le_ctx);
+
+			/* but limit ourselves in case of flood */
+			if (counter++ < 100)
+				continue;
+			counter = 0;
+		}
+
+		if (ptlrpc_at_check(svcpt))
+			ptlrpc_at_check_timed(svcpt);
+
+		if (ptlrpc_server_request_pending(svcpt, false)) {
+			lu_context_enter(&env->le_ctx);
+			ptlrpc_server_handle_request(svcpt, thread);
+			lu_context_exit(&env->le_ctx);
+		}
+
+		if (ptlrpc_rqbd_pending(svcpt) &&
+		    ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
+			/* I just failed to repost request buffers.
+			 * Wait for a timeout (unless something else
+			 * happens) before I try again */
+			svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
+			CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
+			       svcpt->scp_nrqbds_posted);
+		}
+	}
+
+	/*
+	lc_watchdog_delete(thread->t_watchdog);
+	thread->t_watchdog = NULL;
+	*/
+
+out_srv_fini:
+	/*
+	 * deconstruct service specific state created by ptlrpc_start_thread()
+	 */
+	if (svc->srv_ops.so_thr_done != NULL)
+		svc->srv_ops.so_thr_done(thread);
+
+	if (env != NULL) {
+		lu_context_fini(&env->le_ctx);
+		OBD_FREE_PTR(env);
+	}
+out:
+	CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
+	       thread, thread->t_pid, thread->t_id, rc);
+
+	spin_lock(&svcpt->scp_lock);
+	if (thread_test_and_clear_flags(thread, SVC_STARTING))
+		svcpt->scp_nthrs_starting--;
+
+	if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
+		/* must know immediately */
+		svcpt->scp_nthrs_running--;
+	}
+
+	thread->t_id = rc;
+	thread_add_flags(thread, SVC_STOPPED);
+
+	wake_up(&thread->t_ctl_waitq);
+	spin_unlock(&svcpt->scp_lock);
+
+	return rc;
+}
+
+static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt,
+			  struct list_head *replies)
+{
+	int result;
+
+	spin_lock(&hrt->hrt_lock);
+
+	list_splice_init(&hrt->hrt_queue, replies);
+	result = ptlrpc_hr.hr_stopping || !list_empty(replies);
+
+	spin_unlock(&hrt->hrt_lock);
+	return result;
+}
+
+/**
+ * Main body of "handle reply" function.
+ * It processes acked reply states
+ */
+static int ptlrpc_hr_main(void *arg)
+{
+	struct ptlrpc_hr_thread		*hrt = (struct ptlrpc_hr_thread *)arg;
+	struct ptlrpc_hr_partition	*hrp = hrt->hrt_partition;
+	LIST_HEAD			(replies);
+	char				threadname[20];
+	int				rc;
+
+	snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
+		 hrp->hrp_cpt, hrt->hrt_id);
+	unshare_fs_struct();
+
+	rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
+	if (rc != 0) {
+		CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
+		      threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
+	}
+
+	atomic_inc(&hrp->hrp_nstarted);
+	wake_up(&ptlrpc_hr.hr_waitq);
+
+	while (!ptlrpc_hr.hr_stopping) {
+		l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies));
+
+		while (!list_empty(&replies)) {
+			struct ptlrpc_reply_state *rs;
+
+			rs = list_entry(replies.prev,
+					    struct ptlrpc_reply_state,
+					    rs_list);
+			list_del_init(&rs->rs_list);
+			ptlrpc_handle_rs(rs);
+		}
+	}
+
+	atomic_inc(&hrp->hrp_nstopped);
+	wake_up(&ptlrpc_hr.hr_waitq);
+
+	return 0;
+}
+
+static void ptlrpc_stop_hr_threads(void)
+{
+	struct ptlrpc_hr_partition	*hrp;
+	int				i;
+	int				j;
+
+	ptlrpc_hr.hr_stopping = 1;
+
+	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+		if (hrp->hrp_thrs == NULL)
+			continue; /* uninitialized */
+		for (j = 0; j < hrp->hrp_nthrs; j++)
+			wake_up_all(&hrp->hrp_thrs[j].hrt_waitq);
+	}
+
+	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+		if (hrp->hrp_thrs == NULL)
+			continue; /* uninitialized */
+		wait_event(ptlrpc_hr.hr_waitq,
+			       atomic_read(&hrp->hrp_nstopped) ==
+			       atomic_read(&hrp->hrp_nstarted));
+	}
+}
+
+static int ptlrpc_start_hr_threads(void)
+{
+	struct ptlrpc_hr_partition	*hrp;
+	int				i;
+	int				j;
+
+	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+		int	rc = 0;
+
+		for (j = 0; j < hrp->hrp_nthrs; j++) {
+			struct	ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j];
+			rc = PTR_ERR(kthread_run(ptlrpc_hr_main,
+						 &hrp->hrp_thrs[j],
+						 "ptlrpc_hr%02d_%03d",
+						 hrp->hrp_cpt,
+						 hrt->hrt_id));
+			if (IS_ERR_VALUE(rc))
+				break;
+		}
+		wait_event(ptlrpc_hr.hr_waitq,
+			       atomic_read(&hrp->hrp_nstarted) == j);
+		if (!IS_ERR_VALUE(rc))
+			continue;
+
+		CERROR("Reply handling thread %d:%d Failed on starting: rc = %d\n",
+		       i, j, rc);
+		ptlrpc_stop_hr_threads();
+		return rc;
+	}
+	return 0;
+}
+
+static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
+{
+	struct l_wait_info	lwi = { 0 };
+	struct ptlrpc_thread	*thread;
+	LIST_HEAD		(zombie);
+
+	CDEBUG(D_INFO, "Stopping threads for service %s\n",
+	       svcpt->scp_service->srv_name);
+
+	spin_lock(&svcpt->scp_lock);
+	/* let the thread know that we would like it to stop asap */
+	list_for_each_entry(thread, &svcpt->scp_threads, t_link) {
+		CDEBUG(D_INFO, "Stopping thread %s #%u\n",
+		       svcpt->scp_service->srv_thread_name, thread->t_id);
+		thread_add_flags(thread, SVC_STOPPING);
+	}
+
+	wake_up_all(&svcpt->scp_waitq);
+
+	while (!list_empty(&svcpt->scp_threads)) {
+		thread = list_entry(svcpt->scp_threads.next,
+					struct ptlrpc_thread, t_link);
+		if (thread_is_stopped(thread)) {
+			list_del(&thread->t_link);
+			list_add(&thread->t_link, &zombie);
+			continue;
+		}
+		spin_unlock(&svcpt->scp_lock);
+
+		CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
+		       svcpt->scp_service->srv_thread_name, thread->t_id);
+		l_wait_event(thread->t_ctl_waitq,
+			     thread_is_stopped(thread), &lwi);
+
+		spin_lock(&svcpt->scp_lock);
+	}
+
+	spin_unlock(&svcpt->scp_lock);
+
+	while (!list_empty(&zombie)) {
+		thread = list_entry(zombie.next,
+					struct ptlrpc_thread, t_link);
+		list_del(&thread->t_link);
+		OBD_FREE_PTR(thread);
+	}
+}
+
+/**
+ * Stops all threads of a particular service \a svc
+ */
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part *svcpt;
+	int			   i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service != NULL)
+			ptlrpc_svcpt_stop_threads(svcpt);
+	}
+}
+EXPORT_SYMBOL(ptlrpc_stop_all_threads);
+
+int ptlrpc_start_threads(struct ptlrpc_service *svc)
+{
+	int	rc = 0;
+	int	i;
+	int	j;
+
+	/* We require 2 threads min, see note in ptlrpc_server_handle_request */
+	LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT);
+
+	for (i = 0; i < svc->srv_ncpts; i++) {
+		for (j = 0; j < svc->srv_nthrs_cpt_init; j++) {
+			rc = ptlrpc_start_thread(svc->srv_parts[i], 1);
+			if (rc == 0)
+				continue;
+
+			if (rc != -EMFILE)
+				goto failed;
+			/* We have enough threads, don't start more. b=15759 */
+			break;
+		}
+	}
+
+	return 0;
+ failed:
+	CERROR("cannot start %s thread #%d_%d: rc %d\n",
+	       svc->srv_thread_name, i, j, rc);
+	ptlrpc_stop_all_threads(svc);
+	return rc;
+}
+EXPORT_SYMBOL(ptlrpc_start_threads);
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
+{
+	struct l_wait_info	lwi = { 0 };
+	struct ptlrpc_thread	*thread;
+	struct ptlrpc_service	*svc;
+	int			rc;
+
+	LASSERT(svcpt != NULL);
+
+	svc = svcpt->scp_service;
+
+	CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n",
+	       svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running,
+	       svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit);
+
+ again:
+	if (unlikely(svc->srv_is_stopping))
+		return -ESRCH;
+
+	if (!ptlrpc_threads_increasable(svcpt) ||
+	    (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
+	     svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1))
+		return -EMFILE;
+
+	OBD_CPT_ALLOC_PTR(thread, svc->srv_cptable, svcpt->scp_cpt);
+	if (thread == NULL)
+		return -ENOMEM;
+	init_waitqueue_head(&thread->t_ctl_waitq);
+
+	spin_lock(&svcpt->scp_lock);
+	if (!ptlrpc_threads_increasable(svcpt)) {
+		spin_unlock(&svcpt->scp_lock);
+		OBD_FREE_PTR(thread);
+		return -EMFILE;
+	}
+
+	if (svcpt->scp_nthrs_starting != 0) {
+		/* serialize starting because some modules (obdfilter)
+		 * might require unique and contiguous t_id */
+		LASSERT(svcpt->scp_nthrs_starting == 1);
+		spin_unlock(&svcpt->scp_lock);
+		OBD_FREE_PTR(thread);
+		if (wait) {
+			CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n",
+			       svc->srv_thread_name, svcpt->scp_thr_nextid);
+			schedule();
+			goto again;
+		}
+
+		CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n",
+		       svc->srv_thread_name, svcpt->scp_thr_nextid);
+		return -EAGAIN;
+	}
+
+	svcpt->scp_nthrs_starting++;
+	thread->t_id = svcpt->scp_thr_nextid++;
+	thread_add_flags(thread, SVC_STARTING);
+	thread->t_svcpt = svcpt;
+
+	list_add(&thread->t_link, &svcpt->scp_threads);
+	spin_unlock(&svcpt->scp_lock);
+
+	if (svcpt->scp_cpt >= 0) {
+		snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d",
+			 svc->srv_thread_name, svcpt->scp_cpt, thread->t_id);
+	} else {
+		snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d",
+			 svc->srv_thread_name, thread->t_id);
+	}
+
+	CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name);
+	rc = PTR_ERR(kthread_run(ptlrpc_main, thread, "%s", thread->t_name));
+	if (IS_ERR_VALUE(rc)) {
+		CERROR("cannot start thread '%s': rc %d\n",
+		       thread->t_name, rc);
+		spin_lock(&svcpt->scp_lock);
+		--svcpt->scp_nthrs_starting;
+		if (thread_is_stopping(thread)) {
+			/* this ptlrpc_thread is being handled
+			 * by ptlrpc_svcpt_stop_threads now
+			 */
+			thread_add_flags(thread, SVC_STOPPED);
+			wake_up(&thread->t_ctl_waitq);
+			spin_unlock(&svcpt->scp_lock);
+		} else {
+			list_del(&thread->t_link);
+			spin_unlock(&svcpt->scp_lock);
+			OBD_FREE_PTR(thread);
+		}
+		return rc;
+	}
+
+	if (!wait)
+		return 0;
+
+	l_wait_event(thread->t_ctl_waitq,
+		     thread_is_running(thread) || thread_is_stopped(thread),
+		     &lwi);
+
+	rc = thread_is_stopped(thread) ? thread->t_id : 0;
+	return rc;
+}
+
+int ptlrpc_hr_init(void)
+{
+	struct ptlrpc_hr_partition	*hrp;
+	struct ptlrpc_hr_thread		*hrt;
+	int				rc;
+	int				i;
+	int				j;
+	int				weight;
+
+	memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
+	ptlrpc_hr.hr_cpt_table = cfs_cpt_table;
+
+	ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table,
+						   sizeof(*hrp));
+	if (ptlrpc_hr.hr_partitions == NULL)
+		return -ENOMEM;
+
+	init_waitqueue_head(&ptlrpc_hr.hr_waitq);
+
+	weight = cpumask_weight(topology_thread_cpumask(0));
+
+	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+		hrp->hrp_cpt = i;
+
+		atomic_set(&hrp->hrp_nstarted, 0);
+		atomic_set(&hrp->hrp_nstopped, 0);
+
+		hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
+		hrp->hrp_nthrs /= weight;
+
+		LASSERT(hrp->hrp_nthrs > 0);
+		OBD_CPT_ALLOC(hrp->hrp_thrs, ptlrpc_hr.hr_cpt_table, i,
+			      hrp->hrp_nthrs * sizeof(*hrt));
+		if (hrp->hrp_thrs == NULL) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		for (j = 0; j < hrp->hrp_nthrs; j++) {
+			hrt = &hrp->hrp_thrs[j];
+
+			hrt->hrt_id = j;
+			hrt->hrt_partition = hrp;
+			init_waitqueue_head(&hrt->hrt_waitq);
+			spin_lock_init(&hrt->hrt_lock);
+			INIT_LIST_HEAD(&hrt->hrt_queue);
+		}
+	}
+
+	rc = ptlrpc_start_hr_threads();
+out:
+	if (rc != 0)
+		ptlrpc_hr_fini();
+	return rc;
+}
+
+void ptlrpc_hr_fini(void)
+{
+	struct ptlrpc_hr_partition	*hrp;
+	int				i;
+
+	if (ptlrpc_hr.hr_partitions == NULL)
+		return;
+
+	ptlrpc_stop_hr_threads();
+
+	cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+		if (hrp->hrp_thrs != NULL) {
+			OBD_FREE(hrp->hrp_thrs,
+				 hrp->hrp_nthrs * sizeof(hrp->hrp_thrs[0]));
+		}
+	}
+
+	cfs_percpt_free(ptlrpc_hr.hr_partitions);
+	ptlrpc_hr.hr_partitions = NULL;
+}
+
+
+/**
+ * Wait until all already scheduled replies are processed.
+ */
+static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
+{
+	while (1) {
+		int rc;
+		struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
+						     NULL, NULL);
+
+		rc = l_wait_event(svcpt->scp_waitq,
+		     atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+		if (rc == 0)
+			break;
+		CWARN("Unexpectedly long timeout %s %p\n",
+		      svcpt->scp_service->srv_name, svcpt->scp_service);
+	}
+}
+
+static void
+ptlrpc_service_del_atimer(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	*svcpt;
+	int				i;
+
+	/* early disarm AT timer... */
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service != NULL)
+			cfs_timer_disarm(&svcpt->scp_at_timer);
+	}
+}
+
+static void
+ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	  *svcpt;
+	struct ptlrpc_request_buffer_desc *rqbd;
+	struct l_wait_info		  lwi;
+	int				  rc;
+	int				  i;
+
+	/* All history will be culled when the next request buffer is
+	 * freed in ptlrpc_service_purge_all() */
+	svc->srv_hist_nrqbds_cpt_max = 0;
+
+	rc = LNetClearLazyPortal(svc->srv_req_portal);
+	LASSERT(rc == 0);
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service == NULL)
+			break;
+
+		/* Unlink all the request buffers.  This forces a 'final'
+		 * event with its 'unlink' flag set for each posted rqbd */
+		list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
+					rqbd_list) {
+			rc = LNetMDUnlink(rqbd->rqbd_md_h);
+			LASSERT(rc == 0 || rc == -ENOENT);
+		}
+	}
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service == NULL)
+			break;
+
+		/* Wait for the network to release any buffers
+		 * it's currently filling */
+		spin_lock(&svcpt->scp_lock);
+		while (svcpt->scp_nrqbds_posted != 0) {
+			spin_unlock(&svcpt->scp_lock);
+			/* Network access will complete in finite time but
+			 * the HUGE timeout lets us CWARN for visibility
+			 * of sluggish NALs */
+			lwi = LWI_TIMEOUT_INTERVAL(
+					cfs_time_seconds(LONG_UNLINK),
+					cfs_time_seconds(1), NULL, NULL);
+			rc = l_wait_event(svcpt->scp_waitq,
+					  svcpt->scp_nrqbds_posted == 0, &lwi);
+			if (rc == -ETIMEDOUT) {
+				CWARN("Service %s waiting for request buffers\n",
+				      svcpt->scp_service->srv_name);
+			}
+			spin_lock(&svcpt->scp_lock);
+		}
+		spin_unlock(&svcpt->scp_lock);
+	}
+}
+
+static void
+ptlrpc_service_purge_all(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part		*svcpt;
+	struct ptlrpc_request_buffer_desc	*rqbd;
+	struct ptlrpc_request			*req;
+	struct ptlrpc_reply_state		*rs;
+	int					i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service == NULL)
+			break;
+
+		spin_lock(&svcpt->scp_rep_lock);
+		while (!list_empty(&svcpt->scp_rep_active)) {
+			rs = list_entry(svcpt->scp_rep_active.next,
+					    struct ptlrpc_reply_state, rs_list);
+			spin_lock(&rs->rs_lock);
+			ptlrpc_schedule_difficult_reply(rs);
+			spin_unlock(&rs->rs_lock);
+		}
+		spin_unlock(&svcpt->scp_rep_lock);
+
+		/* purge the request queue.  NB No new replies (rqbds
+		 * all unlinked) and no service threads, so I'm the only
+		 * thread noodling the request queue now */
+		while (!list_empty(&svcpt->scp_req_incoming)) {
+			req = list_entry(svcpt->scp_req_incoming.next,
+					     struct ptlrpc_request, rq_list);
+
+			list_del(&req->rq_list);
+			svcpt->scp_nreqs_incoming--;
+			ptlrpc_server_finish_request(svcpt, req);
+		}
+
+		while (ptlrpc_server_request_pending(svcpt, true)) {
+			req = ptlrpc_server_request_get(svcpt, true);
+			ptlrpc_server_finish_active_request(svcpt, req);
+		}
+
+		LASSERT(list_empty(&svcpt->scp_rqbd_posted));
+		LASSERT(svcpt->scp_nreqs_incoming == 0);
+		LASSERT(svcpt->scp_nreqs_active == 0);
+		/* history should have been culled by
+		 * ptlrpc_server_finish_request */
+		LASSERT(svcpt->scp_hist_nrqbds == 0);
+
+		/* Now free all the request buffers since nothing
+		 * references them any more... */
+
+		while (!list_empty(&svcpt->scp_rqbd_idle)) {
+			rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+					      struct ptlrpc_request_buffer_desc,
+					      rqbd_list);
+			ptlrpc_free_rqbd(rqbd);
+		}
+		ptlrpc_wait_replies(svcpt);
+
+		while (!list_empty(&svcpt->scp_rep_idle)) {
+			rs = list_entry(svcpt->scp_rep_idle.next,
+					    struct ptlrpc_reply_state,
+					    rs_list);
+			list_del(&rs->rs_list);
+			OBD_FREE_LARGE(rs, svc->srv_max_reply_size);
+		}
+	}
+}
+
+static void
+ptlrpc_service_free(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	*svcpt;
+	struct ptlrpc_at_array		*array;
+	int				i;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		if (svcpt->scp_service == NULL)
+			break;
+
+		/* In case somebody rearmed this in the meantime */
+		cfs_timer_disarm(&svcpt->scp_at_timer);
+		array = &svcpt->scp_at_array;
+
+		if (array->paa_reqs_array != NULL) {
+			OBD_FREE(array->paa_reqs_array,
+				 sizeof(struct list_head) * array->paa_size);
+			array->paa_reqs_array = NULL;
+		}
+
+		if (array->paa_reqs_count != NULL) {
+			OBD_FREE(array->paa_reqs_count,
+				 sizeof(__u32) * array->paa_size);
+			array->paa_reqs_count = NULL;
+		}
+	}
+
+	ptlrpc_service_for_each_part(svcpt, i, svc)
+		OBD_FREE_PTR(svcpt);
+
+	if (svc->srv_cpts != NULL)
+		cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts);
+
+	OBD_FREE(svc, offsetof(struct ptlrpc_service,
+			       srv_parts[svc->srv_ncpts]));
+}
+
+int ptlrpc_unregister_service(struct ptlrpc_service *service)
+{
+	CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
+
+	service->srv_is_stopping = 1;
+
+	mutex_lock(&ptlrpc_all_services_mutex);
+	list_del_init(&service->srv_list);
+	mutex_unlock(&ptlrpc_all_services_mutex);
+
+	ptlrpc_service_del_atimer(service);
+	ptlrpc_stop_all_threads(service);
+
+	ptlrpc_service_unlink_rqbd(service);
+	ptlrpc_service_purge_all(service);
+	ptlrpc_service_nrs_cleanup(service);
+
+	ptlrpc_lprocfs_unregister_service(service);
+
+	ptlrpc_service_free(service);
+
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_service);
+
+/**
+ * Returns 0 if the service is healthy.
+ *
+ * Right now, it just checks to make sure that requests aren't languishing
+ * in the queue.  We'll use this health check to govern whether a node needs
+ * to be shot, so it's intentionally non-aggressive. */
+int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
+{
+	struct ptlrpc_request		*request = NULL;
+	struct timeval			right_now;
+	long				timediff;
+
+	do_gettimeofday(&right_now);
+
+	spin_lock(&svcpt->scp_req_lock);
+	/* How long has the next entry been waiting? */
+	if (ptlrpc_server_high_pending(svcpt, true))
+		request = ptlrpc_nrs_req_peek_nolock(svcpt, true);
+	else if (ptlrpc_server_normal_pending(svcpt, true))
+		request = ptlrpc_nrs_req_peek_nolock(svcpt, false);
+
+	if (request == NULL) {
+		spin_unlock(&svcpt->scp_req_lock);
+		return 0;
+	}
+
+	timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
+	spin_unlock(&svcpt->scp_req_lock);
+
+	if ((timediff / ONE_MILLION) >
+	    (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+		CERROR("%s: unhealthy - request has been waiting %lds\n",
+		       svcpt->scp_service->srv_name, timediff / ONE_MILLION);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+ptlrpc_service_health_check(struct ptlrpc_service *svc)
+{
+	struct ptlrpc_service_part	*svcpt;
+	int				i;
+
+	if (svc == NULL)
+		return 0;
+
+	ptlrpc_service_for_each_part(svcpt, i, svc) {
+		int rc = ptlrpc_svcpt_health_check(svcpt);
+
+		if (rc != 0)
+			return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ptlrpc_service_health_check);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
new file mode 100644
index 000000000..d6d92046c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -0,0 +1,4492 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/fs.h>
+#include <linux/posix_acl_xattr.h>
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_disk.h"
+void lustre_assert_wire_constants(void)
+{
+	 /* Wire protocol assertions generated by 'wirecheck'
+	  * (make -C lustre/utils newwiretest)
+	  * running on Linux centos6-bis 2.6.32-358.0.1.el6-head
+	  * #3 SMP Wed Apr 17 17:37:43 CEST 2013
+	  * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC)
+	  */
+
+	/* Constants... */
+	LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n",
+		 (long long)PTL_RPC_MSG_REQUEST);
+	LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n",
+		 (long long)PTL_RPC_MSG_ERR);
+	LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n",
+		 (long long)PTL_RPC_MSG_REPLY);
+	LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n",
+		 MDS_DIR_END_OFF);
+	LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n",
+		 DEAD_HANDLE_MAGIC);
+	CLASSERT(MTI_NAME_MAXLEN == 64);
+	LASSERTF(OST_REPLY == 0, "found %lld\n",
+		 (long long)OST_REPLY);
+	LASSERTF(OST_GETATTR == 1, "found %lld\n",
+		 (long long)OST_GETATTR);
+	LASSERTF(OST_SETATTR == 2, "found %lld\n",
+		 (long long)OST_SETATTR);
+	LASSERTF(OST_READ == 3, "found %lld\n",
+		 (long long)OST_READ);
+	LASSERTF(OST_WRITE == 4, "found %lld\n",
+		 (long long)OST_WRITE);
+	LASSERTF(OST_CREATE == 5, "found %lld\n",
+		 (long long)OST_CREATE);
+	LASSERTF(OST_DESTROY == 6, "found %lld\n",
+		 (long long)OST_DESTROY);
+	LASSERTF(OST_GET_INFO == 7, "found %lld\n",
+		 (long long)OST_GET_INFO);
+	LASSERTF(OST_CONNECT == 8, "found %lld\n",
+		 (long long)OST_CONNECT);
+	LASSERTF(OST_DISCONNECT == 9, "found %lld\n",
+		 (long long)OST_DISCONNECT);
+	LASSERTF(OST_PUNCH == 10, "found %lld\n",
+		 (long long)OST_PUNCH);
+	LASSERTF(OST_OPEN == 11, "found %lld\n",
+		 (long long)OST_OPEN);
+	LASSERTF(OST_CLOSE == 12, "found %lld\n",
+		 (long long)OST_CLOSE);
+	LASSERTF(OST_STATFS == 13, "found %lld\n",
+		 (long long)OST_STATFS);
+	LASSERTF(OST_SYNC == 16, "found %lld\n",
+		 (long long)OST_SYNC);
+	LASSERTF(OST_SET_INFO == 17, "found %lld\n",
+		 (long long)OST_SET_INFO);
+	LASSERTF(OST_QUOTACHECK == 18, "found %lld\n",
+		 (long long)OST_QUOTACHECK);
+	LASSERTF(OST_QUOTACTL == 19, "found %lld\n",
+		 (long long)OST_QUOTACTL);
+	LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
+		 (long long)OST_QUOTA_ADJUST_QUNIT);
+	LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
+		 (long long)OST_LAST_OPC);
+	LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+		 OBD_OBJECT_EOF);
+	LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n",
+		 (long long)OST_MIN_PRECREATE);
+	LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n",
+		 (long long)OST_MAX_PRECREATE);
+	LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n",
+		 OST_LVB_ERR_INIT);
+	LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n",
+		 OST_LVB_ERR_MASK);
+	LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n",
+		 (long long)MDS_FIRST_OPC);
+	LASSERTF(MDS_GETATTR == 33, "found %lld\n",
+		 (long long)MDS_GETATTR);
+	LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n",
+		 (long long)MDS_GETATTR_NAME);
+	LASSERTF(MDS_CLOSE == 35, "found %lld\n",
+		 (long long)MDS_CLOSE);
+	LASSERTF(MDS_REINT == 36, "found %lld\n",
+		 (long long)MDS_REINT);
+	LASSERTF(MDS_READPAGE == 37, "found %lld\n",
+		 (long long)MDS_READPAGE);
+	LASSERTF(MDS_CONNECT == 38, "found %lld\n",
+		 (long long)MDS_CONNECT);
+	LASSERTF(MDS_DISCONNECT == 39, "found %lld\n",
+		 (long long)MDS_DISCONNECT);
+	LASSERTF(MDS_GETSTATUS == 40, "found %lld\n",
+		 (long long)MDS_GETSTATUS);
+	LASSERTF(MDS_STATFS == 41, "found %lld\n",
+		 (long long)MDS_STATFS);
+	LASSERTF(MDS_PIN == 42, "found %lld\n",
+		 (long long)MDS_PIN);
+	LASSERTF(MDS_UNPIN == 43, "found %lld\n",
+		 (long long)MDS_UNPIN);
+	LASSERTF(MDS_SYNC == 44, "found %lld\n",
+		 (long long)MDS_SYNC);
+	LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n",
+		 (long long)MDS_DONE_WRITING);
+	LASSERTF(MDS_SET_INFO == 46, "found %lld\n",
+		 (long long)MDS_SET_INFO);
+	LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n",
+		 (long long)MDS_QUOTACHECK);
+	LASSERTF(MDS_QUOTACTL == 48, "found %lld\n",
+		 (long long)MDS_QUOTACTL);
+	LASSERTF(MDS_GETXATTR == 49, "found %lld\n",
+		 (long long)MDS_GETXATTR);
+	LASSERTF(MDS_SETXATTR == 50, "found %lld\n",
+		 (long long)MDS_SETXATTR);
+	LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n",
+		 (long long)MDS_WRITEPAGE);
+	LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n",
+		 (long long)MDS_IS_SUBDIR);
+	LASSERTF(MDS_GET_INFO == 53, "found %lld\n",
+		 (long long)MDS_GET_INFO);
+	LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n",
+		 (long long)MDS_HSM_STATE_GET);
+	LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n",
+		 (long long)MDS_HSM_STATE_SET);
+	LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n",
+		 (long long)MDS_HSM_ACTION);
+	LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n",
+		 (long long)MDS_HSM_PROGRESS);
+	LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n",
+		 (long long)MDS_HSM_REQUEST);
+	LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n",
+		 (long long)MDS_HSM_CT_REGISTER);
+	LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n",
+		 (long long)MDS_HSM_CT_UNREGISTER);
+	LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n",
+		 (long long)MDS_SWAP_LAYOUTS);
+	LASSERTF(MDS_LAST_OPC == 62, "found %lld\n",
+		 (long long)MDS_LAST_OPC);
+	LASSERTF(REINT_SETATTR == 1, "found %lld\n",
+		 (long long)REINT_SETATTR);
+	LASSERTF(REINT_CREATE == 2, "found %lld\n",
+		 (long long)REINT_CREATE);
+	LASSERTF(REINT_LINK == 3, "found %lld\n",
+		 (long long)REINT_LINK);
+	LASSERTF(REINT_UNLINK == 4, "found %lld\n",
+		 (long long)REINT_UNLINK);
+	LASSERTF(REINT_RENAME == 5, "found %lld\n",
+		 (long long)REINT_RENAME);
+	LASSERTF(REINT_OPEN == 6, "found %lld\n",
+		 (long long)REINT_OPEN);
+	LASSERTF(REINT_SETXATTR == 7, "found %lld\n",
+		 (long long)REINT_SETXATTR);
+	LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
+		 (long long)REINT_RMENTRY);
+	LASSERTF(REINT_MAX == 9, "found %lld\n",
+		 (long long)REINT_MAX);
+	LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_IT_EXECD);
+	LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_LOOKUP_EXECD);
+	LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_LOOKUP_NEG);
+	LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_LOOKUP_POS);
+	LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_OPEN_CREATE);
+	LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_OPEN_OPEN);
+	LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_ENQ_COMPLETE);
+	LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_ENQ_OPEN_REF);
+	LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_ENQ_CREATE_REF);
+	LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
+		(unsigned)DISP_OPEN_LOCK);
+	LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
+		 (long long)MDS_STATUS_CONN);
+	LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
+		 (long long)MDS_STATUS_LOV);
+	LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
+		 (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
+	LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)MF_SOM_CHANGE);
+	LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)MF_EPOCH_OPEN);
+	LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)MF_EPOCH_CLOSE);
+	LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
+		(unsigned)MF_MDC_CANCEL_FID1);
+	LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
+		(unsigned)MF_MDC_CANCEL_FID2);
+	LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
+		(unsigned)MF_MDC_CANCEL_FID3);
+	LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
+		(unsigned)MF_MDC_CANCEL_FID4);
+	LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
+		(unsigned)MF_SOM_AU);
+	LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
+		(unsigned)MF_GETATTR_LOCK);
+	LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_MODE);
+	LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_UID);
+	LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_GID);
+	LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_SIZE);
+	LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_ATIME);
+	LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_MTIME);
+	LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_CTIME);
+	LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_ATIME_SET);
+	LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_MTIME_SET);
+	LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_FORCE);
+	LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_ATTR_FLAG);
+	LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_KILL_SUID);
+	LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_KILL_SGID);
+	LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_CTIME_SET);
+	LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_FROM_OPEN);
+	LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
+			(long long)MDS_ATTR_BLOCKS);
+	LASSERTF(FLD_QUERY == 900, "found %lld\n",
+		 (long long)FLD_QUERY);
+	LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
+		 (long long)FLD_FIRST_OPC);
+	LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+		 (long long)FLD_LAST_OPC);
+	LASSERTF(SEQ_QUERY == 700, "found %lld\n",
+		 (long long)SEQ_QUERY);
+	LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n",
+		 (long long)SEQ_FIRST_OPC);
+	LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n",
+		 (long long)SEQ_LAST_OPC);
+	LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n",
+		 (long long)SEQ_ALLOC_SUPER);
+	LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n",
+		 (long long)SEQ_ALLOC_META);
+	LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n",
+		 (long long)LDLM_ENQUEUE);
+	LASSERTF(LDLM_CONVERT == 102, "found %lld\n",
+		 (long long)LDLM_CONVERT);
+	LASSERTF(LDLM_CANCEL == 103, "found %lld\n",
+		 (long long)LDLM_CANCEL);
+	LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n",
+		 (long long)LDLM_BL_CALLBACK);
+	LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n",
+		 (long long)LDLM_CP_CALLBACK);
+	LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n",
+		 (long long)LDLM_GL_CALLBACK);
+	LASSERTF(LDLM_SET_INFO == 107, "found %lld\n",
+		 (long long)LDLM_SET_INFO);
+	LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n",
+		 (long long)LDLM_LAST_OPC);
+	LASSERTF(LCK_MINMODE == 0, "found %lld\n",
+		 (long long)LCK_MINMODE);
+	LASSERTF(LCK_EX == 1, "found %lld\n",
+		 (long long)LCK_EX);
+	LASSERTF(LCK_PW == 2, "found %lld\n",
+		 (long long)LCK_PW);
+	LASSERTF(LCK_PR == 4, "found %lld\n",
+		 (long long)LCK_PR);
+	LASSERTF(LCK_CW == 8, "found %lld\n",
+		 (long long)LCK_CW);
+	LASSERTF(LCK_CR == 16, "found %lld\n",
+		 (long long)LCK_CR);
+	LASSERTF(LCK_NL == 32, "found %lld\n",
+		 (long long)LCK_NL);
+	LASSERTF(LCK_GROUP == 64, "found %lld\n",
+		 (long long)LCK_GROUP);
+	LASSERTF(LCK_COS == 128, "found %lld\n",
+		 (long long)LCK_COS);
+	LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
+		 (long long)LCK_MAXMODE);
+	LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
+		 (long long)LCK_MODE_NUM);
+	CLASSERT(LDLM_PLAIN == 10);
+	CLASSERT(LDLM_EXTENT == 11);
+	CLASSERT(LDLM_FLOCK == 12);
+	CLASSERT(LDLM_IBITS == 13);
+	CLASSERT(LDLM_MAX_TYPE == 14);
+	CLASSERT(LUSTRE_RES_ID_SEQ_OFF == 0);
+	CLASSERT(LUSTRE_RES_ID_VER_OID_OFF == 1);
+	LASSERTF(UPDATE_OBJ == 1000, "found %lld\n",
+		 (long long)UPDATE_OBJ);
+	LASSERTF(UPDATE_LAST_OPC == 1001, "found %lld\n",
+		 (long long)UPDATE_LAST_OPC);
+	CLASSERT(LUSTRE_RES_ID_QUOTA_SEQ_OFF == 2);
+	CLASSERT(LUSTRE_RES_ID_QUOTA_VER_OID_OFF == 3);
+	CLASSERT(LUSTRE_RES_ID_HSH_OFF == 3);
+	CLASSERT(LQUOTA_TYPE_USR == 0);
+	CLASSERT(LQUOTA_TYPE_GRP == 1);
+	CLASSERT(LQUOTA_RES_MD == 1);
+	CLASSERT(LQUOTA_RES_DT == 2);
+	LASSERTF(OBD_PING == 400, "found %lld\n",
+		 (long long)OBD_PING);
+	LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n",
+		 (long long)OBD_LOG_CANCEL);
+	LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n",
+		 (long long)OBD_QC_CALLBACK);
+	LASSERTF(OBD_IDX_READ == 403, "found %lld\n",
+		 (long long)OBD_IDX_READ);
+	LASSERTF(OBD_LAST_OPC == 404, "found %lld\n",
+		 (long long)OBD_LAST_OPC);
+	LASSERTF(QUOTA_DQACQ == 601, "found %lld\n",
+		 (long long)QUOTA_DQACQ);
+	LASSERTF(QUOTA_DQREL == 602, "found %lld\n",
+		 (long long)QUOTA_DQREL);
+	LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n",
+		 (long long)QUOTA_LAST_OPC);
+	LASSERTF(MGS_CONNECT == 250, "found %lld\n",
+		 (long long)MGS_CONNECT);
+	LASSERTF(MGS_DISCONNECT == 251, "found %lld\n",
+		 (long long)MGS_DISCONNECT);
+	LASSERTF(MGS_EXCEPTION == 252, "found %lld\n",
+		 (long long)MGS_EXCEPTION);
+	LASSERTF(MGS_TARGET_REG == 253, "found %lld\n",
+		 (long long)MGS_TARGET_REG);
+	LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n",
+		 (long long)MGS_TARGET_DEL);
+	LASSERTF(MGS_SET_INFO == 255, "found %lld\n",
+		 (long long)MGS_SET_INFO);
+	LASSERTF(MGS_LAST_OPC == 257, "found %lld\n",
+		 (long long)MGS_LAST_OPC);
+	LASSERTF(SEC_CTX_INIT == 801, "found %lld\n",
+		 (long long)SEC_CTX_INIT);
+	LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n",
+		 (long long)SEC_CTX_INIT_CONT);
+	LASSERTF(SEC_CTX_FINI == 803, "found %lld\n",
+		 (long long)SEC_CTX_FINI);
+	LASSERTF(SEC_LAST_OPC == 804, "found %lld\n",
+		 (long long)SEC_LAST_OPC);
+	/* Sizes and Offsets */
+
+	/* Checks for struct obd_uuid */
+	LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_uuid));
+
+	/* Checks for struct lu_seq_range */
+	LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct lu_seq_range));
+	LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_seq_range, lsr_start));
+	LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start));
+	LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_seq_range, lsr_end));
+	LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end));
+	LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_seq_range, lsr_index));
+	LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index));
+	LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_seq_range, lsr_flags));
+	LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags));
+	LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n",
+		 (long long)LU_SEQ_RANGE_MDT);
+	LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n",
+		 (long long)LU_SEQ_RANGE_OST);
+
+	/* Checks for struct lustre_mdt_attrs */
+	LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct lustre_mdt_attrs));
+	LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat));
+	LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat));
+	LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat));
+	LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat));
+	LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid));
+	LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
+	LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)LMAI_RELEASED);
+	LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)LMAC_HSM);
+	LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)LMAC_SOM);
+	LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)LMAC_NOT_IN_OI);
+	LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+		(unsigned)LMAC_FID_ON_OST);
+	LASSERTF(OBJ_CREATE == 1, "found %lld\n",
+		 (long long)OBJ_CREATE);
+	LASSERTF(OBJ_DESTROY == 2, "found %lld\n",
+		 (long long)OBJ_DESTROY);
+	LASSERTF(OBJ_REF_ADD == 3, "found %lld\n",
+		 (long long)OBJ_REF_ADD);
+	LASSERTF(OBJ_REF_DEL == 4, "found %lld\n",
+		 (long long)OBJ_REF_DEL);
+	LASSERTF(OBJ_ATTR_SET == 5, "found %lld\n",
+		 (long long)OBJ_ATTR_SET);
+	LASSERTF(OBJ_ATTR_GET == 6, "found %lld\n",
+		 (long long)OBJ_ATTR_GET);
+	LASSERTF(OBJ_XATTR_SET == 7, "found %lld\n",
+		 (long long)OBJ_XATTR_SET);
+	LASSERTF(OBJ_XATTR_GET == 8, "found %lld\n",
+		 (long long)OBJ_XATTR_GET);
+	LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+		 (long long)OBJ_INDEX_LOOKUP);
+	LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+		 (long long)OBJ_INDEX_LOOKUP);
+	LASSERTF(OBJ_INDEX_INSERT == 10, "found %lld\n",
+		 (long long)OBJ_INDEX_INSERT);
+	LASSERTF(OBJ_INDEX_DELETE == 11, "found %lld\n",
+		 (long long)OBJ_INDEX_DELETE);
+
+	/* Checks for struct ost_id */
+	LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct ost_id));
+	LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_id, oi));
+	LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_id *)0)->oi));
+	LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n",
+		 (long long)LUSTRE_FID_INIT_OID);
+	LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n",
+		 (long long)FID_SEQ_OST_MDT0);
+	LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n",
+		 (long long)FID_SEQ_LLOG);
+	LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n",
+		 (long long)FID_SEQ_ECHO);
+	LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n",
+		 (long long)FID_SEQ_OST_MDT1);
+	LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n",
+		 (long long)FID_SEQ_OST_MAX);
+	LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n",
+		 (long long)FID_SEQ_RSVD);
+	LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
+		 (long long)FID_SEQ_IGIF);
+	LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_IGIF_MAX);
+	LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_IDIF);
+	LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_IDIF_MAX);
+	LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_START);
+	LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_LOCAL_FILE);
+	LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_DOT_LUSTRE);
+	LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_SPECIAL);
+	LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_QUOTA);
+	LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_QUOTA_GLB);
+	LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_ROOT);
+	LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_NORMAL);
+	LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+			(long long)FID_SEQ_LOV_DEFAULT);
+	LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)FID_OID_SPECIAL_BFL);
+	LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)FID_OID_DOT_LUSTRE);
+	LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)FID_OID_DOT_LUSTRE_OBF);
+
+	/* Checks for struct lu_dirent */
+	LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct lu_dirent));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_fid));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_hash));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_reclen));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_namelen));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_attrs));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs));
+	LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirent, lde_name[0]));
+	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
+	LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)LUDA_FID);
+	LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)LUDA_TYPE);
+	LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)LUDA_64BITHASH);
+
+	/* Checks for struct luda_type */
+	LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
+		 (long long)(int)sizeof(struct luda_type));
+	LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct luda_type, lt_type));
+	LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct luda_type *)0)->lt_type));
+
+	/* Checks for struct lu_dirpage */
+	LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct lu_dirpage));
+	LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start));
+	LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start));
+	LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end));
+	LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end));
+	LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirpage, ldp_flags));
+	LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags));
+	LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirpage, ldp_pad0));
+	LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0));
+	LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0]));
+	LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]));
+	LASSERTF(LDF_EMPTY == 1, "found %lld\n",
+		 (long long)LDF_EMPTY);
+	LASSERTF(LDF_COLLIDE == 2, "found %lld\n",
+		 (long long)LDF_COLLIDE);
+	LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n",
+		 (long long)LU_PAGE_SIZE);
+	/* Checks for union lu_page */
+	LASSERTF((int)sizeof(union lu_page) == 4096, "found %lld\n",
+		 (long long)(int)sizeof(union lu_page));
+
+	/* Checks for struct lustre_handle */
+	LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct lustre_handle));
+	LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_handle, cookie));
+	LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_handle *)0)->cookie));
+
+	/* Checks for struct lustre_msg_v2 */
+	LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct lustre_msg_v2));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_magic));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_flags));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3));
+	LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0]));
+	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
+	LASSERTF(LUSTRE_MSG_MAGIC_V1 == 0x0BD00BD0, "found 0x%.8x\n",
+		LUSTRE_MSG_MAGIC_V1);
+	LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
+		LUSTRE_MSG_MAGIC_V2);
+	LASSERTF(LUSTRE_MSG_MAGIC_V1_SWABBED == 0xD00BD00B, "found 0x%.8x\n",
+		LUSTRE_MSG_MAGIC_V1_SWABBED);
+	LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
+		LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+	/* Checks for struct ptlrpc_body */
+	LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
+		 (long long)(int)sizeof(struct ptlrpc_body_v3));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv));
+	CLASSERT(PTLRPC_NUM_VERSIONS == 4);
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
+	CLASSERT(JOBSTATS_JOBID_SIZE == 32);
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
+		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions));
+	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n",
+		 (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding));
+	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n",
+		 (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding));
+	LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n",
+		 (long long)MSG_PTLRPC_BODY_OFF);
+	LASSERTF(REQ_REC_OFF == 1, "found %lld\n",
+		 (long long)REQ_REC_OFF);
+	LASSERTF(REPLY_REC_OFF == 1, "found %lld\n",
+		 (long long)REPLY_REC_OFF);
+	LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n",
+		 (long long)DLM_LOCKREQ_OFF);
+	LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n",
+		 (long long)DLM_REQ_REC_OFF);
+	LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n",
+		 (long long)DLM_INTENT_IT_OFF);
+	LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n",
+		 (long long)DLM_INTENT_REC_OFF);
+	LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n",
+		 (long long)DLM_LOCKREPLY_OFF);
+	LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n",
+		 (long long)DLM_REPLY_REC_OFF);
+	LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
+		 (long long)MSG_PTLRPC_HEADER_OFF);
+	LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
+		PTLRPC_MSG_VERSION);
+	LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
+		LUSTRE_VERSION_MASK);
+	LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
+		LUSTRE_OBD_VERSION);
+	LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
+		LUSTRE_MDS_VERSION);
+	LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
+		LUSTRE_OST_VERSION);
+	LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
+		LUSTRE_DLM_VERSION);
+	LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
+		LUSTRE_LOG_VERSION);
+	LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
+		LUSTRE_MGS_VERSION);
+	LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
+		 (long long)MSGHDR_AT_SUPPORT);
+	LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
+		 (long long)MSGHDR_CKSUM_INCOMPAT18);
+	LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_OP_FLAG_MASK);
+	LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
+		 (long long)MSG_OP_FLAG_SHIFT);
+	LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
+		(unsigned)MSG_GEN_FLAG_MASK);
+	LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_LAST_REPLAY);
+	LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_RESENT);
+	LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_REPLAY);
+	LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_DELAY_REPLAY);
+	LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_VERSION_REPLAY);
+	LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_REQ_REPLAY_DONE);
+	LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_LOCK_REPLAY_DONE);
+	LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_RECOVERING);
+	LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_RECONNECT);
+	LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_REPLAYABLE);
+	LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_LIBCLIENT);
+	LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_INITIAL);
+	LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_ASYNC);
+	LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_NEXT_VER);
+	LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
+		(unsigned)MSG_CONNECT_TRANSNO);
+
+	/* Checks for struct obd_connect_data */
+	LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_connect_data));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_version));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_index));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_unused));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_group));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
+	LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding1));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding2));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding3));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding4));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding5));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding6));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding7));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding8));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8));
+	LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, padding9));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingA));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingB));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingC));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingD));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingE));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE));
+	LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_connect_data, paddingF));
+	LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF));
+	LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_RDONLY);
+	LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_INDEX);
+	LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_MDS);
+	LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_GRANT);
+	LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_SRVLOCK);
+	LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_VERSION);
+	LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_REQPORTAL);
+	LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_ACL);
+	LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_XATTR);
+	LASSERTF(OBD_CONNECT_CROW == 0x200ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_CROW);
+	LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_TRUNCLOCK);
+	LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_TRANSNO);
+	LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_IBITS);
+	LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_JOIN);
+	LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_ATTRFID);
+	LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_NODEVOH);
+	LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_RMT_CLIENT);
+	LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_RMT_CLIENT_FORCE);
+	LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_BRW_SIZE);
+	LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_QUOTA64);
+	LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_MDS_CAPA);
+	LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_OSS_CAPA);
+	LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_CANCELSET);
+	LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_SOM);
+	LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_AT);
+	LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LRU_RESIZE);
+	LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_MDS_MDS);
+	LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_REAL);
+	LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_CHANGE_QS);
+	LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_CKSUM);
+	LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_FID);
+	LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_VBR);
+	LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LOV_V3);
+	LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_GRANT_SHRINK);
+	LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_SKIP_ORPHAN);
+	LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_MAX_EASIZE);
+	LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_FULL20);
+	LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LAYOUTLOCK);
+	LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_64BITHASH);
+	LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_MAXBYTES);
+	LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_IMP_RECOV);
+	LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_JOBSTATS);
+	LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_UMASK);
+	LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_EINPROGRESS);
+	LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_GRANT_PARAM);
+	LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_FLOCK_OWNER);
+	LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LVB_TYPE);
+	LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_NANOSEC_TIME);
+	LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LIGHTWEIGHT);
+	LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_SHORTIO);
+	LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_PINGLESS);
+	LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
+		 "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
+	LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)OBD_CKSUM_CRC32);
+	LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)OBD_CKSUM_ADLER);
+	LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)OBD_CKSUM_CRC32C);
+
+	/* Checks for struct obdo */
+	LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
+		 (long long)(int)sizeof(struct obdo));
+	LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_valid));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_valid));
+	LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_oi));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_oi));
+	LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_parent_seq));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq));
+	LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_size));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_size));
+	LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_mtime));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_mtime));
+	LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_atime));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_atime));
+	LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_ctime));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_ctime));
+	LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_blocks));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_blocks));
+	LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_grant));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_grant));
+	LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_blksize));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_blksize));
+	LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_mode));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_mode));
+	LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_uid));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_uid));
+	LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_gid));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_gid));
+	LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_flags));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_flags));
+	LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_nlink));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_nlink));
+	LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_parent_oid));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid));
+	LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_misc));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_misc));
+	LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_ioepoch));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch));
+	LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_stripe_idx));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
+	LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_parent_ver));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver));
+	LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_handle));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_handle));
+	LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_lcookie));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_lcookie));
+	LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_uid_h));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_uid_h));
+	LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_gid_h));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_gid_h));
+	LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_data_version));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_data_version));
+	LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_padding_4));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_padding_4));
+	LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_padding_5));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_padding_5));
+	LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n",
+		 (long long)(int)offsetof(struct obdo, o_padding_6));
+	LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obdo *)0)->o_padding_6));
+	LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLID);
+	LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLATIME);
+	LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLMTIME);
+	LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLCTIME);
+	LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLSIZE);
+	LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLBLOCKS);
+	LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLBLKSZ);
+	LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLMODE);
+	LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLTYPE);
+	LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLUID);
+	LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGID);
+	LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLFLAGS);
+	LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLNLINK);
+	LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGENER);
+	LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRDEV);
+	LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLEASIZE);
+	LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_LINKNAME);
+	LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLHANDLE);
+	LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLCKSUM);
+	LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLQOS);
+	LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLCOOKIE);
+	LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGROUP);
+	LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLFID);
+	LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLEPOCH);
+	LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGRANT);
+	LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLDIREA);
+	LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLUSRQUOTA);
+	LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGRPQUOTA);
+	LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLMODEASIZE);
+	LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_MDS);
+	LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_REINT);
+	LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_MEA);
+	LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL),
+		 "found 0x%.16llxULL\n", OBD_MD_TSTATE);
+	LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLXATTR);
+	LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLXATTRLS);
+	LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLXATTRRM);
+	LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLACL);
+	LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRMTPERM);
+	LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLMDSCAPA);
+	LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLOSSCAPA);
+	LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLCKSPLIT);
+	LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLCROSSREF);
+	LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLGETATTRLOCK);
+	LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRMTLSETFACL);
+	LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRMTLGETFACL);
+	LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRMTRSETFACL);
+	LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLRMTRGETFACL);
+	LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
+		 OBD_MD_FLDATAVERSION);
+	CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
+	CLASSERT(OBD_FL_OBDMDEXISTS == 0x00000002);
+	CLASSERT(OBD_FL_DELORPHAN == 0x00000004);
+	CLASSERT(OBD_FL_NORPC == 0x00000008);
+	CLASSERT(OBD_FL_IDONLY == 0x00000010);
+	CLASSERT(OBD_FL_RECREATE_OBJS == 0x00000020);
+	CLASSERT(OBD_FL_DEBUG_CHECK == 0x00000040);
+	CLASSERT(OBD_FL_NO_USRQUOTA == 0x00000100);
+	CLASSERT(OBD_FL_NO_GRPQUOTA == 0x00000200);
+	CLASSERT(OBD_FL_CREATE_CROW == 0x00000400);
+	CLASSERT(OBD_FL_SRVLOCK == 0x00000800);
+	CLASSERT(OBD_FL_CKSUM_CRC32 == 0x00001000);
+	CLASSERT(OBD_FL_CKSUM_ADLER == 0x00002000);
+	CLASSERT(OBD_FL_CKSUM_CRC32C == 0x00004000);
+	CLASSERT(OBD_FL_CKSUM_RSVD2 == 0x00008000);
+	CLASSERT(OBD_FL_CKSUM_RSVD3 == 0x00010000);
+	CLASSERT(OBD_FL_SHRINK_GRANT == 0x00020000);
+	CLASSERT(OBD_FL_MMAP == 0x00040000);
+	CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000);
+	CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000);
+	CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000);
+
+	/* Checks for struct lov_ost_data_v1 */
+	LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct lov_ost_data_v1));
+	LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi));
+	LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi));
+	LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen));
+	LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen));
+	LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
+	LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
+
+	/* Checks for struct lov_mds_md_v1 */
+	LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct lov_mds_md_v1));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen));
+	LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
+	CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+
+	/* Checks for struct lov_mds_md_v3 */
+	LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
+		 (long long)(int)sizeof(struct lov_mds_md_v3));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
+	CLASSERT(LOV_MAXPOOLNAME == 16);
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]));
+	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
+	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
+	CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
+	LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)LOV_PATTERN_RAID0);
+	LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)LOV_PATTERN_RAID1);
+	LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
+		(unsigned)LOV_PATTERN_FIRST);
+	LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
+		(unsigned)LOV_PATTERN_CMOBD);
+
+	/* Checks for struct obd_statfs */
+	LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_statfs));
+	LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_type));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_type));
+	LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_blocks));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks));
+	LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_bfree));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree));
+	LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_bavail));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail));
+	LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_ffree));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree));
+	LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_fsid));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid));
+	LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_bsize));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize));
+	LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_namelen));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen));
+	LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_state));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
+	LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare2));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare3));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare4));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare5));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare6));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare7));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare8));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
+	LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_statfs, os_spare9));
+	LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
+
+	/* Checks for struct obd_ioobj */
+	LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_ioobj));
+	LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_ioobj, ioo_oid));
+	LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid));
+	LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw));
+	LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw));
+	LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt));
+	LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
+
+	/* Checks for union lquota_id */
+	LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n",
+		 (long long)(int)sizeof(union lquota_id));
+
+	LASSERTF(QUOTABLOCK_BITS == 10, "found %lld\n",
+		 (long long)QUOTABLOCK_BITS);
+	LASSERTF(QUOTABLOCK_SIZE == 1024, "found %lld\n",
+		 (long long)QUOTABLOCK_SIZE);
+
+	/* Checks for struct obd_quotactl */
+	LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_quotactl));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_cmd));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_type));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_id));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_stat));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo));
+	LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_quotactl, qc_dqblk));
+	LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk));
+
+	/* Checks for struct obd_dqinfo */
+	LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_dqinfo));
+	LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace));
+	LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace));
+	LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace));
+	LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace));
+	LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqinfo, dqi_flags));
+	LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags));
+	LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqinfo, dqi_valid));
+	LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid));
+
+	/* Checks for struct obd_dqblk */
+	LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n",
+		 (long long)(int)sizeof(struct obd_dqblk));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_curspace));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_btime));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_itime));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_valid));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid));
+	LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n",
+		 (long long)(int)offsetof(struct obd_dqblk, dqb_padding));
+	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
+	LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
+		Q_QUOTACHECK);
+	LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
+		Q_INITQUOTA);
+	LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
+		Q_GETOINFO);
+	LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
+		Q_GETOQUOTA);
+	LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
+		Q_FINVALIDATE);
+
+	/* Checks for struct lquota_acct_rec */
+	LASSERTF((int)sizeof(struct lquota_acct_rec) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct lquota_acct_rec));
+	LASSERTF((int)offsetof(struct lquota_acct_rec, bspace) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_acct_rec, bspace));
+	LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->bspace) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_acct_rec *)0)->bspace));
+	LASSERTF((int)offsetof(struct lquota_acct_rec, ispace) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_acct_rec, ispace));
+	LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->ispace) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_acct_rec *)0)->ispace));
+
+	/* Checks for struct lquota_glb_rec */
+	LASSERTF((int)sizeof(struct lquota_glb_rec) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct lquota_glb_rec));
+	LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_hardlimit) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_glb_rec, qbr_hardlimit));
+	LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit));
+	LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_softlimit) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_glb_rec, qbr_softlimit));
+	LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit));
+	LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_time) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_glb_rec, qbr_time));
+	LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_time));
+	LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_granted) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_glb_rec, qbr_granted));
+	LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted));
+
+	/* Checks for struct lquota_slv_rec */
+	LASSERTF((int)sizeof(struct lquota_slv_rec) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct lquota_slv_rec));
+	LASSERTF((int)offsetof(struct lquota_slv_rec, qsr_granted) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_slv_rec, qsr_granted));
+	LASSERTF((int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted));
+
+	/* Checks for struct idx_info */
+	LASSERTF((int)sizeof(struct idx_info) == 80, "found %lld\n",
+		 (long long)(int)sizeof(struct idx_info));
+	LASSERTF((int)offsetof(struct idx_info, ii_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_magic));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_magic));
+	LASSERTF((int)offsetof(struct idx_info, ii_flags) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_flags));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_flags));
+	LASSERTF((int)offsetof(struct idx_info, ii_count) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_count));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_count) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_count));
+	LASSERTF((int)offsetof(struct idx_info, ii_pad0) == 10, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_pad0));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad0) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_pad0));
+	LASSERTF((int)offsetof(struct idx_info, ii_attrs) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_attrs));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_attrs) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_attrs));
+	LASSERTF((int)offsetof(struct idx_info, ii_fid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_fid));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_fid));
+	LASSERTF((int)offsetof(struct idx_info, ii_version) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_version));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_version) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_version));
+	LASSERTF((int)offsetof(struct idx_info, ii_hash_start) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_hash_start));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_start));
+	LASSERTF((int)offsetof(struct idx_info, ii_hash_end) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_hash_end));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_end));
+	LASSERTF((int)offsetof(struct idx_info, ii_keysize) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_keysize));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_keysize) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_keysize));
+	LASSERTF((int)offsetof(struct idx_info, ii_recsize) == 58, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_recsize));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_recsize) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_recsize));
+	LASSERTF((int)offsetof(struct idx_info, ii_pad1) == 60, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_pad1));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_pad1));
+	LASSERTF((int)offsetof(struct idx_info, ii_pad2) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_pad2));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_pad2));
+	LASSERTF((int)offsetof(struct idx_info, ii_pad3) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct idx_info, ii_pad3));
+	LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct idx_info *)0)->ii_pad3));
+	CLASSERT(IDX_INFO_MAGIC == 0x3D37CC37);
+
+	/* Checks for struct lu_idxpage */
+	LASSERTF((int)sizeof(struct lu_idxpage) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct lu_idxpage));
+	LASSERTF((int)offsetof(struct lu_idxpage, lip_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_idxpage, lip_magic));
+	LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_magic));
+	LASSERTF((int)offsetof(struct lu_idxpage, lip_flags) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_idxpage, lip_flags));
+	LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_flags));
+	LASSERTF((int)offsetof(struct lu_idxpage, lip_nr) == 6, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_idxpage, lip_nr));
+	LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_nr) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_nr));
+	LASSERTF((int)offsetof(struct lu_idxpage, lip_pad0) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lu_idxpage, lip_pad0));
+	LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_pad0) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_pad0));
+	CLASSERT(LIP_MAGIC == 0x8A6D6B6C);
+	LASSERTF(LIP_HDR_SIZE == 16, "found %lld\n",
+		 (long long)LIP_HDR_SIZE);
+	LASSERTF(II_FL_NOHASH == 1, "found %lld\n",
+		 (long long)II_FL_NOHASH);
+	LASSERTF(II_FL_VARKEY == 2, "found %lld\n",
+		 (long long)II_FL_VARKEY);
+	LASSERTF(II_FL_VARREC == 4, "found %lld\n",
+		 (long long)II_FL_VARREC);
+	LASSERTF(II_FL_NONUNQ == 8, "found %lld\n",
+		 (long long)II_FL_NONUNQ);
+
+	/* Checks for struct niobuf_remote */
+	LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct niobuf_remote));
+	LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, offset));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
+	LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, len));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
+	LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, flags));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
+	LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
+		OBD_BRW_READ);
+	LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
+		OBD_BRW_WRITE);
+	LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
+		OBD_BRW_SYNC);
+	LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
+		OBD_BRW_CHECK);
+	LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
+		OBD_BRW_FROM_GRANT);
+	LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
+		OBD_BRW_GRANTED);
+	LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
+		OBD_BRW_NOCACHE);
+	LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
+		OBD_BRW_NOQUOTA);
+	LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
+		OBD_BRW_SRVLOCK);
+	LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
+		OBD_BRW_ASYNC);
+	LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
+		OBD_BRW_MEMALLOC);
+
+	/* Checks for struct ost_body */
+	LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
+		 (long long)(int)sizeof(struct ost_body));
+	LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_body, oa));
+	LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_body *)0)->oa));
+
+	/* Checks for struct ll_fid */
+	LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct ll_fid));
+	LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fid, id));
+	LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fid *)0)->id));
+	LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fid, generation));
+	LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fid *)0)->generation));
+	LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fid, f_type));
+	LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fid *)0)->f_type));
+
+	/* Checks for struct mdt_body */
+	LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_body));
+	LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, fid1));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
+	LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, fid2));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
+	LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, handle));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->handle));
+	LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, valid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->valid));
+	LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, size));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->size));
+	LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mtime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
+	LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, atime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->atime));
+	LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, ctime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
+	LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, blocks));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
+	LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, t_state));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8,
+		 "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
+	LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, fsuid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
+	LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, fsgid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
+	LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, capability));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->capability));
+	LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mode));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mode));
+	LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, uid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->uid));
+	LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, gid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->gid));
+	LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, flags));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->flags));
+	LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, rdev));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
+	LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, nlink));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
+	LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, unused2));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
+	LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, suppgid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
+	LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, eadatasize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
+	LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, aclsize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
+	LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, max_mdsize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
+	LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, max_cookiesize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
+	LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, uid_h));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
+	LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, gid_h));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
+	LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_5));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
+	LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_6));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
+	LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_7));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
+	LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_8));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
+	LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_9));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
+	LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, padding_10));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
+	LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
+		MDS_FMODE_CLOSED);
+	LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
+		MDS_FMODE_EXEC);
+	LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
+		MDS_FMODE_EPOCH);
+	LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
+		MDS_FMODE_TRUNC);
+	LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
+		MDS_FMODE_SOM);
+	LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
+		MDS_OPEN_CREATED);
+	LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
+		MDS_OPEN_CROSS);
+	LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
+		MDS_OPEN_CREAT);
+	LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
+		MDS_OPEN_EXCL);
+	LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
+		MDS_OPEN_TRUNC);
+	LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
+		MDS_OPEN_APPEND);
+	LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
+		MDS_OPEN_SYNC);
+	LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
+		MDS_OPEN_DIRECTORY);
+	LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_BY_FID);
+	LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_DELAY_CREATE);
+	LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_OWNEROVERRIDE);
+	LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_JOIN_FILE);
+	LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_LOCK);
+	LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_HAS_EA);
+	LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
+		MDS_OPEN_HAS_OBJS);
+	LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
+			(long long)MDS_OPEN_NORESTORE);
+	LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
+			(long long)MDS_OPEN_NEWSTRIPE);
+	LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
+			(long long)MDS_OPEN_VOLATILE);
+	LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
+		LUSTRE_SYNC_FL);
+	LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
+		LUSTRE_IMMUTABLE_FL);
+	LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
+		LUSTRE_APPEND_FL);
+	LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
+		LUSTRE_NOATIME_FL);
+	LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
+		LUSTRE_DIRSYNC_FL);
+	LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
+		MDS_INODELOCK_LOOKUP);
+	LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
+		MDS_INODELOCK_UPDATE);
+	LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
+		MDS_INODELOCK_OPEN);
+	LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
+		MDS_INODELOCK_LAYOUT);
+
+	/* Checks for struct mdt_ioepoch */
+	LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_ioepoch));
+	LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_ioepoch, handle));
+	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle));
+	LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_ioepoch, ioepoch));
+	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch));
+	LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_ioepoch, flags));
+	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags));
+	LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_ioepoch, padding));
+	LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
+
+	/* Checks for struct mdt_remote_perm */
+	LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_remote_perm));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
+	LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
+	LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
+	LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)CFS_SETUID_PERM);
+	LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)CFS_SETGID_PERM);
+	LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
+		(unsigned)CFS_SETGRP_PERM);
+	LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
+		(unsigned)CFS_RMTACL_PERM);
+	LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
+		(unsigned)CFS_RMTOWN_PERM);
+
+	/* Checks for struct mdt_rec_setattr */
+	LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_setattr));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_size));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4));
+	LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5));
+
+	/* Checks for struct mdt_rec_create */
+	LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_create));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_fid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_time));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_rdev));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_mode));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_umask));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask));
+	LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4));
+
+	/* Checks for struct mdt_rec_link */
+	LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_link));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_fid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_time));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8));
+	LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9));
+	LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9));
+
+	/* Checks for struct mdt_rec_unlink */
+	LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_unlink));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_time));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8));
+	LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9));
+	LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9));
+
+	/* Checks for struct mdt_rec_rename */
+	LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_rename));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_time));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_mode));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7));
+	LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8));
+	LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8));
+
+	/* Checks for struct mdt_rec_setxattr */
+	LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_setxattr));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10));
+	LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11));
+	LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
+
+	/* Checks for struct mdt_rec_reint */
+	LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
+		 (long long)(int)sizeof(struct mdt_rec_reint));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_cap));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_atime));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_size));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_bias));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_mode));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_flags));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_umask));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask));
+	LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4));
+	LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4));
+
+	/* Checks for struct lmv_desc */
+	LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n",
+		 (long long)(int)sizeof(struct lmv_desc));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_tgt_count));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_pattern));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_padding_1));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_padding_2));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_padding_3));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_padding_4));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4));
+	LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_desc, ld_uuid));
+	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
+
+	/* Checks for struct lmv_stripe_md */
+	LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct lmv_stripe_md));
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
+	CLASSERT(LOV_MAXPOOLNAME == 16);
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
+	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
+	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
+
+	/* Checks for struct lov_desc */
+	LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
+		 (long long)(int)sizeof(struct lov_desc));
+	LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_tgt_count));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count));
+	LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count));
+	LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count));
+	LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_pattern));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern));
+	LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size));
+	LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
+	LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_padding_0));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0));
+	LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
+	LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_padding_1));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
+	LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_padding_2));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
+	LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct lov_desc, ld_uuid));
+	LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid));
+	CLASSERT(LOV_DESC_MAGIC == 0xB0CCDE5C);
+
+	/* Checks for struct ldlm_res_id */
+	LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_res_id));
+	CLASSERT(RES_NAME_SIZE == 4);
+	LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_res_id, name[4]));
+	LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4]));
+
+	/* Checks for struct ldlm_extent */
+	LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_extent));
+	LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_extent, start));
+	LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_extent *)0)->start));
+	LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_extent, end));
+	LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_extent *)0)->end));
+	LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_extent, gid));
+	LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
+
+	/* Checks for struct ldlm_inodebits */
+	LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_inodebits));
+	LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_inodebits, bits));
+	LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
+
+	/* Checks for struct ldlm_flock_wire */
+	LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_flock_wire));
+	LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start));
+	LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start));
+	LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end));
+	LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end));
+	LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner));
+	LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner));
+	LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding));
+	LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding));
+	LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid));
+	LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid));
+
+	/* Checks for struct ldlm_intent */
+	LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_intent));
+	LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_intent, opc));
+	LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_intent *)0)->opc));
+
+	/* Checks for struct ldlm_resource_desc */
+	LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_resource_desc));
+	LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_resource_desc, lr_type));
+	LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type));
+	LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding));
+	LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding));
+	LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_resource_desc, lr_name));
+	LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name));
+
+	/* Checks for struct ldlm_lock_desc */
+	LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_lock_desc));
+	LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_lock_desc, l_resource));
+	LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource));
+	LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode));
+	LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode));
+	LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode));
+	LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode));
+	LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data));
+	LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data));
+
+	/* Checks for struct ldlm_request */
+	LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_request));
+	LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_request, lock_flags));
+	LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags));
+	LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_request, lock_count));
+	LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count));
+	LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_request, lock_desc));
+	LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc));
+	LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_request, lock_handle));
+	LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle));
+
+	/* Checks for struct ldlm_reply */
+	LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_reply));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_flags));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_padding));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_desc));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_handle));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1));
+	LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2));
+	LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2));
+
+	/* Checks for struct ost_lvb_v1 */
+	LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct ost_lvb_v1));
+	LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb_v1, lvb_size));
+	LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size));
+	LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime));
+	LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime));
+	LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime));
+	LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime));
+	LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime));
+	LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime));
+	LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks));
+	LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks));
+
+	/* Checks for struct ost_lvb */
+	LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n",
+		 (long long)(int)sizeof(struct ost_lvb));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_size));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_mtime));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_atime));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_ctime));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_blocks));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns));
+	LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n",
+		 (long long)(int)offsetof(struct ost_lvb, lvb_padding));
+	LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding));
+
+	/* Checks for struct lquota_lvb */
+	LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct lquota_lvb));
+	LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_lvb, lvb_flags));
+	LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags));
+	LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel));
+	LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel));
+	LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel));
+	LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel));
+	LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit));
+	LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit));
+	LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lquota_lvb, lvb_pad1));
+	LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1));
+	LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n",
+		 (long long)LQUOTA_FL_EDQUOT);
+
+	/* Checks for struct ldlm_gl_lquota_desc */
+	LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct ldlm_gl_lquota_desc));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time));
+	LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2));
+	LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
+
+	/* Checks for struct mgs_send_param */
+	LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
+		 (long long)(int)sizeof(struct mgs_send_param));
+	CLASSERT(MGS_PARAM_MAXLEN == 1024);
+	LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024]));
+	LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]));
+
+	/* Checks for struct cfg_marker */
+	LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n",
+		 (long long)(int)sizeof(struct cfg_marker));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_step));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_flags));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_vers));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_padding));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_createtime));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_canceltime));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_tgtname));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname));
+	LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct cfg_marker, cm_comment));
+	LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n",
+		 (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment));
+
+	/* Checks for struct llog_logid */
+	LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_logid));
+	LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid, lgl_oi));
+	LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi));
+	LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid, lgl_ogen));
+	LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
+	CLASSERT(OST_SZ_REC == 274730752);
+	CLASSERT(MDS_UNLINK_REC == 274801668);
+	CLASSERT(MDS_UNLINK64_REC == 275325956);
+	CLASSERT(MDS_SETATTR64_REC == 275325953);
+	CLASSERT(OBD_CFG_REC == 274857984);
+	CLASSERT(LLOG_GEN_REC == 274989056);
+	CLASSERT(CHANGELOG_REC == 275120128);
+	CLASSERT(CHANGELOG_USER_REC == 275185664);
+	CLASSERT(LLOG_HDR_MAGIC == 275010873);
+	CLASSERT(LLOG_LOGID_MAGIC == 275010875);
+
+	/* Checks for struct llog_catid */
+	LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_catid));
+	LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_catid, lci_logid));
+	LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid));
+	LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_catid, lci_padding1));
+	LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1));
+	LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_catid, lci_padding2));
+	LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2));
+	LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_catid, lci_padding3));
+	LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3));
+
+	/* Checks for struct llog_rec_hdr */
+	LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_rec_hdr));
+	LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_hdr, lrh_len));
+	LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len));
+	LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_hdr, lrh_index));
+	LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index));
+	LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_hdr, lrh_type));
+	LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type));
+	LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_hdr, lrh_id));
+	LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id));
+
+	/* Checks for struct llog_rec_tail */
+	LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_rec_tail));
+	LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_tail, lrt_len));
+	LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len));
+	LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_rec_tail, lrt_index));
+	LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index));
+
+	/* Checks for struct llog_logid_rec */
+	LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_logid_rec));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_hdr));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_id));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_padding1));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_padding2));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_padding3));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3));
+	LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_logid_rec, lid_tail));
+	LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail));
+
+	/* Checks for struct llog_unlink_rec */
+	LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_unlink_rec));
+	LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr));
+	LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr));
+	LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink_rec, lur_oid));
+	LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid));
+	LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq));
+	LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq));
+	LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink_rec, lur_count));
+	LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count));
+	LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink_rec, lur_tail));
+	LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail));
+	/* Checks for struct llog_unlink64_rec */
+	LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_unlink64_rec));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_count));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2));
+	LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3));
+	LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3));
+
+	/* Checks for struct llog_setattr64_rec */
+	LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_setattr64_rec));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail));
+
+	/* Checks for struct llog_size_change_rec */
+	LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_size_change_rec));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3));
+	LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail));
+	LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail));
+
+	/* Checks for struct changelog_rec */
+	LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct changelog_rec));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_namelen));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_flags));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_type));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_index));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_prev));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_time));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_tfid));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid));
+	LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_rec, cr_pfid));
+	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
+
+	/* Checks for struct changelog_ext_rec */
+	LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
+		 (long long)(int)sizeof(struct changelog_ext_rec));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
+	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
+	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
+
+	/* Checks for struct changelog_setinfo */
+	LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
+		 (long long)(int)sizeof(struct changelog_setinfo));
+	LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_setinfo, cs_recno));
+	LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno));
+	LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct changelog_setinfo, cs_id));
+	LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id));
+
+	/* Checks for struct llog_changelog_rec */
+	LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_changelog_rec));
+	LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr));
+	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr));
+	LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_rec, cr));
+	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
+	LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
+	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
+
+	/* Checks for struct llog_changelog_user_rec */
+	LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_changelog_user_rec));
+	LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr));
+	LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr));
+	LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
+	LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
+	LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
+	LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
+	LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
+	LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec));
+	LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail));
+	LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail));
+
+	/* Checks for struct llog_gen */
+	LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_gen));
+	LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_gen, mnt_cnt));
+	LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt));
+	LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_gen, conn_cnt));
+	LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt));
+
+	/* Checks for struct llog_gen_rec */
+	LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_gen_rec));
+	LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr));
+	LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr));
+	LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_gen_rec, lgr_gen));
+	LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen));
+	LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_gen_rec, lgr_tail));
+	LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail));
+
+	/* Checks for struct llog_log_hdr */
+	LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_log_hdr));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_hdr));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_count));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_size));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_flags));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_reserved));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap));
+	LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_log_hdr, llh_tail));
+	LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail));
+
+	/* Checks for struct llog_cookie */
+	LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct llog_cookie));
+	LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_cookie, lgc_lgl));
+	LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl));
+	LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_cookie, lgc_subsys));
+	LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys));
+	LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_cookie, lgc_index));
+	LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index));
+	LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_cookie, lgc_padding));
+	LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding));
+
+	/* Checks for struct llogd_body */
+	LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n",
+		 (long long)(int)sizeof(struct llogd_body));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_logid));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_llh_flags));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_index));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_saved_index));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_len));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len));
+	LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
+	LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
+	CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501);
+	CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502);
+	CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503);
+	CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504);
+	CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505);
+	CLASSERT(LLOG_ORIGIN_CONNECT == 506);
+	CLASSERT(LLOG_CATINFO == 507);
+	CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508);
+	CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
+	CLASSERT(LLOG_FIRST_OPC == 501);
+	CLASSERT(LLOG_LAST_OPC == 510);
+
+	/* Checks for struct llogd_conn_body */
+	LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
+		 (long long)(int)sizeof(struct llogd_conn_body));
+	LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen));
+	LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen));
+	LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid));
+	LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid));
+	LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx));
+	LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
+
+	/* Checks for struct ll_fiemap_info_key */
+	LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n",
+		 (long long)(int)sizeof(struct ll_fiemap_info_key));
+	LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_info_key, name[8]));
+	LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]));
+	LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_info_key, oa));
+	LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa));
+	LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap));
+	LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap));
+
+	/* Checks for struct quota_body */
+	LASSERTF((int)sizeof(struct quota_body) == 112, "found %lld\n",
+		 (long long)(int)sizeof(struct quota_body));
+	LASSERTF((int)offsetof(struct quota_body, qb_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_fid));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_fid));
+	LASSERTF((int)offsetof(struct quota_body, qb_id) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_id));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_id) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_id));
+	LASSERTF((int)offsetof(struct quota_body, qb_flags) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_flags));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_flags));
+	LASSERTF((int)offsetof(struct quota_body, qb_padding) == 36, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_padding));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_padding));
+	LASSERTF((int)offsetof(struct quota_body, qb_count) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_count));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_count) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_count));
+	LASSERTF((int)offsetof(struct quota_body, qb_usage) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_usage));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_usage) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_usage));
+	LASSERTF((int)offsetof(struct quota_body, qb_slv_ver) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_slv_ver));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_slv_ver) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_slv_ver));
+	LASSERTF((int)offsetof(struct quota_body, qb_lockh) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_lockh));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_lockh) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_lockh));
+	LASSERTF((int)offsetof(struct quota_body, qb_glb_lockh) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_glb_lockh));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_glb_lockh) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_glb_lockh));
+	LASSERTF((int)offsetof(struct quota_body, qb_padding1[4]) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct quota_body, qb_padding1[4]));
+	LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding1[4]) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct quota_body *)0)->qb_padding1[4]));
+
+	/* Checks for struct mgs_target_info */
+	LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n",
+		 (long long)(int)sizeof(struct mgs_target_info));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_config_ver));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_flags));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_nid_count));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_instance));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_fsname));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_svname));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_uuid));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_nids));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids));
+	LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n",
+		 (long long)(int)offsetof(struct mgs_target_info, mti_params));
+	LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
+		 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
+
+	/* Checks for struct lustre_capa */
+	LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n",
+		 (long long)(int)sizeof(struct lustre_capa));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_fid));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_opc));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_uid));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_gid));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_flags));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_keyid));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_timeout));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout));
+	LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_expiry));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry));
+	CLASSERT(CAPA_HMAC_MAX_LEN == 64);
+	LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa, lc_hmac[64]));
+	LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]));
+
+	/* Checks for struct lustre_capa_key */
+	LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n",
+		 (long long)(int)sizeof(struct lustre_capa_key));
+	LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa_key, lk_seq));
+	LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq));
+	LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa_key, lk_keyid));
+	LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid));
+	LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa_key, lk_padding));
+	LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding));
+	CLASSERT(CAPA_HMAC_KEY_MAX_LEN == 56);
+	LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct lustre_capa_key, lk_key[56]));
+	LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]));
+
+	/* Checks for struct getinfo_fid2path */
+	LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct getinfo_fid2path));
+	LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct getinfo_fid2path, gf_fid));
+	LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid));
+	LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct getinfo_fid2path, gf_recno));
+	LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno));
+	LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno));
+	LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno));
+	LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen));
+	LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen));
+	LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0]));
+	LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]));
+
+	/* Checks for struct ll_user_fiemap */
+	LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct ll_user_fiemap));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_start));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_length));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_flags));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved));
+	LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_user_fiemap, fm_extents));
+	LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents));
+	CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001);
+	CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002);
+	CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
+
+	/* Checks for struct ll_fiemap_extent */
+	LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n",
+		 (long long)(int)sizeof(struct ll_fiemap_extent));
+	LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
+	LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical));
+	LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical));
+	LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical));
+	LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
+	LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
+	LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
+	LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
+	LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
+	LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
+	CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
+	CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
+	CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
+	CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008);
+	CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080);
+	CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100);
+	CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200);
+	CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400);
+	CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800);
+	CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000);
+	CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000);
+	CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
+
+	/* Checks for type posix_acl_xattr_entry */
+	LASSERTF((int)sizeof(posix_acl_xattr_entry) == 8, "found %lld\n",
+		 (long long)(int)sizeof(posix_acl_xattr_entry));
+	LASSERTF((int)offsetof(posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
+		 (long long)(int)offsetof(posix_acl_xattr_entry, e_tag));
+	LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_tag));
+	LASSERTF((int)offsetof(posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
+		 (long long)(int)offsetof(posix_acl_xattr_entry, e_perm));
+	LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_perm));
+	LASSERTF((int)offsetof(posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
+		 (long long)(int)offsetof(posix_acl_xattr_entry, e_id));
+	LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_id));
+
+	/* Checks for type posix_acl_xattr_header */
+	LASSERTF((int)sizeof(posix_acl_xattr_header) == 4, "found %lld\n",
+		 (long long)(int)sizeof(posix_acl_xattr_header));
+	LASSERTF((int)offsetof(posix_acl_xattr_header, a_version) == 0, "found %lld\n",
+		 (long long)(int)offsetof(posix_acl_xattr_header, a_version));
+	LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_version));
+	LASSERTF((int)offsetof(posix_acl_xattr_header, a_entries) == 4, "found %lld\n",
+		 (long long)(int)offsetof(posix_acl_xattr_header, a_entries));
+	LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_entries) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_entries));
+
+	/* Checks for struct link_ea_header */
+	LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct link_ea_header));
+	LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_header, leh_magic));
+	LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic));
+	LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_header, leh_reccount));
+	LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount));
+	LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_header, leh_len));
+	LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len));
+	LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_header, padding1));
+	LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_header *)0)->padding1));
+	LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_header, padding2));
+	LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_header *)0)->padding2));
+	CLASSERT(LINK_EA_MAGIC == 0x11EAF1DFUL);
+
+	/* Checks for struct link_ea_entry */
+	LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n",
+		 (long long)(int)sizeof(struct link_ea_entry));
+	LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_entry, lee_reclen));
+	LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen));
+	LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid));
+	LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid));
+	LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n",
+		 (long long)(int)offsetof(struct link_ea_entry, lee_name));
+	LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+	/* Checks for struct layout_intent */
+	LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct layout_intent));
+	LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct layout_intent, li_opc));
+	LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct layout_intent *)0)->li_opc));
+	LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct layout_intent, li_flags));
+	LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
+	LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct layout_intent, li_start));
+	LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
+	LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct layout_intent, li_end));
+	LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
+	LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
+		 (long long)LAYOUT_INTENT_ACCESS);
+	LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
+		 (long long)LAYOUT_INTENT_READ);
+	LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n",
+		 (long long)LAYOUT_INTENT_WRITE);
+	LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n",
+		 (long long)LAYOUT_INTENT_GLIMPSE);
+	LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n",
+		 (long long)LAYOUT_INTENT_TRUNC);
+	LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n",
+		 (long long)LAYOUT_INTENT_RELEASE);
+	LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n",
+		 (long long)LAYOUT_INTENT_RESTORE);
+
+	/* Checks for struct hsm_action_item */
+	LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_action_item));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_len));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_action));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_fid));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_dfid));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_extent));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_cookie));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_gid));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid));
+	LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_item, hai_data));
+	LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data));
+
+	/* Checks for struct hsm_action_list */
+	LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_action_list));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_version));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_count));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_compound_id));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_flags));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_archive_id));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id));
+	LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, padding1));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1));
+	LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_action_list, hal_fsname));
+	LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname));
+
+	/* Checks for struct hsm_progress */
+	LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_progress));
+	LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, hp_fid));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid));
+	LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, hp_cookie));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie));
+	LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, hp_extent));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent));
+	LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, hp_flags));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags));
+	LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, hp_errval));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval));
+	LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress, padding));
+	LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
+	LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
+		HP_FLAG_COMPLETED);
+	LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
+		HP_FLAG_RETRY);
+
+	LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_copy, hc_data_version));
+	LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version));
+	LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_copy, hc_flags));
+	LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags));
+	LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_copy, hc_errval));
+	LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval));
+	LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_copy, padding));
+	LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_copy *)0)->padding));
+	LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_copy, hc_hai));
+	LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai));
+
+	/* Checks for struct hsm_progress_kernel */
+	LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_progress_kernel));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version));
+	LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2));
+	LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2));
+
+	/* Checks for struct hsm_user_item */
+	LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_user_item));
+	LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+	LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+	LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+	LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+	/* Checks for struct hsm_user_state */
+	LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_user_state));
+	LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_state, hus_states));
+	LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+	LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_state, hus_archive_id));
+	LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id));
+	LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+	LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+	LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+	LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+	LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+	LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
+
+	/* Checks for struct hsm_state_set */
+	LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_state_set));
+	LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_state_set, hss_valid));
+	LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid));
+	LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_state_set, hss_archive_id));
+	LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id));
+	LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_state_set, hss_setmask));
+	LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask));
+	LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_state_set, hss_clearmask));
+	LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask));
+
+	/* Checks for struct hsm_current_action */
+	LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_current_action));
+	LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_current_action, hca_state));
+	LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state));
+	LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_current_action, hca_action));
+	LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action));
+	LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_current_action, hca_location));
+	LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location));
+
+	/* Checks for struct hsm_request */
+	LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_request));
+	LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_request, hr_action));
+	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_action));
+	LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_request, hr_archive_id));
+	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id));
+	LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_request, hr_flags));
+	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags));
+	LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_request, hr_itemcount));
+	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount));
+	LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_request, hr_data_len));
+	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
+	LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
+		(unsigned)HSM_FORCE_ACTION);
+	LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
+		(unsigned)HSM_GHOST_COPY);
+
+	/* Checks for struct hsm_user_request */
+	LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
+		 (long long)(int)sizeof(struct hsm_user_request));
+	LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_request, hur_request));
+	LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request));
+	LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct hsm_user_request, hur_user_item));
+	LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item));
+
+	/* Checks for struct hsm_user_import */
+	LASSERTF(sizeof(struct hsm_user_import) == 48, "found %lld\n",
+		 (long long)sizeof(struct hsm_user_import));
+	LASSERTF(offsetof(struct hsm_user_import, hui_size) == 0,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_size));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_size) == 8,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_size));
+	LASSERTF(offsetof(struct hsm_user_import, hui_uid) == 32,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_uid));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_uid) == 4,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_uid));
+	LASSERTF(offsetof(struct hsm_user_import, hui_gid) == 36,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_gid));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_gid) == 4,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_gid));
+	LASSERTF(offsetof(struct hsm_user_import, hui_mode) == 40,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_mode));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mode) == 4,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_mode));
+	LASSERTF(offsetof(struct hsm_user_import, hui_atime) == 8,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_atime));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime) == 8,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_atime));
+	LASSERTF(offsetof(struct hsm_user_import, hui_atime_ns) == 24,
+		 "found %lld\n",
+		(long long)(int)offsetof(struct hsm_user_import, hui_atime_ns));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime_ns) == 4,
+		 "found %lld\n",
+		(long long)sizeof(((struct hsm_user_import *)0)->hui_atime_ns));
+	LASSERTF(offsetof(struct hsm_user_import, hui_mtime) == 16,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_mtime));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime) == 8,
+		 "found %lld\n",
+		 (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime));
+	LASSERTF(offsetof(struct hsm_user_import, hui_mtime_ns) == 28,
+		 "found %lld\n",
+		(long long)offsetof(struct hsm_user_import, hui_mtime_ns));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime_ns) == 4,
+		 "found %lld\n",
+		(long long)sizeof(((struct hsm_user_import *)0)->hui_mtime_ns));
+	LASSERTF(offsetof(struct hsm_user_import, hui_archive_id) == 44,
+		 "found %lld\n",
+		 (long long)offsetof(struct hsm_user_import, hui_archive_id));
+	LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_archive_id) == 4,
+		 "found %lld\n",
+	      (long long)sizeof(((struct hsm_user_import *)0)->hui_archive_id));
+
+	/* Checks for struct update_buf */
+	LASSERTF((int)sizeof(struct update_buf) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct update_buf));
+	LASSERTF((int)offsetof(struct update_buf, ub_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct update_buf, ub_magic));
+	LASSERTF((int)sizeof(((struct update_buf *)0)->ub_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_buf *)0)->ub_magic));
+	LASSERTF((int)offsetof(struct update_buf, ub_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct update_buf, ub_count));
+	LASSERTF((int)sizeof(((struct update_buf *)0)->ub_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_buf *)0)->ub_count));
+	LASSERTF((int)offsetof(struct update_buf, ub_bufs) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct update_buf, ub_bufs));
+	LASSERTF((int)sizeof(((struct update_buf *)0)->ub_bufs) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_buf *)0)->ub_bufs));
+
+	/* Checks for struct update_reply */
+	LASSERTF((int)sizeof(struct update_reply) == 8, "found %lld\n",
+		 (long long)(int)sizeof(struct update_reply));
+	LASSERTF((int)offsetof(struct update_reply, ur_version) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct update_reply, ur_version));
+	LASSERTF((int)sizeof(((struct update_reply *)0)->ur_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_reply *)0)->ur_version));
+	LASSERTF((int)offsetof(struct update_reply, ur_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct update_reply, ur_count));
+	LASSERTF((int)sizeof(((struct update_reply *)0)->ur_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_reply *)0)->ur_count));
+	LASSERTF((int)offsetof(struct update_reply, ur_lens) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct update_reply, ur_lens));
+	LASSERTF((int)sizeof(((struct update_reply *)0)->ur_lens) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct update_reply *)0)->ur_lens));
+
+	/* Checks for struct update */
+	LASSERTF((int)sizeof(struct update) == 56, "found %lld\n",
+		 (long long)(int)sizeof(struct update));
+	LASSERTF((int)offsetof(struct update, u_type) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct update, u_type));
+	LASSERTF((int)sizeof(((struct update *)0)->u_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update *)0)->u_type));
+	LASSERTF((int)offsetof(struct update, u_batchid) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct update, u_batchid));
+	LASSERTF((int)sizeof(((struct update *)0)->u_batchid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct update *)0)->u_batchid));
+	LASSERTF((int)offsetof(struct update, u_fid) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct update, u_fid));
+	LASSERTF((int)sizeof(((struct update *)0)->u_fid) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct update *)0)->u_fid));
+	LASSERTF((int)offsetof(struct update, u_lens) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct update, u_lens));
+	LASSERTF((int)sizeof(((struct update *)0)->u_lens) == 32, "found %lld\n",
+		 (long long)(int)sizeof(((struct update *)0)->u_lens));
+	LASSERTF((int)offsetof(struct update, u_bufs) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct update, u_bufs));
+	LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
+		 (long long)(int)sizeof(((struct update *)0)->u_bufs));
+}
author	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
committer	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
commit	57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree	5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/staging/lustre/lustre/ptlrpc