summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/ptlrpc
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/staging/lustre/lustre/ptlrpc
Initial import
Diffstat (limited to 'drivers/staging/lustre/lustre/ptlrpc')
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/Makefile20
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/client.c3149
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/connection.c241
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/errno.c380
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/events.c585
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/import.c1642
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/layout.c2442
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_client.c366
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/llog_net.c72
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c1366
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/niobuf.c731
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs.c1754
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c270
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pack_generic.c2536
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pers.c75
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/pinger.c678
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h312
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c171
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c811
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/recover.c379
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec.c2459
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c884
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_config.c901
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_gc.c252
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c199
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_null.c458
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/sec_plain.c1013
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/service.c3105
-rw-r--r--drivers/staging/lustre/lustre/ptlrpc/wiretest.c4492
29 files changed, 31743 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile
new file mode 100644
index 000000000..fb50cd4c6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/Makefile
@@ -0,0 +1,20 @@
+obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o
+LDLM := ../../lustre/ldlm/
+
+ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o
+ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o
+ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o
+ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o
+ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o
+ldlm_objs += $(LDLM)ldlm_pool.o
+ldlm_objs += $(LDLM)interval_tree.o
+ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
+ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o
+ptlrpc_objs += llog_net.o llog_client.o import.o ptlrpcd.o
+ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
+ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o
+ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o
+
+ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs)
+ptlrpc-$(CONFIG_PROC_FS) += sec_lproc.o
+ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.o
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
new file mode 100644
index 000000000..0357f1d45
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -0,0 +1,3149 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/** Implementation of client-side PortalRPC interfaces */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_req_layout.h"
+
+#include "ptlrpc_internal.h"
+
+static int ptlrpc_send_new_req(struct ptlrpc_request *req);
+static int ptlrpcd_check_work(struct ptlrpc_request *req);
+
+/**
+ * Initialize passed in client structure \a cl.
+ */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+ struct ptlrpc_client *cl)
+{
+ cl->cli_request_portal = req_portal;
+ cl->cli_reply_portal = rep_portal;
+ cl->cli_name = name;
+}
+EXPORT_SYMBOL(ptlrpc_init_client);
+
+/**
+ * Return PortalRPC connection for remote uud \a uuid
+ */
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
+{
+ struct ptlrpc_connection *c;
+ lnet_nid_t self;
+ lnet_process_id_t peer;
+ int err;
+
+ /* ptlrpc_uuid_to_peer() initializes its 2nd parameter
+ * before accessing its values. */
+ /* coverity[uninit_use_in_call] */
+ err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
+ if (err != 0) {
+ CNETERR("cannot find peer %s!\n", uuid->uuid);
+ return NULL;
+ }
+
+ c = ptlrpc_connection_get(peer, self, uuid);
+ if (c) {
+ memcpy(c->c_remote_uuid.uuid,
+ uuid->uuid, sizeof(c->c_remote_uuid.uuid));
+ }
+
+ CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c);
+
+ return c;
+}
+EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
+
+/**
+ * Allocate and initialize new bulk descriptor on the sender.
+ * Returns pointer to the descriptor or NULL on error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal)
+{
+ struct ptlrpc_bulk_desc *desc;
+ int i;
+
+ OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[npages]));
+ if (!desc)
+ return NULL;
+
+ spin_lock_init(&desc->bd_lock);
+ init_waitqueue_head(&desc->bd_waitq);
+ desc->bd_max_iov = npages;
+ desc->bd_iov_count = 0;
+ desc->bd_portal = portal;
+ desc->bd_type = type;
+ desc->bd_md_count = 0;
+ LASSERT(max_brw > 0);
+ desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT);
+ /* PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this
+ * node. Negotiated ocd_brw_size will always be <= this number. */
+ for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++)
+ LNetInvalidateHandle(&desc->bd_mds[i]);
+
+ return desc;
+}
+
+/**
+ * Prepare bulk descriptor for specified outgoing request \a req that
+ * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * the bulk to be sent. Used on client-side.
+ * Returns pointer to newly allocated initialized bulk descriptor or NULL on
+ * error.
+ */
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
+ unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal)
+{
+ struct obd_import *imp = req->rq_import;
+ struct ptlrpc_bulk_desc *desc;
+
+ LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+ desc = ptlrpc_new_bulk(npages, max_brw, type, portal);
+ if (desc == NULL)
+ return NULL;
+
+ desc->bd_import_generation = req->rq_import_generation;
+ desc->bd_import = class_import_get(imp);
+ desc->bd_req = req;
+
+ desc->bd_cbid.cbid_fn = client_bulk_callback;
+ desc->bd_cbid.cbid_arg = desc;
+
+ /* This makes req own desc, and free it when she frees herself */
+ req->rq_bulk = desc;
+
+ return desc;
+}
+EXPORT_SYMBOL(ptlrpc_prep_bulk_imp);
+
+/**
+ * Add a page \a page to the bulk descriptor \a desc.
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
+void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+ struct page *page, int pageoffset, int len, int pin)
+{
+ LASSERT(desc->bd_iov_count < desc->bd_max_iov);
+ LASSERT(page != NULL);
+ LASSERT(pageoffset >= 0);
+ LASSERT(len > 0);
+ LASSERT(pageoffset + len <= PAGE_CACHE_SIZE);
+
+ desc->bd_nob += len;
+
+ if (pin)
+ page_cache_get(page);
+
+ ptlrpc_add_bulk_page(desc, page, pageoffset, len);
+}
+EXPORT_SYMBOL(__ptlrpc_prep_bulk_page);
+
+/**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
+void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
+{
+ int i;
+
+ LASSERT(desc != NULL);
+ LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
+ LASSERT(desc->bd_md_count == 0); /* network hands off */
+ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+
+ sptlrpc_enc_pool_put_pages(desc);
+
+ if (desc->bd_export)
+ class_export_put(desc->bd_export);
+ else
+ class_import_put(desc->bd_import);
+
+ if (unpin) {
+ for (i = 0; i < desc->bd_iov_count; i++)
+ page_cache_release(desc->bd_iov[i].kiov_page);
+ }
+
+ OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
+ bd_iov[desc->bd_max_iov]));
+}
+EXPORT_SYMBOL(__ptlrpc_free_bulk);
+
+/**
+ * Set server timelimit for this req, i.e. how long are we willing to wait
+ * for reply before timing out this request.
+ */
+void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
+{
+ __u32 serv_est;
+ int idx;
+ struct imp_at *at;
+
+ LASSERT(req->rq_import);
+
+ if (AT_OFF) {
+ /* non-AT settings */
+ /**
+ * \a imp_server_timeout means this is reverse import and
+ * we send (currently only) ASTs to the client and cannot afford
+ * to wait too long for the reply, otherwise the other client
+ * (because of which we are sending this request) would
+ * timeout waiting for us
+ */
+ req->rq_timeout = req->rq_import->imp_server_timeout ?
+ obd_timeout / 2 : obd_timeout;
+ } else {
+ at = &req->rq_import->imp_at;
+ idx = import_at_get_index(req->rq_import,
+ req->rq_request_portal);
+ serv_est = at_get(&at->iat_service_estimate[idx]);
+ req->rq_timeout = at_est2timeout(serv_est);
+ }
+ /* We could get even fancier here, using history to predict increased
+ loading... */
+
+ /* Let the server know what this RPC timeout is by putting it in the
+ reqmsg*/
+ lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+}
+EXPORT_SYMBOL(ptlrpc_at_set_req_timeout);
+
+/* Adjust max service estimate based on server value */
+static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
+ unsigned int serv_est)
+{
+ int idx;
+ unsigned int oldse;
+ struct imp_at *at;
+
+ LASSERT(req->rq_import);
+ at = &req->rq_import->imp_at;
+
+ idx = import_at_get_index(req->rq_import, req->rq_request_portal);
+ /* max service estimates are tracked on the server side,
+ so just keep minimal history here */
+ oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
+ if (oldse != 0)
+ CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d has changed from %d to %d\n",
+ req->rq_import->imp_obd->obd_name, req->rq_request_portal,
+ oldse, at_get(&at->iat_service_estimate[idx]));
+}
+
+/* Expected network latency per remote node (secs) */
+int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
+{
+ return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency);
+}
+
+/* Adjust expected network latency */
+static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+ unsigned int service_time)
+{
+ unsigned int nl, oldnl;
+ struct imp_at *at;
+ time_t now = get_seconds();
+
+ LASSERT(req->rq_import);
+
+ if (service_time > now - req->rq_sent + 3) {
+ /* bz16408, however, this can also happen if early reply
+ * is lost and client RPC is expired and resent, early reply
+ * or reply of original RPC can still be fit in reply buffer
+ * of resent RPC, now client is measuring time from the
+ * resent time, but server sent back service time of original
+ * RPC.
+ */
+ CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ?
+ D_ADAPTTO : D_WARNING,
+ "Reported service time %u > total measured time "
+ CFS_DURATION_T"\n", service_time,
+ cfs_time_sub(now, req->rq_sent));
+ return;
+ }
+
+ /* Network latency is total time less server processing time */
+ nl = max_t(int, now - req->rq_sent -
+ service_time, 0) + 1; /* st rounding */
+ at = &req->rq_import->imp_at;
+
+ oldnl = at_measured(&at->iat_net_latency, nl);
+ if (oldnl != 0)
+ CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) has changed from %d to %d\n",
+ req->rq_import->imp_obd->obd_name,
+ obd_uuid2str(
+ &req->rq_import->imp_connection->c_remote_uuid),
+ oldnl, at_get(&at->iat_net_latency));
+}
+
+static int unpack_reply(struct ptlrpc_request *req)
+{
+ int rc;
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+ rc = ptlrpc_unpack_rep_msg(req, req->rq_replen);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc);
+ return -EPROTO;
+ }
+ }
+
+ rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc);
+ return -EPROTO;
+ }
+ return 0;
+}
+
+/**
+ * Handle an early reply message, called with the rq_lock held.
+ * If anything goes wrong just ignore it - same as if it never happened
+ */
+static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request *early_req;
+ time_t olddl;
+ int rc;
+
+ req->rq_early = 0;
+ spin_unlock(&req->rq_lock);
+
+ rc = sptlrpc_cli_unwrap_early_reply(req, &early_req);
+ if (rc) {
+ spin_lock(&req->rq_lock);
+ return rc;
+ }
+
+ rc = unpack_reply(early_req);
+ if (rc == 0) {
+ /* Expecting to increase the service time estimate here */
+ ptlrpc_at_adj_service(req,
+ lustre_msg_get_timeout(early_req->rq_repmsg));
+ ptlrpc_at_adj_net_latency(req,
+ lustre_msg_get_service_time(early_req->rq_repmsg));
+ }
+
+ sptlrpc_cli_finish_early_reply(early_req);
+
+ if (rc != 0) {
+ spin_lock(&req->rq_lock);
+ return rc;
+ }
+
+ /* Adjust the local timeout for this req */
+ ptlrpc_at_set_req_timeout(req);
+
+ spin_lock(&req->rq_lock);
+ olddl = req->rq_deadline;
+ /* server assumes it now has rq_timeout from when it sent the
+ * early reply, so client should give it at least that long. */
+ req->rq_deadline = get_seconds() + req->rq_timeout +
+ ptlrpc_at_get_net_latency(req);
+
+ DEBUG_REQ(D_ADAPTTO, req,
+ "Early reply #%d, new deadline in " CFS_DURATION_T "s (" CFS_DURATION_T "s)",
+ req->rq_early_count,
+ cfs_time_sub(req->rq_deadline, get_seconds()),
+ cfs_time_sub(req->rq_deadline, olddl));
+
+ return rc;
+}
+
+struct kmem_cache *request_cache;
+
+int ptlrpc_request_cache_init(void)
+{
+ request_cache = kmem_cache_create("ptlrpc_cache",
+ sizeof(struct ptlrpc_request),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ return request_cache == NULL ? -ENOMEM : 0;
+}
+
+void ptlrpc_request_cache_fini(void)
+{
+ kmem_cache_destroy(request_cache);
+}
+
+struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags)
+{
+ struct ptlrpc_request *req;
+
+ OBD_SLAB_ALLOC_PTR_GFP(req, request_cache, flags);
+ return req;
+}
+
+void ptlrpc_request_cache_free(struct ptlrpc_request *req)
+{
+ OBD_SLAB_FREE_PTR(req, request_cache);
+}
+
+/**
+ * Wind down request pool \a pool.
+ * Frees all requests from the pool too
+ */
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
+{
+ struct list_head *l, *tmp;
+ struct ptlrpc_request *req;
+
+ LASSERT(pool != NULL);
+
+ spin_lock(&pool->prp_lock);
+ list_for_each_safe(l, tmp, &pool->prp_req_list) {
+ req = list_entry(l, struct ptlrpc_request, rq_list);
+ list_del(&req->rq_list);
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
+ OBD_FREE_LARGE(req->rq_reqbuf, pool->prp_rq_size);
+ ptlrpc_request_cache_free(req);
+ }
+ spin_unlock(&pool->prp_lock);
+ OBD_FREE(pool, sizeof(*pool));
+}
+EXPORT_SYMBOL(ptlrpc_free_rq_pool);
+
+/**
+ * Allocates, initializes and adds \a num_rq requests to the pool \a pool
+ */
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
+{
+ int i;
+ int size = 1;
+
+ while (size < pool->prp_rq_size)
+ size <<= 1;
+
+ LASSERTF(list_empty(&pool->prp_req_list) ||
+ size == pool->prp_rq_size,
+ "Trying to change pool size with nonempty pool from %d to %d bytes\n",
+ pool->prp_rq_size, size);
+
+ spin_lock(&pool->prp_lock);
+ pool->prp_rq_size = size;
+ for (i = 0; i < num_rq; i++) {
+ struct ptlrpc_request *req;
+ struct lustre_msg *msg;
+
+ spin_unlock(&pool->prp_lock);
+ req = ptlrpc_request_cache_alloc(GFP_NOFS);
+ if (!req)
+ return;
+ OBD_ALLOC_LARGE(msg, size);
+ if (!msg) {
+ ptlrpc_request_cache_free(req);
+ return;
+ }
+ req->rq_reqbuf = msg;
+ req->rq_reqbuf_len = size;
+ req->rq_pool = pool;
+ spin_lock(&pool->prp_lock);
+ list_add_tail(&req->rq_list, &pool->prp_req_list);
+ }
+ spin_unlock(&pool->prp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool);
+
+/**
+ * Create and initialize new request pool with given attributes:
+ * \a num_rq - initial number of requests to create for the pool
+ * \a msgsize - maximum message size possible for requests in thid pool
+ * \a populate_pool - function to be called when more requests need to be added
+ * to the pool
+ * Returns pointer to newly created pool or NULL on error.
+ */
+struct ptlrpc_request_pool *
+ptlrpc_init_rq_pool(int num_rq, int msgsize,
+ void (*populate_pool)(struct ptlrpc_request_pool *, int))
+{
+ struct ptlrpc_request_pool *pool;
+
+ OBD_ALLOC(pool, sizeof(struct ptlrpc_request_pool));
+ if (!pool)
+ return NULL;
+
+ /* Request next power of two for the allocation, because internally
+ kernel would do exactly this */
+
+ spin_lock_init(&pool->prp_lock);
+ INIT_LIST_HEAD(&pool->prp_req_list);
+ pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD;
+ pool->prp_populate = populate_pool;
+
+ populate_pool(pool, num_rq);
+
+ if (list_empty(&pool->prp_req_list)) {
+ /* have not allocated a single request for the pool */
+ OBD_FREE(pool, sizeof(struct ptlrpc_request_pool));
+ pool = NULL;
+ }
+ return pool;
+}
+EXPORT_SYMBOL(ptlrpc_init_rq_pool);
+
+/**
+ * Fetches one request from pool \a pool
+ */
+static struct ptlrpc_request *
+ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request;
+ struct lustre_msg *reqbuf;
+
+ if (!pool)
+ return NULL;
+
+ spin_lock(&pool->prp_lock);
+
+ /* See if we have anything in a pool, and bail out if nothing,
+ * in writeout path, where this matters, this is safe to do, because
+ * nothing is lost in this case, and when some in-flight requests
+ * complete, this code will be called again. */
+ if (unlikely(list_empty(&pool->prp_req_list))) {
+ spin_unlock(&pool->prp_lock);
+ return NULL;
+ }
+
+ request = list_entry(pool->prp_req_list.next, struct ptlrpc_request,
+ rq_list);
+ list_del_init(&request->rq_list);
+ spin_unlock(&pool->prp_lock);
+
+ LASSERT(request->rq_reqbuf);
+ LASSERT(request->rq_pool);
+
+ reqbuf = request->rq_reqbuf;
+ memset(request, 0, sizeof(*request));
+ request->rq_reqbuf = reqbuf;
+ request->rq_reqbuf_len = pool->prp_rq_size;
+ request->rq_pool = pool;
+
+ return request;
+}
+
+/**
+ * Returns freed \a request to pool.
+ */
+static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
+{
+ struct ptlrpc_request_pool *pool = request->rq_pool;
+
+ spin_lock(&pool->prp_lock);
+ LASSERT(list_empty(&request->rq_list));
+ LASSERT(!request->rq_receiving_reply);
+ list_add_tail(&request->rq_list, &pool->prp_req_list);
+ spin_unlock(&pool->prp_lock);
+}
+
+static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode,
+ int count, __u32 *lengths, char **bufs,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct obd_import *imp = request->rq_import;
+ int rc;
+
+ if (unlikely(ctx))
+ request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
+ else {
+ rc = sptlrpc_req_get_ctx(request);
+ if (rc)
+ goto out_free;
+ }
+
+ sptlrpc_req_set_flavor(request, opcode);
+
+ rc = lustre_pack_request(request, imp->imp_msg_magic, count,
+ lengths, bufs);
+ if (rc) {
+ LASSERT(!request->rq_pool);
+ goto out_ctx;
+ }
+
+ lustre_msg_add_version(request->rq_reqmsg, version);
+ request->rq_send_state = LUSTRE_IMP_FULL;
+ request->rq_type = PTL_RPC_MSG_REQUEST;
+ request->rq_export = NULL;
+
+ request->rq_req_cbid.cbid_fn = request_out_callback;
+ request->rq_req_cbid.cbid_arg = request;
+
+ request->rq_reply_cbid.cbid_fn = reply_in_callback;
+ request->rq_reply_cbid.cbid_arg = request;
+
+ request->rq_reply_deadline = 0;
+ request->rq_phase = RQ_PHASE_NEW;
+ request->rq_next_phase = RQ_PHASE_UNDEFINED;
+
+ request->rq_request_portal = imp->imp_client->cli_request_portal;
+ request->rq_reply_portal = imp->imp_client->cli_reply_portal;
+
+ ptlrpc_at_set_req_timeout(request);
+
+ spin_lock_init(&request->rq_lock);
+ INIT_LIST_HEAD(&request->rq_list);
+ INIT_LIST_HEAD(&request->rq_timed_list);
+ INIT_LIST_HEAD(&request->rq_replay_list);
+ INIT_LIST_HEAD(&request->rq_ctx_chain);
+ INIT_LIST_HEAD(&request->rq_set_chain);
+ INIT_LIST_HEAD(&request->rq_history_list);
+ INIT_LIST_HEAD(&request->rq_exp_list);
+ init_waitqueue_head(&request->rq_reply_waitq);
+ init_waitqueue_head(&request->rq_set_waitq);
+ request->rq_xid = ptlrpc_next_xid();
+ atomic_set(&request->rq_refcount, 1);
+
+ lustre_msg_set_opc(request->rq_reqmsg, opcode);
+
+ return 0;
+out_ctx:
+ sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1);
+out_free:
+ class_import_put(imp);
+ return rc;
+}
+
+int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode, char **bufs,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ int count;
+
+ count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT);
+ return __ptlrpc_request_bufs_pack(request, version, opcode, count,
+ request->rq_pill.rc_area[RCL_CLIENT],
+ bufs, ctx);
+}
+EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
+
+/**
+ * Pack request buffers for network transfer, performing necessary encryption
+ * steps if necessary.
+ */
+int ptlrpc_request_pack(struct ptlrpc_request *request,
+ __u32 version, int opcode)
+{
+ int rc;
+ rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL);
+ if (rc)
+ return rc;
+
+ /* For some old 1.8 clients (< 1.8.7), they will LASSERT the size of
+ * ptlrpc_body sent from server equal to local ptlrpc_body size, so we
+ * have to send old ptlrpc_body to keep interoperability with these
+ * clients.
+ *
+ * Only three kinds of server->client RPCs so far:
+ * - LDLM_BL_CALLBACK
+ * - LDLM_CP_CALLBACK
+ * - LDLM_GL_CALLBACK
+ *
+ * XXX This should be removed whenever we drop the interoperability with
+ * the these old clients.
+ */
+ if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK ||
+ opcode == LDLM_GL_CALLBACK)
+ req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY,
+ sizeof(struct ptlrpc_body_v2), RCL_CLIENT);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_request_pack);
+
+/**
+ * Helper function to allocate new request on import \a imp
+ * and possibly using existing request from pool \a pool if provided.
+ * Returns allocated request structure with import field filled or
+ * NULL on error.
+ */
+static inline
+struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
+ struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request = NULL;
+
+ if (pool)
+ request = ptlrpc_prep_req_from_pool(pool);
+
+ if (!request)
+ request = ptlrpc_request_cache_alloc(GFP_NOFS);
+
+ if (request) {
+ LASSERTF((unsigned long)imp > 0x1000, "%p", imp);
+ LASSERT(imp != LP_POISON);
+ LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p",
+ imp->imp_client);
+ LASSERT(imp->imp_client != LP_POISON);
+
+ request->rq_import = class_import_get(imp);
+ } else {
+ CERROR("request allocation out of memory\n");
+ }
+
+ return request;
+}
+
+/**
+ * Helper function for creating a request.
+ * Calls __ptlrpc_request_alloc to allocate new request structure and inits
+ * buffer structures according to capsule template \a format.
+ * Returns allocated request structure pointer or NULL on error.
+ */
+static struct ptlrpc_request *
+ptlrpc_request_alloc_internal(struct obd_import *imp,
+ struct ptlrpc_request_pool *pool,
+ const struct req_format *format)
+{
+ struct ptlrpc_request *request;
+
+ request = __ptlrpc_request_alloc(imp, pool);
+ if (request == NULL)
+ return NULL;
+
+ req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
+ req_capsule_set(&request->rq_pill, format);
+ return request;
+}
+
+/**
+ * Allocate new request structure for import \a imp and initialize its
+ * buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
+ const struct req_format *format)
+{
+ return ptlrpc_request_alloc_internal(imp, NULL, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc);
+
+/**
+ * Allocate new request structure for import \a imp from pool \a pool and
+ * initialize its buffer structure according to capsule template \a format.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
+ struct ptlrpc_request_pool *pool,
+ const struct req_format *format)
+{
+ return ptlrpc_request_alloc_internal(imp, pool, format);
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pool);
+
+/**
+ * For requests not from pool, free memory of the request structure.
+ * For requests obtained from a pool earlier, return request back to pool.
+ */
+void ptlrpc_request_free(struct ptlrpc_request *request)
+{
+ if (request->rq_pool)
+ __ptlrpc_free_req_to_pool(request);
+ else
+ ptlrpc_request_cache_free(request);
+}
+EXPORT_SYMBOL(ptlrpc_request_free);
+
+/**
+ * Allocate new request for operation \a opcode and immediately pack it for
+ * network transfer.
+ * Only used for simple requests like OBD_PING where the only important
+ * part of the request is operation itself.
+ * Returns allocated request or NULL on error.
+ */
+struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
+ const struct req_format *format,
+ __u32 version, int opcode)
+{
+ struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format);
+ int rc;
+
+ if (req) {
+ rc = ptlrpc_request_pack(req, version, opcode);
+ if (rc) {
+ ptlrpc_request_free(req);
+ req = NULL;
+ }
+ }
+ return req;
+}
+EXPORT_SYMBOL(ptlrpc_request_alloc_pack);
+
+/**
+ * Prepare request (fetched from pool \a pool if not NULL) on import \a imp
+ * for operation \a opcode. Request would contain \a count buffers.
+ * Sizes of buffers are described in array \a lengths and buffers themselves
+ * are provided by a pointer \a bufs.
+ * Returns prepared request structure pointer or NULL on error.
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req_pool(struct obd_import *imp,
+ __u32 version, int opcode,
+ int count, __u32 *lengths, char **bufs,
+ struct ptlrpc_request_pool *pool)
+{
+ struct ptlrpc_request *request;
+ int rc;
+
+ request = __ptlrpc_request_alloc(imp, pool);
+ if (!request)
+ return NULL;
+
+ rc = __ptlrpc_request_bufs_pack(request, version, opcode, count,
+ lengths, bufs, NULL);
+ if (rc) {
+ ptlrpc_request_free(request);
+ request = NULL;
+ }
+ return request;
+}
+EXPORT_SYMBOL(ptlrpc_prep_req_pool);
+
+/**
+ * Same as ptlrpc_prep_req_pool, but without pool
+ */
+struct ptlrpc_request *
+ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count,
+ __u32 *lengths, char **bufs)
+{
+ return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs,
+ NULL);
+}
+EXPORT_SYMBOL(ptlrpc_prep_req);
+
+/**
+ * Allocate and initialize new request set structure.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_set(void)
+{
+ struct ptlrpc_request_set *set;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (!set)
+ return NULL;
+ atomic_set(&set->set_refcount, 1);
+ INIT_LIST_HEAD(&set->set_requests);
+ init_waitqueue_head(&set->set_waitq);
+ atomic_set(&set->set_new_count, 0);
+ atomic_set(&set->set_remaining, 0);
+ spin_lock_init(&set->set_new_req_lock);
+ INIT_LIST_HEAD(&set->set_new_requests);
+ INIT_LIST_HEAD(&set->set_cblist);
+ set->set_max_inflight = UINT_MAX;
+ set->set_producer = NULL;
+ set->set_producer_arg = NULL;
+ set->set_rc = 0;
+
+ return set;
+}
+EXPORT_SYMBOL(ptlrpc_prep_set);
+
+/**
+ * Allocate and initialize new request set structure with flow control
+ * extension. This extension allows to control the number of requests in-flight
+ * for the whole set. A callback function to generate requests must be provided
+ * and the request set will keep the number of requests sent over the wire to
+ * @max_inflight.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
+struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
+ void *arg)
+
+{
+ struct ptlrpc_request_set *set;
+
+ set = ptlrpc_prep_set();
+ if (!set)
+ return NULL;
+
+ set->set_max_inflight = max;
+ set->set_producer = func;
+ set->set_producer_arg = arg;
+
+ return set;
+}
+EXPORT_SYMBOL(ptlrpc_prep_fcset);
+
+/**
+ * Wind down and free request set structure previously allocated with
+ * ptlrpc_prep_set.
+ * Ensures that all requests on the set have completed and removes
+ * all requests from the request list in a set.
+ * If any unsent request happen to be on the list, pretends that they got
+ * an error in flight and calls their completion handler.
+ */
+void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ struct list_head *next;
+ int expected_phase;
+ int n = 0;
+
+ /* Requests on the set should either all be completed, or all be new */
+ expected_phase = (atomic_read(&set->set_remaining) == 0) ?
+ RQ_PHASE_COMPLETE : RQ_PHASE_NEW;
+ list_for_each(tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ LASSERT(req->rq_phase == expected_phase);
+ n++;
+ }
+
+ LASSERTF(atomic_read(&set->set_remaining) == 0 ||
+ atomic_read(&set->set_remaining) == n, "%d / %d\n",
+ atomic_read(&set->set_remaining), n);
+
+ list_for_each_safe(tmp, next, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ list_del_init(&req->rq_set_chain);
+
+ LASSERT(req->rq_phase == expected_phase);
+
+ if (req->rq_phase == RQ_PHASE_NEW) {
+ ptlrpc_req_interpret(NULL, req, -EBADR);
+ atomic_dec(&set->set_remaining);
+ }
+
+ spin_lock(&req->rq_lock);
+ req->rq_set = NULL;
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+
+ ptlrpc_req_finished(req);
+ }
+
+ LASSERT(atomic_read(&set->set_remaining) == 0);
+
+ ptlrpc_reqset_put(set);
+}
+EXPORT_SYMBOL(ptlrpc_set_destroy);
+
+/**
+ * Add a callback function \a fn to the set.
+ * This function would be called when all requests on this set are completed.
+ * The function will be passed \a data argument.
+ */
+int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
+ set_interpreter_func fn, void *data)
+{
+ struct ptlrpc_set_cbdata *cbdata;
+
+ OBD_ALLOC_PTR(cbdata);
+ if (cbdata == NULL)
+ return -ENOMEM;
+
+ cbdata->psc_interpret = fn;
+ cbdata->psc_data = data;
+ list_add_tail(&cbdata->psc_item, &set->set_cblist);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_set_add_cb);
+
+/**
+ * Add a new request to the general purpose request set.
+ * Assumes request reference from the caller.
+ */
+void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
+ struct ptlrpc_request *req)
+{
+ LASSERT(list_empty(&req->rq_set_chain));
+
+ /* The set takes over the caller's request reference */
+ list_add_tail(&req->rq_set_chain, &set->set_requests);
+ req->rq_set = set;
+ atomic_inc(&set->set_remaining);
+ req->rq_queued_time = cfs_time_current();
+
+ if (req->rq_reqmsg != NULL)
+ lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+ if (set->set_producer != NULL)
+ /* If the request set has a producer callback, the RPC must be
+ * sent straight away */
+ ptlrpc_send_new_req(req);
+}
+EXPORT_SYMBOL(ptlrpc_set_add_req);
+
+/**
+ * Add a request to a request with dedicated server thread
+ * and wake the thread to make any necessary processing.
+ * Currently only used for ptlrpcd.
+ */
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+ struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *set = pc->pc_set;
+ int count, i;
+
+ LASSERT(req->rq_set == NULL);
+ LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0);
+
+ spin_lock(&set->set_new_req_lock);
+ /*
+ * The set takes over the caller's request reference.
+ */
+ req->rq_set = set;
+ req->rq_queued_time = cfs_time_current();
+ list_add_tail(&req->rq_set_chain, &set->set_new_requests);
+ count = atomic_inc_return(&set->set_new_count);
+ spin_unlock(&set->set_new_req_lock);
+
+ /* Only need to call wakeup once for the first entry. */
+ if (count == 1) {
+ wake_up(&set->set_waitq);
+
+ /* XXX: It maybe unnecessary to wakeup all the partners. But to
+ * guarantee the async RPC can be processed ASAP, we have
+ * no other better choice. It maybe fixed in future. */
+ for (i = 0; i < pc->pc_npartners; i++)
+ wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_set_add_new_req);
+
+/**
+ * Based on the current state of the import, determine if the request
+ * can be sent, is an error, or should be delayed.
+ *
+ * Returns true if this request should be delayed. If false, and
+ * *status is set, then the request can not be sent and *status is the
+ * error code. If false and status is 0, then request can be sent.
+ *
+ * The imp->imp_lock must be held.
+ */
+static int ptlrpc_import_delay_req(struct obd_import *imp,
+ struct ptlrpc_request *req, int *status)
+{
+ int delay = 0;
+
+ LASSERT(status != NULL);
+ *status = 0;
+
+ if (req->rq_ctx_init || req->rq_ctx_fini) {
+ /* always allow ctx init/fini rpc go through */
+ } else if (imp->imp_state == LUSTRE_IMP_NEW) {
+ DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
+ *status = -EIO;
+ } else if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ /* pings may safely race with umount */
+ DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ?
+ D_HA : D_ERROR, req, "IMP_CLOSED ");
+ *status = -EIO;
+ } else if (ptlrpc_send_limit_expired(req)) {
+ /* probably doesn't need to be a D_ERROR after initial testing */
+ DEBUG_REQ(D_ERROR, req, "send limit expired ");
+ *status = -EIO;
+ } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
+ imp->imp_state == LUSTRE_IMP_CONNECTING) {
+ /* allow CONNECT even if import is invalid */
+ if (atomic_read(&imp->imp_inval_count) != 0) {
+ DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+ *status = -EIO;
+ }
+ } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) {
+ if (!imp->imp_deactive)
+ DEBUG_REQ(D_NET, req, "IMP_INVALID");
+ *status = -ESHUTDOWN; /* bz 12940 */
+ } else if (req->rq_import_generation != imp->imp_generation) {
+ DEBUG_REQ(D_ERROR, req, "req wrong generation:");
+ *status = -EIO;
+ } else if (req->rq_send_state != imp->imp_state) {
+ /* invalidate in progress - any requests should be drop */
+ if (atomic_read(&imp->imp_inval_count) != 0) {
+ DEBUG_REQ(D_ERROR, req, "invalidate in flight");
+ *status = -EIO;
+ } else if (imp->imp_dlm_fake || req->rq_no_delay) {
+ *status = -EWOULDBLOCK;
+ } else if (req->rq_allow_replay &&
+ (imp->imp_state == LUSTRE_IMP_REPLAY ||
+ imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS ||
+ imp->imp_state == LUSTRE_IMP_REPLAY_WAIT ||
+ imp->imp_state == LUSTRE_IMP_RECOVER)) {
+ DEBUG_REQ(D_HA, req, "allow during recovery.\n");
+ } else {
+ delay = 1;
+ }
+ }
+
+ return delay;
+}
+
+/**
+ * Decide if the error message regarding provided request \a req
+ * should be printed to the console or not.
+ * Makes it's decision on request status and other properties.
+ * Returns 1 to print error on the system console or 0 if not.
+ */
+static int ptlrpc_console_allow(struct ptlrpc_request *req)
+{
+ __u32 opc;
+ int err;
+
+ LASSERT(req->rq_reqmsg != NULL);
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ /* Suppress particular reconnect errors which are to be expected. No
+ * errors are suppressed for the initial connection on an import */
+ if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
+ (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
+
+ /* Suppress timed out reconnect requests */
+ if (req->rq_timedout)
+ return 0;
+
+ /* Suppress unavailable/again reconnect requests */
+ err = lustre_msg_get_status(req->rq_repmsg);
+ if (err == -ENODEV || err == -EAGAIN)
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Check request processing status.
+ * Returns the status.
+ */
+static int ptlrpc_check_status(struct ptlrpc_request *req)
+{
+ int err;
+
+ err = lustre_msg_get_status(req->rq_repmsg);
+ if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
+ struct obd_import *imp = req->rq_import;
+ __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
+ if (ptlrpc_console_allow(req))
+ LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s, operation %s failed with %d.\n",
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(
+ imp->imp_connection->c_peer.nid),
+ ll_opcode2str(opc), err);
+ return err < 0 ? err : -EINVAL;
+ }
+
+ if (err < 0) {
+ DEBUG_REQ(D_INFO, req, "status is %d", err);
+ } else if (err > 0) {
+ /* XXX: translate this error from net to host */
+ DEBUG_REQ(D_INFO, req, "status is %d", err);
+ }
+
+ return err;
+}
+
+/**
+ * save pre-versions of objects into request for replay.
+ * Versions are obtained from server reply.
+ * used for VBR.
+ */
+static void ptlrpc_save_versions(struct ptlrpc_request *req)
+{
+ struct lustre_msg *repmsg = req->rq_repmsg;
+ struct lustre_msg *reqmsg = req->rq_reqmsg;
+ __u64 *versions = lustre_msg_get_versions(repmsg);
+
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
+ return;
+
+ LASSERT(versions);
+ lustre_msg_set_versions(reqmsg, versions);
+ CDEBUG(D_INFO, "Client save versions [%#llx/%#llx]\n",
+ versions[0], versions[1]);
+}
+
+/**
+ * Callback function called when client receives RPC reply for \a req.
+ * Returns 0 on success or error code.
+ * The return value would be assigned to req->rq_status by the caller
+ * as request processing status.
+ * This function also decides if the request needs to be saved for later replay.
+ */
+static int after_reply(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ struct obd_device *obd = req->rq_import->imp_obd;
+ int rc;
+ struct timeval work_start;
+ long timediff;
+
+ LASSERT(obd != NULL);
+ /* repbuf must be unlinked */
+ LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+
+ if (req->rq_reply_truncate) {
+ if (ptlrpc_no_resend(req)) {
+ DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d",
+ req->rq_nob_received, req->rq_repbuf_len);
+ return -EOVERFLOW;
+ }
+
+ sptlrpc_cli_free_repbuf(req);
+ /* Pass the required reply buffer size (include
+ * space for early reply).
+ * NB: no need to roundup because alloc_repbuf
+ * will roundup it */
+ req->rq_replen = req->rq_nob_received;
+ req->rq_nob_received = 0;
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 1;
+ spin_unlock(&req->rq_lock);
+ return 0;
+ }
+
+ /*
+ * NB Until this point, the whole of the incoming message,
+ * including buflens, status etc is in the sender's byte order.
+ */
+ rc = sptlrpc_cli_unwrap_reply(req);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
+ return rc;
+ }
+
+ /*
+ * Security layer unwrap might ask resend this request.
+ */
+ if (req->rq_resend)
+ return 0;
+
+ rc = unpack_reply(req);
+ if (rc)
+ return rc;
+
+ /* retry indefinitely on EINPROGRESS */
+ if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS &&
+ ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) {
+ time_t now = get_seconds();
+
+ DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS");
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 1;
+ spin_unlock(&req->rq_lock);
+ req->rq_nr_resend++;
+
+ /* allocate new xid to avoid reply reconstruction */
+ if (!req->rq_bulk) {
+ /* new xid is already allocated for bulk in
+ * ptlrpc_check_set() */
+ req->rq_xid = ptlrpc_next_xid();
+ DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for resend on EINPROGRESS");
+ }
+
+ /* Readjust the timeout for current conditions */
+ ptlrpc_at_set_req_timeout(req);
+ /* delay resend to give a chance to the server to get ready.
+ * The delay is increased by 1s on every resend and is capped to
+ * the current request timeout (i.e. obd_timeout if AT is off,
+ * or AT service time x 125% + 5s, see at_est2timeout) */
+ if (req->rq_nr_resend > req->rq_timeout)
+ req->rq_sent = now + req->rq_timeout;
+ else
+ req->rq_sent = now + req->rq_nr_resend;
+
+ return 0;
+ }
+
+ do_gettimeofday(&work_start);
+ timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL);
+ if (obd->obd_svc_stats != NULL) {
+ lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR,
+ timediff);
+ ptlrpc_lprocfs_rpc_sent(req, timediff);
+ }
+
+ if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY &&
+ lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) {
+ DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)",
+ lustre_msg_get_type(req->rq_repmsg));
+ return -EPROTO;
+ }
+
+ if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING)
+ CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val);
+ ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg));
+ ptlrpc_at_adj_net_latency(req,
+ lustre_msg_get_service_time(req->rq_repmsg));
+
+ rc = ptlrpc_check_status(req);
+ imp->imp_connect_error = rc;
+
+ if (rc) {
+ /*
+ * Either we've been evicted, or the server has failed for
+ * some reason. Try to reconnect, and if that fails, punt to
+ * the upcall.
+ */
+ if (ll_rpc_recoverable_error(rc)) {
+ if (req->rq_send_state != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
+ return rc;
+ }
+ ptlrpc_request_handle_notconn(req);
+ return rc;
+ }
+ } else {
+ /*
+ * Let's look if server sent slv. Do it only for RPC with
+ * rc == 0.
+ */
+ ldlm_cli_update_pool(req);
+ }
+
+ /*
+ * Store transno in reqmsg for replay.
+ */
+ if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
+ req->rq_transno = lustre_msg_get_transno(req->rq_repmsg);
+ lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno);
+ }
+
+ if (imp->imp_replayable) {
+ spin_lock(&imp->imp_lock);
+ /*
+ * No point in adding already-committed requests to the replay
+ * list, we will just remove them immediately. b=9829
+ */
+ if (req->rq_transno != 0 &&
+ (req->rq_transno >
+ lustre_msg_get_last_committed(req->rq_repmsg) ||
+ req->rq_replay)) {
+ /** version recovery */
+ ptlrpc_save_versions(req);
+ ptlrpc_retain_replayable_request(req, imp);
+ } else if (req->rq_commit_cb != NULL &&
+ list_empty(&req->rq_replay_list)) {
+ /* NB: don't call rq_commit_cb if it's already on
+ * rq_replay_list, ptlrpc_free_committed() will call
+ * it later, see LU-3618 for details */
+ spin_unlock(&imp->imp_lock);
+ req->rq_commit_cb(req);
+ spin_lock(&imp->imp_lock);
+ }
+
+ /*
+ * Replay-enabled imports return commit-status information.
+ */
+ if (lustre_msg_get_last_committed(req->rq_repmsg)) {
+ imp->imp_peer_committed_transno =
+ lustre_msg_get_last_committed(req->rq_repmsg);
+ }
+
+ ptlrpc_free_committed(imp);
+
+ if (!list_empty(&imp->imp_replay_list)) {
+ struct ptlrpc_request *last;
+
+ last = list_entry(imp->imp_replay_list.prev,
+ struct ptlrpc_request,
+ rq_replay_list);
+ /*
+ * Requests with rq_replay stay on the list even if no
+ * commit is expected.
+ */
+ if (last->rq_transno > imp->imp_peer_committed_transno)
+ ptlrpc_pinger_commit_expected(imp);
+ }
+
+ spin_unlock(&imp->imp_lock);
+ }
+
+ return rc;
+}
+
+/**
+ * Helper function to send request \a req over the network for the first time
+ * Also adjusts request phase.
+ * Returns 0 on success or error code.
+ */
+static int ptlrpc_send_new_req(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ int rc;
+
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ if (req->rq_sent && (req->rq_sent > get_seconds()) &&
+ (!req->rq_generation_set ||
+ req->rq_import_generation == imp->imp_generation))
+ return 0;
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_RPC);
+
+ spin_lock(&imp->imp_lock);
+
+ if (!req->rq_generation_set)
+ req->rq_import_generation = imp->imp_generation;
+
+ if (ptlrpc_import_delay_req(imp, req, &rc)) {
+ spin_lock(&req->rq_lock);
+ req->rq_waiting = 1;
+ spin_unlock(&req->rq_lock);
+
+ DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: (%s != %s)",
+ lustre_msg_get_status(req->rq_reqmsg),
+ ptlrpc_import_state_name(req->rq_send_state),
+ ptlrpc_import_state_name(imp->imp_state));
+ LASSERT(list_empty(&req->rq_list));
+ list_add_tail(&req->rq_list, &imp->imp_delayed_list);
+ atomic_inc(&req->rq_import->imp_inflight);
+ spin_unlock(&imp->imp_lock);
+ return 0;
+ }
+
+ if (rc != 0) {
+ spin_unlock(&imp->imp_lock);
+ req->rq_status = rc;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ return rc;
+ }
+
+ LASSERT(list_empty(&req->rq_list));
+ list_add_tail(&req->rq_list, &imp->imp_sending_list);
+ atomic_inc(&req->rq_import->imp_inflight);
+ spin_unlock(&imp->imp_lock);
+
+ lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+ rc = sptlrpc_req_refresh_ctx(req, -1);
+ if (rc) {
+ if (req->rq_err) {
+ req->rq_status = rc;
+ return 1;
+ }
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 1;
+ spin_unlock(&req->rq_lock);
+ return 0;
+ }
+
+ CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
+ current_comm(),
+ imp->imp_obd->obd_uuid.uuid,
+ lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
+ lustre_msg_get_opc(req->rq_reqmsg));
+
+ rc = ptl_send_rpc(req, 0);
+ if (rc) {
+ DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc);
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+ return rc;
+ }
+ return 0;
+}
+
+static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set)
+{
+ int remaining, rc;
+
+ LASSERT(set->set_producer != NULL);
+
+ remaining = atomic_read(&set->set_remaining);
+
+ /* populate the ->set_requests list with requests until we
+ * reach the maximum number of RPCs in flight for this set */
+ while (atomic_read(&set->set_remaining) < set->set_max_inflight) {
+ rc = set->set_producer(set, set->set_producer_arg);
+ if (rc == -ENOENT) {
+ /* no more RPC to produce */
+ set->set_producer = NULL;
+ set->set_producer_arg = NULL;
+ return 0;
+ }
+ }
+
+ return (atomic_read(&set->set_remaining) - remaining);
+}
+
+/**
+ * this sends any unsent RPCs in \a set and returns 1 if all are sent
+ * and no more replies are expected.
+ * (it is possible to get less replies than requests sent e.g. due to timed out
+ * requests or requests that we had trouble to send out)
+ *
+ * NOTE: This function contains a potential schedule point (cond_resched()).
+ */
+int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *next;
+ struct list_head comp_reqs;
+ int force_timer_recalc = 0;
+
+ if (atomic_read(&set->set_remaining) == 0)
+ return 1;
+
+ INIT_LIST_HEAD(&comp_reqs);
+ list_for_each_safe(tmp, next, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ struct obd_import *imp = req->rq_import;
+ int unregistered = 0;
+ int rc = 0;
+
+ /* This schedule point is mainly for the ptlrpcd caller of this
+ * function. Most ptlrpc sets are not long-lived and unbounded
+ * in length, but at the least the set used by the ptlrpcd is.
+ * Since the processing time is unbounded, we need to insert an
+ * explicit schedule point to make the thread well-behaved.
+ */
+ cond_resched();
+
+ if (req->rq_phase == RQ_PHASE_NEW &&
+ ptlrpc_send_new_req(req)) {
+ force_timer_recalc = 1;
+ }
+
+ /* delayed send - skip */
+ if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent)
+ continue;
+
+ /* delayed resend - skip */
+ if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend &&
+ req->rq_sent > get_seconds())
+ continue;
+
+ if (!(req->rq_phase == RQ_PHASE_RPC ||
+ req->rq_phase == RQ_PHASE_BULK ||
+ req->rq_phase == RQ_PHASE_INTERPRET ||
+ req->rq_phase == RQ_PHASE_UNREGISTERING ||
+ req->rq_phase == RQ_PHASE_COMPLETE)) {
+ DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
+ LBUG();
+ }
+
+ if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ LASSERT(req->rq_next_phase != req->rq_phase);
+ LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
+
+ /*
+ * Skip processing until reply is unlinked. We
+ * can't return to pool before that and we can't
+ * call interpret before that. We need to make
+ * sure that all rdma transfers finished and will
+ * not corrupt any data.
+ */
+ if (ptlrpc_client_recv_or_unlink(req) ||
+ ptlrpc_client_bulk_active(req))
+ continue;
+
+ /*
+ * Turn fail_loc off to prevent it from looping
+ * forever.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK,
+ OBD_FAIL_ONCE);
+ }
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK,
+ OBD_FAIL_ONCE);
+ }
+
+ /*
+ * Move to next phase if reply was successfully
+ * unlinked.
+ */
+ ptlrpc_rqphase_move(req, req->rq_next_phase);
+ }
+
+ if (req->rq_phase == RQ_PHASE_COMPLETE) {
+ list_move_tail(&req->rq_set_chain, &comp_reqs);
+ continue;
+ }
+
+ if (req->rq_phase == RQ_PHASE_INTERPRET)
+ goto interpret;
+
+ /*
+ * Note that this also will start async reply unlink.
+ */
+ if (req->rq_net_err && !req->rq_timedout) {
+ ptlrpc_expire_one_request(req, 1);
+
+ /*
+ * Check if we still need to wait for unlink.
+ */
+ if (ptlrpc_client_recv_or_unlink(req) ||
+ ptlrpc_client_bulk_active(req))
+ continue;
+ /* If there is no need to resend, fail it now. */
+ if (req->rq_no_resend) {
+ if (req->rq_status == 0)
+ req->rq_status = -EIO;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ goto interpret;
+ } else {
+ continue;
+ }
+ }
+
+ if (req->rq_err) {
+ spin_lock(&req->rq_lock);
+ req->rq_replied = 0;
+ spin_unlock(&req->rq_lock);
+ if (req->rq_status == 0)
+ req->rq_status = -EIO;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ goto interpret;
+ }
+
+ /* ptlrpc_set_wait->l_wait_event sets lwi_allow_intr
+ * so it sets rq_intr regardless of individual rpc
+ * timeouts. The synchronous IO waiting path sets
+ * rq_intr irrespective of whether ptlrpcd
+ * has seen a timeout. Our policy is to only interpret
+ * interrupted rpcs after they have timed out, so we
+ * need to enforce that here.
+ */
+
+ if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
+ req->rq_wait_ctx)) {
+ req->rq_status = -EINTR;
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ goto interpret;
+ }
+
+ if (req->rq_phase == RQ_PHASE_RPC) {
+ if (req->rq_timedout || req->rq_resend ||
+ req->rq_waiting || req->rq_wait_ctx) {
+ int status;
+
+ if (!ptlrpc_unregister_reply(req, 1))
+ continue;
+
+ spin_lock(&imp->imp_lock);
+ if (ptlrpc_import_delay_req(imp, req,
+ &status)) {
+ /* put on delay list - only if we wait
+ * recovery finished - before send */
+ list_del_init(&req->rq_list);
+ list_add_tail(&req->rq_list,
+ &imp->
+ imp_delayed_list);
+ spin_unlock(&imp->imp_lock);
+ continue;
+ }
+
+ if (status != 0) {
+ req->rq_status = status;
+ ptlrpc_rqphase_move(req,
+ RQ_PHASE_INTERPRET);
+ spin_unlock(&imp->imp_lock);
+ goto interpret;
+ }
+ if (ptlrpc_no_resend(req) &&
+ !req->rq_wait_ctx) {
+ req->rq_status = -ENOTCONN;
+ ptlrpc_rqphase_move(req,
+ RQ_PHASE_INTERPRET);
+ spin_unlock(&imp->imp_lock);
+ goto interpret;
+ }
+
+ list_del_init(&req->rq_list);
+ list_add_tail(&req->rq_list,
+ &imp->imp_sending_list);
+
+ spin_unlock(&imp->imp_lock);
+
+ spin_lock(&req->rq_lock);
+ req->rq_waiting = 0;
+ spin_unlock(&req->rq_lock);
+
+ if (req->rq_timedout || req->rq_resend) {
+ /* This is re-sending anyways,
+ * let's mark req as resend. */
+ spin_lock(&req->rq_lock);
+ req->rq_resend = 1;
+ spin_unlock(&req->rq_lock);
+ if (req->rq_bulk) {
+ __u64 old_xid;
+
+ if (!ptlrpc_unregister_bulk(req, 1))
+ continue;
+
+ /* ensure previous bulk fails */
+ old_xid = req->rq_xid;
+ req->rq_xid = ptlrpc_next_xid();
+ CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
+ old_xid, req->rq_xid);
+ }
+ }
+ /*
+ * rq_wait_ctx is only touched by ptlrpcd,
+ * so no lock is needed here.
+ */
+ status = sptlrpc_req_refresh_ctx(req, -1);
+ if (status) {
+ if (req->rq_err) {
+ req->rq_status = status;
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 0;
+ spin_unlock(&req->rq_lock);
+ force_timer_recalc = 1;
+ } else {
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ continue;
+ } else {
+ spin_lock(&req->rq_lock);
+ req->rq_wait_ctx = 0;
+ spin_unlock(&req->rq_lock);
+ }
+
+ rc = ptl_send_rpc(req, 0);
+ if (rc) {
+ DEBUG_REQ(D_HA, req,
+ "send failed: rc = %d", rc);
+ force_timer_recalc = 1;
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+ /* need to reset the timeout */
+ force_timer_recalc = 1;
+ }
+
+ spin_lock(&req->rq_lock);
+
+ if (ptlrpc_client_early(req)) {
+ ptlrpc_at_recv_early_reply(req);
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ /* Still waiting for a reply? */
+ if (ptlrpc_client_recv(req)) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ /* Did we actually receive a reply? */
+ if (!ptlrpc_client_replied(req)) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ spin_unlock(&req->rq_lock);
+
+ /* unlink from net because we are going to
+ * swab in-place of reply buffer */
+ unregistered = ptlrpc_unregister_reply(req, 1);
+ if (!unregistered)
+ continue;
+
+ req->rq_status = after_reply(req);
+ if (req->rq_resend)
+ continue;
+
+ /* If there is no bulk associated with this request,
+ * then we're done and should let the interpreter
+ * process the reply. Similarly if the RPC returned
+ * an error, and therefore the bulk will never arrive.
+ */
+ if (req->rq_bulk == NULL || req->rq_status < 0) {
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+ goto interpret;
+ }
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_BULK);
+ }
+
+ LASSERT(req->rq_phase == RQ_PHASE_BULK);
+ if (ptlrpc_client_bulk_active(req))
+ continue;
+
+ if (req->rq_bulk->bd_failure) {
+ /* The RPC reply arrived OK, but the bulk screwed
+ * up! Dead weird since the server told us the RPC
+ * was good after getting the REPLY for her GET or
+ * the ACK for her PUT. */
+ DEBUG_REQ(D_ERROR, req, "bulk transfer failed");
+ req->rq_status = -EIO;
+ }
+
+ ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
+
+interpret:
+ LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+
+ /* This moves to "unregistering" phase we need to wait for
+ * reply unlink. */
+ if (!unregistered && !ptlrpc_unregister_reply(req, 1)) {
+ /* start async bulk unlink too */
+ ptlrpc_unregister_bulk(req, 1);
+ continue;
+ }
+
+ if (!ptlrpc_unregister_bulk(req, 1))
+ continue;
+
+ /* When calling interpret receiving already should be
+ * finished. */
+ LASSERT(!req->rq_receiving_reply);
+
+ ptlrpc_req_interpret(env, req, req->rq_status);
+
+ if (ptlrpcd_check_work(req)) {
+ atomic_dec(&set->set_remaining);
+ continue;
+ }
+ ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
+
+ CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
+ "Completed RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n",
+ current_comm(), imp->imp_obd->obd_uuid.uuid,
+ lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid),
+ lustre_msg_get_opc(req->rq_reqmsg));
+
+ spin_lock(&imp->imp_lock);
+ /* Request already may be not on sending or delaying list. This
+ * may happen in the case of marking it erroneous for the case
+ * ptlrpc_import_delay_req(req, status) find it impossible to
+ * allow sending this rpc and returns *status != 0. */
+ if (!list_empty(&req->rq_list)) {
+ list_del_init(&req->rq_list);
+ atomic_dec(&imp->imp_inflight);
+ }
+ spin_unlock(&imp->imp_lock);
+
+ atomic_dec(&set->set_remaining);
+ wake_up_all(&imp->imp_recovery_waitq);
+
+ if (set->set_producer) {
+ /* produce a new request if possible */
+ if (ptlrpc_set_producer(set) > 0)
+ force_timer_recalc = 1;
+
+ /* free the request that has just been completed
+ * in order not to pollute set->set_requests */
+ list_del_init(&req->rq_set_chain);
+ spin_lock(&req->rq_lock);
+ req->rq_set = NULL;
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+
+ /* record rq_status to compute the final status later */
+ if (req->rq_status != 0)
+ set->set_rc = req->rq_status;
+ ptlrpc_req_finished(req);
+ } else {
+ list_move_tail(&req->rq_set_chain, &comp_reqs);
+ }
+ }
+
+ /* move completed request at the head of list so it's easier for
+ * caller to find them */
+ list_splice(&comp_reqs, &set->set_requests);
+
+ /* If we hit an error, we want to recover promptly. */
+ return atomic_read(&set->set_remaining) == 0 || force_timer_recalc;
+}
+EXPORT_SYMBOL(ptlrpc_check_set);
+
+/**
+ * Time out request \a req. is \a async_unlink is set, that means do not wait
+ * until LNet actually confirms network buffer unlinking.
+ * Return 1 if we should give up further retrying attempts or 0 otherwise.
+ */
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
+{
+ struct obd_import *imp = req->rq_import;
+ int rc = 0;
+
+ spin_lock(&req->rq_lock);
+ req->rq_timedout = 1;
+ spin_unlock(&req->rq_lock);
+
+ DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent "CFS_DURATION_T
+ "/real "CFS_DURATION_T"]",
+ req->rq_net_err ? "failed due to network error" :
+ ((req->rq_real_sent == 0 ||
+ time_before((unsigned long)req->rq_real_sent, (unsigned long)req->rq_sent) ||
+ cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ?
+ "timed out for sent delay" : "timed out for slow reply"),
+ req->rq_sent, req->rq_real_sent);
+
+ if (imp != NULL && obd_debug_peer_on_timeout)
+ LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer);
+
+ ptlrpc_unregister_reply(req, async_unlink);
+ ptlrpc_unregister_bulk(req, async_unlink);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ if (imp == NULL) {
+ DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?");
+ return 1;
+ }
+
+ atomic_inc(&imp->imp_timeouts);
+
+ /* The DLM server doesn't want recovery run on its imports. */
+ if (imp->imp_dlm_fake)
+ return 1;
+
+ /* If this request is for recovery or other primordial tasks,
+ * then error it out here. */
+ if (req->rq_ctx_init || req->rq_ctx_fini ||
+ req->rq_send_state != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov) {
+ DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)",
+ ptlrpc_import_state_name(req->rq_send_state),
+ ptlrpc_import_state_name(imp->imp_state));
+ spin_lock(&req->rq_lock);
+ req->rq_status = -ETIMEDOUT;
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ return 1;
+ }
+
+ /* if a request can't be resent we can't wait for an answer after
+ the timeout */
+ if (ptlrpc_no_resend(req)) {
+ DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:");
+ rc = 1;
+ }
+
+ ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
+
+ return rc;
+}
+
+/**
+ * Time out all uncompleted requests in request set pointed by \a data
+ * Callback used when waiting on sets with l_wait_event.
+ * Always returns 1.
+ */
+int ptlrpc_expired_set(void *data)
+{
+ struct ptlrpc_request_set *set = data;
+ struct list_head *tmp;
+ time_t now = get_seconds();
+
+ LASSERT(set != NULL);
+
+ /*
+ * A timeout expired. See which reqs it applies to...
+ */
+ list_for_each(tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ /* don't expire request waiting for context */
+ if (req->rq_wait_ctx)
+ continue;
+
+ /* Request in-flight? */
+ if (!((req->rq_phase == RQ_PHASE_RPC &&
+ !req->rq_waiting && !req->rq_resend) ||
+ (req->rq_phase == RQ_PHASE_BULK)))
+ continue;
+
+ if (req->rq_timedout || /* already dealt with */
+ req->rq_deadline > now) /* not expired */
+ continue;
+
+ /* Deal with this guy. Do it asynchronously to not block
+ * ptlrpcd thread. */
+ ptlrpc_expire_one_request(req, 1);
+ }
+
+ /*
+ * When waiting for a whole set, we always break out of the
+ * sleep so we can recalculate the timeout, or enable interrupts
+ * if everyone's timed out.
+ */
+ return 1;
+}
+EXPORT_SYMBOL(ptlrpc_expired_set);
+
+/**
+ * Sets rq_intr flag in \a req under spinlock.
+ */
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_intr = 1;
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_mark_interrupted);
+
+/**
+ * Interrupts (sets interrupted flag) all uncompleted requests in
+ * a set \a data. Callback for l_wait_event for interruptible waits.
+ */
+void ptlrpc_interrupted_set(void *data)
+{
+ struct ptlrpc_request_set *set = data;
+ struct list_head *tmp;
+
+ LASSERT(set != NULL);
+ CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set);
+
+ list_for_each(tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+
+ if (req->rq_phase != RQ_PHASE_RPC &&
+ req->rq_phase != RQ_PHASE_UNREGISTERING)
+ continue;
+
+ ptlrpc_mark_interrupted(req);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_interrupted_set);
+
+/**
+ * Get the smallest timeout in the set; this does NOT set a timeout.
+ */
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ time_t now = get_seconds();
+ int timeout = 0;
+ struct ptlrpc_request *req;
+ int deadline;
+
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+ /*
+ * Request in-flight?
+ */
+ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+ (req->rq_phase == RQ_PHASE_BULK) ||
+ (req->rq_phase == RQ_PHASE_NEW)))
+ continue;
+
+ /*
+ * Already timed out.
+ */
+ if (req->rq_timedout)
+ continue;
+
+ /*
+ * Waiting for ctx.
+ */
+ if (req->rq_wait_ctx)
+ continue;
+
+ if (req->rq_phase == RQ_PHASE_NEW)
+ deadline = req->rq_sent;
+ else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend)
+ deadline = req->rq_sent;
+ else
+ deadline = req->rq_sent + req->rq_timeout;
+
+ if (deadline <= now) /* actually expired already */
+ timeout = 1; /* ASAP */
+ else if (timeout == 0 || timeout > deadline - now)
+ timeout = deadline - now;
+ }
+ return timeout;
+}
+EXPORT_SYMBOL(ptlrpc_set_next_timeout);
+
+/**
+ * Send all unset request from the set and then wait until all
+ * requests in the set complete (either get a reply, timeout, get an
+ * error or otherwise be interrupted).
+ * Returns 0 on success or error code otherwise.
+ */
+int ptlrpc_set_wait(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp;
+ struct ptlrpc_request *req;
+ struct l_wait_info lwi;
+ int rc, timeout;
+
+ if (set->set_producer)
+ (void)ptlrpc_set_producer(set);
+ else
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ if (req->rq_phase == RQ_PHASE_NEW)
+ (void)ptlrpc_send_new_req(req);
+ }
+
+ if (list_empty(&set->set_requests))
+ return 0;
+
+ do {
+ timeout = ptlrpc_set_next_timeout(set);
+
+ /* wait until all complete, interrupted, or an in-flight
+ * req times out */
+ CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n",
+ set, timeout);
+
+ if (timeout == 0 && !cfs_signal_pending())
+ /*
+ * No requests are in-flight (ether timed out
+ * or delayed), so we can allow interrupts.
+ * We still want to block for a limited time,
+ * so we allow interrupts during the timeout.
+ */
+ lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1),
+ ptlrpc_expired_set,
+ ptlrpc_interrupted_set, set);
+ else
+ /*
+ * At least one request is in flight, so no
+ * interrupts are allowed. Wait until all
+ * complete, or an in-flight req times out.
+ */
+ lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+ ptlrpc_expired_set, set);
+
+ rc = l_wait_event(set->set_waitq, ptlrpc_check_set(NULL, set), &lwi);
+
+ /* LU-769 - if we ignored the signal because it was already
+ * pending when we started, we need to handle it now or we risk
+ * it being ignored forever */
+ if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr &&
+ cfs_signal_pending()) {
+ sigset_t blocked_sigs =
+ cfs_block_sigsinv(LUSTRE_FATAL_SIGS);
+
+ /* In fact we only interrupt for the "fatal" signals
+ * like SIGINT or SIGKILL. We still ignore less
+ * important signals since ptlrpc set is not easily
+ * reentrant from userspace again */
+ if (cfs_signal_pending())
+ ptlrpc_interrupted_set(set);
+ cfs_restore_sigs(blocked_sigs);
+ }
+
+ LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+
+ /* -EINTR => all requests have been flagged rq_intr so next
+ * check completes.
+ * -ETIMEDOUT => someone timed out. When all reqs have
+ * timed out, signals are enabled allowing completion with
+ * EINTR.
+ * I don't really care if we go once more round the loop in
+ * the error cases -eeb. */
+ if (rc == 0 && atomic_read(&set->set_remaining) == 0) {
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
+ spin_lock(&req->rq_lock);
+ req->rq_invalid_rqset = 1;
+ spin_unlock(&req->rq_lock);
+ }
+ }
+ } while (rc != 0 || atomic_read(&set->set_remaining) != 0);
+
+ LASSERT(atomic_read(&set->set_remaining) == 0);
+
+ rc = set->set_rc; /* rq_status of already freed requests if any */
+ list_for_each(tmp, &set->set_requests) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+
+ LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
+ if (req->rq_status != 0)
+ rc = req->rq_status;
+ }
+
+ if (set->set_interpret != NULL) {
+ int (*interpreter)(struct ptlrpc_request_set *set, void *, int) =
+ set->set_interpret;
+ rc = interpreter(set, set->set_arg, rc);
+ } else {
+ struct ptlrpc_set_cbdata *cbdata, *n;
+ int err;
+
+ list_for_each_entry_safe(cbdata, n,
+ &set->set_cblist, psc_item) {
+ list_del_init(&cbdata->psc_item);
+ err = cbdata->psc_interpret(set, cbdata->psc_data, rc);
+ if (err && !rc)
+ rc = err;
+ OBD_FREE_PTR(cbdata);
+ }
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_set_wait);
+
+/**
+ * Helper function for request freeing.
+ * Called when request count reached zero and request needs to be freed.
+ * Removes request from all sorts of sending/replay lists it might be on,
+ * frees network buffers if any are present.
+ * If \a locked is set, that means caller is already holding import imp_lock
+ * and so we no longer need to reobtain it (for certain lists manipulations)
+ */
+static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
+{
+ if (request == NULL)
+ return;
+ LASSERTF(!request->rq_receiving_reply, "req %p\n", request);
+ LASSERTF(request->rq_rqbd == NULL, "req %p\n", request);/* client-side */
+ LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
+ LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+ LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request);
+ LASSERTF(!request->rq_replay, "req %p\n", request);
+
+ req_capsule_fini(&request->rq_pill);
+
+ /* We must take it off the imp_replay_list first. Otherwise, we'll set
+ * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
+ if (request->rq_import != NULL) {
+ if (!locked)
+ spin_lock(&request->rq_import->imp_lock);
+ list_del_init(&request->rq_replay_list);
+ if (!locked)
+ spin_unlock(&request->rq_import->imp_lock);
+ }
+ LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request);
+
+ if (atomic_read(&request->rq_refcount) != 0) {
+ DEBUG_REQ(D_ERROR, request,
+ "freeing request with nonzero refcount");
+ LBUG();
+ }
+
+ if (request->rq_repbuf != NULL)
+ sptlrpc_cli_free_repbuf(request);
+ if (request->rq_export != NULL) {
+ class_export_put(request->rq_export);
+ request->rq_export = NULL;
+ }
+ if (request->rq_import != NULL) {
+ class_import_put(request->rq_import);
+ request->rq_import = NULL;
+ }
+ if (request->rq_bulk != NULL)
+ ptlrpc_free_bulk_pin(request->rq_bulk);
+
+ if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL)
+ sptlrpc_cli_free_reqbuf(request);
+
+ if (request->rq_cli_ctx)
+ sptlrpc_req_put_ctx(request, !locked);
+
+ if (request->rq_pool)
+ __ptlrpc_free_req_to_pool(request);
+ else
+ ptlrpc_request_cache_free(request);
+}
+
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked);
+/**
+ * Drop one request reference. Must be called with import imp_lock held.
+ * When reference count drops to zero, request is freed.
+ */
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
+{
+ assert_spin_locked(&request->rq_import->imp_lock);
+ (void)__ptlrpc_req_finished(request, 1);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished_with_imp_lock);
+
+/**
+ * Helper function
+ * Drops one reference count for request \a request.
+ * \a locked set indicates that caller holds import imp_lock.
+ * Frees the request when reference count reaches zero.
+ */
+static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
+{
+ if (request == NULL)
+ return 1;
+
+ if (request == LP_POISON ||
+ request->rq_reqmsg == LP_POISON) {
+ CERROR("dereferencing freed request (bug 575)\n");
+ LBUG();
+ return 1;
+ }
+
+ DEBUG_REQ(D_INFO, request, "refcount now %u",
+ atomic_read(&request->rq_refcount) - 1);
+
+ if (atomic_dec_and_test(&request->rq_refcount)) {
+ __ptlrpc_free_req(request, locked);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Drops one reference count for a request.
+ */
+void ptlrpc_req_finished(struct ptlrpc_request *request)
+{
+ __ptlrpc_req_finished(request, 0);
+}
+EXPORT_SYMBOL(ptlrpc_req_finished);
+
+/**
+ * Returns xid of a \a request
+ */
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
+{
+ return request->rq_xid;
+}
+EXPORT_SYMBOL(ptlrpc_req_xid);
+
+/**
+ * Disengage the client's reply buffer from the network
+ * NB does _NOT_ unregister any client-side bulk.
+ * IDEMPOTENT, but _not_ safe against concurrent callers.
+ * The request owner (i.e. the thread doing the I/O) must call...
+ * Returns 0 on success or 1 if unregistering cannot be made.
+ */
+int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
+{
+ int rc;
+ wait_queue_head_t *wq;
+ struct l_wait_info lwi;
+
+ /*
+ * Might sleep.
+ */
+ LASSERT(!in_interrupt());
+
+ /*
+ * Let's setup deadline for reply unlink.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ async && request->rq_reply_deadline == 0)
+ request->rq_reply_deadline = get_seconds()+LONG_UNLINK;
+
+ /*
+ * Nothing left to do.
+ */
+ if (!ptlrpc_client_recv_or_unlink(request))
+ return 1;
+
+ LNetMDUnlink(request->rq_reply_md_h);
+
+ /*
+ * Let's check it once again.
+ */
+ if (!ptlrpc_client_recv_or_unlink(request))
+ return 1;
+
+ /*
+ * Move to "Unregistering" phase as reply was not unlinked yet.
+ */
+ ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
+
+ /*
+ * Do not wait for unlink to finish.
+ */
+ if (async)
+ return 0;
+
+ /*
+ * We have to l_wait_event() whatever the result, to give liblustre
+ * a chance to run reply_in_callback(), and to make sure we've
+ * unlinked before returning a req to the pool.
+ */
+ if (request->rq_set != NULL)
+ wq = &request->rq_set->set_waitq;
+ else
+ wq = &request->rq_reply_waitq;
+
+ for (;;) {
+ /* Network access will complete in finite time but the HUGE
+ * timeout lets us CWARN for visibility of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(*wq, !ptlrpc_client_recv_or_unlink(request),
+ &lwi);
+ if (rc == 0) {
+ ptlrpc_rqphase_move(request, request->rq_next_phase);
+ return 1;
+ }
+
+ LASSERT(rc == -ETIMEDOUT);
+ DEBUG_REQ(D_WARNING, request,
+ "Unexpectedly long timeout rvcng=%d unlnk=%d/%d",
+ request->rq_receiving_reply,
+ request->rq_req_unlink, request->rq_reply_unlink);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_reply);
+
+static void ptlrpc_free_request(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_replay = 0;
+ spin_unlock(&req->rq_lock);
+
+ if (req->rq_commit_cb != NULL)
+ req->rq_commit_cb(req);
+ list_del_init(&req->rq_replay_list);
+
+ __ptlrpc_req_finished(req, 1);
+}
+
+/**
+ * the request is committed and dropped from the replay list of its import
+ */
+void ptlrpc_request_committed(struct ptlrpc_request *req, int force)
+{
+ struct obd_import *imp = req->rq_import;
+
+ spin_lock(&imp->imp_lock);
+ if (list_empty(&req->rq_replay_list)) {
+ spin_unlock(&imp->imp_lock);
+ return;
+ }
+
+ if (force || req->rq_transno <= imp->imp_peer_committed_transno)
+ ptlrpc_free_request(req);
+
+ spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_request_committed);
+
+/**
+ * Iterates through replay_list on import and prunes
+ * all requests have transno smaller than last_committed for the
+ * import and don't have rq_replay set.
+ * Since requests are sorted in transno order, stops when meeting first
+ * transno bigger than last_committed.
+ * caller must hold imp->imp_lock
+ */
+void ptlrpc_free_committed(struct obd_import *imp)
+{
+ struct ptlrpc_request *req, *saved;
+ struct ptlrpc_request *last_req = NULL; /* temporary fire escape */
+ bool skip_committed_list = true;
+
+ LASSERT(imp != NULL);
+ assert_spin_locked(&imp->imp_lock);
+
+ if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
+ imp->imp_generation == imp->imp_last_generation_checked) {
+ CDEBUG(D_INFO, "%s: skip recheck: last_committed %llu\n",
+ imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+ return;
+ }
+ CDEBUG(D_RPCTRACE, "%s: committing for last_committed %llu gen %d\n",
+ imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
+ imp->imp_generation);
+
+ if (imp->imp_generation != imp->imp_last_generation_checked)
+ skip_committed_list = false;
+
+ imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
+ imp->imp_last_generation_checked = imp->imp_generation;
+
+ list_for_each_entry_safe(req, saved, &imp->imp_replay_list,
+ rq_replay_list) {
+ /* XXX ok to remove when 1357 resolved - rread 05/29/03 */
+ LASSERT(req != last_req);
+ last_req = req;
+
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_EMERG, req, "zero transno during replay");
+ LBUG();
+ }
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_RPCTRACE, req, "free request with old gen");
+ goto free_req;
+ }
+
+ /* not yet committed */
+ if (req->rq_transno > imp->imp_peer_committed_transno) {
+ DEBUG_REQ(D_RPCTRACE, req, "stopping search");
+ break;
+ }
+
+ if (req->rq_replay) {
+ DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)");
+ list_move_tail(&req->rq_replay_list,
+ &imp->imp_committed_list);
+ continue;
+ }
+
+ DEBUG_REQ(D_INFO, req, "commit (last_committed %llu)",
+ imp->imp_peer_committed_transno);
+free_req:
+ ptlrpc_free_request(req);
+ }
+ if (skip_committed_list)
+ return;
+
+ list_for_each_entry_safe(req, saved, &imp->imp_committed_list,
+ rq_replay_list) {
+ LASSERT(req->rq_transno != 0);
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
+ ptlrpc_free_request(req);
+ }
+ }
+}
+
+void ptlrpc_cleanup_client(struct obd_import *imp)
+{
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_client);
+
+/**
+ * Schedule previously sent request for resend.
+ * For bulk requests we assign new xid (to avoid problems with
+ * lost replies and therefore several transfers landing into same buffer
+ * from different sending attempts).
+ */
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+ DEBUG_REQ(D_HA, req, "going to resend");
+ spin_lock(&req->rq_lock);
+
+ /* Request got reply but linked to the import list still.
+ Let ptlrpc_check_set() to process it. */
+ if (ptlrpc_client_replied(req)) {
+ spin_unlock(&req->rq_lock);
+ DEBUG_REQ(D_HA, req, "it has reply, so skip it");
+ return;
+ }
+
+ lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
+ req->rq_status = -EAGAIN;
+
+ req->rq_resend = 1;
+ req->rq_net_err = 0;
+ req->rq_timedout = 0;
+ if (req->rq_bulk) {
+ __u64 old_xid = req->rq_xid;
+
+ /* ensure previous bulk fails */
+ req->rq_xid = ptlrpc_next_xid();
+ CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n",
+ old_xid, req->rq_xid);
+ }
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_resend_req);
+
+/* XXX: this function and rq_status are currently unused */
+void ptlrpc_restart_req(struct ptlrpc_request *req)
+{
+ DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request");
+ req->rq_status = -ERESTARTSYS;
+
+ spin_lock(&req->rq_lock);
+ req->rq_restart = 1;
+ req->rq_timedout = 0;
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+EXPORT_SYMBOL(ptlrpc_restart_req);
+
+/**
+ * Grab additional reference on a request \a req
+ */
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
+{
+ atomic_inc(&req->rq_refcount);
+ return req;
+}
+EXPORT_SYMBOL(ptlrpc_request_addref);
+
+/**
+ * Add a request to import replay_list.
+ * Must be called under imp_lock
+ */
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+ struct obd_import *imp)
+{
+ struct list_head *tmp;
+
+ assert_spin_locked(&imp->imp_lock);
+
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_EMERG, req, "saving request with zero transno");
+ LBUG();
+ }
+
+ /* clear this for new requests that were resent as well
+ as resent replayed requests. */
+ lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
+
+ /* don't re-add requests that have been replayed */
+ if (!list_empty(&req->rq_replay_list))
+ return;
+
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY);
+
+ LASSERT(imp->imp_replayable);
+ /* Balanced in ptlrpc_free_committed, usually. */
+ ptlrpc_request_addref(req);
+ list_for_each_prev(tmp, &imp->imp_replay_list) {
+ struct ptlrpc_request *iter =
+ list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* We may have duplicate transnos if we create and then
+ * open a file, or for closes retained if to match creating
+ * opens, so use req->rq_xid as a secondary key.
+ * (See bugs 684, 685, and 428.)
+ * XXX no longer needed, but all opens need transnos!
+ */
+ if (iter->rq_transno > req->rq_transno)
+ continue;
+
+ if (iter->rq_transno == req->rq_transno) {
+ LASSERT(iter->rq_xid != req->rq_xid);
+ if (iter->rq_xid > req->rq_xid)
+ continue;
+ }
+
+ list_add(&req->rq_replay_list, &iter->rq_replay_list);
+ return;
+ }
+
+ list_add(&req->rq_replay_list, &imp->imp_replay_list);
+}
+EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
+
+/**
+ * Send request and wait until it completes.
+ * Returns request processing status.
+ */
+int ptlrpc_queue_wait(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *set;
+ int rc;
+
+ LASSERT(req->rq_set == NULL);
+ LASSERT(!req->rq_receiving_reply);
+
+ set = ptlrpc_prep_set();
+ if (set == NULL) {
+ CERROR("Unable to allocate ptlrpc set.");
+ return -ENOMEM;
+ }
+
+ /* for distributed debugging */
+ lustre_msg_set_status(req->rq_reqmsg, current_pid());
+
+ /* add a ref for the set (see comment in ptlrpc_set_add_req) */
+ ptlrpc_request_addref(req);
+ ptlrpc_set_add_req(set, req);
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_queue_wait);
+
+struct ptlrpc_replay_async_args {
+ int praa_old_state;
+ int praa_old_status;
+};
+
+/**
+ * Callback used for replayed requests reply processing.
+ * In case of successful reply calls registered request replay callback.
+ * In case of error restart replay process.
+ */
+static int ptlrpc_replay_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *data, int rc)
+{
+ struct ptlrpc_replay_async_args *aa = data;
+ struct obd_import *imp = req->rq_import;
+
+ atomic_dec(&imp->imp_replay_inflight);
+
+ if (!ptlrpc_client_replied(req)) {
+ CERROR("request replay timed out, restarting recovery\n");
+ rc = -ETIMEDOUT;
+ goto out;
+ }
+
+ if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR &&
+ (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN ||
+ lustre_msg_get_status(req->rq_repmsg) == -ENODEV)) {
+ rc = lustre_msg_get_status(req->rq_repmsg);
+ goto out;
+ }
+
+ /** VBR: check version failure */
+ if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) {
+ /** replay was failed due to version mismatch */
+ DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n");
+ spin_lock(&imp->imp_lock);
+ imp->imp_vbr_failed = 1;
+ imp->imp_no_lock_replay = 1;
+ spin_unlock(&imp->imp_lock);
+ lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+ } else {
+ /** The transno had better not change over replay. */
+ LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) ==
+ lustre_msg_get_transno(req->rq_repmsg) ||
+ lustre_msg_get_transno(req->rq_repmsg) == 0,
+ "%#llx/%#llx\n",
+ lustre_msg_get_transno(req->rq_reqmsg),
+ lustre_msg_get_transno(req->rq_repmsg));
+ }
+
+ spin_lock(&imp->imp_lock);
+ /** if replays by version then gap occur on server, no trust to locks */
+ if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY)
+ imp->imp_no_lock_replay = 1;
+ imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg);
+ spin_unlock(&imp->imp_lock);
+ LASSERT(imp->imp_last_replay_transno);
+
+ /* transaction number shouldn't be bigger than the latest replayed */
+ if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) {
+ DEBUG_REQ(D_ERROR, req,
+ "Reported transno %llu is bigger than the replayed one: %llu",
+ req->rq_transno,
+ lustre_msg_get_transno(req->rq_reqmsg));
+ rc = -EINVAL;
+ goto out;
+ }
+
+ DEBUG_REQ(D_HA, req, "got rep");
+
+ /* let the callback do fixups, possibly including in the request */
+ if (req->rq_replay_cb)
+ req->rq_replay_cb(req);
+
+ if (ptlrpc_client_replied(req) &&
+ lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) {
+ DEBUG_REQ(D_ERROR, req, "status %d, old was %d",
+ lustre_msg_get_status(req->rq_repmsg),
+ aa->praa_old_status);
+ } else {
+ /* Put it back for re-replay. */
+ lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status);
+ }
+
+ /*
+ * Errors while replay can set transno to 0, but
+ * imp_last_replay_transno shouldn't be set to 0 anyway
+ */
+ if (req->rq_transno == 0)
+ CERROR("Transno is 0 during replay!\n");
+
+ /* continue with recovery */
+ rc = ptlrpc_import_recovery_state_machine(imp);
+ out:
+ req->rq_send_state = aa->praa_old_state;
+
+ if (rc != 0)
+ /* this replay failed, so restart recovery */
+ ptlrpc_connect_import(imp);
+
+ return rc;
+}
+
+/**
+ * Prepares and queues request for replay.
+ * Adds it to ptlrpcd queue for actual sending.
+ * Returns 0 on success.
+ */
+int ptlrpc_replay_req(struct ptlrpc_request *req)
+{
+ struct ptlrpc_replay_async_args *aa;
+
+ LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY);
+
+ LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+ aa = ptlrpc_req_async_args(req);
+ memset(aa, 0, sizeof(*aa));
+
+ /* Prepare request to be resent with ptlrpcd */
+ aa->praa_old_state = req->rq_send_state;
+ req->rq_send_state = LUSTRE_IMP_REPLAY;
+ req->rq_phase = RQ_PHASE_NEW;
+ req->rq_next_phase = RQ_PHASE_UNDEFINED;
+ if (req->rq_repmsg)
+ aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg);
+ req->rq_status = 0;
+ req->rq_interpret_reply = ptlrpc_replay_interpret;
+ /* Readjust the timeout for current conditions */
+ ptlrpc_at_set_req_timeout(req);
+
+ /* Tell server the net_latency, so the server can calculate how long
+ * it should wait for next replay */
+ lustre_msg_set_service_time(req->rq_reqmsg,
+ ptlrpc_at_get_net_latency(req));
+ DEBUG_REQ(D_HA, req, "REPLAY");
+
+ atomic_inc(&req->rq_import->imp_replay_inflight);
+ ptlrpc_request_addref(req); /* ptlrpcd needs a ref */
+
+ ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_replay_req);
+
+/**
+ * Aborts all in-flight request on import \a imp sending and delayed lists
+ */
+void ptlrpc_abort_inflight(struct obd_import *imp)
+{
+ struct list_head *tmp, *n;
+
+ /* Make sure that no new requests get processed for this import.
+ * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing
+ * this flag and then putting requests on sending_list or delayed_list.
+ */
+ spin_lock(&imp->imp_lock);
+
+ /* XXX locking? Maybe we should remove each request with the list
+ * locked? Also, how do we know if the requests on the list are
+ * being freed at this time?
+ */
+ list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_RPCTRACE, req, "inflight");
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ req->rq_status = -EIO;
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&req->rq_lock);
+ }
+
+ list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
+ struct ptlrpc_request *req =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req");
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ req->rq_status = -EIO;
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&req->rq_lock);
+ }
+
+ /* Last chance to free reqs left on the replay list, but we
+ * will still leak reqs that haven't committed. */
+ if (imp->imp_replayable)
+ ptlrpc_free_committed(imp);
+
+ spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_abort_inflight);
+
+/**
+ * Abort all uncompleted requests in request set \a set
+ */
+void ptlrpc_abort_set(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *pos;
+
+ LASSERT(set != NULL);
+
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_phase != RQ_PHASE_RPC) {
+ spin_unlock(&req->rq_lock);
+ continue;
+ }
+
+ req->rq_err = 1;
+ req->rq_status = -EINTR;
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+ }
+}
+
+static __u64 ptlrpc_last_xid;
+static spinlock_t ptlrpc_last_xid_lock;
+
+/**
+ * Initialize the XID for the node. This is common among all requests on
+ * this node, and only requires the property that it is monotonically
+ * increasing. It does not need to be sequential. Since this is also used
+ * as the RDMA match bits, it is important that a single client NOT have
+ * the same match bits for two different in-flight requests, hence we do
+ * NOT want to have an XID per target or similar.
+ *
+ * To avoid an unlikely collision between match bits after a client reboot
+ * (which would deliver old data into the wrong RDMA buffer) initialize
+ * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s.
+ * If the time is clearly incorrect, we instead use a 62-bit random number.
+ * In the worst case the random number will overflow 1M RPCs per second in
+ * 9133 years, or permutations thereof.
+ */
+#define YEAR_2004 (1ULL << 30)
+void ptlrpc_init_xid(void)
+{
+ time_t now = get_seconds();
+
+ spin_lock_init(&ptlrpc_last_xid_lock);
+ if (now < YEAR_2004) {
+ cfs_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid));
+ ptlrpc_last_xid >>= 2;
+ ptlrpc_last_xid |= (1ULL << 61);
+ } else {
+ ptlrpc_last_xid = (__u64)now << 20;
+ }
+
+ /* Always need to be aligned to a power-of-two for multi-bulk BRW */
+ CLASSERT((PTLRPC_BULK_OPS_COUNT & (PTLRPC_BULK_OPS_COUNT - 1)) == 0);
+ ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK;
+}
+
+/**
+ * Increase xid and returns resulting new value to the caller.
+ *
+ * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting
+ * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC
+ * itself uses the last bulk xid needed, so the server can determine the
+ * the number of bulk transfers from the RPC XID and a bitmask. The starting
+ * xid must align to a power-of-two value.
+ *
+ * This is assumed to be true due to the initial ptlrpc_last_xid
+ * value also being initialized to a power-of-two value. LU-1431
+ */
+__u64 ptlrpc_next_xid(void)
+{
+ __u64 next;
+
+ spin_lock(&ptlrpc_last_xid_lock);
+ next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+ ptlrpc_last_xid = next;
+ spin_unlock(&ptlrpc_last_xid_lock);
+
+ return next;
+}
+EXPORT_SYMBOL(ptlrpc_next_xid);
+
+/**
+ * Get a glimpse at what next xid value might have been.
+ * Returns possible next xid.
+ */
+__u64 ptlrpc_sample_next_xid(void)
+{
+#if BITS_PER_LONG == 32
+ /* need to avoid possible word tearing on 32-bit systems */
+ __u64 next;
+
+ spin_lock(&ptlrpc_last_xid_lock);
+ next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+ spin_unlock(&ptlrpc_last_xid_lock);
+
+ return next;
+#else
+ /* No need to lock, since returned value is racy anyways */
+ return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT;
+#endif
+}
+EXPORT_SYMBOL(ptlrpc_sample_next_xid);
+
+/**
+ * Functions for operating ptlrpc workers.
+ *
+ * A ptlrpc work is a function which will be running inside ptlrpc context.
+ * The callback shouldn't sleep otherwise it will block that ptlrpcd thread.
+ *
+ * 1. after a work is created, it can be used many times, that is:
+ * handler = ptlrpcd_alloc_work();
+ * ptlrpcd_queue_work();
+ *
+ * queue it again when necessary:
+ * ptlrpcd_queue_work();
+ * ptlrpcd_destroy_work();
+ * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but
+ * it will only be queued once in any time. Also as its name implies, it may
+ * have delay before it really runs by ptlrpcd thread.
+ */
+struct ptlrpc_work_async_args {
+ int (*cb)(const struct lu_env *, void *);
+ void *cbdata;
+};
+
+static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
+{
+ /* re-initialize the req */
+ req->rq_timeout = obd_timeout;
+ req->rq_sent = get_seconds();
+ req->rq_deadline = req->rq_sent + req->rq_timeout;
+ req->rq_reply_deadline = req->rq_deadline;
+ req->rq_phase = RQ_PHASE_INTERPRET;
+ req->rq_next_phase = RQ_PHASE_COMPLETE;
+ req->rq_xid = ptlrpc_next_xid();
+ req->rq_import_generation = req->rq_import->imp_generation;
+
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+}
+
+static int work_interpreter(const struct lu_env *env,
+ struct ptlrpc_request *req, void *data, int rc)
+{
+ struct ptlrpc_work_async_args *arg = data;
+
+ LASSERT(ptlrpcd_check_work(req));
+ LASSERT(arg->cb != NULL);
+
+ rc = arg->cb(env, arg->cbdata);
+
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+
+ if (atomic_dec_return(&req->rq_refcount) > 1) {
+ atomic_set(&req->rq_refcount, 2);
+ ptlrpcd_add_work_req(req);
+ }
+ return rc;
+}
+
+static int worker_format;
+
+static int ptlrpcd_check_work(struct ptlrpc_request *req)
+{
+ return req->rq_pill.rc_fmt == (void *)&worker_format;
+}
+
+/**
+ * Create a work for ptlrpc.
+ */
+void *ptlrpcd_alloc_work(struct obd_import *imp,
+ int (*cb)(const struct lu_env *, void *), void *cbdata)
+{
+ struct ptlrpc_request *req = NULL;
+ struct ptlrpc_work_async_args *args;
+
+ might_sleep();
+
+ if (cb == NULL)
+ return ERR_PTR(-EINVAL);
+
+ /* copy some code from deprecated fakereq. */
+ req = ptlrpc_request_cache_alloc(GFP_NOFS);
+ if (req == NULL) {
+ CERROR("ptlrpc: run out of memory!\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+ req->rq_type = PTL_RPC_MSG_REQUEST;
+ req->rq_import = class_import_get(imp);
+ req->rq_export = NULL;
+ req->rq_interpret_reply = work_interpreter;
+ /* don't want reply */
+ req->rq_receiving_reply = 0;
+ req->rq_req_unlink = req->rq_reply_unlink = 0;
+ req->rq_no_delay = req->rq_no_resend = 1;
+ req->rq_pill.rc_fmt = (void *)&worker_format;
+
+ spin_lock_init(&req->rq_lock);
+ INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_replay_list);
+ INIT_LIST_HEAD(&req->rq_set_chain);
+ INIT_LIST_HEAD(&req->rq_history_list);
+ INIT_LIST_HEAD(&req->rq_exp_list);
+ init_waitqueue_head(&req->rq_reply_waitq);
+ init_waitqueue_head(&req->rq_set_waitq);
+ atomic_set(&req->rq_refcount, 1);
+
+ CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
+ args = ptlrpc_req_async_args(req);
+ args->cb = cb;
+ args->cbdata = cbdata;
+
+ return req;
+}
+EXPORT_SYMBOL(ptlrpcd_alloc_work);
+
+void ptlrpcd_destroy_work(void *handler)
+{
+ struct ptlrpc_request *req = handler;
+
+ if (req)
+ ptlrpc_req_finished(req);
+}
+EXPORT_SYMBOL(ptlrpcd_destroy_work);
+
+int ptlrpcd_queue_work(void *handler)
+{
+ struct ptlrpc_request *req = handler;
+
+ /*
+ * Check if the req is already being queued.
+ *
+ * Here comes a trick: it lacks a way of checking if a req is being
+ * processed reliably in ptlrpc. Here I have to use refcount of req
+ * for this purpose. This is okay because the caller should use this
+ * req as opaque data. - Jinshan
+ */
+ LASSERT(atomic_read(&req->rq_refcount) > 0);
+ if (atomic_inc_return(&req->rq_refcount) == 2)
+ ptlrpcd_add_work_req(req);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpcd_queue_work);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
new file mode 100644
index 000000000..7e27397ce
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -0,0 +1,241 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+
+#include "ptlrpc_internal.h"
+
+static struct cfs_hash *conn_hash;
+static cfs_hash_ops_t conn_hash_ops;
+
+struct ptlrpc_connection *
+ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self,
+ struct obd_uuid *uuid)
+{
+ struct ptlrpc_connection *conn, *conn2;
+
+ conn = cfs_hash_lookup(conn_hash, &peer);
+ if (conn)
+ goto out;
+
+ OBD_ALLOC_PTR(conn);
+ if (!conn)
+ return NULL;
+
+ conn->c_peer = peer;
+ conn->c_self = self;
+ INIT_HLIST_NODE(&conn->c_hash);
+ atomic_set(&conn->c_refcount, 1);
+ if (uuid)
+ obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+
+ /*
+ * Add the newly created conn to the hash, on key collision we
+ * lost a racing addition and must destroy our newly allocated
+ * connection. The object which exists in the has will be
+ * returned and may be compared against out object.
+ */
+ /* In the function below, .hs_keycmp resolves to
+ * conn_keycmp() */
+ /* coverity[overrun-buffer-val] */
+ conn2 = cfs_hash_findadd_unique(conn_hash, &peer, &conn->c_hash);
+ if (conn != conn2) {
+ OBD_FREE_PTR(conn);
+ conn = conn2;
+ }
+out:
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+ return conn;
+}
+EXPORT_SYMBOL(ptlrpc_connection_get);
+
+int ptlrpc_connection_put(struct ptlrpc_connection *conn)
+{
+ int rc = 0;
+
+ if (!conn)
+ return rc;
+
+ LASSERT(atomic_read(&conn->c_refcount) > 1);
+
+ /*
+ * We do not remove connection from hashtable and
+ * do not free it even if last caller released ref,
+ * as we want to have it cached for the case it is
+ * needed again.
+ *
+ * Deallocating it and later creating new connection
+ * again would be wastful. This way we also avoid
+ * expensive locking to protect things from get/put
+ * race when found cached connection is freed by
+ * ptlrpc_connection_put().
+ *
+ * It will be freed later in module unload time,
+ * when ptlrpc_connection_fini()->lh_exit->conn_exit()
+ * path is called.
+ */
+ if (atomic_dec_return(&conn->c_refcount) == 1)
+ rc = 1;
+
+ CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_connection_put);
+
+struct ptlrpc_connection *
+ptlrpc_connection_addref(struct ptlrpc_connection *conn)
+{
+ atomic_inc(&conn->c_refcount);
+ CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+ conn, atomic_read(&conn->c_refcount),
+ libcfs_nid2str(conn->c_peer.nid));
+
+ return conn;
+}
+EXPORT_SYMBOL(ptlrpc_connection_addref);
+
+int ptlrpc_connection_init(void)
+{
+ conn_hash = cfs_hash_create("CONN_HASH",
+ HASH_CONN_CUR_BITS,
+ HASH_CONN_MAX_BITS,
+ HASH_CONN_BKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &conn_hash_ops, CFS_HASH_DEFAULT);
+ if (!conn_hash)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_connection_init);
+
+void ptlrpc_connection_fini(void)
+{
+ cfs_hash_putref(conn_hash);
+}
+EXPORT_SYMBOL(ptlrpc_connection_fini);
+
+/*
+ * Hash operations for net_peer<->connection
+ */
+static unsigned
+conn_hashfn(struct cfs_hash *hs, const void *key, unsigned mask)
+{
+ return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask);
+}
+
+static int
+conn_keycmp(const void *key, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+ const lnet_process_id_t *conn_key;
+
+ LASSERT(key != NULL);
+ conn_key = (lnet_process_id_t *)key;
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+
+ return conn_key->nid == conn->c_peer.nid &&
+ conn_key->pid == conn->c_peer.pid;
+}
+
+static void *
+conn_key(struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ return &conn->c_peer;
+}
+
+static void *
+conn_object(struct hlist_node *hnode)
+{
+ return hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+}
+
+static void
+conn_get(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ atomic_inc(&conn->c_refcount);
+}
+
+static void
+conn_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ atomic_dec(&conn->c_refcount);
+}
+
+static void
+conn_exit(struct cfs_hash *hs, struct hlist_node *hnode)
+{
+ struct ptlrpc_connection *conn;
+
+ conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+ /*
+ * Nothing should be left. Connection user put it and
+ * connection also was deleted from table by this time
+ * so we should have 0 refs.
+ */
+ LASSERTF(atomic_read(&conn->c_refcount) == 0,
+ "Busy connection with %d refs\n",
+ atomic_read(&conn->c_refcount));
+ OBD_FREE_PTR(conn);
+}
+
+static cfs_hash_ops_t conn_hash_ops = {
+ .hs_hash = conn_hashfn,
+ .hs_keycmp = conn_keycmp,
+ .hs_key = conn_key,
+ .hs_object = conn_object,
+ .hs_get = conn_get,
+ .hs_put_locked = conn_put_locked,
+ .hs_exit = conn_exit,
+};
diff --git a/drivers/staging/lustre/lustre/ptlrpc/errno.c b/drivers/staging/lustre/lustre/ptlrpc/errno.c
new file mode 100644
index 000000000..73f8374f1
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/errno.c
@@ -0,0 +1,380 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.txt
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved.
+ *
+ * Copyright (c) 2013, Intel Corporation.
+ */
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/lustre/lustre_errno.h"
+
+/*
+ * The two translation tables below must define a one-to-one mapping between
+ * host and network errnos.
+ *
+ * EWOULDBLOCK is equal to EAGAIN on all architectures except for parisc, which
+ * appears irrelevant. Thus, existing references to EWOULDBLOCK are fine.
+ *
+ * EDEADLOCK is equal to EDEADLK on x86 but not on sparc, at least. A sparc
+ * host has no context-free way to determine if a LUSTRE_EDEADLK represents an
+ * EDEADLK or an EDEADLOCK. Therefore, all existing references to EDEADLOCK
+ * that need to be transferred on wire have been replaced with EDEADLK.
+ */
+static int lustre_errno_hton_mapping[] = {
+ [EPERM] = LUSTRE_EPERM,
+ [ENOENT] = LUSTRE_ENOENT,
+ [ESRCH] = LUSTRE_ESRCH,
+ [EINTR] = LUSTRE_EINTR,
+ [EIO] = LUSTRE_EIO,
+ [ENXIO] = LUSTRE_ENXIO,
+ [E2BIG] = LUSTRE_E2BIG,
+ [ENOEXEC] = LUSTRE_ENOEXEC,
+ [EBADF] = LUSTRE_EBADF,
+ [ECHILD] = LUSTRE_ECHILD,
+ [EAGAIN] = LUSTRE_EAGAIN,
+ [ENOMEM] = LUSTRE_ENOMEM,
+ [EACCES] = LUSTRE_EACCES,
+ [EFAULT] = LUSTRE_EFAULT,
+ [ENOTBLK] = LUSTRE_ENOTBLK,
+ [EBUSY] = LUSTRE_EBUSY,
+ [EEXIST] = LUSTRE_EEXIST,
+ [EXDEV] = LUSTRE_EXDEV,
+ [ENODEV] = LUSTRE_ENODEV,
+ [ENOTDIR] = LUSTRE_ENOTDIR,
+ [EISDIR] = LUSTRE_EISDIR,
+ [EINVAL] = LUSTRE_EINVAL,
+ [ENFILE] = LUSTRE_ENFILE,
+ [EMFILE] = LUSTRE_EMFILE,
+ [ENOTTY] = LUSTRE_ENOTTY,
+ [ETXTBSY] = LUSTRE_ETXTBSY,
+ [EFBIG] = LUSTRE_EFBIG,
+ [ENOSPC] = LUSTRE_ENOSPC,
+ [ESPIPE] = LUSTRE_ESPIPE,
+ [EROFS] = LUSTRE_EROFS,
+ [EMLINK] = LUSTRE_EMLINK,
+ [EPIPE] = LUSTRE_EPIPE,
+ [EDOM] = LUSTRE_EDOM,
+ [ERANGE] = LUSTRE_ERANGE,
+ [EDEADLK] = LUSTRE_EDEADLK,
+ [ENAMETOOLONG] = LUSTRE_ENAMETOOLONG,
+ [ENOLCK] = LUSTRE_ENOLCK,
+ [ENOSYS] = LUSTRE_ENOSYS,
+ [ENOTEMPTY] = LUSTRE_ENOTEMPTY,
+ [ELOOP] = LUSTRE_ELOOP,
+ [ENOMSG] = LUSTRE_ENOMSG,
+ [EIDRM] = LUSTRE_EIDRM,
+ [ECHRNG] = LUSTRE_ECHRNG,
+ [EL2NSYNC] = LUSTRE_EL2NSYNC,
+ [EL3HLT] = LUSTRE_EL3HLT,
+ [EL3RST] = LUSTRE_EL3RST,
+ [ELNRNG] = LUSTRE_ELNRNG,
+ [EUNATCH] = LUSTRE_EUNATCH,
+ [ENOCSI] = LUSTRE_ENOCSI,
+ [EL2HLT] = LUSTRE_EL2HLT,
+ [EBADE] = LUSTRE_EBADE,
+ [EBADR] = LUSTRE_EBADR,
+ [EXFULL] = LUSTRE_EXFULL,
+ [ENOANO] = LUSTRE_ENOANO,
+ [EBADRQC] = LUSTRE_EBADRQC,
+ [EBADSLT] = LUSTRE_EBADSLT,
+ [EBFONT] = LUSTRE_EBFONT,
+ [ENOSTR] = LUSTRE_ENOSTR,
+ [ENODATA] = LUSTRE_ENODATA,
+ [ETIME] = LUSTRE_ETIME,
+ [ENOSR] = LUSTRE_ENOSR,
+ [ENONET] = LUSTRE_ENONET,
+ [ENOPKG] = LUSTRE_ENOPKG,
+ [EREMOTE] = LUSTRE_EREMOTE,
+ [ENOLINK] = LUSTRE_ENOLINK,
+ [EADV] = LUSTRE_EADV,
+ [ESRMNT] = LUSTRE_ESRMNT,
+ [ECOMM] = LUSTRE_ECOMM,
+ [EPROTO] = LUSTRE_EPROTO,
+ [EMULTIHOP] = LUSTRE_EMULTIHOP,
+ [EDOTDOT] = LUSTRE_EDOTDOT,
+ [EBADMSG] = LUSTRE_EBADMSG,
+ [EOVERFLOW] = LUSTRE_EOVERFLOW,
+ [ENOTUNIQ] = LUSTRE_ENOTUNIQ,
+ [EBADFD] = LUSTRE_EBADFD,
+ [EREMCHG] = LUSTRE_EREMCHG,
+ [ELIBACC] = LUSTRE_ELIBACC,
+ [ELIBBAD] = LUSTRE_ELIBBAD,
+ [ELIBSCN] = LUSTRE_ELIBSCN,
+ [ELIBMAX] = LUSTRE_ELIBMAX,
+ [ELIBEXEC] = LUSTRE_ELIBEXEC,
+ [EILSEQ] = LUSTRE_EILSEQ,
+ [ERESTART] = LUSTRE_ERESTART,
+ [ESTRPIPE] = LUSTRE_ESTRPIPE,
+ [EUSERS] = LUSTRE_EUSERS,
+ [ENOTSOCK] = LUSTRE_ENOTSOCK,
+ [EDESTADDRREQ] = LUSTRE_EDESTADDRREQ,
+ [EMSGSIZE] = LUSTRE_EMSGSIZE,
+ [EPROTOTYPE] = LUSTRE_EPROTOTYPE,
+ [ENOPROTOOPT] = LUSTRE_ENOPROTOOPT,
+ [EPROTONOSUPPORT] = LUSTRE_EPROTONOSUPPORT,
+ [ESOCKTNOSUPPORT] = LUSTRE_ESOCKTNOSUPPORT,
+ [EOPNOTSUPP] = LUSTRE_EOPNOTSUPP,
+ [EPFNOSUPPORT] = LUSTRE_EPFNOSUPPORT,
+ [EAFNOSUPPORT] = LUSTRE_EAFNOSUPPORT,
+ [EADDRINUSE] = LUSTRE_EADDRINUSE,
+ [EADDRNOTAVAIL] = LUSTRE_EADDRNOTAVAIL,
+ [ENETDOWN] = LUSTRE_ENETDOWN,
+ [ENETUNREACH] = LUSTRE_ENETUNREACH,
+ [ENETRESET] = LUSTRE_ENETRESET,
+ [ECONNABORTED] = LUSTRE_ECONNABORTED,
+ [ECONNRESET] = LUSTRE_ECONNRESET,
+ [ENOBUFS] = LUSTRE_ENOBUFS,
+ [EISCONN] = LUSTRE_EISCONN,
+ [ENOTCONN] = LUSTRE_ENOTCONN,
+ [ESHUTDOWN] = LUSTRE_ESHUTDOWN,
+ [ETOOMANYREFS] = LUSTRE_ETOOMANYREFS,
+ [ETIMEDOUT] = LUSTRE_ETIMEDOUT,
+ [ECONNREFUSED] = LUSTRE_ECONNREFUSED,
+ [EHOSTDOWN] = LUSTRE_EHOSTDOWN,
+ [EHOSTUNREACH] = LUSTRE_EHOSTUNREACH,
+ [EALREADY] = LUSTRE_EALREADY,
+ [EINPROGRESS] = LUSTRE_EINPROGRESS,
+ [ESTALE] = LUSTRE_ESTALE,
+ [EUCLEAN] = LUSTRE_EUCLEAN,
+ [ENOTNAM] = LUSTRE_ENOTNAM,
+ [ENAVAIL] = LUSTRE_ENAVAIL,
+ [EISNAM] = LUSTRE_EISNAM,
+ [EREMOTEIO] = LUSTRE_EREMOTEIO,
+ [EDQUOT] = LUSTRE_EDQUOT,
+ [ENOMEDIUM] = LUSTRE_ENOMEDIUM,
+ [EMEDIUMTYPE] = LUSTRE_EMEDIUMTYPE,
+ [ECANCELED] = LUSTRE_ECANCELED,
+ [ENOKEY] = LUSTRE_ENOKEY,
+ [EKEYEXPIRED] = LUSTRE_EKEYEXPIRED,
+ [EKEYREVOKED] = LUSTRE_EKEYREVOKED,
+ [EKEYREJECTED] = LUSTRE_EKEYREJECTED,
+ [EOWNERDEAD] = LUSTRE_EOWNERDEAD,
+ [ENOTRECOVERABLE] = LUSTRE_ENOTRECOVERABLE,
+ [ERESTARTSYS] = LUSTRE_ERESTARTSYS,
+ [ERESTARTNOINTR] = LUSTRE_ERESTARTNOINTR,
+ [ERESTARTNOHAND] = LUSTRE_ERESTARTNOHAND,
+ [ENOIOCTLCMD] = LUSTRE_ENOIOCTLCMD,
+ [ERESTART_RESTARTBLOCK] = LUSTRE_ERESTART_RESTARTBLOCK,
+ [EBADHANDLE] = LUSTRE_EBADHANDLE,
+ [ENOTSYNC] = LUSTRE_ENOTSYNC,
+ [EBADCOOKIE] = LUSTRE_EBADCOOKIE,
+ [ENOTSUPP] = LUSTRE_ENOTSUPP,
+ [ETOOSMALL] = LUSTRE_ETOOSMALL,
+ [ESERVERFAULT] = LUSTRE_ESERVERFAULT,
+ [EBADTYPE] = LUSTRE_EBADTYPE,
+ [EJUKEBOX] = LUSTRE_EJUKEBOX,
+ [EIOCBQUEUED] = LUSTRE_EIOCBQUEUED,
+};
+
+static int lustre_errno_ntoh_mapping[] = {
+ [LUSTRE_EPERM] = EPERM,
+ [LUSTRE_ENOENT] = ENOENT,
+ [LUSTRE_ESRCH] = ESRCH,
+ [LUSTRE_EINTR] = EINTR,
+ [LUSTRE_EIO] = EIO,
+ [LUSTRE_ENXIO] = ENXIO,
+ [LUSTRE_E2BIG] = E2BIG,
+ [LUSTRE_ENOEXEC] = ENOEXEC,
+ [LUSTRE_EBADF] = EBADF,
+ [LUSTRE_ECHILD] = ECHILD,
+ [LUSTRE_EAGAIN] = EAGAIN,
+ [LUSTRE_ENOMEM] = ENOMEM,
+ [LUSTRE_EACCES] = EACCES,
+ [LUSTRE_EFAULT] = EFAULT,
+ [LUSTRE_ENOTBLK] = ENOTBLK,
+ [LUSTRE_EBUSY] = EBUSY,
+ [LUSTRE_EEXIST] = EEXIST,
+ [LUSTRE_EXDEV] = EXDEV,
+ [LUSTRE_ENODEV] = ENODEV,
+ [LUSTRE_ENOTDIR] = ENOTDIR,
+ [LUSTRE_EISDIR] = EISDIR,
+ [LUSTRE_EINVAL] = EINVAL,
+ [LUSTRE_ENFILE] = ENFILE,
+ [LUSTRE_EMFILE] = EMFILE,
+ [LUSTRE_ENOTTY] = ENOTTY,
+ [LUSTRE_ETXTBSY] = ETXTBSY,
+ [LUSTRE_EFBIG] = EFBIG,
+ [LUSTRE_ENOSPC] = ENOSPC,
+ [LUSTRE_ESPIPE] = ESPIPE,
+ [LUSTRE_EROFS] = EROFS,
+ [LUSTRE_EMLINK] = EMLINK,
+ [LUSTRE_EPIPE] = EPIPE,
+ [LUSTRE_EDOM] = EDOM,
+ [LUSTRE_ERANGE] = ERANGE,
+ [LUSTRE_EDEADLK] = EDEADLK,
+ [LUSTRE_ENAMETOOLONG] = ENAMETOOLONG,
+ [LUSTRE_ENOLCK] = ENOLCK,
+ [LUSTRE_ENOSYS] = ENOSYS,
+ [LUSTRE_ENOTEMPTY] = ENOTEMPTY,
+ [LUSTRE_ELOOP] = ELOOP,
+ [LUSTRE_ENOMSG] = ENOMSG,
+ [LUSTRE_EIDRM] = EIDRM,
+ [LUSTRE_ECHRNG] = ECHRNG,
+ [LUSTRE_EL2NSYNC] = EL2NSYNC,
+ [LUSTRE_EL3HLT] = EL3HLT,
+ [LUSTRE_EL3RST] = EL3RST,
+ [LUSTRE_ELNRNG] = ELNRNG,
+ [LUSTRE_EUNATCH] = EUNATCH,
+ [LUSTRE_ENOCSI] = ENOCSI,
+ [LUSTRE_EL2HLT] = EL2HLT,
+ [LUSTRE_EBADE] = EBADE,
+ [LUSTRE_EBADR] = EBADR,
+ [LUSTRE_EXFULL] = EXFULL,
+ [LUSTRE_ENOANO] = ENOANO,
+ [LUSTRE_EBADRQC] = EBADRQC,
+ [LUSTRE_EBADSLT] = EBADSLT,
+ [LUSTRE_EBFONT] = EBFONT,
+ [LUSTRE_ENOSTR] = ENOSTR,
+ [LUSTRE_ENODATA] = ENODATA,
+ [LUSTRE_ETIME] = ETIME,
+ [LUSTRE_ENOSR] = ENOSR,
+ [LUSTRE_ENONET] = ENONET,
+ [LUSTRE_ENOPKG] = ENOPKG,
+ [LUSTRE_EREMOTE] = EREMOTE,
+ [LUSTRE_ENOLINK] = ENOLINK,
+ [LUSTRE_EADV] = EADV,
+ [LUSTRE_ESRMNT] = ESRMNT,
+ [LUSTRE_ECOMM] = ECOMM,
+ [LUSTRE_EPROTO] = EPROTO,
+ [LUSTRE_EMULTIHOP] = EMULTIHOP,
+ [LUSTRE_EDOTDOT] = EDOTDOT,
+ [LUSTRE_EBADMSG] = EBADMSG,
+ [LUSTRE_EOVERFLOW] = EOVERFLOW,
+ [LUSTRE_ENOTUNIQ] = ENOTUNIQ,
+ [LUSTRE_EBADFD] = EBADFD,
+ [LUSTRE_EREMCHG] = EREMCHG,
+ [LUSTRE_ELIBACC] = ELIBACC,
+ [LUSTRE_ELIBBAD] = ELIBBAD,
+ [LUSTRE_ELIBSCN] = ELIBSCN,
+ [LUSTRE_ELIBMAX] = ELIBMAX,
+ [LUSTRE_ELIBEXEC] = ELIBEXEC,
+ [LUSTRE_EILSEQ] = EILSEQ,
+ [LUSTRE_ERESTART] = ERESTART,
+ [LUSTRE_ESTRPIPE] = ESTRPIPE,
+ [LUSTRE_EUSERS] = EUSERS,
+ [LUSTRE_ENOTSOCK] = ENOTSOCK,
+ [LUSTRE_EDESTADDRREQ] = EDESTADDRREQ,
+ [LUSTRE_EMSGSIZE] = EMSGSIZE,
+ [LUSTRE_EPROTOTYPE] = EPROTOTYPE,
+ [LUSTRE_ENOPROTOOPT] = ENOPROTOOPT,
+ [LUSTRE_EPROTONOSUPPORT] = EPROTONOSUPPORT,
+ [LUSTRE_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT,
+ [LUSTRE_EOPNOTSUPP] = EOPNOTSUPP,
+ [LUSTRE_EPFNOSUPPORT] = EPFNOSUPPORT,
+ [LUSTRE_EAFNOSUPPORT] = EAFNOSUPPORT,
+ [LUSTRE_EADDRINUSE] = EADDRINUSE,
+ [LUSTRE_EADDRNOTAVAIL] = EADDRNOTAVAIL,
+ [LUSTRE_ENETDOWN] = ENETDOWN,
+ [LUSTRE_ENETUNREACH] = ENETUNREACH,
+ [LUSTRE_ENETRESET] = ENETRESET,
+ [LUSTRE_ECONNABORTED] = ECONNABORTED,
+ [LUSTRE_ECONNRESET] = ECONNRESET,
+ [LUSTRE_ENOBUFS] = ENOBUFS,
+ [LUSTRE_EISCONN] = EISCONN,
+ [LUSTRE_ENOTCONN] = ENOTCONN,
+ [LUSTRE_ESHUTDOWN] = ESHUTDOWN,
+ [LUSTRE_ETOOMANYREFS] = ETOOMANYREFS,
+ [LUSTRE_ETIMEDOUT] = ETIMEDOUT,
+ [LUSTRE_ECONNREFUSED] = ECONNREFUSED,
+ [LUSTRE_EHOSTDOWN] = EHOSTDOWN,
+ [LUSTRE_EHOSTUNREACH] = EHOSTUNREACH,
+ [LUSTRE_EALREADY] = EALREADY,
+ [LUSTRE_EINPROGRESS] = EINPROGRESS,
+ [LUSTRE_ESTALE] = ESTALE,
+ [LUSTRE_EUCLEAN] = EUCLEAN,
+ [LUSTRE_ENOTNAM] = ENOTNAM,
+ [LUSTRE_ENAVAIL] = ENAVAIL,
+ [LUSTRE_EISNAM] = EISNAM,
+ [LUSTRE_EREMOTEIO] = EREMOTEIO,
+ [LUSTRE_EDQUOT] = EDQUOT,
+ [LUSTRE_ENOMEDIUM] = ENOMEDIUM,
+ [LUSTRE_EMEDIUMTYPE] = EMEDIUMTYPE,
+ [LUSTRE_ECANCELED] = ECANCELED,
+ [LUSTRE_ENOKEY] = ENOKEY,
+ [LUSTRE_EKEYEXPIRED] = EKEYEXPIRED,
+ [LUSTRE_EKEYREVOKED] = EKEYREVOKED,
+ [LUSTRE_EKEYREJECTED] = EKEYREJECTED,
+ [LUSTRE_EOWNERDEAD] = EOWNERDEAD,
+ [LUSTRE_ENOTRECOVERABLE] = ENOTRECOVERABLE,
+ [LUSTRE_ERESTARTSYS] = ERESTARTSYS,
+ [LUSTRE_ERESTARTNOINTR] = ERESTARTNOINTR,
+ [LUSTRE_ERESTARTNOHAND] = ERESTARTNOHAND,
+ [LUSTRE_ENOIOCTLCMD] = ENOIOCTLCMD,
+ [LUSTRE_ERESTART_RESTARTBLOCK] = ERESTART_RESTARTBLOCK,
+ [LUSTRE_EBADHANDLE] = EBADHANDLE,
+ [LUSTRE_ENOTSYNC] = ENOTSYNC,
+ [LUSTRE_EBADCOOKIE] = EBADCOOKIE,
+ [LUSTRE_ENOTSUPP] = ENOTSUPP,
+ [LUSTRE_ETOOSMALL] = ETOOSMALL,
+ [LUSTRE_ESERVERFAULT] = ESERVERFAULT,
+ [LUSTRE_EBADTYPE] = EBADTYPE,
+ [LUSTRE_EJUKEBOX] = EJUKEBOX,
+ [LUSTRE_EIOCBQUEUED] = EIOCBQUEUED,
+};
+
+unsigned int lustre_errno_hton(unsigned int h)
+{
+ unsigned int n;
+
+ if (h == 0) {
+ n = 0;
+ } else if (h < ARRAY_SIZE(lustre_errno_hton_mapping)) {
+ n = lustre_errno_hton_mapping[h];
+ if (n == 0)
+ goto generic;
+ } else {
+generic:
+ /*
+ * A generic errno is better than the unknown one that could
+ * mean anything to a different host.
+ */
+ n = LUSTRE_EIO;
+ }
+
+ return n;
+}
+EXPORT_SYMBOL(lustre_errno_hton);
+
+unsigned int lustre_errno_ntoh(unsigned int n)
+{
+ unsigned int h;
+
+ if (n == 0) {
+ h = 0;
+ } else if (n < ARRAY_SIZE(lustre_errno_ntoh_mapping)) {
+ h = lustre_errno_ntoh_mapping[n];
+ if (h == 0)
+ goto generic;
+ } else {
+generic:
+ /*
+ * Similar to the situation in lustre_errno_hton(), an unknown
+ * network errno could coincide with anything. Hence, it is
+ * better to return a generic errno.
+ */
+ h = EIO;
+ }
+
+ return h;
+}
+EXPORT_SYMBOL(lustre_errno_ntoh);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
new file mode 100644
index 000000000..7f8644e01
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -0,0 +1,585 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+# ifdef __mips64__
+# include <linux/kernel.h>
+# endif
+
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+#include "ptlrpc_internal.h"
+
+lnet_handle_eq_t ptlrpc_eq_h;
+
+/*
+ * Client's outgoing request callback
+ */
+void request_out_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request *req = cbid->cbid_arg;
+
+ LASSERT(ev->type == LNET_EVENT_SEND ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT(ev->unlinked);
+
+ DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+ sptlrpc_request_out_callback(req);
+ spin_lock(&req->rq_lock);
+ req->rq_real_sent = get_seconds();
+ if (ev->unlinked)
+ req->rq_req_unlink = 0;
+
+ if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
+
+ /* Failed send: make it seem like the reply timed out, just
+ * like failing sends in client.c does currently... */
+
+ req->rq_net_err = 1;
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&req->rq_lock);
+
+ ptlrpc_req_finished(req);
+}
+
+/*
+ * Client's incoming reply callback
+ */
+void reply_in_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request *req = cbid->cbid_arg;
+
+ DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+
+ LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
+ LASSERT(ev->md.start == req->rq_repbuf);
+ LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len);
+ /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
+ for adaptive timeouts' early reply. */
+ LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);
+
+ spin_lock(&req->rq_lock);
+
+ req->rq_receiving_reply = 0;
+ req->rq_early = 0;
+ if (ev->unlinked)
+ req->rq_reply_unlink = 0;
+
+ if (ev->status)
+ goto out_wake;
+
+ if (ev->type == LNET_EVENT_UNLINK) {
+ LASSERT(ev->unlinked);
+ DEBUG_REQ(D_NET, req, "unlink");
+ goto out_wake;
+ }
+
+ if (ev->mlength < ev->rlength) {
+ CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
+ req->rq_replen, ev->rlength, ev->offset);
+ req->rq_reply_truncate = 1;
+ req->rq_replied = 1;
+ req->rq_status = -EOVERFLOW;
+ req->rq_nob_received = ev->rlength + ev->offset;
+ goto out_wake;
+ }
+
+ if ((ev->offset == 0) &&
+ ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) {
+ /* Early reply */
+ DEBUG_REQ(D_ADAPTTO, req,
+ "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d",
+ ev->mlength, ev->offset,
+ req->rq_replen, req->rq_replied, ev->unlinked);
+
+ req->rq_early_count++; /* number received, client side */
+
+ if (req->rq_replied) /* already got the real reply */
+ goto out_wake;
+
+ req->rq_early = 1;
+ req->rq_reply_off = ev->offset;
+ req->rq_nob_received = ev->mlength;
+ /* And we're still receiving */
+ req->rq_receiving_reply = 1;
+ } else {
+ /* Real reply */
+ req->rq_rep_swab_mask = 0;
+ req->rq_replied = 1;
+ /* Got reply, no resend required */
+ req->rq_resend = 0;
+ req->rq_reply_off = ev->offset;
+ req->rq_nob_received = ev->mlength;
+ /* LNetMDUnlink can't be called under the LNET_LOCK,
+ so we must unlink in ptlrpc_unregister_reply */
+ DEBUG_REQ(D_INFO, req,
+ "reply in flags=%x mlen=%u offset=%d replen=%d",
+ lustre_msg_get_flags(req->rq_reqmsg),
+ ev->mlength, ev->offset, req->rq_replen);
+ }
+
+ req->rq_import->imp_last_reply_time = get_seconds();
+
+out_wake:
+ /* NB don't unlock till after wakeup; req can disappear under us
+ * since we don't have our own ref */
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+
+/*
+ * Client's bulk has been written/read
+ */
+void client_bulk_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_bulk_desc *desc = cbid->cbid_arg;
+ struct ptlrpc_request *req;
+
+ LASSERT((desc->bd_type == BULK_PUT_SINK &&
+ ev->type == LNET_EVENT_PUT) ||
+ (desc->bd_type == BULK_GET_SOURCE &&
+ ev->type == LNET_EVENT_GET) ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT(ev->unlinked);
+
+ if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE))
+ ev->status = -EIO;
+
+ if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2,
+ CFS_FAIL_ONCE))
+ ev->status = -EIO;
+
+ CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+ "event type %d, status %d, desc %p\n",
+ ev->type, ev->status, desc);
+
+ spin_lock(&desc->bd_lock);
+ req = desc->bd_req;
+ LASSERT(desc->bd_md_count > 0);
+ desc->bd_md_count--;
+
+ if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
+ desc->bd_nob_transferred += ev->mlength;
+ desc->bd_sender = ev->sender;
+ } else {
+ /* start reconnect and resend if network error hit */
+ spin_lock(&req->rq_lock);
+ req->rq_net_err = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ if (ev->status != 0)
+ desc->bd_failure = 1;
+
+ /* NB don't unlock till after wakeup; desc can disappear under us
+ * otherwise */
+ if (desc->bd_md_count == 0)
+ ptlrpc_client_wake_req(desc->bd_req);
+
+ spin_unlock(&desc->bd_lock);
+}
+
+/*
+ * We will have percpt request history list for ptlrpc service in upcoming
+ * patches because we don't want to be serialized by current per-service
+ * history operations. So we require history ID can (somehow) show arriving
+ * order w/o grabbing global lock, and user can sort them in userspace.
+ *
+ * This is how we generate history ID for ptlrpc_request:
+ * ----------------------------------------------------
+ * | 32 bits | 16 bits | (16 - X)bits | X bits |
+ * ----------------------------------------------------
+ * | seconds | usec / 16 | sequence | CPT id |
+ * ----------------------------------------------------
+ *
+ * it might not be precise but should be good enough.
+ */
+
+#define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits)
+
+#define REQS_SEC_SHIFT 32
+#define REQS_USEC_SHIFT 16
+#define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt)
+
+static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ __u64 sec = req->rq_arrival_time.tv_sec;
+ __u32 usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */
+ __u64 new_seq;
+
+ /* set sequence ID for request and add it to history list,
+ * it must be called with hold svcpt::scp_lock */
+
+ new_seq = (sec << REQS_SEC_SHIFT) |
+ (usec << REQS_USEC_SHIFT) |
+ (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt);
+
+ if (new_seq > svcpt->scp_hist_seq) {
+ /* This handles the initial case of scp_hist_seq == 0 or
+ * we just jumped into a new time window */
+ svcpt->scp_hist_seq = new_seq;
+ } else {
+ LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT);
+ /* NB: increase sequence number in current usec bucket,
+ * however, it's possible that we used up all bits for
+ * sequence and jumped into the next usec bucket (future time),
+ * then we hope there will be less RPCs per bucket at some
+ * point, and sequence will catch up again */
+ svcpt->scp_hist_seq += (1U << REQS_SEQ_SHIFT(svcpt));
+ new_seq = svcpt->scp_hist_seq;
+ }
+
+ req->rq_history_seq = new_seq;
+
+ list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs);
+}
+
+/*
+ * Server's incoming request callback
+ */
+void request_in_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg;
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+ struct ptlrpc_service *service = svcpt->scp_service;
+ struct ptlrpc_request *req;
+
+ LASSERT(ev->type == LNET_EVENT_PUT ||
+ ev->type == LNET_EVENT_UNLINK);
+ LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer);
+ LASSERT((char *)ev->md.start + ev->offset + ev->mlength <=
+ rqbd->rqbd_buffer + service->srv_buf_size);
+
+ CDEBUG((ev->status == 0) ? D_NET : D_ERROR,
+ "event type %d, status %d, service %s\n",
+ ev->type, ev->status, service->srv_name);
+
+ if (ev->unlinked) {
+ /* If this is the last request message to fit in the
+ * request buffer we can use the request object embedded in
+ * rqbd. Note that if we failed to allocate a request,
+ * we'd have to re-post the rqbd, which we can't do in this
+ * context. */
+ req = &rqbd->rqbd_req;
+ memset(req, 0, sizeof(*req));
+ } else {
+ LASSERT(ev->type == LNET_EVENT_PUT);
+ if (ev->status != 0) {
+ /* We moaned above already... */
+ return;
+ }
+ req = ptlrpc_request_cache_alloc(GFP_ATOMIC);
+ if (req == NULL) {
+ CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n",
+ service->srv_name,
+ libcfs_id2str(ev->initiator));
+ return;
+ }
+ }
+
+ /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL,
+ * flags are reset and scalars are zero. We only set the message
+ * size to non-zero if this was a successful receive. */
+ req->rq_xid = ev->match_bits;
+ req->rq_reqbuf = ev->md.start + ev->offset;
+ if (ev->type == LNET_EVENT_PUT && ev->status == 0)
+ req->rq_reqdata_len = ev->mlength;
+ do_gettimeofday(&req->rq_arrival_time);
+ req->rq_peer = ev->initiator;
+ req->rq_self = ev->target.nid;
+ req->rq_rqbd = rqbd;
+ req->rq_phase = RQ_PHASE_NEW;
+ spin_lock_init(&req->rq_lock);
+ INIT_LIST_HEAD(&req->rq_timed_list);
+ INIT_LIST_HEAD(&req->rq_exp_list);
+ atomic_set(&req->rq_refcount, 1);
+ if (ev->type == LNET_EVENT_PUT)
+ CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n",
+ req, req->rq_xid, ev->mlength);
+
+ CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer));
+
+ spin_lock(&svcpt->scp_lock);
+
+ ptlrpc_req_add_history(svcpt, req);
+
+ if (ev->unlinked) {
+ svcpt->scp_nrqbds_posted--;
+ CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
+ svcpt->scp_nrqbds_posted);
+
+ /* Normally, don't complain about 0 buffers posted; LNET won't
+ * drop incoming reqs since we set the portal lazy */
+ if (test_req_buffer_pressure &&
+ ev->type != LNET_EVENT_UNLINK &&
+ svcpt->scp_nrqbds_posted == 0)
+ CWARN("All %s request buffers busy\n",
+ service->srv_name);
+
+ /* req takes over the network's ref on rqbd */
+ } else {
+ /* req takes a ref on rqbd */
+ rqbd->rqbd_refcount++;
+ }
+
+ list_add_tail(&req->rq_list, &svcpt->scp_req_incoming);
+ svcpt->scp_nreqs_incoming++;
+
+ /* NB everything can disappear under us once the request
+ * has been queued and we unlock, so do the wake now... */
+ wake_up(&svcpt->scp_waitq);
+
+ spin_unlock(&svcpt->scp_lock);
+}
+
+/*
+ * Server's outgoing reply callback
+ */
+void reply_out_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ struct ptlrpc_reply_state *rs = cbid->cbid_arg;
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+ LASSERT(ev->type == LNET_EVENT_SEND ||
+ ev->type == LNET_EVENT_ACK ||
+ ev->type == LNET_EVENT_UNLINK);
+
+ if (!rs->rs_difficult) {
+ /* 'Easy' replies have no further processing so I drop the
+ * net's ref on 'rs' */
+ LASSERT(ev->unlinked);
+ ptlrpc_rs_decref(rs);
+ return;
+ }
+
+ LASSERT(rs->rs_on_net);
+
+ if (ev->unlinked) {
+ /* Last network callback. The net's ref on 'rs' stays put
+ * until ptlrpc_handle_rs() is done with it */
+ spin_lock(&svcpt->scp_rep_lock);
+ spin_lock(&rs->rs_lock);
+
+ rs->rs_on_net = 0;
+ if (!rs->rs_no_ack ||
+ rs->rs_transno <=
+ rs->rs_export->exp_obd->obd_last_committed)
+ ptlrpc_schedule_difficult_reply(rs);
+
+ spin_unlock(&rs->rs_lock);
+ spin_unlock(&svcpt->scp_rep_lock);
+ }
+}
+
+
+static void ptlrpc_master_callback(lnet_event_t *ev)
+{
+ struct ptlrpc_cb_id *cbid = ev->md.user_ptr;
+ void (*callback)(lnet_event_t *ev) = cbid->cbid_fn;
+
+ /* Honestly, it's best to find out early. */
+ LASSERT(cbid->cbid_arg != LP_POISON);
+ LASSERT(callback == request_out_callback ||
+ callback == reply_in_callback ||
+ callback == client_bulk_callback ||
+ callback == request_in_callback ||
+ callback == reply_out_callback);
+
+ callback(ev);
+}
+
+int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
+ lnet_process_id_t *peer, lnet_nid_t *self)
+{
+ int best_dist = 0;
+ __u32 best_order = 0;
+ int count = 0;
+ int rc = -ENOENT;
+ int portals_compatibility;
+ int dist;
+ __u32 order;
+ lnet_nid_t dst_nid;
+ lnet_nid_t src_nid;
+
+ portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL);
+
+ peer->pid = LUSTRE_SRV_LNET_PID;
+
+ /* Choose the matching UUID that's closest */
+ while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) {
+ dist = LNetDist(dst_nid, &src_nid, &order);
+ if (dist < 0)
+ continue;
+
+ if (dist == 0) { /* local! use loopback LND */
+ peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0);
+ rc = 0;
+ break;
+ }
+
+ if (rc < 0 ||
+ dist < best_dist ||
+ (dist == best_dist && order < best_order)) {
+ best_dist = dist;
+ best_order = order;
+
+ if (portals_compatibility > 1) {
+ /* Strong portals compatibility: Zero the nid's
+ * NET, so if I'm reading new config logs, or
+ * getting configured by (new) lconf I can
+ * still talk to old servers. */
+ dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid));
+ src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid));
+ }
+ peer->nid = dst_nid;
+ *self = src_nid;
+ rc = 0;
+ }
+ }
+
+ CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer));
+ return rc;
+}
+
+void ptlrpc_ni_fini(void)
+{
+ wait_queue_head_t waitq;
+ struct l_wait_info lwi;
+ int rc;
+ int retries;
+
+ /* Wait for the event queue to become idle since there may still be
+ * messages in flight with pending events (i.e. the fire-and-forget
+ * messages == client requests and "non-difficult" server
+ * replies */
+
+ for (retries = 0;; retries++) {
+ rc = LNetEQFree(ptlrpc_eq_h);
+ switch (rc) {
+ default:
+ LBUG();
+
+ case 0:
+ LNetNIFini();
+ return;
+
+ case -EBUSY:
+ if (retries != 0)
+ CWARN("Event queue still busy\n");
+
+ /* Wait for a bit */
+ init_waitqueue_head(&waitq);
+ lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL);
+ l_wait_event(waitq, 0, &lwi);
+ break;
+ }
+ }
+ /* notreached */
+}
+
+lnet_pid_t ptl_get_pid(void)
+{
+ lnet_pid_t pid;
+
+ pid = LUSTRE_SRV_LNET_PID;
+ return pid;
+}
+
+int ptlrpc_ni_init(void)
+{
+ int rc;
+ lnet_pid_t pid;
+
+ pid = ptl_get_pid();
+ CDEBUG(D_NET, "My pid is: %x\n", pid);
+
+ /* We're not passing any limits yet... */
+ rc = LNetNIInit(pid);
+ if (rc < 0) {
+ CDEBUG(D_NET, "Can't init network interface: %d\n", rc);
+ return -ENOENT;
+ }
+
+ /* CAVEAT EMPTOR: how we process portals events is _radically_
+ * different depending on... */
+ /* kernel LNet calls our master callback when there are new event,
+ * because we are guaranteed to get every event via callback,
+ * so we just set EQ size to 0 to avoid overhead of serializing
+ * enqueue/dequeue operations in LNet. */
+ rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h);
+ if (rc == 0)
+ return 0;
+
+ CERROR("Failed to allocate event queue: %d\n", rc);
+ LNetNIFini();
+
+ return -ENOMEM;
+}
+
+
+int ptlrpc_init_portals(void)
+{
+ int rc = ptlrpc_ni_init();
+
+ if (rc != 0) {
+ CERROR("network initialisation failed\n");
+ return -EIO;
+ }
+ rc = ptlrpcd_addref();
+ if (rc == 0)
+ return 0;
+
+ CERROR("rpcd initialisation failed\n");
+ ptlrpc_ni_fini();
+ return rc;
+}
+
+void ptlrpc_exit_portals(void)
+{
+ ptlrpcd_decref();
+ ptlrpc_ni_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
new file mode 100644
index 000000000..d5fc689c0
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -0,0 +1,1642 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/import.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_export.h"
+#include "../include/obd.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpc_connect_async_args {
+ __u64 pcaa_peer_committed;
+ int pcaa_initial_connect;
+};
+
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
+static void __import_set_state(struct obd_import *imp,
+ enum lustre_imp_state state)
+{
+ switch (state) {
+ case LUSTRE_IMP_CLOSED:
+ case LUSTRE_IMP_NEW:
+ case LUSTRE_IMP_DISCON:
+ case LUSTRE_IMP_CONNECTING:
+ break;
+ case LUSTRE_IMP_REPLAY_WAIT:
+ imp->imp_replay_state = LUSTRE_IMP_REPLAY_LOCKS;
+ break;
+ default:
+ imp->imp_replay_state = LUSTRE_IMP_REPLAY;
+ }
+
+ imp->imp_state = state;
+ imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state;
+ imp->imp_state_hist[imp->imp_state_hist_idx].ish_time =
+ get_seconds();
+ imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) %
+ IMP_STATE_HIST_LEN;
+}
+
+/* A CLOSED import should remain so. */
+#define IMPORT_SET_STATE_NOLOCK(imp, state) \
+do { \
+ if (imp->imp_state != LUSTRE_IMP_CLOSED) { \
+ CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \
+ imp, obd2cli_tgt(imp->imp_obd), \
+ ptlrpc_import_state_name(imp->imp_state), \
+ ptlrpc_import_state_name(state)); \
+ __import_set_state(imp, state); \
+ } \
+} while (0)
+
+#define IMPORT_SET_STATE(imp, state) \
+do { \
+ spin_lock(&imp->imp_lock); \
+ IMPORT_SET_STATE_NOLOCK(imp, state); \
+ spin_unlock(&imp->imp_lock); \
+} while (0)
+
+
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+ struct ptlrpc_request *request,
+ void *data, int rc);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* Only this function is allowed to change the import state when it is
+ * CLOSED. I would rather refcount the import and free it after
+ * disconnection like we do with exports. To do that, the client_obd
+ * will need to save the peer info somewhere other than in the import,
+ * though. */
+int ptlrpc_init_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+
+ imp->imp_generation++;
+ imp->imp_state = LUSTRE_IMP_NEW;
+
+ spin_unlock(&imp->imp_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_init_import);
+
+#define UUID_STR "_UUID"
+void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
+{
+ *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix))
+ ? uuid : uuid + strlen(prefix);
+
+ *uuid_len = strlen(*uuid_start);
+
+ if (*uuid_len < strlen(UUID_STR))
+ return;
+
+ if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
+ UUID_STR, strlen(UUID_STR)))
+ *uuid_len -= strlen(UUID_STR);
+}
+EXPORT_SYMBOL(deuuidify);
+
+/**
+ * Returns true if import was FULL, false if import was already not
+ * connected.
+ * @imp - import to be disconnected
+ * @conn_cnt - connection count (epoch) of the request that timed out
+ * and caused the disconnection. In some cases, multiple
+ * inflight requests can fail to a single target (e.g. OST
+ * bulk requests) and if one has already caused a reconnection
+ * (increasing the import->conn_cnt) the older failure should
+ * not also cause a reconnection. If zero it forces a reconnect.
+ */
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
+{
+ int rc = 0;
+
+ spin_lock(&imp->imp_lock);
+
+ if (imp->imp_state == LUSTRE_IMP_FULL &&
+ (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) {
+ char *target_start;
+ int target_len;
+
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ if (imp->imp_replayable) {
+ LCONSOLE_WARN("%s: Connection to %.*s (at %s) was lost; in progress operations using this service will wait for recovery to complete\n",
+ imp->imp_obd->obd_name, target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ } else {
+ LCONSOLE_ERROR_MSG(0x166, "%s: Connection to %.*s (at %s) was lost; in progress operations using this service will fail\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ }
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+ spin_unlock(&imp->imp_lock);
+
+ if (obd_dump_on_timeout)
+ libcfs_debug_dumplog();
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
+ rc = 1;
+ } else {
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n",
+ imp->imp_client->cli_name, imp,
+ (imp->imp_state == LUSTRE_IMP_FULL &&
+ imp->imp_conn_cnt > conn_cnt) ?
+ "reconnected" : "not connected", imp->imp_conn_cnt,
+ conn_cnt, ptlrpc_import_state_name(imp->imp_state));
+ }
+
+ return rc;
+}
+
+/* Must be called with imp_lock held! */
+static void ptlrpc_deactivate_and_unlock_import(struct obd_import *imp)
+{
+ assert_spin_locked(&imp->imp_lock);
+
+ CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd));
+ imp->imp_invalid = 1;
+ imp->imp_generation++;
+ spin_unlock(&imp->imp_lock);
+
+ ptlrpc_abort_inflight(imp);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE);
+}
+
+/*
+ * This acts as a barrier; all existing requests are rejected, and
+ * no new requests will be accepted until the import is valid again.
+ */
+void ptlrpc_deactivate_import(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+ ptlrpc_deactivate_and_unlock_import(imp);
+}
+EXPORT_SYMBOL(ptlrpc_deactivate_import);
+
+static unsigned int
+ptlrpc_inflight_deadline(struct ptlrpc_request *req, time_t now)
+{
+ long dl;
+
+ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
+ (req->rq_phase == RQ_PHASE_BULK) ||
+ (req->rq_phase == RQ_PHASE_NEW)))
+ return 0;
+
+ if (req->rq_timedout)
+ return 0;
+
+ if (req->rq_phase == RQ_PHASE_NEW)
+ dl = req->rq_sent;
+ else
+ dl = req->rq_deadline;
+
+ if (dl <= now)
+ return 0;
+
+ return dl - now;
+}
+
+static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp)
+{
+ time_t now = get_seconds();
+ struct list_head *tmp, *n;
+ struct ptlrpc_request *req;
+ unsigned int timeout = 0;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_safe(tmp, n, &imp->imp_sending_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ timeout = max(ptlrpc_inflight_deadline(req, now), timeout);
+ }
+ spin_unlock(&imp->imp_lock);
+ return timeout;
+}
+
+/**
+ * This function will invalidate the import, if necessary, then block
+ * for all the RPC completions, and finally notify the obd to
+ * invalidate its state (ie cancel locks, clear pending requests,
+ * etc).
+ */
+void ptlrpc_invalidate_import(struct obd_import *imp)
+{
+ struct list_head *tmp, *n;
+ struct ptlrpc_request *req;
+ struct l_wait_info lwi;
+ unsigned int timeout;
+ int rc;
+
+ atomic_inc(&imp->imp_inval_count);
+
+ if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
+ ptlrpc_deactivate_import(imp);
+
+ CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2);
+ LASSERT(imp->imp_invalid);
+
+ /* Wait forever until inflight == 0. We really can't do it another
+ * way because in some cases we need to wait for very long reply
+ * unlink. We can't do anything before that because there is really
+ * no guarantee that some rdma transfer is not in progress right now. */
+ do {
+ /* Calculate max timeout for waiting on rpcs to error
+ * out. Use obd_timeout if calculated value is smaller
+ * than it. */
+ if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
+ timeout = ptlrpc_inflight_timeout(imp);
+ timeout += timeout / 3;
+
+ if (timeout == 0)
+ timeout = obd_timeout;
+ } else {
+ /* decrease the interval to increase race condition */
+ timeout = 1;
+ }
+
+ CDEBUG(D_RPCTRACE,
+ "Sleeping %d sec for inflight to error out\n",
+ timeout);
+
+ /* Wait for all requests to error out and call completion
+ * callbacks. Cap it at obd_timeout -- these should all
+ * have been locally cancelled by ptlrpc_abort_inflight. */
+ lwi = LWI_TIMEOUT_INTERVAL(
+ cfs_timeout_cap(cfs_time_seconds(timeout)),
+ (timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+ NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inflight) == 0),
+ &lwi);
+ if (rc) {
+ const char *cli_tgt = obd2cli_tgt(imp->imp_obd);
+
+ CERROR("%s: rc = %d waiting for callback (%d != 0)\n",
+ cli_tgt, rc,
+ atomic_read(&imp->imp_inflight));
+
+ spin_lock(&imp->imp_lock);
+ if (atomic_read(&imp->imp_inflight) == 0) {
+ int count = atomic_read(&imp->imp_unregistering);
+
+ /* We know that "unregistering" rpcs only can
+ * survive in sending or delaying lists (they
+ * maybe waiting for long reply unlink in
+ * sluggish nets). Let's check this. If there
+ * is no inflight and unregistering != 0, this
+ * is bug. */
+ LASSERTF(count == 0, "Some RPCs are still unregistering: %d\n",
+ count);
+
+ /* Let's save one loop as soon as inflight have
+ * dropped to zero. No new inflights possible at
+ * this point. */
+ rc = 0;
+ } else {
+ list_for_each_safe(tmp, n,
+ &imp->imp_sending_list) {
+ req = list_entry(tmp,
+ struct ptlrpc_request,
+ rq_list);
+ DEBUG_REQ(D_ERROR, req,
+ "still on sending list");
+ }
+ list_for_each_safe(tmp, n,
+ &imp->imp_delayed_list) {
+ req = list_entry(tmp,
+ struct ptlrpc_request,
+ rq_list);
+ DEBUG_REQ(D_ERROR, req,
+ "still on delayed list");
+ }
+
+ CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n",
+ cli_tgt,
+ ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
+ atomic_read(&imp->
+ imp_unregistering));
+ }
+ spin_unlock(&imp->imp_lock);
+ }
+ } while (rc != 0);
+
+ /*
+ * Let's additionally check that no new rpcs added to import in
+ * "invalidate" state.
+ */
+ LASSERT(atomic_read(&imp->imp_inflight) == 0);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
+ sptlrpc_import_flush_all_ctx(imp);
+
+ atomic_dec(&imp->imp_inval_count);
+ wake_up_all(&imp->imp_recovery_waitq);
+}
+EXPORT_SYMBOL(ptlrpc_invalidate_import);
+
+/* unset imp_invalid */
+void ptlrpc_activate_import(struct obd_import *imp)
+{
+ struct obd_device *obd = imp->imp_obd;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_deactive != 0) {
+ spin_unlock(&imp->imp_lock);
+ return;
+ }
+
+ imp->imp_invalid = 0;
+ spin_unlock(&imp->imp_lock);
+ obd_import_event(obd, imp, IMP_EVENT_ACTIVE);
+}
+EXPORT_SYMBOL(ptlrpc_activate_import);
+
+static void ptlrpc_pinger_force(struct obd_import *imp)
+{
+ CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(imp->imp_state));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_force_verify = 1;
+ spin_unlock(&imp->imp_lock);
+
+ if (imp->imp_state != LUSTRE_IMP_CONNECTING)
+ ptlrpc_pinger_wake_up();
+}
+
+void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
+{
+ LASSERT(!imp->imp_dlm_fake);
+
+ if (ptlrpc_set_import_discon(imp, conn_cnt)) {
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_deactivate_import(imp);
+ }
+
+ ptlrpc_pinger_force(imp);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_fail_import);
+
+int ptlrpc_reconnect_import(struct obd_import *imp)
+{
+#ifdef ENABLE_PINGER
+ struct l_wait_info lwi;
+ int secs = cfs_time_seconds(obd_timeout);
+ int rc;
+
+ ptlrpc_pinger_force(imp);
+
+ CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
+ obd2cli_tgt(imp->imp_obd), secs);
+
+ lwi = LWI_TIMEOUT(secs, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_recovery(imp), &lwi);
+ CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(imp->imp_state));
+ return rc;
+#else
+ ptlrpc_set_import_discon(imp, 0);
+ /* Force a new connect attempt */
+ ptlrpc_invalidate_import(imp);
+ /* Do a fresh connect next time by zeroing the handle */
+ ptlrpc_disconnect_import(imp, 1);
+ /* Wait for all invalidate calls to finish */
+ if (atomic_read(&imp->imp_inval_count) > 0) {
+ int rc;
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ (atomic_read(&imp->imp_inval_count) == 0),
+ &lwi);
+ if (rc)
+ CERROR("Interrupted, inval=%d\n",
+ atomic_read(&imp->imp_inval_count));
+ }
+
+ /* Allow reconnect attempts */
+ imp->imp_obd->obd_no_recov = 0;
+ /* Remove 'invalid' flag */
+ ptlrpc_activate_import(imp);
+ /* Attempt a new connect */
+ ptlrpc_recover_import(imp, NULL, 0);
+ return 0;
+#endif
+}
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
+static int import_select_connection(struct obd_import *imp)
+{
+ struct obd_import_conn *imp_conn = NULL, *conn;
+ struct obd_export *dlmexp;
+ char *target_start;
+ int target_len, tried_all = 1;
+
+ spin_lock(&imp->imp_lock);
+
+ if (list_empty(&imp->imp_conn_list)) {
+ CERROR("%s: no connections available\n",
+ imp->imp_obd->obd_name);
+ spin_unlock(&imp->imp_lock);
+ return -EINVAL;
+ }
+
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n",
+ imp->imp_obd->obd_name,
+ libcfs_nid2str(conn->oic_conn->c_peer.nid),
+ conn->oic_last_attempt);
+
+ /* If we have not tried this connection since
+ the last successful attempt, go with this one */
+ if ((conn->oic_last_attempt == 0) ||
+ cfs_time_beforeq_64(conn->oic_last_attempt,
+ imp->imp_last_success_conn)) {
+ imp_conn = conn;
+ tried_all = 0;
+ break;
+ }
+
+ /* If all of the connections have already been tried
+ since the last successful connection; just choose the
+ least recently used */
+ if (!imp_conn)
+ imp_conn = conn;
+ else if (cfs_time_before_64(conn->oic_last_attempt,
+ imp_conn->oic_last_attempt))
+ imp_conn = conn;
+ }
+
+ /* if not found, simply choose the current one */
+ if (!imp_conn || imp->imp_force_reconnect) {
+ LASSERT(imp->imp_conn_current);
+ imp_conn = imp->imp_conn_current;
+ tried_all = 0;
+ }
+ LASSERT(imp_conn->oic_conn);
+
+ /* If we've tried everything, and we're back to the beginning of the
+ list, increase our timeout and try again. It will be reset when
+ we do finally connect. (FIXME: really we should wait for all network
+ state associated with the last connection attempt to drain before
+ trying to reconnect on it.) */
+ if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) {
+ struct adaptive_timeout *at = &imp->imp_at.iat_net_latency;
+ if (at_get(at) < CONNECTION_SWITCH_MAX) {
+ at_measured(at, at_get(at) + CONNECTION_SWITCH_INC);
+ if (at_get(at) > CONNECTION_SWITCH_MAX)
+ at_reset(at, CONNECTION_SWITCH_MAX);
+ }
+ LASSERT(imp_conn->oic_last_attempt);
+ CDEBUG(D_HA, "%s: tried all connections, increasing latency to %ds\n",
+ imp->imp_obd->obd_name, at_get(at));
+ }
+
+ imp_conn->oic_last_attempt = cfs_time_current_64();
+
+ /* switch connection, don't mind if it's same as the current one */
+ if (imp->imp_connection)
+ ptlrpc_connection_put(imp->imp_connection);
+ imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ LASSERT(dlmexp != NULL);
+ if (dlmexp->exp_connection)
+ ptlrpc_connection_put(dlmexp->exp_connection);
+ dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
+ class_export_put(dlmexp);
+
+ if (imp->imp_conn_current != imp_conn) {
+ if (imp->imp_conn_current) {
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+
+ CDEBUG(D_HA, "%s: Connection changing to %.*s (at %s)\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+ }
+
+ imp->imp_conn_current = imp_conn;
+ }
+
+ CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+ imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+ libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+
+ spin_unlock(&imp->imp_lock);
+
+ return 0;
+}
+
+/*
+ * must be called under imp_lock
+ */
+static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
+{
+ struct ptlrpc_request *req;
+ struct list_head *tmp;
+
+ /* The requests in committed_list always have smaller transnos than
+ * the requests in replay_list */
+ if (!list_empty(&imp->imp_committed_list)) {
+ tmp = imp->imp_committed_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req,
+ "zero transno in committed_list");
+ LBUG();
+ }
+ return 1;
+ }
+ if (!list_empty(&imp->imp_replay_list)) {
+ tmp = imp->imp_replay_list.next;
+ req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
+ *transno = req->rq_transno;
+ if (req->rq_transno == 0) {
+ DEBUG_REQ(D_ERROR, req, "zero transno in replay_list");
+ LBUG();
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_connect_import(struct obd_import *imp)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int initial_connect = 0;
+ int set_transno = 0;
+ __u64 committed_before_reconnect = 0;
+ struct ptlrpc_request *request;
+ char *bufs[] = { NULL,
+ obd2cli_tgt(imp->imp_obd),
+ obd->obd_uuid.uuid,
+ (char *)&imp->imp_dlm_handle,
+ (char *)&imp->imp_connect_data };
+ struct ptlrpc_connect_async_args *aa;
+ int rc;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("can't connect to a closed import\n");
+ return -EINVAL;
+ } else if (imp->imp_state == LUSTRE_IMP_FULL) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("already connected\n");
+ return 0;
+ } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+ spin_unlock(&imp->imp_lock);
+ CERROR("already connecting\n");
+ return -EALREADY;
+ }
+
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
+
+ imp->imp_conn_cnt++;
+ imp->imp_resend_replay = 0;
+
+ if (!lustre_handle_is_used(&imp->imp_remote_handle))
+ initial_connect = 1;
+ else
+ committed_before_reconnect = imp->imp_peer_committed_transno;
+
+ set_transno = ptlrpc_first_transno(imp,
+ &imp->imp_connect_data.ocd_transno);
+ spin_unlock(&imp->imp_lock);
+
+ rc = import_select_connection(imp);
+ if (rc)
+ goto out;
+
+ rc = sptlrpc_import_sec_adapt(imp, NULL, NULL);
+ if (rc)
+ goto out;
+
+ /* Reset connect flags to the originally requested flags, in case
+ * the server is updated on-the-fly we will get the new features. */
+ imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
+ /* Reset ocd_version each time so the server knows the exact versions */
+ imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE;
+ imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+ rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
+ &obd->obd_uuid, &imp->imp_connect_data, NULL);
+ if (rc)
+ goto out;
+
+ request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
+ if (request == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
+ imp->imp_connect_op, bufs, NULL);
+ if (rc) {
+ ptlrpc_request_free(request);
+ goto out;
+ }
+
+ /* Report the rpc service time to the server so that it knows how long
+ * to wait for clients to join recovery */
+ lustre_msg_set_service_time(request->rq_reqmsg,
+ at_timeout2est(request->rq_timeout));
+
+ /* The amount of time we give the server to process the connect req.
+ * import_select_connection will increase the net latency on
+ * repeated reconnect attempts to cover slow networks.
+ * We override/ignore the server rpc completion estimate here,
+ * which may be large if this is a reconnect attempt */
+ request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+ lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
+ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER);
+
+ request->rq_no_resend = request->rq_no_delay = 1;
+ request->rq_send_state = LUSTRE_IMP_CONNECTING;
+ /* Allow a slightly larger reply for future growth compatibility */
+ req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
+ sizeof(struct obd_connect_data)+16*sizeof(__u64));
+ ptlrpc_request_set_replen(request);
+ request->rq_interpret_reply = ptlrpc_connect_interpret;
+
+ CLASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
+ aa = ptlrpc_req_async_args(request);
+ memset(aa, 0, sizeof(*aa));
+
+ aa->pcaa_peer_committed = committed_before_reconnect;
+ aa->pcaa_initial_connect = initial_connect;
+
+ if (aa->pcaa_initial_connect) {
+ spin_lock(&imp->imp_lock);
+ imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
+ lustre_msg_add_op_flags(request->rq_reqmsg,
+ MSG_CONNECT_INITIAL);
+ }
+
+ if (set_transno)
+ lustre_msg_add_op_flags(request->rq_reqmsg,
+ MSG_CONNECT_TRANSNO);
+
+ DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
+ request->rq_timeout);
+ ptlrpcd_add_req(request, PDL_POLICY_ROUND, -1);
+ rc = 0;
+out:
+ if (rc != 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_connect_import);
+
+static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
+{
+ int force_verify;
+
+ spin_lock(&imp->imp_lock);
+ force_verify = imp->imp_force_verify != 0;
+ spin_unlock(&imp->imp_lock);
+
+ if (force_verify)
+ ptlrpc_pinger_wake_up();
+}
+
+static int ptlrpc_busy_reconnect(int rc)
+{
+ return (rc == -EBUSY) || (rc == -EAGAIN);
+}
+
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
+static int ptlrpc_connect_interpret(const struct lu_env *env,
+ struct ptlrpc_request *request,
+ void *data, int rc)
+{
+ struct ptlrpc_connect_async_args *aa = data;
+ struct obd_import *imp = request->rq_import;
+ struct client_obd *cli = &imp->imp_obd->u.cli;
+ struct lustre_handle old_hdl;
+ __u64 old_connect_flags;
+ int msg_flags;
+ struct obd_connect_data *ocd;
+ struct obd_export *exp;
+ int ret;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+ imp->imp_connect_tried = 1;
+ spin_unlock(&imp->imp_lock);
+ return 0;
+ }
+
+ if (rc) {
+ /* if this reconnect to busy export - not need select new target
+ * for connecting*/
+ imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_maybe_ping_import_soon(imp);
+ goto out;
+ }
+ spin_unlock(&imp->imp_lock);
+
+ LASSERT(imp->imp_conn_current);
+
+ msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+
+ ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA,
+ RCL_SERVER);
+ /* server replied obd_connect_data is always bigger */
+ ocd = req_capsule_server_sized_get(&request->rq_pill,
+ &RMF_CONNECT_DATA, ret);
+
+ if (ocd == NULL) {
+ CERROR("%s: no connect data from server\n",
+ imp->imp_obd->obd_name);
+ rc = -EPROTO;
+ goto out;
+ }
+
+ spin_lock(&imp->imp_lock);
+
+ /* All imports are pingable */
+ imp->imp_pingable = 1;
+ imp->imp_force_reconnect = 0;
+ imp->imp_force_verify = 0;
+
+ imp->imp_connect_data = *ocd;
+
+ CDEBUG(D_HA, "%s: connect to target with instance %u\n",
+ imp->imp_obd->obd_name, ocd->ocd_instance);
+ exp = class_conn2export(&imp->imp_dlm_handle);
+
+ spin_unlock(&imp->imp_lock);
+
+ /* check that server granted subset of flags we asked for. */
+ if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
+ ocd->ocd_connect_flags) {
+ CERROR("%s: Server didn't granted asked subset of flags: asked=%#llx grranted=%#llx\n",
+ imp->imp_obd->obd_name, imp->imp_connect_flags_orig,
+ ocd->ocd_connect_flags);
+ rc = -EPROTO;
+ goto out;
+ }
+
+ if (!exp) {
+ /* This could happen if export is cleaned during the
+ connect attempt */
+ CERROR("%s: missing export after connect\n",
+ imp->imp_obd->obd_name);
+ rc = -ENODEV;
+ goto out;
+ }
+ old_connect_flags = exp_connect_flags(exp);
+ exp->exp_connect_data = *ocd;
+ imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
+ class_export_put(exp);
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
+
+ if (aa->pcaa_initial_connect) {
+ spin_lock(&imp->imp_lock);
+ if (msg_flags & MSG_CONNECT_REPLAYABLE) {
+ imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "connected to replayable target: %s\n",
+ obd2cli_tgt(imp->imp_obd));
+ } else {
+ imp->imp_replayable = 0;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ /* if applies, adjust the imp->imp_msg_magic here
+ * according to reply flags */
+
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+
+ /* Initial connects are allowed for clients with non-random
+ * uuids when servers are in recovery. Simply signal the
+ * servers replay is complete and wait in REPLAY_WAIT. */
+ if (msg_flags & MSG_CONNECT_RECOVERING) {
+ CDEBUG(D_HA, "connect to %s during recovery\n",
+ obd2cli_tgt(imp->imp_obd));
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+ } else {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+ ptlrpc_activate_import(imp);
+ }
+
+ rc = 0;
+ goto finish;
+ }
+
+ /* Determine what recovery state to move the import to. */
+ if (MSG_CONNECT_RECONNECT & msg_flags) {
+ memset(&old_hdl, 0, sizeof(old_hdl));
+ if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg),
+ sizeof(old_hdl))) {
+ LCONSOLE_WARN("Reconnect to %s (at @%s) failed due bad handle %#llx\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_dlm_handle.cookie);
+ rc = -ENOTCONN;
+ goto out;
+ }
+
+ if (memcmp(&imp->imp_remote_handle,
+ lustre_msg_get_handle(request->rq_repmsg),
+ sizeof(imp->imp_remote_handle))) {
+ int level = msg_flags & MSG_CONNECT_RECOVERING ?
+ D_HA : D_WARNING;
+
+ /* Bug 16611/14775: if server handle have changed,
+ * that means some sort of disconnection happened.
+ * If the server is not in recovery, that also means it
+ * already erased all of our state because of previous
+ * eviction. If it is in recovery - we are safe to
+ * participate since we can reestablish all of our state
+ * with server again */
+ if ((MSG_CONNECT_RECOVERING & msg_flags)) {
+ CDEBUG(level, "%s@%s changed server handle from %#llx to %#llx but is still in recovery\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_remote_handle.cookie,
+ lustre_msg_get_handle(
+ request->rq_repmsg)->cookie);
+ } else {
+ LCONSOLE_WARN("Evicted from %s (at %s) after server handle changed from %#llx to %#llx\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection-> \
+ c_remote_uuid.uuid,
+ imp->imp_remote_handle.cookie,
+ lustre_msg_get_handle(
+ request->rq_repmsg)->cookie);
+ }
+
+
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+
+ if (!(MSG_CONNECT_RECOVERING & msg_flags)) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ rc = 0;
+ goto finish;
+ }
+
+ } else {
+ CDEBUG(D_HA, "reconnected to %s@%s after partition\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ }
+
+ if (imp->imp_invalid) {
+ CDEBUG(D_HA, "%s: reconnected but import is invalid; marking evicted\n",
+ imp->imp_obd->obd_name);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ } else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+ imp->imp_obd->obd_name,
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_resend_replay = 1;
+ spin_unlock(&imp->imp_lock);
+
+ IMPORT_SET_STATE(imp, imp->imp_replay_state);
+ } else {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
+ } else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) {
+ LASSERT(imp->imp_replayable);
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+ imp->imp_last_replay_transno = 0;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
+ } else {
+ DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)",
+ imp->imp_obd->obd_name, msg_flags);
+ imp->imp_remote_handle =
+ *lustre_msg_get_handle(request->rq_repmsg);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
+ }
+
+ /* Sanity checks for a reconnected import. */
+ if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE)) {
+ CERROR("imp_replayable flag does not match server after reconnect. We should LBUG right here.\n");
+ }
+
+ if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
+ lustre_msg_get_last_committed(request->rq_repmsg) <
+ aa->pcaa_peer_committed) {
+ CERROR("%s went back in time (transno %lld was previously committed, server now claims %lld)! See https://bugzilla.lustre.org/show_bug.cgi?id=9646\n",
+ obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
+ lustre_msg_get_last_committed(request->rq_repmsg));
+ }
+
+finish:
+ rc = ptlrpc_import_recovery_state_machine(imp);
+ if (rc != 0) {
+ if (rc == -ENOTCONN) {
+ CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ ptlrpc_connect_import(imp);
+ imp->imp_connect_tried = 1;
+ return 0;
+ }
+ } else {
+
+ spin_lock(&imp->imp_lock);
+ list_del(&imp->imp_conn_current->oic_item);
+ list_add(&imp->imp_conn_current->oic_item,
+ &imp->imp_conn_list);
+ imp->imp_last_success_conn =
+ imp->imp_conn_current->oic_last_attempt;
+
+ spin_unlock(&imp->imp_lock);
+
+ if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
+ !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
+ LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %llx, replied %llx\n",
+ imp->imp_obd->obd_name,
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_connect_flags_orig,
+ ocd->ocd_connect_flags);
+ rc = -EPROTO;
+ goto out;
+ }
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ (ocd->ocd_version > LUSTRE_VERSION_CODE +
+ LUSTRE_VERSION_OFFSET_WARN ||
+ ocd->ocd_version < LUSTRE_VERSION_CODE -
+ LUSTRE_VERSION_OFFSET_WARN)) {
+ /* Sigh, some compilers do not like #ifdef in the middle
+ of macro arguments */
+ const char *older = "older. Consider upgrading server or downgrading client"
+ ;
+ const char *newer = "newer than client version. Consider upgrading client"
+ ;
+
+ LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
+ obd2cli_tgt(imp->imp_obd),
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version),
+ ocd->ocd_version > LUSTRE_VERSION_CODE ?
+ newer : older, LUSTRE_VERSION_STRING);
+ }
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+ /* Check if server has LU-1252 fix applied to not always swab
+ * the IR MNE entries. Do this only once per connection. This
+ * fixup is version-limited, because we don't want to carry the
+ * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+ * need interop with unpatched 2.2 servers. For newer servers,
+ * the client will do MNE swabbing only as needed. LU-1644 */
+ if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+ strcmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MGC_NAME) == 0))
+ imp->imp_need_mne_swab = 1;
+ else /* clear if server was upgraded since last connect */
+ imp->imp_need_mne_swab = 0;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
+#endif
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+ /* We sent to the server ocd_cksum_types with bits set
+ * for algorithms we understand. The server masked off
+ * the checksum types it doesn't support */
+ if ((ocd->ocd_cksum_types &
+ cksum_types_supported_client()) == 0) {
+ LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
+ obd2cli_tgt(imp->imp_obd),
+ ocd->ocd_cksum_types,
+ cksum_types_supported_client());
+ cli->cl_checksum = 0;
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ } else {
+ cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+ }
+ } else {
+ /* The server does not support OBD_CONNECT_CKSUM.
+ * Enforce ADLER for backward compatibility*/
+ cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+ }
+ cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
+
+ if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+ cli->cl_max_pages_per_rpc =
+ min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT,
+ cli->cl_max_pages_per_rpc);
+ else if (imp->imp_connect_op == MDS_CONNECT ||
+ imp->imp_connect_op == MGS_CONNECT)
+ cli->cl_max_pages_per_rpc = 1;
+
+ /* Reset ns_connect_flags only for initial connect. It might be
+ * changed in while using FS and if we reset it in reconnect
+ * this leads to losing user settings done before such as
+ * disable lru_resize, etc. */
+ if (old_connect_flags != exp_connect_flags(exp) ||
+ aa->pcaa_initial_connect) {
+ CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
+ imp->imp_obd->obd_name, ocd->ocd_connect_flags);
+ imp->imp_obd->obd_namespace->ns_connect_flags =
+ ocd->ocd_connect_flags;
+ imp->imp_obd->obd_namespace->ns_orig_connect_flags =
+ ocd->ocd_connect_flags;
+ }
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ /* We need a per-message support flag, because
+ a. we don't know if the incoming connect reply
+ supports AT or not (in reply_in_callback)
+ until we unpack it.
+ b. failovered server means export and flags are gone
+ (in ptlrpc_send_reply).
+ Can only be set when we know AT is supported at
+ both ends */
+ imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+ if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+ (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+ imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+ else
+ imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+ LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+ (cli->cl_max_pages_per_rpc > 0));
+ }
+
+out:
+ imp->imp_connect_tried = 1;
+
+ if (rc != 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+ if (rc == -EACCES) {
+ /*
+ * Give up trying to reconnect
+ * EACCES means client has no permission for connection
+ */
+ imp->imp_obd->obd_no_recov = 1;
+ ptlrpc_deactivate_import(imp);
+ }
+
+ if (rc == -EPROTO) {
+ struct obd_connect_data *ocd;
+
+ /* reply message might not be ready */
+ if (request->rq_repmsg == NULL)
+ return -EPROTO;
+
+ ocd = req_capsule_server_get(&request->rq_pill,
+ &RMF_CONNECT_DATA);
+ if (ocd &&
+ (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+ (ocd->ocd_version != LUSTRE_VERSION_CODE)) {
+ /*
+ * Actually servers are only supposed to refuse
+ * connection from liblustre clients, so we
+ * should never see this from VFS context
+ */
+ LCONSOLE_ERROR_MSG(0x16a, "Server %s version (%d.%d.%d.%d) refused connection from this client with an incompatible version (%s). Client must be recompiled\n",
+ obd2cli_tgt(imp->imp_obd),
+ OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+ OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+ OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+ OBD_OCD_VERSION_FIX(ocd->ocd_version),
+ LUSTRE_VERSION_STRING);
+ ptlrpc_deactivate_import(imp);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
+ }
+ return -EPROTO;
+ }
+
+ ptlrpc_maybe_ping_import_soon(imp);
+
+ CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n",
+ obd2cli_tgt(imp->imp_obd),
+ (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
+ }
+
+ wake_up_all(&imp->imp_recovery_waitq);
+ return rc;
+}
+
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
+static int completed_replay_interpret(const struct lu_env *env,
+ struct ptlrpc_request *req,
+ void *data, int rc)
+{
+ atomic_dec(&req->rq_import->imp_replay_inflight);
+ if (req->rq_status == 0 &&
+ !req->rq_import->imp_vbr_failed) {
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ } else {
+ if (req->rq_import->imp_vbr_failed) {
+ CDEBUG(D_WARNING,
+ "%s: version recovery fails, reconnecting\n",
+ req->rq_import->imp_obd->obd_name);
+ } else {
+ CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, reconnecting\n",
+ req->rq_import->imp_obd->obd_name,
+ req->rq_status);
+ }
+ ptlrpc_connect_import(req->rq_import);
+ }
+
+ return 0;
+}
+
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
+static int signal_completed_replay(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY)))
+ return 0;
+
+ LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+ atomic_inc(&imp->imp_replay_inflight);
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION,
+ OBD_PING);
+ if (req == NULL) {
+ atomic_dec(&imp->imp_replay_inflight);
+ return -ENOMEM;
+ }
+
+ ptlrpc_request_set_replen(req);
+ req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
+ lustre_msg_add_flags(req->rq_reqmsg,
+ MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
+ if (AT_OFF)
+ req->rq_timeout *= 3;
+ req->rq_interpret_reply = completed_replay_interpret;
+
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ return 0;
+}
+
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+ struct obd_import *imp = data;
+
+ unshare_fs_struct();
+
+ CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+ imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ ptlrpc_invalidate_import(imp);
+
+ if (obd_dump_on_eviction) {
+ CERROR("dump the log upon eviction\n");
+ libcfs_debug_dumplog();
+ }
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ ptlrpc_import_recovery_state_machine(imp);
+
+ class_import_put(imp);
+ return 0;
+}
+
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typically we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
+{
+ int rc = 0;
+ int inflight;
+ char *target_start;
+ int target_len;
+
+ if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+ /* Don't care about MGC eviction */
+ if (strcmp(imp->imp_obd->obd_type->typ_name,
+ LUSTRE_MGC_NAME) != 0) {
+ LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n",
+ imp->imp_obd->obd_name, target_len,
+ target_start);
+ }
+ CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+ /* reset vbr_failed flag upon eviction */
+ spin_lock(&imp->imp_lock);
+ imp->imp_vbr_failed = 0;
+ spin_unlock(&imp->imp_lock);
+
+ {
+ struct task_struct *task;
+ /* bug 17802: XXX client_disconnect_export vs connect request
+ * race. if client will evicted at this time, we start
+ * invalidate thread without reference to import and import can
+ * be freed at same time. */
+ class_import_get(imp);
+ task = kthread_run(ptlrpc_invalidate_import_thread, imp,
+ "ll_imp_inval");
+ if (IS_ERR(task)) {
+ class_import_put(imp);
+ CERROR("error starting invalidate thread: %d\n", rc);
+ rc = PTR_ERR(task);
+ } else {
+ rc = 0;
+ }
+ return rc;
+ }
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY) {
+ CDEBUG(D_HA, "replay requested by %s\n",
+ obd2cli_tgt(imp->imp_obd));
+ rc = ptlrpc_replay_next(imp, &inflight);
+ if (inflight == 0 &&
+ atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+ rc = ldlm_replay_locks(imp);
+ if (rc)
+ goto out;
+ }
+ rc = 0;
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) {
+ if (atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT);
+ rc = signal_completed_replay(imp);
+ if (rc)
+ goto out;
+ }
+
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
+ if (atomic_read(&imp->imp_replay_inflight) == 0) {
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+ }
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_RECOVER) {
+ CDEBUG(D_HA, "reconnected to %s@%s\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ rc = ptlrpc_resend(imp);
+ if (rc)
+ goto out;
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+ ptlrpc_activate_import(imp);
+
+ deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
+ &target_start, &target_len);
+ LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n",
+ imp->imp_obd->obd_name,
+ target_len, target_start,
+ libcfs_nid2str(imp->imp_connection->c_peer.nid));
+ }
+
+ if (imp->imp_state == LUSTRE_IMP_FULL) {
+ wake_up_all(&imp->imp_recovery_waitq);
+ ptlrpc_wake_delayed(imp);
+ }
+
+out:
+ return rc;
+}
+
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+{
+ struct ptlrpc_request *req;
+ int rq_opc, rc = 0;
+
+ if (imp->imp_obd->obd_force)
+ goto set_state;
+
+ switch (imp->imp_connect_op) {
+ case OST_CONNECT:
+ rq_opc = OST_DISCONNECT;
+ break;
+ case MDS_CONNECT:
+ rq_opc = MDS_DISCONNECT;
+ break;
+ case MGS_CONNECT:
+ rq_opc = MGS_DISCONNECT;
+ break;
+ default:
+ rc = -EINVAL;
+ CERROR("%s: don't know how to disconnect from %s (connect_op %d): rc = %d\n",
+ imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+ imp->imp_connect_op, rc);
+ return rc;
+ }
+
+ if (ptlrpc_import_in_recovery(imp)) {
+ struct l_wait_info lwi;
+ long timeout;
+
+ if (AT_OFF) {
+ if (imp->imp_server_timeout)
+ timeout = cfs_time_seconds(obd_timeout / 2);
+ else
+ timeout = cfs_time_seconds(obd_timeout);
+ } else {
+ int idx = import_at_get_index(imp,
+ imp->imp_client->cli_request_portal);
+ timeout = cfs_time_seconds(
+ at_get(&imp->imp_at.iat_service_estimate[idx]));
+ }
+
+ lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout),
+ back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_recovery(imp), &lwi);
+
+ }
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_FULL)
+ goto out;
+ spin_unlock(&imp->imp_lock);
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
+ LUSTRE_OBD_VERSION, rq_opc);
+ if (req) {
+ /* We are disconnecting, do not retry a failed DISCONNECT rpc if
+ * it fails. We can get through the above with a down server
+ * if the client doesn't know the server is gone yet. */
+ req->rq_no_resend = 1;
+
+ /* We want client umounts to happen quickly, no matter the
+ server state... */
+ req->rq_timeout = min_t(int, req->rq_timeout,
+ INITIAL_CONNECT_TIMEOUT);
+
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
+ req->rq_send_state = LUSTRE_IMP_CONNECTING;
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ ptlrpc_req_finished(req);
+ }
+
+set_state:
+ spin_lock(&imp->imp_lock);
+out:
+ if (noclose)
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+ else
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+ memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+ spin_unlock(&imp->imp_lock);
+
+ if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN)
+ rc = 0;
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_disconnect_import);
+
+void ptlrpc_cleanup_imp(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+ IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+ imp->imp_generation++;
+ spin_unlock(&imp->imp_lock);
+ ptlrpc_abort_inflight(imp);
+}
+EXPORT_SYMBOL(ptlrpc_cleanup_imp);
+
+/* Adaptive Timeout utils */
+extern unsigned int at_min, at_max, at_history;
+
+/* Bin into timeslices using AT_BINS bins.
+ This gives us a max of the last binlimit*AT_BINS secs without the storage,
+ but still smoothing out a return to normalcy from a slow response.
+ (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
+int at_measured(struct adaptive_timeout *at, unsigned int val)
+{
+ unsigned int old = at->at_current;
+ time_t now = get_seconds();
+ time_t binlimit = max_t(time_t, at_history / AT_BINS, 1);
+
+ LASSERT(at);
+ CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n",
+ val, at, now - at->at_binstart, at->at_current,
+ at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]);
+
+ if (val == 0)
+ /* 0's don't count, because we never want our timeout to
+ drop to 0, and because 0 could mean an error */
+ return 0;
+
+ spin_lock(&at->at_lock);
+
+ if (unlikely(at->at_binstart == 0)) {
+ /* Special case to remove default from history */
+ at->at_current = val;
+ at->at_worst_ever = val;
+ at->at_worst_time = now;
+ at->at_hist[0] = val;
+ at->at_binstart = now;
+ } else if (now - at->at_binstart < binlimit) {
+ /* in bin 0 */
+ at->at_hist[0] = max(val, at->at_hist[0]);
+ at->at_current = max(val, at->at_current);
+ } else {
+ int i, shift;
+ unsigned int maxv = val;
+ /* move bins over */
+ shift = (now - at->at_binstart) / binlimit;
+ LASSERT(shift > 0);
+ for (i = AT_BINS - 1; i >= 0; i--) {
+ if (i >= shift) {
+ at->at_hist[i] = at->at_hist[i - shift];
+ maxv = max(maxv, at->at_hist[i]);
+ } else {
+ at->at_hist[i] = 0;
+ }
+ }
+ at->at_hist[0] = val;
+ at->at_current = maxv;
+ at->at_binstart += shift * binlimit;
+ }
+
+ if (at->at_current > at->at_worst_ever) {
+ at->at_worst_ever = at->at_current;
+ at->at_worst_time = now;
+ }
+
+ if (at->at_flags & AT_FLG_NOHIST)
+ /* Only keep last reported val; keeping the rest of the history
+ for proc only */
+ at->at_current = val;
+
+ if (at_max > 0)
+ at->at_current = min(at->at_current, at_max);
+ at->at_current = max(at->at_current, at_min);
+
+ if (at->at_current != old)
+ CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d (val=%u) hist %u %u %u %u\n",
+ at,
+ old, at->at_current, at->at_current - old, val,
+ at->at_hist[0], at->at_hist[1], at->at_hist[2],
+ at->at_hist[3]);
+
+ /* if we changed, report the old value */
+ old = (at->at_current != old) ? old : 0;
+
+ spin_unlock(&at->at_lock);
+ return old;
+}
+
+/* Find the imp_at index for a given portal; assign if space available */
+int import_at_get_index(struct obd_import *imp, int portal)
+{
+ struct imp_at *at = &imp->imp_at;
+ int i;
+
+ for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+ if (at->iat_portal[i] == portal)
+ return i;
+ if (at->iat_portal[i] == 0)
+ /* unused */
+ break;
+ }
+
+ /* Not found in list, add it under a lock */
+ spin_lock(&imp->imp_lock);
+
+ /* Check unused under lock */
+ for (; i < IMP_AT_MAX_PORTALS; i++) {
+ if (at->iat_portal[i] == portal)
+ goto out;
+ if (at->iat_portal[i] == 0)
+ /* unused */
+ break;
+ }
+
+ /* Not enough portals? */
+ LASSERT(i < IMP_AT_MAX_PORTALS);
+
+ at->iat_portal[i] = portal;
+out:
+ spin_unlock(&imp->imp_lock);
+ return i;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
new file mode 100644
index 000000000..a42335e26
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -0,0 +1,2442 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/layout.c
+ *
+ * Lustre Metadata Target (mdt) request handler
+ *
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ */
+/*
+ * This file contains the "capsule/pill" abstraction layered above PTLRPC.
+ *
+ * Every struct ptlrpc_request contains a "pill", which points to a description
+ * of the format that the request conforms to.
+ */
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/module.h>
+
+/* LUSTRE_VERSION_CODE */
+#include "../include/lustre_ver.h"
+
+#include "../include/obd_support.h"
+/* lustre_swab_mdt_body */
+#include "../include/lustre/lustre_idl.h"
+/* obd2cli_tgt() (required by DEBUG_REQ()) */
+#include "../include/obd.h"
+
+/* __REQ_LAYOUT_USER__ */
+#endif
+/* struct ptlrpc_request, lustre_msg* */
+#include "../include/lustre_req_layout.h"
+#include "../include/lustre_acl.h"
+#include "../include/lustre_debug.h"
+
+/*
+ * RQFs (see below) refer to two struct req_msg_field arrays describing the
+ * client request and server reply, respectively.
+ */
+/* empty set of fields... for suitable definition of emptiness. */
+static const struct req_msg_field *empty[] = {
+ &RMF_PTLRPC_BODY
+};
+
+static const struct req_msg_field *mgs_target_info_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_TARGET_INFO
+};
+
+static const struct req_msg_field *mgs_set_info[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_SEND_PARAM
+};
+
+static const struct req_msg_field *mgs_config_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_BODY
+};
+
+static const struct req_msg_field *mgs_config_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MGS_CONFIG_RES
+};
+
+static const struct req_msg_field *log_cancel_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LOGCOOKIES
+};
+
+static const struct req_msg_field *mdt_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY
+};
+
+static const struct req_msg_field *mdt_body_capa[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *quotactl_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_QUOTACTL
+};
+
+static const struct req_msg_field *quota_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *ldlm_intent_quota_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_DLM_LVB,
+ &RMF_QUOTA_BODY
+};
+
+static const struct req_msg_field *mdt_close_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_EPOCH,
+ &RMF_REC_REINT,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *mdt_release_close_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_EPOCH,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CLOSE_DATA
+};
+
+static const struct req_msg_field *obd_statfs_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_STATFS
+};
+
+static const struct req_msg_field *seq_query_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SEQ_OPC,
+ &RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *seq_query_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SEQ_RANGE
+};
+
+static const struct req_msg_field *fld_query_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FLD_OPC,
+ &RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *fld_query_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FLD_MDFLD
+};
+
+static const struct req_msg_field *mds_getattr_name_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT
+};
+
+static const struct req_msg_field *mds_reint_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *mds_reint_create_slave_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_create_sym_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_open_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_reint_open_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_unlink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_link_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_rename_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_last_unlink_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_LOGCOOKIES,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_reint_setattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_MDT_EPOCH,
+ &RMF_EADATA,
+ &RMF_LOGCOOKIES,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_setxattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mdt_swap_layouts[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_SWAP_LAYOUTS,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *obd_connect_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_TGTUUID,
+ &RMF_CLUUID,
+ &RMF_CONN,
+ &RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_connect_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_CONNECT_DATA
+};
+
+static const struct req_msg_field *obd_set_info_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY,
+ &RMF_SETINFO_VAL
+};
+
+static const struct req_msg_field *ost_grant_shrink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *mds_getinfo_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_KEY,
+ &RMF_GETINFO_VALLEN
+};
+
+static const struct req_msg_field *mds_getinfo_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_VAL,
+};
+
+static const struct req_msg_field *ldlm_enqueue_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ
+};
+
+static const struct req_msg_field *ldlm_enqueue_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP
+};
+
+static const struct req_msg_field *ldlm_enqueue_lvb_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_cp_callback_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_gl_callback_desc_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_DLM_GL_DESC
+};
+
+static const struct req_msg_field *ldlm_gl_callback_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_LVB
+};
+
+static const struct req_msg_field *ldlm_intent_basic_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+};
+
+static const struct req_msg_field *ldlm_intent_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT
+};
+
+static const struct req_msg_field *ldlm_intent_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL
+};
+
+static const struct req_msg_field *ldlm_intent_layout_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_LAYOUT_INTENT,
+ &RMF_EADATA /* for new layout to be set up */
+};
+static const struct req_msg_field *ldlm_intent_open_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *ldlm_intent_getattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ldlm_intent_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_create_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_open_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_open_client[] */
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *ldlm_intent_unlink_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_REC_REINT, /* coincides with mds_reint_unlink_client[] */
+ &RMF_CAPA1,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *ldlm_intent_getxattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REQ,
+ &RMF_LDLM_INTENT,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+};
+
+static const struct req_msg_field *ldlm_intent_getxattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_DLM_REP,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL, /* for req_capsule_extend/mdt_intent_policy */
+ &RMF_EADATA,
+ &RMF_EAVALS,
+ &RMF_EAVALS_LENS
+};
+
+static const struct req_msg_field *mds_getxattr_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_NAME,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getxattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *mds_getattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_setattr_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDT_MD,
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
+};
+
+static const struct req_msg_field *mds_update_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_UPDATE,
+};
+
+static const struct req_msg_field *mds_update_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_UPDATE_REPLY,
+};
+
+static const struct req_msg_field *llog_origin_handle_create_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY,
+ &RMF_NAME
+};
+
+static const struct req_msg_field *llogd_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY
+};
+
+static const struct req_msg_field *llog_log_hdr_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOG_LOG_HDR
+};
+
+static const struct req_msg_field *llogd_conn_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_CONN_BODY
+};
+
+static const struct req_msg_field *llog_origin_handle_next_block_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_LLOGD_BODY,
+ &RMF_EADATA
+};
+
+static const struct req_msg_field *obd_idx_read_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_IDX_INFO
+};
+
+static const struct req_msg_field *obd_idx_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_IDX_INFO
+};
+
+static const struct req_msg_field *ost_body_only[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_body_capa[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_destroy_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_DLM_REQ,
+ &RMF_CAPA1
+};
+
+
+static const struct req_msg_field *ost_brw_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_OBD_IOOBJ,
+ &RMF_NIOBUF_REMOTE,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *ost_brw_read_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY
+};
+
+static const struct req_msg_field *ost_brw_write_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OST_BODY,
+ &RMF_RCS
+};
+
+static const struct req_msg_field *ost_get_info_generic_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GENERIC_DATA,
+};
+
+static const struct req_msg_field *ost_get_info_generic_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_SETINFO_KEY
+};
+
+static const struct req_msg_field *ost_get_last_id_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_OBD_ID
+};
+
+static const struct req_msg_field *ost_get_last_fid_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FID,
+};
+
+static const struct req_msg_field *ost_get_fiemap_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FIEMAP_KEY,
+ &RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *ost_get_fiemap_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_FIEMAP_VAL
+};
+
+static const struct req_msg_field *mdt_hsm_progress[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_PROGRESS,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_register[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_ARCHIVE,
+};
+
+static const struct req_msg_field *mdt_hsm_ct_unregister[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+};
+
+static const struct req_msg_field *mdt_hsm_action_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_CURRENT_ACTION,
+};
+
+static const struct req_msg_field *mdt_hsm_state_get_server[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_HSM_USER_STATE,
+};
+
+static const struct req_msg_field *mdt_hsm_state_set[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1,
+ &RMF_HSM_STATE_SET,
+};
+
+static const struct req_msg_field *mdt_hsm_request[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_MDS_HSM_REQUEST,
+ &RMF_MDS_HSM_USER_ITEM,
+ &RMF_GENERIC_DATA,
+};
+
+static struct req_format *req_formats[] = {
+ &RQF_OBD_PING,
+ &RQF_OBD_SET_INFO,
+ &RQF_OBD_IDX_READ,
+ &RQF_SEC_CTX,
+ &RQF_MGS_TARGET_REG,
+ &RQF_MGS_SET_INFO,
+ &RQF_MGS_CONFIG_READ,
+ &RQF_SEQ_QUERY,
+ &RQF_FLD_QUERY,
+ &RQF_MDS_CONNECT,
+ &RQF_MDS_DISCONNECT,
+ &RQF_MDS_GET_INFO,
+ &RQF_MDS_GETSTATUS,
+ &RQF_MDS_STATFS,
+ &RQF_MDS_GETATTR,
+ &RQF_MDS_GETATTR_NAME,
+ &RQF_MDS_GETXATTR,
+ &RQF_MDS_SYNC,
+ &RQF_MDS_CLOSE,
+ &RQF_MDS_RELEASE_CLOSE,
+ &RQF_MDS_PIN,
+ &RQF_MDS_UNPIN,
+ &RQF_MDS_READPAGE,
+ &RQF_MDS_WRITEPAGE,
+ &RQF_MDS_IS_SUBDIR,
+ &RQF_MDS_DONE_WRITING,
+ &RQF_MDS_REINT,
+ &RQF_MDS_REINT_CREATE,
+ &RQF_MDS_REINT_CREATE_RMT_ACL,
+ &RQF_MDS_REINT_CREATE_SLAVE,
+ &RQF_MDS_REINT_CREATE_SYM,
+ &RQF_MDS_REINT_OPEN,
+ &RQF_MDS_REINT_UNLINK,
+ &RQF_MDS_REINT_LINK,
+ &RQF_MDS_REINT_RENAME,
+ &RQF_MDS_REINT_SETATTR,
+ &RQF_MDS_REINT_SETXATTR,
+ &RQF_MDS_QUOTACHECK,
+ &RQF_MDS_QUOTACTL,
+ &RQF_MDS_HSM_PROGRESS,
+ &RQF_MDS_HSM_CT_REGISTER,
+ &RQF_MDS_HSM_CT_UNREGISTER,
+ &RQF_MDS_HSM_STATE_GET,
+ &RQF_MDS_HSM_STATE_SET,
+ &RQF_MDS_HSM_ACTION,
+ &RQF_MDS_HSM_REQUEST,
+ &RQF_MDS_SWAP_LAYOUTS,
+ &RQF_UPDATE_OBJ,
+ &RQF_QC_CALLBACK,
+ &RQF_OST_CONNECT,
+ &RQF_OST_DISCONNECT,
+ &RQF_OST_QUOTACHECK,
+ &RQF_OST_QUOTACTL,
+ &RQF_OST_GETATTR,
+ &RQF_OST_SETATTR,
+ &RQF_OST_CREATE,
+ &RQF_OST_PUNCH,
+ &RQF_OST_SYNC,
+ &RQF_OST_DESTROY,
+ &RQF_OST_BRW_READ,
+ &RQF_OST_BRW_WRITE,
+ &RQF_OST_STATFS,
+ &RQF_OST_SET_GRANT_INFO,
+ &RQF_OST_GET_INFO_GENERIC,
+ &RQF_OST_GET_INFO_LAST_ID,
+ &RQF_OST_GET_INFO_LAST_FID,
+ &RQF_OST_SET_INFO_LAST_FID,
+ &RQF_OST_GET_INFO_FIEMAP,
+ &RQF_LDLM_ENQUEUE,
+ &RQF_LDLM_ENQUEUE_LVB,
+ &RQF_LDLM_CONVERT,
+ &RQF_LDLM_CANCEL,
+ &RQF_LDLM_CALLBACK,
+ &RQF_LDLM_CP_CALLBACK,
+ &RQF_LDLM_BL_CALLBACK,
+ &RQF_LDLM_GL_CALLBACK,
+ &RQF_LDLM_GL_DESC_CALLBACK,
+ &RQF_LDLM_INTENT,
+ &RQF_LDLM_INTENT_BASIC,
+ &RQF_LDLM_INTENT_LAYOUT,
+ &RQF_LDLM_INTENT_GETATTR,
+ &RQF_LDLM_INTENT_OPEN,
+ &RQF_LDLM_INTENT_CREATE,
+ &RQF_LDLM_INTENT_UNLINK,
+ &RQF_LDLM_INTENT_GETXATTR,
+ &RQF_LDLM_INTENT_QUOTA,
+ &RQF_QUOTA_DQACQ,
+ &RQF_LOG_CANCEL,
+ &RQF_LLOG_ORIGIN_HANDLE_CREATE,
+ &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+ &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+ &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+ &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+ &RQF_LLOG_ORIGIN_CONNECT,
+ &RQF_CONNECT,
+};
+
+struct req_msg_field {
+ const __u32 rmf_flags;
+ const char *rmf_name;
+ /**
+ * Field length. (-1) means "variable length". If the
+ * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length,
+ * but the actual size must be a whole multiple of \a rmf_size.
+ */
+ const int rmf_size;
+ void (*rmf_swabber)(void *);
+ void (*rmf_dumper)(void *);
+ int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR];
+};
+
+enum rmf_flags {
+ /**
+ * The field is a string, must be NUL-terminated.
+ */
+ RMF_F_STRING = 1 << 0,
+ /**
+ * The field's buffer size need not match the declared \a rmf_size.
+ */
+ RMF_F_NO_SIZE_CHECK = 1 << 1,
+ /**
+ * The field's buffer size must be a whole multiple of the declared \a
+ * rmf_size and the \a rmf_swabber function must work on the declared \a
+ * rmf_size worth of bytes.
+ */
+ RMF_F_STRUCT_ARRAY = 1 << 2
+};
+
+struct req_capsule;
+
+/*
+ * Request fields.
+ */
+#define DEFINE_MSGF(name, flags, size, swabber, dumper) { \
+ .rmf_name = (name), \
+ .rmf_flags = (flags), \
+ .rmf_size = (size), \
+ .rmf_swabber = (void (*)(void *))(swabber), \
+ .rmf_dumper = (void (*)(void *))(dumper) \
+}
+
+struct req_msg_field RMF_GENERIC_DATA =
+ DEFINE_MSGF("generic_data", 0,
+ -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GENERIC_DATA);
+
+struct req_msg_field RMF_MGS_TARGET_INFO =
+ DEFINE_MSGF("mgs_target_info", 0,
+ sizeof(struct mgs_target_info),
+ lustre_swab_mgs_target_info, NULL);
+EXPORT_SYMBOL(RMF_MGS_TARGET_INFO);
+
+struct req_msg_field RMF_MGS_SEND_PARAM =
+ DEFINE_MSGF("mgs_send_param", 0,
+ sizeof(struct mgs_send_param),
+ NULL, NULL);
+EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
+
+struct req_msg_field RMF_MGS_CONFIG_BODY =
+ DEFINE_MSGF("mgs_config_read request", 0,
+ sizeof(struct mgs_config_body),
+ lustre_swab_mgs_config_body, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
+
+struct req_msg_field RMF_MGS_CONFIG_RES =
+ DEFINE_MSGF("mgs_config_read reply ", 0,
+ sizeof(struct mgs_config_res),
+ lustre_swab_mgs_config_res, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
+
+struct req_msg_field RMF_U32 =
+ DEFINE_MSGF("generic u32", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_U32);
+
+struct req_msg_field RMF_SETINFO_VAL =
+ DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_VAL);
+
+struct req_msg_field RMF_GETINFO_KEY =
+ DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_KEY);
+
+struct req_msg_field RMF_GETINFO_VALLEN =
+ DEFINE_MSGF("getinfo_vallen", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VALLEN);
+
+struct req_msg_field RMF_GETINFO_VAL =
+ DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_GETINFO_VAL);
+
+struct req_msg_field RMF_SEQ_OPC =
+ DEFINE_MSGF("seq_query_opc", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_SEQ_OPC);
+
+struct req_msg_field RMF_SEQ_RANGE =
+ DEFINE_MSGF("seq_query_range", 0,
+ sizeof(struct lu_seq_range),
+ lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_SEQ_RANGE);
+
+struct req_msg_field RMF_FLD_OPC =
+ DEFINE_MSGF("fld_query_opc", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_FLD_OPC);
+
+struct req_msg_field RMF_FLD_MDFLD =
+ DEFINE_MSGF("fld_query_mdfld", 0,
+ sizeof(struct lu_seq_range),
+ lustre_swab_lu_seq_range, NULL);
+EXPORT_SYMBOL(RMF_FLD_MDFLD);
+
+struct req_msg_field RMF_MDT_BODY =
+ DEFINE_MSGF("mdt_body", 0,
+ sizeof(struct mdt_body), lustre_swab_mdt_body, NULL);
+EXPORT_SYMBOL(RMF_MDT_BODY);
+
+struct req_msg_field RMF_OBD_QUOTACTL =
+ DEFINE_MSGF("obd_quotactl", 0,
+ sizeof(struct obd_quotactl),
+ lustre_swab_obd_quotactl, NULL);
+EXPORT_SYMBOL(RMF_OBD_QUOTACTL);
+
+struct req_msg_field RMF_QUOTA_BODY =
+ DEFINE_MSGF("quota_body", 0,
+ sizeof(struct quota_body), lustre_swab_quota_body, NULL);
+EXPORT_SYMBOL(RMF_QUOTA_BODY);
+
+struct req_msg_field RMF_MDT_EPOCH =
+ DEFINE_MSGF("mdt_ioepoch", 0,
+ sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL);
+EXPORT_SYMBOL(RMF_MDT_EPOCH);
+
+struct req_msg_field RMF_PTLRPC_BODY =
+ DEFINE_MSGF("ptlrpc_body", 0,
+ sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL);
+EXPORT_SYMBOL(RMF_PTLRPC_BODY);
+
+struct req_msg_field RMF_CLOSE_DATA =
+ DEFINE_MSGF("data_version", 0,
+ sizeof(struct close_data), lustre_swab_close_data, NULL);
+EXPORT_SYMBOL(RMF_CLOSE_DATA);
+
+struct req_msg_field RMF_OBD_STATFS =
+ DEFINE_MSGF("obd_statfs", 0,
+ sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL);
+EXPORT_SYMBOL(RMF_OBD_STATFS);
+
+struct req_msg_field RMF_SETINFO_KEY =
+ DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SETINFO_KEY);
+
+struct req_msg_field RMF_NAME =
+ DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_NAME);
+
+struct req_msg_field RMF_SYMTGT =
+ DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SYMTGT);
+
+struct req_msg_field RMF_TGTUUID =
+ DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+ NULL);
+EXPORT_SYMBOL(RMF_TGTUUID);
+
+struct req_msg_field RMF_CLUUID =
+ DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL,
+ NULL);
+EXPORT_SYMBOL(RMF_CLUUID);
+
+struct req_msg_field RMF_STRING =
+ DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_STRING);
+
+struct req_msg_field RMF_LLOGD_BODY =
+ DEFINE_MSGF("llogd_body", 0,
+ sizeof(struct llogd_body), lustre_swab_llogd_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_BODY);
+
+struct req_msg_field RMF_LLOG_LOG_HDR =
+ DEFINE_MSGF("llog_log_hdr", 0,
+ sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL);
+EXPORT_SYMBOL(RMF_LLOG_LOG_HDR);
+
+struct req_msg_field RMF_LLOGD_CONN_BODY =
+ DEFINE_MSGF("llogd_conn_body", 0,
+ sizeof(struct llogd_conn_body),
+ lustre_swab_llogd_conn_body, NULL);
+EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY);
+
+/*
+ * connection handle received in MDS_CONNECT request.
+ *
+ * No swabbing needed because struct lustre_handle contains only a 64-bit cookie
+ * that the client does not interpret at all.
+ */
+struct req_msg_field RMF_CONN =
+ DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL);
+EXPORT_SYMBOL(RMF_CONN);
+
+struct req_msg_field RMF_CONNECT_DATA =
+ DEFINE_MSGF("cdata",
+ RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */,
+ sizeof(struct obd_connect_data),
+ lustre_swab_connect, NULL);
+EXPORT_SYMBOL(RMF_CONNECT_DATA);
+
+struct req_msg_field RMF_DLM_REQ =
+ DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */,
+ sizeof(struct ldlm_request),
+ lustre_swab_ldlm_request, NULL);
+EXPORT_SYMBOL(RMF_DLM_REQ);
+
+struct req_msg_field RMF_DLM_REP =
+ DEFINE_MSGF("dlm_rep", 0,
+ sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL);
+EXPORT_SYMBOL(RMF_DLM_REP);
+
+struct req_msg_field RMF_LDLM_INTENT =
+ DEFINE_MSGF("ldlm_intent", 0,
+ sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL);
+EXPORT_SYMBOL(RMF_LDLM_INTENT);
+
+struct req_msg_field RMF_DLM_LVB =
+ DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_DLM_LVB);
+
+struct req_msg_field RMF_DLM_GL_DESC =
+ DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc),
+ lustre_swab_gl_desc, NULL);
+EXPORT_SYMBOL(RMF_DLM_GL_DESC);
+
+struct req_msg_field RMF_MDT_MD =
+ DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_MDT_MD);
+
+struct req_msg_field RMF_REC_REINT =
+ DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint),
+ lustre_swab_mdt_rec_reint, NULL);
+EXPORT_SYMBOL(RMF_REC_REINT);
+
+/* FIXME: this length should be defined as a macro */
+struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1,
+ NULL, NULL);
+EXPORT_SYMBOL(RMF_EADATA);
+
+struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_EAVALS);
+
+struct req_msg_field RMF_ACL =
+ DEFINE_MSGF("acl", RMF_F_NO_SIZE_CHECK,
+ LUSTRE_POSIX_ACL_MAX_SIZE, NULL, NULL);
+EXPORT_SYMBOL(RMF_ACL);
+
+/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */
+struct req_msg_field RMF_LOGCOOKIES =
+ DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */,
+ sizeof(struct llog_cookie), NULL, NULL);
+EXPORT_SYMBOL(RMF_LOGCOOKIES);
+
+struct req_msg_field RMF_CAPA1 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA1);
+
+struct req_msg_field RMF_CAPA2 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa, NULL);
+EXPORT_SYMBOL(RMF_CAPA2);
+
+struct req_msg_field RMF_LAYOUT_INTENT =
+ DEFINE_MSGF("layout_intent", 0,
+ sizeof(struct layout_intent), lustre_swab_layout_intent,
+ NULL);
+EXPORT_SYMBOL(RMF_LAYOUT_INTENT);
+
+/*
+ * OST request field.
+ */
+struct req_msg_field RMF_OST_BODY =
+ DEFINE_MSGF("ost_body", 0,
+ sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body);
+EXPORT_SYMBOL(RMF_OST_BODY);
+
+struct req_msg_field RMF_OBD_IOOBJ =
+ DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY,
+ sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo);
+EXPORT_SYMBOL(RMF_OBD_IOOBJ);
+
+struct req_msg_field RMF_NIOBUF_REMOTE =
+ DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY,
+ sizeof(struct niobuf_remote), lustre_swab_niobuf_remote,
+ dump_rniobuf);
+EXPORT_SYMBOL(RMF_NIOBUF_REMOTE);
+
+struct req_msg_field RMF_RCS =
+ DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32),
+ lustre_swab_generic_32s, dump_rcs);
+EXPORT_SYMBOL(RMF_RCS);
+
+struct req_msg_field RMF_EAVALS_LENS =
+ DEFINE_MSGF("eavals_lens", RMF_F_STRUCT_ARRAY, sizeof(__u32),
+ lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_EAVALS_LENS);
+
+struct req_msg_field RMF_OBD_ID =
+ DEFINE_MSGF("u64", 0,
+ sizeof(u64), lustre_swab_ost_last_id, NULL);
+EXPORT_SYMBOL(RMF_OBD_ID);
+
+struct req_msg_field RMF_FID =
+ DEFINE_MSGF("fid", 0,
+ sizeof(struct lu_fid), lustre_swab_lu_fid, NULL);
+EXPORT_SYMBOL(RMF_FID);
+
+struct req_msg_field RMF_OST_ID =
+ DEFINE_MSGF("ost_id", 0,
+ sizeof(struct ost_id), lustre_swab_ost_id, NULL);
+EXPORT_SYMBOL(RMF_OST_ID);
+
+struct req_msg_field RMF_FIEMAP_KEY =
+ DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key),
+ lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_KEY);
+
+struct req_msg_field RMF_FIEMAP_VAL =
+ DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL);
+EXPORT_SYMBOL(RMF_FIEMAP_VAL);
+
+struct req_msg_field RMF_IDX_INFO =
+ DEFINE_MSGF("idx_info", 0, sizeof(struct idx_info),
+ lustre_swab_idx_info, NULL);
+EXPORT_SYMBOL(RMF_IDX_INFO);
+struct req_msg_field RMF_HSM_USER_STATE =
+ DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state),
+ lustre_swab_hsm_user_state, NULL);
+EXPORT_SYMBOL(RMF_HSM_USER_STATE);
+
+struct req_msg_field RMF_HSM_STATE_SET =
+ DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set),
+ lustre_swab_hsm_state_set, NULL);
+EXPORT_SYMBOL(RMF_HSM_STATE_SET);
+
+struct req_msg_field RMF_MDS_HSM_PROGRESS =
+ DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel),
+ lustre_swab_hsm_progress_kernel, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS);
+
+struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION =
+ DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action),
+ lustre_swab_hsm_current_action, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION);
+
+struct req_msg_field RMF_MDS_HSM_USER_ITEM =
+ DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY,
+ sizeof(struct hsm_user_item), lustre_swab_hsm_user_item,
+ NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM);
+
+struct req_msg_field RMF_MDS_HSM_ARCHIVE =
+ DEFINE_MSGF("hsm_archive", 0,
+ sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE);
+
+struct req_msg_field RMF_MDS_HSM_REQUEST =
+ DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request),
+ lustre_swab_hsm_request, NULL);
+EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST);
+
+struct req_msg_field RMF_UPDATE = DEFINE_MSGF("update", 0, -1,
+ lustre_swab_update_buf, NULL);
+EXPORT_SYMBOL(RMF_UPDATE);
+
+struct req_msg_field RMF_UPDATE_REPLY = DEFINE_MSGF("update_reply", 0, -1,
+ lustre_swab_update_reply_buf,
+ NULL);
+EXPORT_SYMBOL(RMF_UPDATE_REPLY);
+
+struct req_msg_field RMF_SWAP_LAYOUTS =
+ DEFINE_MSGF("swap_layouts", 0, sizeof(struct mdc_swap_layouts),
+ lustre_swab_swap_layouts, NULL);
+EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
+/*
+ * Request formats.
+ */
+
+struct req_format {
+ const char *rf_name;
+ int rf_idx;
+ struct {
+ int nr;
+ const struct req_msg_field **d;
+ } rf_fields[RCL_NR];
+};
+
+#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \
+ .rf_name = name, \
+ .rf_fields = { \
+ [RCL_CLIENT] = { \
+ .nr = client_nr, \
+ .d = client \
+ }, \
+ [RCL_SERVER] = { \
+ .nr = server_nr, \
+ .d = server \
+ } \
+ } \
+}
+
+#define DEFINE_REQ_FMT0(name, client, server) \
+DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server))
+
+struct req_format RQF_OBD_PING =
+ DEFINE_REQ_FMT0("OBD_PING", empty, empty);
+EXPORT_SYMBOL(RQF_OBD_PING);
+
+struct req_format RQF_OBD_SET_INFO =
+ DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty);
+EXPORT_SYMBOL(RQF_OBD_SET_INFO);
+
+/* Read index file through the network */
+struct req_format RQF_OBD_IDX_READ =
+ DEFINE_REQ_FMT0("OBD_IDX_READ",
+ obd_idx_read_client, obd_idx_read_server);
+EXPORT_SYMBOL(RQF_OBD_IDX_READ);
+
+struct req_format RQF_SEC_CTX =
+ DEFINE_REQ_FMT0("SEC_CTX", empty, empty);
+EXPORT_SYMBOL(RQF_SEC_CTX);
+
+struct req_format RQF_MGS_TARGET_REG =
+ DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only,
+ mgs_target_info_only);
+EXPORT_SYMBOL(RQF_MGS_TARGET_REG);
+
+struct req_format RQF_MGS_SET_INFO =
+ DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info,
+ mgs_set_info);
+EXPORT_SYMBOL(RQF_MGS_SET_INFO);
+
+struct req_format RQF_MGS_CONFIG_READ =
+ DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
+ mgs_config_read_server);
+EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
+
+struct req_format RQF_SEQ_QUERY =
+ DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
+EXPORT_SYMBOL(RQF_SEQ_QUERY);
+
+struct req_format RQF_FLD_QUERY =
+ DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
+EXPORT_SYMBOL(RQF_FLD_QUERY);
+
+struct req_format RQF_LOG_CANCEL =
+ DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
+EXPORT_SYMBOL(RQF_LOG_CANCEL);
+
+struct req_format RQF_MDS_QUOTACHECK =
+ DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_MDS_QUOTACHECK);
+
+struct req_format RQF_OST_QUOTACHECK =
+ DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_OST_QUOTACHECK);
+
+struct req_format RQF_MDS_QUOTACTL =
+ DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_MDS_QUOTACTL);
+
+struct req_format RQF_OST_QUOTACTL =
+ DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
+EXPORT_SYMBOL(RQF_OST_QUOTACTL);
+
+struct req_format RQF_QC_CALLBACK =
+ DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty);
+EXPORT_SYMBOL(RQF_QC_CALLBACK);
+
+struct req_format RQF_QUOTA_DQACQ =
+ DEFINE_REQ_FMT0("QUOTA_DQACQ", quota_body_only, quota_body_only);
+EXPORT_SYMBOL(RQF_QUOTA_DQACQ);
+
+struct req_format RQF_LDLM_INTENT_QUOTA =
+ DEFINE_REQ_FMT0("LDLM_INTENT_QUOTA",
+ ldlm_intent_quota_client,
+ ldlm_intent_quota_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_QUOTA);
+
+struct req_format RQF_MDS_GETSTATUS =
+ DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
+
+struct req_format RQF_MDS_STATFS =
+ DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_MDS_STATFS);
+
+struct req_format RQF_MDS_SYNC =
+ DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_SYNC);
+
+struct req_format RQF_MDS_GETATTR =
+ DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR);
+
+struct req_format RQF_MDS_GETXATTR =
+ DEFINE_REQ_FMT0("MDS_GETXATTR",
+ mds_getxattr_client, mds_getxattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETXATTR);
+
+struct req_format RQF_MDS_GETATTR_NAME =
+ DEFINE_REQ_FMT0("MDS_GETATTR_NAME",
+ mds_getattr_name_client, mds_getattr_server);
+EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME);
+
+struct req_format RQF_MDS_REINT =
+ DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT);
+
+struct req_format RQF_MDS_REINT_CREATE =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE",
+ mds_reint_create_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
+
+struct req_format RQF_MDS_REINT_CREATE_RMT_ACL =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL",
+ mds_reint_create_rmt_acl_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL);
+
+struct req_format RQF_MDS_REINT_CREATE_SLAVE =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA",
+ mds_reint_create_slave_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE);
+
+struct req_format RQF_MDS_REINT_CREATE_SYM =
+ DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM",
+ mds_reint_create_sym_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM);
+
+struct req_format RQF_MDS_REINT_OPEN =
+ DEFINE_REQ_FMT0("MDS_REINT_OPEN",
+ mds_reint_open_client, mds_reint_open_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_OPEN);
+
+struct req_format RQF_MDS_REINT_UNLINK =
+ DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK);
+
+struct req_format RQF_MDS_REINT_LINK =
+ DEFINE_REQ_FMT0("MDS_REINT_LINK",
+ mds_reint_link_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_LINK);
+
+struct req_format RQF_MDS_REINT_RENAME =
+ DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
+
+struct req_format RQF_MDS_REINT_SETATTR =
+ DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
+ mds_reint_setattr_client, mds_setattr_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
+
+struct req_format RQF_MDS_REINT_SETXATTR =
+ DEFINE_REQ_FMT0("MDS_REINT_SETXATTR",
+ mds_reint_setxattr_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR);
+
+struct req_format RQF_MDS_CONNECT =
+ DEFINE_REQ_FMT0("MDS_CONNECT",
+ obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_MDS_CONNECT);
+
+struct req_format RQF_MDS_DISCONNECT =
+ DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_MDS_DISCONNECT);
+
+struct req_format RQF_MDS_GET_INFO =
+ DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client,
+ mds_getinfo_server);
+EXPORT_SYMBOL(RQF_MDS_GET_INFO);
+
+struct req_format RQF_UPDATE_OBJ =
+ DEFINE_REQ_FMT0("OBJECT_UPDATE_OBJ", mds_update_client,
+ mds_update_server);
+EXPORT_SYMBOL(RQF_UPDATE_OBJ);
+
+struct req_format RQF_LDLM_ENQUEUE =
+ DEFINE_REQ_FMT0("LDLM_ENQUEUE",
+ ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE);
+
+struct req_format RQF_LDLM_ENQUEUE_LVB =
+ DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB",
+ ldlm_enqueue_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB);
+
+struct req_format RQF_LDLM_CONVERT =
+ DEFINE_REQ_FMT0("LDLM_CONVERT",
+ ldlm_enqueue_client, ldlm_enqueue_server);
+EXPORT_SYMBOL(RQF_LDLM_CONVERT);
+
+struct req_format RQF_LDLM_CANCEL =
+ DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CANCEL);
+
+struct req_format RQF_LDLM_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CALLBACK);
+
+struct req_format RQF_LDLM_CP_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK);
+
+struct req_format RQF_LDLM_BL_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty);
+EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client,
+ ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK);
+
+struct req_format RQF_LDLM_GL_DESC_CALLBACK =
+ DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client,
+ ldlm_gl_callback_server);
+EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK);
+
+struct req_format RQF_LDLM_INTENT_BASIC =
+ DEFINE_REQ_FMT0("LDLM_INTENT_BASIC",
+ ldlm_intent_basic_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC);
+
+struct req_format RQF_LDLM_INTENT =
+ DEFINE_REQ_FMT0("LDLM_INTENT",
+ ldlm_intent_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT);
+
+struct req_format RQF_LDLM_INTENT_LAYOUT =
+ DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ",
+ ldlm_intent_layout_client, ldlm_enqueue_lvb_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT);
+
+struct req_format RQF_LDLM_INTENT_GETATTR =
+ DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR",
+ ldlm_intent_getattr_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR);
+
+struct req_format RQF_LDLM_INTENT_OPEN =
+ DEFINE_REQ_FMT0("LDLM_INTENT_OPEN",
+ ldlm_intent_open_client, ldlm_intent_open_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN);
+
+struct req_format RQF_LDLM_INTENT_CREATE =
+ DEFINE_REQ_FMT0("LDLM_INTENT_CREATE",
+ ldlm_intent_create_client, ldlm_intent_getattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE);
+
+struct req_format RQF_LDLM_INTENT_UNLINK =
+ DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK",
+ ldlm_intent_unlink_client, ldlm_intent_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK);
+
+struct req_format RQF_LDLM_INTENT_GETXATTR =
+ DEFINE_REQ_FMT0("LDLM_INTENT_GETXATTR",
+ ldlm_intent_getxattr_client,
+ ldlm_intent_getxattr_server);
+EXPORT_SYMBOL(RQF_LDLM_INTENT_GETXATTR);
+
+struct req_format RQF_MDS_CLOSE =
+ DEFINE_REQ_FMT0("MDS_CLOSE",
+ mdt_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_CLOSE);
+
+struct req_format RQF_MDS_RELEASE_CLOSE =
+ DEFINE_REQ_FMT0("MDS_CLOSE",
+ mdt_release_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
+
+struct req_format RQF_MDS_PIN =
+ DEFINE_REQ_FMT0("MDS_PIN",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_PIN);
+
+struct req_format RQF_MDS_UNPIN =
+ DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty);
+EXPORT_SYMBOL(RQF_MDS_UNPIN);
+
+struct req_format RQF_MDS_DONE_WRITING =
+ DEFINE_REQ_FMT0("MDS_DONE_WRITING",
+ mdt_close_client, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
+
+struct req_format RQF_MDS_READPAGE =
+ DEFINE_REQ_FMT0("MDS_READPAGE",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_READPAGE);
+
+struct req_format RQF_MDS_HSM_ACTION =
+ DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_ACTION);
+
+struct req_format RQF_MDS_HSM_PROGRESS =
+ DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS);
+
+struct req_format RQF_MDS_HSM_CT_REGISTER =
+ DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER);
+
+struct req_format RQF_MDS_HSM_CT_UNREGISTER =
+ DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER);
+
+struct req_format RQF_MDS_HSM_STATE_GET =
+ DEFINE_REQ_FMT0("MDS_HSM_STATE_GET",
+ mdt_body_capa, mdt_hsm_state_get_server);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET);
+
+struct req_format RQF_MDS_HSM_STATE_SET =
+ DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET);
+
+struct req_format RQF_MDS_HSM_REQUEST =
+ DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty);
+EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST);
+
+struct req_format RQF_MDS_SWAP_LAYOUTS =
+ DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS",
+ mdt_swap_layouts, empty);
+EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS);
+
+/* This is for split */
+struct req_format RQF_MDS_WRITEPAGE =
+ DEFINE_REQ_FMT0("MDS_WRITEPAGE",
+ mdt_body_capa, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
+
+struct req_format RQF_MDS_IS_SUBDIR =
+ DEFINE_REQ_FMT0("MDS_IS_SUBDIR",
+ mdt_body_only, mdt_body_only);
+EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
+ llog_origin_handle_create_client, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY",
+ llogd_body_only, llogd_body_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK",
+ llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK",
+ llogd_body_only, llog_origin_handle_next_block_server);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+
+struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER",
+ llogd_body_only, llog_log_hdr_only);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
+
+struct req_format RQF_LLOG_ORIGIN_CONNECT =
+ DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty);
+EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT);
+
+struct req_format RQF_CONNECT =
+ DEFINE_REQ_FMT0("CONNECT", obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_CONNECT);
+
+struct req_format RQF_OST_CONNECT =
+ DEFINE_REQ_FMT0("OST_CONNECT",
+ obd_connect_client, obd_connect_server);
+EXPORT_SYMBOL(RQF_OST_CONNECT);
+
+struct req_format RQF_OST_DISCONNECT =
+ DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty);
+EXPORT_SYMBOL(RQF_OST_DISCONNECT);
+
+struct req_format RQF_OST_GETATTR =
+ DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_GETATTR);
+
+struct req_format RQF_OST_SETATTR =
+ DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SETATTR);
+
+struct req_format RQF_OST_CREATE =
+ DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_CREATE);
+
+struct req_format RQF_OST_PUNCH =
+ DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_PUNCH);
+
+struct req_format RQF_OST_SYNC =
+ DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SYNC);
+
+struct req_format RQF_OST_DESTROY =
+ DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only);
+EXPORT_SYMBOL(RQF_OST_DESTROY);
+
+struct req_format RQF_OST_BRW_READ =
+ DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server);
+EXPORT_SYMBOL(RQF_OST_BRW_READ);
+
+struct req_format RQF_OST_BRW_WRITE =
+ DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server);
+EXPORT_SYMBOL(RQF_OST_BRW_WRITE);
+
+struct req_format RQF_OST_STATFS =
+ DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server);
+EXPORT_SYMBOL(RQF_OST_STATFS);
+
+struct req_format RQF_OST_SET_GRANT_INFO =
+ DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client,
+ ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO);
+
+struct req_format RQF_OST_GET_INFO_GENERIC =
+ DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client,
+ ost_get_info_generic_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC);
+
+struct req_format RQF_OST_GET_INFO_LAST_ID =
+ DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client,
+ ost_get_last_id_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID);
+
+struct req_format RQF_OST_GET_INFO_LAST_FID =
+ DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client,
+ ost_get_last_fid_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID);
+
+struct req_format RQF_OST_SET_INFO_LAST_FID =
+ DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client,
+ empty);
+EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID);
+
+struct req_format RQF_OST_GET_INFO_FIEMAP =
+ DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client,
+ ost_get_fiemap_server);
+EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
+
+#if !defined(__REQ_LAYOUT_USER__)
+
+/* Convenience macro */
+#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)]
+
+/**
+ * Initializes the capsule abstraction by computing and setting the \a rf_idx
+ * field of RQFs and the \a rmf_offset field of RMFs.
+ */
+int req_layout_init(void)
+{
+ int i;
+ int j;
+ int k;
+ struct req_format *rf = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
+ rf = req_formats[i];
+ rf->rf_idx = i;
+ for (j = 0; j < RCL_NR; ++j) {
+ LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR);
+ for (k = 0; k < rf->rf_fields[j].nr; ++k) {
+ struct req_msg_field *field;
+
+ field = (typeof(field))rf->rf_fields[j].d[k];
+ LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY)
+ || field->rmf_size > 0);
+ LASSERT(field->rmf_offset[i][j] == 0);
+ /*
+ * k + 1 to detect unused format/field
+ * combinations.
+ */
+ field->rmf_offset[i][j] = k + 1;
+ }
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(req_layout_init);
+
+void req_layout_fini(void)
+{
+}
+EXPORT_SYMBOL(req_layout_fini);
+
+/**
+ * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1.
+ *
+ * Actual/expected field sizes are set elsewhere in functions in this file:
+ * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and
+ * req_capsule_msg_size(). The \a rc_area information is used by.
+ * ptlrpc_request_set_replen().
+ */
+void req_capsule_init_area(struct req_capsule *pill)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
+ pill->rc_area[RCL_CLIENT][i] = -1;
+ pill->rc_area[RCL_SERVER][i] = -1;
+ }
+}
+EXPORT_SYMBOL(req_capsule_init_area);
+
+/**
+ * Initialize a pill.
+ *
+ * The \a location indicates whether the caller is executing on the client side
+ * (RCL_CLIENT) or server side (RCL_SERVER)..
+ */
+void req_capsule_init(struct req_capsule *pill,
+ struct ptlrpc_request *req,
+ enum req_location location)
+{
+ LASSERT(location == RCL_SERVER || location == RCL_CLIENT);
+
+ /*
+ * Today all capsules are embedded in ptlrpc_request structs,
+ * but just in case that ever isn't the case, we don't reach
+ * into req unless req != NULL and pill is the one embedded in
+ * the req.
+ *
+ * The req->rq_pill_init flag makes it safe to initialize a pill
+ * twice, which might happen in the OST paths as a result of the
+ * high-priority RPC queue getting peeked at before ost_handle()
+ * handles an OST RPC.
+ */
+ if (req != NULL && pill == &req->rq_pill && req->rq_pill_init)
+ return;
+
+ memset(pill, 0, sizeof(*pill));
+ pill->rc_req = req;
+ pill->rc_loc = location;
+ req_capsule_init_area(pill);
+
+ if (req != NULL && pill == &req->rq_pill)
+ req->rq_pill_init = 1;
+}
+EXPORT_SYMBOL(req_capsule_init);
+
+void req_capsule_fini(struct req_capsule *pill)
+{
+}
+EXPORT_SYMBOL(req_capsule_fini);
+
+static int __req_format_is_sane(const struct req_format *fmt)
+{
+ return
+ 0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) &&
+ req_formats[fmt->rf_idx] == fmt;
+}
+
+static struct lustre_msg *__req_msg(const struct req_capsule *pill,
+ enum req_location loc)
+{
+ struct ptlrpc_request *req;
+
+ req = pill->rc_req;
+ return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg;
+}
+
+/**
+ * Set the format (\a fmt) of a \a pill; format changes are not allowed here
+ * (see req_capsule_extend()).
+ */
+void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt)
+{
+ LASSERT(pill->rc_fmt == NULL || pill->rc_fmt == fmt);
+ LASSERT(__req_format_is_sane(fmt));
+
+ pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_set);
+
+/**
+ * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in
+ * yet.
+
+ * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of
+ * variable-sized fields. The field sizes come from the declared \a rmf_size
+ * field of a \a pill's \a rc_fmt's RMF's.
+ */
+int req_capsule_filled_sizes(struct req_capsule *pill,
+ enum req_location loc)
+{
+ const struct req_format *fmt = pill->rc_fmt;
+ int i;
+
+ LASSERT(fmt != NULL);
+
+ for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+ if (pill->rc_area[loc][i] == -1) {
+ pill->rc_area[loc][i] =
+ fmt->rf_fields[loc].d[i]->rmf_size;
+ if (pill->rc_area[loc][i] == -1) {
+ /*
+ * Skip the following fields.
+ *
+ * If this LASSERT() trips then you're missing a
+ * call to req_capsule_set_size().
+ */
+ LASSERT(loc != RCL_SERVER);
+ break;
+ }
+ }
+ }
+ return i;
+}
+EXPORT_SYMBOL(req_capsule_filled_sizes);
+
+/**
+ * Capsule equivalent of lustre_pack_request() and lustre_pack_reply().
+ *
+ * This function uses the \a pill's \a rc_area as filled in by
+ * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by
+ * this function).
+ */
+int req_capsule_server_pack(struct req_capsule *pill)
+{
+ const struct req_format *fmt;
+ int count;
+ int rc;
+
+ LASSERT(pill->rc_loc == RCL_SERVER);
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+
+ count = req_capsule_filled_sizes(pill, RCL_SERVER);
+ rc = lustre_pack_reply(pill->rc_req, count,
+ pill->rc_area[RCL_SERVER], NULL);
+ if (rc != 0) {
+ DEBUG_REQ(D_ERROR, pill->rc_req,
+ "Cannot pack %d fields in format `%s': ",
+ count, fmt->rf_name);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(req_capsule_server_pack);
+
+/**
+ * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
+ * corresponding to the given RMF (\a field).
+ */
+static int __req_capsule_offset(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ int offset;
+
+ offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+ LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n",
+ pill->rc_fmt->rf_name,
+ field->rmf_name, offset, loc);
+ offset--;
+
+ LASSERT(0 <= offset && offset < REQ_MAX_FIELD_NR);
+ return offset;
+}
+
+/**
+ * Helper for __req_capsule_get(); swabs value / array of values and/or dumps
+ * them if desired.
+ */
+static
+void
+swabber_dumper_helper(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc,
+ int offset,
+ void *value, int len, int dump, void (*swabber)(void *))
+{
+ void *p;
+ int i;
+ int n;
+ int do_swab;
+ int inout = loc == RCL_CLIENT;
+
+ swabber = swabber ?: field->rmf_swabber;
+
+ if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) &&
+ swabber != NULL && value != NULL)
+ do_swab = 1;
+ else
+ do_swab = 0;
+
+ if (!field->rmf_dumper)
+ dump = 0;
+
+ if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) {
+ if (dump) {
+ CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n",
+ do_swab ? "unswabbed " : "", field->rmf_name);
+ field->rmf_dumper(value);
+ }
+ if (!do_swab)
+ return;
+ swabber(value);
+ ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+ if (dump) {
+ CDEBUG(D_RPCTRACE, "Dump of swabbed field %s follows\n",
+ field->rmf_name);
+ field->rmf_dumper(value);
+ }
+
+ return;
+ }
+
+ /*
+ * We're swabbing an array; swabber() swabs a single array element, so
+ * swab every element.
+ */
+ LASSERT((len % field->rmf_size) == 0);
+ for (p = value, i = 0, n = len / field->rmf_size;
+ i < n;
+ i++, p += field->rmf_size) {
+ if (dump) {
+ CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, element %d follows\n",
+ do_swab ? "unswabbed " : "", field->rmf_name, i);
+ field->rmf_dumper(p);
+ }
+ if (!do_swab)
+ continue;
+ swabber(p);
+ if (dump) {
+ CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, element %d follows\n",
+ field->rmf_name, i);
+ field->rmf_dumper(value);
+ }
+ }
+ if (do_swab)
+ ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset);
+}
+
+/**
+ * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill
+ * corresponding to the given RMF (\a field).
+ *
+ * The buffer will be swabbed using the given \a swabber. If \a swabber == NULL
+ * then the \a rmf_swabber from the RMF will be used. Soon there will be no
+ * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then
+ * be removed. Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each
+ * element of the array swabbed.
+ */
+static void *__req_capsule_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc,
+ void (*swabber)(void *),
+ int dump)
+{
+ const struct req_format *fmt;
+ struct lustre_msg *msg;
+ void *value;
+ int len;
+ int offset;
+
+ void *(*getter)(struct lustre_msg *m, int n, int minlen);
+
+ static const char *rcl_names[RCL_NR] = {
+ [RCL_CLIENT] = "client",
+ [RCL_SERVER] = "server"
+ };
+
+ LASSERT(pill != NULL);
+ LASSERT(pill != LP_POISON);
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+ LASSERT(fmt != LP_POISON);
+ LASSERT(__req_format_is_sane(fmt));
+
+ offset = __req_capsule_offset(pill, field, loc);
+
+ msg = __req_msg(pill, loc);
+ LASSERT(msg != NULL);
+
+ getter = (field->rmf_flags & RMF_F_STRING) ?
+ (typeof(getter))lustre_msg_string : lustre_msg_buf;
+
+ if (field->rmf_flags & RMF_F_STRUCT_ARRAY) {
+ /*
+ * We've already asserted that field->rmf_size > 0 in
+ * req_layout_init().
+ */
+ len = lustre_msg_buflen(msg, offset);
+ if ((len % field->rmf_size) != 0) {
+ CERROR("%s: array field size mismatch %d modulo %d != 0 (%d)\n",
+ field->rmf_name, len, field->rmf_size, loc);
+ return NULL;
+ }
+ } else if (pill->rc_area[loc][offset] != -1) {
+ len = pill->rc_area[loc][offset];
+ } else {
+ len = max(field->rmf_size, 0);
+ }
+ value = getter(msg, offset, len);
+
+ if (value == NULL) {
+ DEBUG_REQ(D_ERROR, pill->rc_req,
+ "Wrong buffer for field `%s' (%d of %d) in format `%s': %d vs. %d (%s)\n",
+ field->rmf_name, offset, lustre_msg_bufcount(msg),
+ fmt->rf_name, lustre_msg_buflen(msg, offset), len,
+ rcl_names[loc]);
+ } else {
+ swabber_dumper_helper(pill, field, loc, offset, value, len,
+ dump, swabber);
+ }
+
+ return value;
+}
+
+/**
+ * Dump a request and/or reply
+ */
+static void __req_capsule_dump(struct req_capsule *pill, enum req_location loc)
+{
+ const struct req_format *fmt;
+ const struct req_msg_field *field;
+ int len;
+ int i;
+
+ fmt = pill->rc_fmt;
+
+ DEBUG_REQ(D_RPCTRACE, pill->rc_req, "BEGIN REQ CAPSULE DUMP\n");
+ for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
+ field = FMT_FIELD(fmt, loc, i);
+ if (field->rmf_dumper == NULL) {
+ /*
+ * FIXME Add a default hex dumper for fields that don't
+ * have a specific dumper
+ */
+ len = req_capsule_get_size(pill, field, loc);
+ CDEBUG(D_RPCTRACE, "Field %s has no dumper function; field size is %d\n",
+ field->rmf_name, len);
+ } else {
+ /* It's the dumping side-effect that we're interested in */
+ (void) __req_capsule_get(pill, field, loc, NULL, 1);
+ }
+ }
+ CDEBUG(D_RPCTRACE, "END REQ CAPSULE DUMP\n");
+}
+
+/**
+ * Dump a request.
+ */
+void req_capsule_client_dump(struct req_capsule *pill)
+{
+ __req_capsule_dump(pill, RCL_CLIENT);
+}
+EXPORT_SYMBOL(req_capsule_client_dump);
+
+/**
+ * Dump a reply
+ */
+void req_capsule_server_dump(struct req_capsule *pill)
+{
+ __req_capsule_dump(pill, RCL_SERVER);
+}
+EXPORT_SYMBOL(req_capsule_server_dump);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_client_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_get);
+
+/**
+ * Same as req_capsule_client_get(), but with a \a swabber argument.
+ *
+ * Currently unused; will be removed when req_capsule_server_swab_get() is
+ * unused too.
+ */
+void *req_capsule_client_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber)
+{
+ return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_client_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len. Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_client_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len)
+{
+ req_capsule_set_size(pill, field, RCL_CLIENT, len);
+ return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_client_sized_get);
+
+/**
+ * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply
+ * buffer corresponding to the given RMF (\a field) of a \a pill.
+ */
+void *req_capsule_server_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_get);
+
+/**
+ * Same as req_capsule_server_get(), but with a \a swabber argument.
+ *
+ * Ideally all swabbing should be done pursuant to RMF definitions, with no
+ * swabbing done outside this capsule abstraction.
+ */
+void *req_capsule_server_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ void *swabber)
+{
+ return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_swab_get);
+
+/**
+ * Utility that combines req_capsule_set_size() and req_capsule_server_get().
+ *
+ * First the \a pill's request \a field's size is set (\a rc_area) using
+ * req_capsule_set_size() with the given \a len. Then the actual buffer is
+ * returned.
+ */
+void *req_capsule_server_sized_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len)
+{
+ req_capsule_set_size(pill, field, RCL_SERVER, len);
+ return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_get);
+
+void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ int len, void *swabber)
+{
+ req_capsule_set_size(pill, field, RCL_SERVER, len);
+ return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
+}
+EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
+
+/**
+ * Returns the buffer of a \a pill corresponding to the given \a field from the
+ * request (if the caller is executing on the server-side) or reply (if the
+ * caller is executing on the client-side).
+ *
+ * This function convenient for use is code that could be executed on the
+ * client and server alike.
+ */
+const void *req_capsule_other_get(struct req_capsule *pill,
+ const struct req_msg_field *field)
+{
+ return __req_capsule_get(pill, field, pill->rc_loc ^ 1, NULL, 0);
+}
+EXPORT_SYMBOL(req_capsule_other_get);
+
+/**
+ * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a
+ * field of the given \a pill.
+ *
+ * This function must be used when constructing variable sized fields of a
+ * request or reply.
+ */
+void req_capsule_set_size(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc, int size)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ if ((size != field->rmf_size) &&
+ (field->rmf_size != -1) &&
+ !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
+ (size > 0)) {
+ if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+ (size % field->rmf_size != 0)) {
+ CERROR("%s: array field size mismatch %d %% %d != 0 (%d)\n",
+ field->rmf_name, size, field->rmf_size, loc);
+ LBUG();
+ } else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
+ size < field->rmf_size) {
+ CERROR("%s: field size mismatch %d != %d (%d)\n",
+ field->rmf_name, size, field->rmf_size, loc);
+ LBUG();
+ }
+ }
+
+ pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size;
+}
+EXPORT_SYMBOL(req_capsule_set_size);
+
+/**
+ * Return the actual PTLRPC buffer length of a request or reply (\a loc)
+ * for the given \a pill's given \a field.
+ *
+ * NB: this function doesn't correspond with req_capsule_set_size(), which
+ * actually sets the size in pill.rc_area[loc][offset], but this function
+ * returns the message buflen[offset], maybe we should use another name.
+ */
+int req_capsule_get_size(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ return lustre_msg_buflen(__req_msg(pill, loc),
+ __req_capsule_offset(pill, field, loc));
+}
+EXPORT_SYMBOL(req_capsule_get_size);
+
+/**
+ * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the
+ * given \a pill's request or reply (\a loc) given the field size recorded in
+ * the \a pill's rc_area.
+ *
+ * See also req_capsule_set_size().
+ */
+int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
+{
+ return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
+ pill->rc_fmt->rf_fields[loc].nr,
+ pill->rc_area[loc]);
+}
+
+/**
+ * While req_capsule_msg_size() computes the size of a PTLRPC request or reply
+ * (\a loc) given a \a pill's \a rc_area, this function computes the size of a
+ * PTLRPC request or reply given only an RQF (\a fmt).
+ *
+ * This function should not be used for formats which contain variable size
+ * fields.
+ */
+int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+ enum req_location loc)
+{
+ int size, i = 0;
+
+ /*
+ * This function should probably LASSERT() that fmt has no fields with
+ * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many
+ * elements in the array there will ultimately be, but then, we could
+ * assume that there will be at least one element, and that's just what
+ * we do.
+ */
+ size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
+ if (size < 0)
+ return size;
+
+ for (; i < fmt->rf_fields[loc].nr; ++i)
+ if (fmt->rf_fields[loc].d[i]->rmf_size != -1)
+ size += cfs_size_round(fmt->rf_fields[loc].d[i]->
+ rmf_size);
+ return size;
+}
+
+/**
+ * Changes the format of an RPC.
+ *
+ * The pill must already have been initialized, which means that it already has
+ * a request format. The new format \a fmt must be an extension of the pill's
+ * old format. Specifically: the new format must have as many request and reply
+ * fields as the old one, and all fields shared by the old and new format must
+ * be at least as large in the new format.
+ *
+ * The new format's fields may be of different "type" than the old format, but
+ * only for fields that are "opaque" blobs: fields which have a) have no
+ * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a
+ * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK. For example,
+ * OBD_SET_INFO has a key field and an opaque value field that gets interpreted
+ * according to the key field. When the value, according to the key, contains a
+ * structure (or array thereof) to be swabbed, the format should be changed to
+ * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set
+ * accordingly.
+ */
+void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
+{
+ int i;
+ int j;
+
+ const struct req_format *old;
+
+ LASSERT(pill->rc_fmt != NULL);
+ LASSERT(__req_format_is_sane(fmt));
+
+ old = pill->rc_fmt;
+ /*
+ * Sanity checking...
+ */
+ for (i = 0; i < RCL_NR; ++i) {
+ LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr);
+ for (j = 0; j < old->rf_fields[i].nr - 1; ++j) {
+ const struct req_msg_field *ofield = FMT_FIELD(old, i, j);
+
+ /* "opaque" fields can be transmogrified */
+ if (ofield->rmf_swabber == NULL &&
+ (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 &&
+ (ofield->rmf_size == -1 ||
+ ofield->rmf_flags == RMF_F_NO_SIZE_CHECK))
+ continue;
+ LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j));
+ }
+ /*
+ * Last field in old format can be shorter than in new.
+ */
+ LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >=
+ FMT_FIELD(old, i, j)->rmf_size);
+ }
+
+ pill->rc_fmt = fmt;
+}
+EXPORT_SYMBOL(req_capsule_extend);
+
+/**
+ * This function returns a non-zero value if the given \a field is present in
+ * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it
+ * returns 0.
+ */
+int req_capsule_has_field(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+
+ return field->rmf_offset[pill->rc_fmt->rf_idx][loc];
+}
+EXPORT_SYMBOL(req_capsule_has_field);
+
+/**
+ * Returns a non-zero value if the given \a field is present in the given \a
+ * pill's PTLRPC request or reply (\a loc), else it returns 0.
+ */
+int req_capsule_field_present(const struct req_capsule *pill,
+ const struct req_msg_field *field,
+ enum req_location loc)
+{
+ int offset;
+
+ LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
+ LASSERT(req_capsule_has_field(pill, field, loc));
+
+ offset = __req_capsule_offset(pill, field, loc);
+ return lustre_msg_bufcount(__req_msg(pill, loc)) > offset;
+}
+EXPORT_SYMBOL(req_capsule_field_present);
+
+/**
+ * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC
+ * request or reply (\a loc).
+ *
+ * This is not the opposite of req_capsule_extend().
+ */
+void req_capsule_shrink(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen,
+ enum req_location loc)
+{
+ const struct req_format *fmt;
+ struct lustre_msg *msg;
+ int len;
+ int offset;
+
+ fmt = pill->rc_fmt;
+ LASSERT(fmt != NULL);
+ LASSERT(__req_format_is_sane(fmt));
+ LASSERT(req_capsule_has_field(pill, field, loc));
+ LASSERT(req_capsule_field_present(pill, field, loc));
+
+ offset = __req_capsule_offset(pill, field, loc);
+
+ msg = __req_msg(pill, loc);
+ len = lustre_msg_buflen(msg, offset);
+ LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n",
+ fmt->rf_name, field->rmf_name, len, newlen);
+
+ if (loc == RCL_CLIENT)
+ pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen,
+ 1);
+ else
+ pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen,
+ 1);
+}
+EXPORT_SYMBOL(req_capsule_shrink);
+
+int req_capsule_server_grow(struct req_capsule *pill,
+ const struct req_msg_field *field,
+ unsigned int newlen)
+{
+ struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs;
+ char *from, *to;
+ int offset, len, rc;
+
+ LASSERT(pill->rc_fmt != NULL);
+ LASSERT(__req_format_is_sane(pill->rc_fmt));
+ LASSERT(req_capsule_has_field(pill, field, RCL_SERVER));
+ LASSERT(req_capsule_field_present(pill, field, RCL_SERVER));
+
+ len = req_capsule_get_size(pill, field, RCL_SERVER);
+ offset = __req_capsule_offset(pill, field, RCL_SERVER);
+ if (pill->rc_req->rq_repbuf_len >=
+ lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen)
+ CERROR("Inplace repack might be done\n");
+
+ pill->rc_req->rq_reply_state = NULL;
+ req_capsule_set_size(pill, field, RCL_SERVER, newlen);
+ rc = req_capsule_server_pack(pill);
+ if (rc) {
+ /* put old rs back, the caller will decide what to do */
+ pill->rc_req->rq_reply_state = rs;
+ return rc;
+ }
+ nrs = pill->rc_req->rq_reply_state;
+ /* Now we need only buffers, copy first chunk */
+ to = lustre_msg_buf(nrs->rs_msg, 0, 0);
+ from = lustre_msg_buf(rs->rs_msg, 0, 0);
+ len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from;
+ memcpy(to, from, len);
+ /* check if we have tail and copy it too */
+ if (rs->rs_msg->lm_bufcount > offset + 1) {
+ to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0);
+ from = lustre_msg_buf(rs->rs_msg, offset + 1, 0);
+ offset = rs->rs_msg->lm_bufcount - 1;
+ len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) +
+ cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from;
+ memcpy(to, from, len);
+ }
+ /* drop old reply if everything is fine */
+ if (rs->rs_difficult) {
+ /* copy rs data */
+ int i;
+
+ nrs->rs_difficult = 1;
+ nrs->rs_no_ack = rs->rs_no_ack;
+ for (i = 0; i < rs->rs_nlocks; i++) {
+ nrs->rs_locks[i] = rs->rs_locks[i];
+ nrs->rs_modes[i] = rs->rs_modes[i];
+ nrs->rs_nlocks++;
+ }
+ rs->rs_nlocks = 0;
+ rs->rs_difficult = 0;
+ rs->rs_no_ack = 0;
+ }
+ ptlrpc_rs_decref(rs);
+ return 0;
+}
+EXPORT_SYMBOL(req_capsule_server_grow);
+/* __REQ_LAYOUT_USER__ */
+#endif
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
new file mode 100644
index 000000000..e9baf5bbe
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c
@@ -0,0 +1,366 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_client.c
+ *
+ * remote api for llog - client side
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_class.h"
+#include "../include/lustre_log.h"
+#include "../include/lustre_net.h"
+#include <linux/list.h>
+
+#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \
+ mutex_lock(&ctxt->loc_mutex); \
+ if (ctxt->loc_imp) { \
+ imp = class_import_get(ctxt->loc_imp); \
+ } else { \
+ CERROR("ctxt->loc_imp == NULL for context idx %d." \
+ "Unable to complete MDS/OSS recovery," \
+ "but I'll try again next time. Not fatal.\n", \
+ ctxt->loc_idx); \
+ imp = NULL; \
+ mutex_unlock(&ctxt->loc_mutex); \
+ return (-EINVAL); \
+ } \
+ mutex_unlock(&ctxt->loc_mutex); \
+} while (0)
+
+#define LLOG_CLIENT_EXIT(ctxt, imp) do { \
+ mutex_lock(&ctxt->loc_mutex); \
+ if (ctxt->loc_imp != imp) \
+ CWARN("loc_imp has changed from %p to %p\n", \
+ ctxt->loc_imp, imp); \
+ class_import_put(imp); \
+ mutex_unlock(&ctxt->loc_mutex); \
+} while (0)
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_client_open(const struct lu_env *env,
+ struct llog_handle *lgh, struct llog_logid *logid,
+ char *name, enum llog_open_param open_param)
+{
+ struct obd_import *imp;
+ struct llogd_body *body;
+ struct llog_ctxt *ctxt = lgh->lgh_ctxt;
+ struct ptlrpc_request *req = NULL;
+ int rc;
+
+ LLOG_CLIENT_ENTRY(ctxt, imp);
+
+ /* client cannot create llog */
+ LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param);
+ LASSERT(lgh);
+
+ req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (name)
+ req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+ strlen(name) + 1);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_CREATE);
+ if (rc) {
+ ptlrpc_request_free(req);
+ req = NULL;
+ goto out;
+ }
+ ptlrpc_request_set_replen(req);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (logid)
+ body->lgd_logid = *logid;
+ body->lgd_ctxt_idx = ctxt->loc_idx - 1;
+
+ if (name) {
+ char *tmp;
+ tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME,
+ strlen(name) + 1);
+ LASSERT(tmp);
+ strcpy(tmp, name);
+ }
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ goto out;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ lgh->lgh_id = body->lgd_logid;
+ lgh->lgh_ctxt = ctxt;
+out:
+ LLOG_CLIENT_EXIT(ctxt, imp);
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+static int llog_client_destroy(const struct lu_env *env,
+ struct llog_handle *loghandle)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ int rc;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_DESTROY);
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+
+ if (!(body->lgd_llh_flags & LLOG_F_IS_PLAIN))
+ CERROR("%s: wrong llog flags %x\n", imp->imp_obd->obd_name,
+ body->lgd_llh_flags);
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ return rc;
+}
+
+
+static int llog_client_next_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int *cur_idx, int next_idx,
+ __u64 *cur_offset, void *buf, int len)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ void *ptr;
+ int rc;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+ body->lgd_index = next_idx;
+ body->lgd_saved_index = *cur_idx;
+ body->lgd_len = len;
+ body->lgd_cur_offset = *cur_offset;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ goto out;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /* The log records are swabbed as they are processed */
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ if (ptr == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ *cur_idx = body->lgd_saved_index;
+ *cur_offset = body->lgd_cur_offset;
+
+ memcpy(buf, ptr, len);
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_prev_block(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ int prev_idx, void *buf, int len)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ void *ptr;
+ int rc;
+
+ LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_PREV_BLOCK);
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = loghandle->lgh_id;
+ body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags;
+ body->lgd_index = prev_idx;
+ body->lgd_len = len;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ goto out;
+
+ body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ if (body == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA);
+ if (ptr == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ memcpy(buf, ptr, len);
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_read_header(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ struct obd_import *imp;
+ struct ptlrpc_request *req = NULL;
+ struct llogd_body *body;
+ struct llog_log_hdr *hdr;
+ struct llog_rec_hdr *llh_hdr;
+ int rc;
+
+ LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
+ req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+ LUSTRE_LOG_VERSION,
+ LLOG_ORIGIN_HANDLE_READ_HEADER);
+ if (req == NULL) {
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
+ body->lgd_logid = handle->lgh_id;
+ body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1;
+ body->lgd_llh_flags = handle->lgh_hdr->llh_flags;
+
+ ptlrpc_request_set_replen(req);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ goto out;
+
+ hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR);
+ if (hdr == NULL) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ memcpy(handle->lgh_hdr, hdr, sizeof(*hdr));
+ handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+
+ /* sanity checks */
+ llh_hdr = &handle->lgh_hdr->llh_hdr;
+ if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+ CERROR("bad log header magic: %#x (expecting %#x)\n",
+ llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+ rc = -EIO;
+ } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+ CERROR("incorrectly sized log header: %#x (expecting %#x)\n",
+ llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+ CERROR("you may need to re-run lconf --write_conf.\n");
+ rc = -EIO;
+ }
+out:
+ ptlrpc_req_finished(req);
+err_exit:
+ LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
+ return rc;
+}
+
+static int llog_client_close(const struct lu_env *env,
+ struct llog_handle *handle)
+{
+ /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
+ the servers all close the file at the end of every
+ other LLOG_ RPC. */
+ return 0;
+}
+
+struct llog_operations llog_client_ops = {
+ .lop_next_block = llog_client_next_block,
+ .lop_prev_block = llog_client_prev_block,
+ .lop_read_header = llog_client_read_header,
+ .lop_open = llog_client_open,
+ .lop_destroy = llog_client_destroy,
+ .lop_close = llog_client_close,
+};
+EXPORT_SYMBOL(llog_client_ops);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
new file mode 100644
index 000000000..dac66f5b3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/llog_net.c
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ * if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_class.h"
+#include "../include/lustre_log.h"
+#include <linux/list.h>
+
+int llog_initiator_connect(struct llog_ctxt *ctxt)
+{
+ struct obd_import *new_imp;
+
+ LASSERT(ctxt);
+ new_imp = ctxt->loc_obd->u.cli.cl_import;
+ LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp,
+ "%p - %p\n", ctxt->loc_imp, new_imp);
+ mutex_lock(&ctxt->loc_mutex);
+ if (ctxt->loc_imp != new_imp) {
+ if (ctxt->loc_imp)
+ class_import_put(ctxt->loc_imp);
+ ctxt->loc_imp = class_import_get(new_imp);
+ }
+ mutex_unlock(&ctxt->loc_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(llog_initiator_connect);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
new file mode 100644
index 000000000..9533ab976
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -0,0 +1,1366 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+
+#include "../include/obd_support.h"
+#include "../include/obd.h"
+#include "../include/lprocfs_status.h"
+#include "../include/lustre/lustre_idl.h"
+#include "../include/lustre_net.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+
+static struct ll_rpc_opcode {
+ __u32 opcode;
+ const char *opname;
+} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = {
+ { OST_REPLY, "ost_reply" },
+ { OST_GETATTR, "ost_getattr" },
+ { OST_SETATTR, "ost_setattr" },
+ { OST_READ, "ost_read" },
+ { OST_WRITE, "ost_write" },
+ { OST_CREATE , "ost_create" },
+ { OST_DESTROY, "ost_destroy" },
+ { OST_GET_INFO, "ost_get_info" },
+ { OST_CONNECT, "ost_connect" },
+ { OST_DISCONNECT, "ost_disconnect" },
+ { OST_PUNCH, "ost_punch" },
+ { OST_OPEN, "ost_open" },
+ { OST_CLOSE, "ost_close" },
+ { OST_STATFS, "ost_statfs" },
+ { 14, NULL }, /* formerly OST_SAN_READ */
+ { 15, NULL }, /* formerly OST_SAN_WRITE */
+ { OST_SYNC, "ost_sync" },
+ { OST_SET_INFO, "ost_set_info" },
+ { OST_QUOTACHECK, "ost_quotacheck" },
+ { OST_QUOTACTL, "ost_quotactl" },
+ { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
+ { MDS_GETATTR, "mds_getattr" },
+ { MDS_GETATTR_NAME, "mds_getattr_lock" },
+ { MDS_CLOSE, "mds_close" },
+ { MDS_REINT, "mds_reint" },
+ { MDS_READPAGE, "mds_readpage" },
+ { MDS_CONNECT, "mds_connect" },
+ { MDS_DISCONNECT, "mds_disconnect" },
+ { MDS_GETSTATUS, "mds_getstatus" },
+ { MDS_STATFS, "mds_statfs" },
+ { MDS_PIN, "mds_pin" },
+ { MDS_UNPIN, "mds_unpin" },
+ { MDS_SYNC, "mds_sync" },
+ { MDS_DONE_WRITING, "mds_done_writing" },
+ { MDS_SET_INFO, "mds_set_info" },
+ { MDS_QUOTACHECK, "mds_quotacheck" },
+ { MDS_QUOTACTL, "mds_quotactl" },
+ { MDS_GETXATTR, "mds_getxattr" },
+ { MDS_SETXATTR, "mds_setxattr" },
+ { MDS_WRITEPAGE, "mds_writepage" },
+ { MDS_IS_SUBDIR, "mds_is_subdir" },
+ { MDS_GET_INFO, "mds_get_info" },
+ { MDS_HSM_STATE_GET, "mds_hsm_state_get" },
+ { MDS_HSM_STATE_SET, "mds_hsm_state_set" },
+ { MDS_HSM_ACTION, "mds_hsm_action" },
+ { MDS_HSM_PROGRESS, "mds_hsm_progress" },
+ { MDS_HSM_REQUEST, "mds_hsm_request" },
+ { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" },
+ { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" },
+ { MDS_SWAP_LAYOUTS, "mds_swap_layouts" },
+ { LDLM_ENQUEUE, "ldlm_enqueue" },
+ { LDLM_CONVERT, "ldlm_convert" },
+ { LDLM_CANCEL, "ldlm_cancel" },
+ { LDLM_BL_CALLBACK, "ldlm_bl_callback" },
+ { LDLM_CP_CALLBACK, "ldlm_cp_callback" },
+ { LDLM_GL_CALLBACK, "ldlm_gl_callback" },
+ { LDLM_SET_INFO, "ldlm_set_info" },
+ { MGS_CONNECT, "mgs_connect" },
+ { MGS_DISCONNECT, "mgs_disconnect" },
+ { MGS_EXCEPTION, "mgs_exception" },
+ { MGS_TARGET_REG, "mgs_target_reg" },
+ { MGS_TARGET_DEL, "mgs_target_del" },
+ { MGS_SET_INFO, "mgs_set_info" },
+ { MGS_CONFIG_READ, "mgs_config_read" },
+ { OBD_PING, "obd_ping" },
+ { OBD_LOG_CANCEL, "llog_cancel" },
+ { OBD_QC_CALLBACK, "obd_quota_callback" },
+ { OBD_IDX_READ, "dt_index_read" },
+ { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_open" },
+ { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" },
+ { LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" },
+ { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" },
+ { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" },
+ { LLOG_ORIGIN_CONNECT, "llog_origin_connect" },
+ { LLOG_CATINFO, "llog_catinfo" },
+ { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" },
+ { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" },
+ { QUOTA_DQACQ, "quota_acquire" },
+ { QUOTA_DQREL, "quota_release" },
+ { SEQ_QUERY, "seq_query" },
+ { SEC_CTX_INIT, "sec_ctx_init" },
+ { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" },
+ { SEC_CTX_FINI, "sec_ctx_fini" },
+ { FLD_QUERY, "fld_query" },
+ { UPDATE_OBJ, "update_obj" },
+};
+
+static struct ll_eopcode {
+ __u32 opcode;
+ const char *opname;
+} ll_eopcode_table[EXTRA_LAST_OPC] = {
+ { LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" },
+ { LDLM_PLAIN_ENQUEUE, "ldlm_plain_enqueue" },
+ { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" },
+ { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" },
+ { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" },
+ { MDS_REINT_SETATTR, "mds_reint_setattr" },
+ { MDS_REINT_CREATE, "mds_reint_create" },
+ { MDS_REINT_LINK, "mds_reint_link" },
+ { MDS_REINT_UNLINK, "mds_reint_unlink" },
+ { MDS_REINT_RENAME, "mds_reint_rename" },
+ { MDS_REINT_OPEN, "mds_reint_open" },
+ { MDS_REINT_SETXATTR, "mds_reint_setxattr" },
+ { BRW_READ_BYTES, "read_bytes" },
+ { BRW_WRITE_BYTES, "write_bytes" },
+};
+
+const char *ll_opcode2str(__u32 opcode)
+{
+ /* When one of the assertions below fail, chances are that:
+ * 1) A new opcode was added in include/lustre/lustre_idl.h,
+ * but is missing from the table above.
+ * or 2) The opcode space was renumbered or rearranged,
+ * and the opcode_offset() function in
+ * ptlrpc_internal.h needs to be modified.
+ */
+ __u32 offset = opcode_offset(opcode);
+ LASSERTF(offset < LUSTRE_MAX_OPCODES,
+ "offset %u >= LUSTRE_MAX_OPCODES %u\n",
+ offset, LUSTRE_MAX_OPCODES);
+ LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode,
+ "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
+ offset, ll_rpc_opcode_table[offset].opcode, opcode);
+ return ll_rpc_opcode_table[offset].opname;
+}
+
+static const char *ll_eopcode2str(__u32 opcode)
+{
+ LASSERT(ll_eopcode_table[opcode].opcode == opcode);
+ return ll_eopcode_table[opcode].opname;
+}
+
+#if defined(CONFIG_PROC_FS)
+static void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir,
+ char *name,
+ struct proc_dir_entry **procroot_ret,
+ struct lprocfs_stats **stats_ret)
+{
+ struct proc_dir_entry *svc_procroot;
+ struct lprocfs_stats *svc_stats;
+ int i, rc;
+ unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX |
+ LPROCFS_CNTR_STDDEV;
+
+ LASSERT(*procroot_ret == NULL);
+ LASSERT(*stats_ret == NULL);
+
+ svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES,
+ 0);
+ if (svc_stats == NULL)
+ return;
+
+ if (dir) {
+ svc_procroot = lprocfs_register(dir, root, NULL, NULL);
+ if (IS_ERR(svc_procroot)) {
+ lprocfs_free_stats(&svc_stats);
+ return;
+ }
+ } else {
+ svc_procroot = root;
+ }
+
+ lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
+ svc_counter_config, "req_waittime", "usec");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR,
+ svc_counter_config, "req_qdepth", "reqs");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR,
+ svc_counter_config, "req_active", "reqs");
+ lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT,
+ svc_counter_config, "req_timeout", "sec");
+ lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR,
+ svc_counter_config, "reqbuf_avail", "bufs");
+ for (i = 0; i < EXTRA_LAST_OPC; i++) {
+ char *units;
+
+ switch (i) {
+ case BRW_WRITE_BYTES:
+ case BRW_READ_BYTES:
+ units = "bytes";
+ break;
+ default:
+ units = "reqs";
+ break;
+ }
+ lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
+ svc_counter_config,
+ ll_eopcode2str(i), units);
+ }
+ for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
+ __u32 opcode = ll_rpc_opcode_table[i].opcode;
+ lprocfs_counter_init(svc_stats,
+ EXTRA_MAX_OPCODES + i, svc_counter_config,
+ ll_opcode2str(opcode), "usec");
+ }
+
+ rc = lprocfs_register_stats(svc_procroot, name, svc_stats);
+ if (rc < 0) {
+ if (dir)
+ lprocfs_remove(&svc_procroot);
+ lprocfs_free_stats(&svc_stats);
+ } else {
+ if (dir)
+ *procroot_ret = svc_procroot;
+ *stats_ret = svc_stats;
+ }
+}
+
+static int
+ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svcpt->scp_hist_nrqbds;
+
+ seq_printf(m, "%d\n", total);
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len);
+
+static int
+ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svc->srv_hist_nrqbds_cpt_max;
+
+ seq_printf(m, "%d\n", total);
+ return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_req_history_max_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int bufpages;
+ int val;
+ int rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val < 0)
+ return -ERANGE;
+
+ /* This sanity check is more of an insanity check; we can still
+ * hose a kernel by allowing the request history to grow too
+ * far. */
+ bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (val > totalram_pages / (2 * bufpages))
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+
+ if (val == 0)
+ svc->srv_hist_nrqbds_cpt_max = 0;
+ else
+ svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts));
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max);
+
+static int
+ptlrpc_lprocfs_threads_min_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+
+ seq_printf(m, "%d\n", svc->srv_nthrs_cpt_init * svc->srv_ncpts);
+ return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_min_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int val;
+ int rc = lprocfs_write_helper(buffer, count, &val);
+
+ if (rc < 0)
+ return rc;
+
+ if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) {
+ spin_unlock(&svc->srv_lock);
+ return -ERANGE;
+ }
+
+ svc->srv_nthrs_cpt_init = val / svc->srv_ncpts;
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_min);
+
+static int
+ptlrpc_lprocfs_threads_started_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ int total = 0;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ total += svcpt->scp_nthrs_running;
+
+ seq_printf(m, "%d\n", total);
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_threads_started);
+
+static int
+ptlrpc_lprocfs_threads_max_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+
+ seq_printf(m, "%d\n", svc->srv_nthrs_cpt_limit * svc->srv_ncpts);
+ return 0;
+}
+
+static ssize_t
+ptlrpc_lprocfs_threads_max_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int val;
+ int rc = lprocfs_write_helper(buffer, count, &val);
+
+ if (rc < 0)
+ return rc;
+
+ if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) {
+ spin_unlock(&svc->srv_lock);
+ return -ERANGE;
+ }
+
+ svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts;
+
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_max);
+
+/**
+ * \addtogoup nrs
+ * @{
+ */
+extern struct nrs_core nrs_core;
+
+/**
+ * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
+ *
+ * \param[in] state The policy state
+ */
+static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state)
+{
+ switch (state) {
+ default:
+ LBUG();
+ case NRS_POL_STATE_INVALID:
+ return "invalid";
+ case NRS_POL_STATE_STOPPED:
+ return "stopped";
+ case NRS_POL_STATE_STOPPING:
+ return "stopping";
+ case NRS_POL_STATE_STARTING:
+ return "starting";
+ case NRS_POL_STATE_STARTED:
+ return "started";
+ }
+}
+
+/**
+ * Obtains status information for \a policy.
+ *
+ * Information is copied in \a info.
+ *
+ * \param[in] policy The policy
+ * \param[out] info Holds returned status information
+ */
+void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_pol_info *info)
+{
+ LASSERT(policy != NULL);
+ LASSERT(info != NULL);
+ assert_spin_locked(&policy->pol_nrs->nrs_lock);
+
+ memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX);
+
+ info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK);
+ info->pi_state = policy->pol_state;
+ /**
+ * XXX: These are accessed without holding
+ * ptlrpc_service_part::scp_req_lock.
+ */
+ info->pi_req_queued = policy->pol_req_queued;
+ info->pi_req_started = policy->pol_req_started;
+}
+
+/**
+ * Reads and prints policy status information for all policies of a PTLRPC
+ * service.
+ */
+static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_pol_info *infos;
+ struct ptlrpc_nrs_pol_info tmp;
+ unsigned num_pols;
+ unsigned pol_idx = 0;
+ bool hp = false;
+ int i;
+ int rc = 0;
+
+ /**
+ * Serialize NRS core lprocfs operations with policy registration/
+ * unregistration.
+ */
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Use the first service partition's regular NRS head in order to obtain
+ * the number of policies registered with NRS heads of this service. All
+ * service partitions will have the same number of policies.
+ */
+ nrs = nrs_svcpt2nrs(svc->srv_parts[0], false);
+
+ spin_lock(&nrs->nrs_lock);
+ num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols;
+ spin_unlock(&nrs->nrs_lock);
+
+ OBD_ALLOC(infos, num_pols * sizeof(*infos));
+ if (infos == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+again:
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ spin_lock(&nrs->nrs_lock);
+
+ pol_idx = 0;
+
+ list_for_each_entry(policy, &nrs->nrs_policy_list,
+ pol_list) {
+ LASSERT(pol_idx < num_pols);
+
+ nrs_policy_get_info_locked(policy, &tmp);
+ /**
+ * Copy values when handling the first service
+ * partition.
+ */
+ if (i == 0) {
+ memcpy(infos[pol_idx].pi_name, tmp.pi_name,
+ NRS_POL_NAME_MAX);
+ memcpy(&infos[pol_idx].pi_state, &tmp.pi_state,
+ sizeof(tmp.pi_state));
+ infos[pol_idx].pi_fallback = tmp.pi_fallback;
+ /**
+ * For the rest of the service partitions
+ * sanity-check the values we get.
+ */
+ } else {
+ LASSERT(strncmp(infos[pol_idx].pi_name,
+ tmp.pi_name,
+ NRS_POL_NAME_MAX) == 0);
+ /**
+ * Not asserting ptlrpc_nrs_pol_info::pi_state,
+ * because it may be different between
+ * instances of the same policy in different
+ * service partitions.
+ */
+ LASSERT(infos[pol_idx].pi_fallback ==
+ tmp.pi_fallback);
+ }
+
+ infos[pol_idx].pi_req_queued += tmp.pi_req_queued;
+ infos[pol_idx].pi_req_started += tmp.pi_req_started;
+
+ pol_idx++;
+ }
+ spin_unlock(&nrs->nrs_lock);
+ }
+
+ /**
+ * Policy status information output is in YAML format.
+ * For example:
+ *
+ * regular_requests:
+ * - name: fifo
+ * state: started
+ * fallback: yes
+ * queued: 0
+ * active: 0
+ *
+ * - name: crrn
+ * state: started
+ * fallback: no
+ * queued: 2015
+ * active: 384
+ *
+ * high_priority_requests:
+ * - name: fifo
+ * state: started
+ * fallback: yes
+ * queued: 0
+ * active: 2
+ *
+ * - name: crrn
+ * state: stopped
+ * fallback: no
+ * queued: 0
+ * active: 0
+ */
+ seq_printf(m, "%s\n",
+ !hp ? "\nregular_requests:" : "high_priority_requests:");
+
+ for (pol_idx = 0; pol_idx < num_pols; pol_idx++) {
+ seq_printf(m, " - name: %s\n"
+ " state: %s\n"
+ " fallback: %s\n"
+ " queued: %-20d\n"
+ " active: %-20d\n\n",
+ infos[pol_idx].pi_name,
+ nrs_state2str(infos[pol_idx].pi_state),
+ infos[pol_idx].pi_fallback ? "yes" : "no",
+ (int)infos[pol_idx].pi_req_queued,
+ (int)infos[pol_idx].pi_req_started);
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ memset(infos, 0, num_pols * sizeof(*infos));
+
+ /**
+ * Redo the processing for the service's HP NRS heads' policies.
+ */
+ hp = true;
+ goto again;
+ }
+
+out:
+ if (infos)
+ OBD_FREE(infos, num_pols * sizeof(*infos));
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ return rc;
+}
+
+/**
+ * The longest valid command string is the maximum policy name size, plus the
+ * length of the " reg" substring
+ */
+#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
+
+/**
+ * Starts and stops a given policy on a PTLRPC service.
+ *
+ * Commands consist of the policy name, followed by an optional [reg|hp] token;
+ * if the optional token is omitted, the operation is performed on both the
+ * regular and high-priority (if the service has one) NRS head.
+ */
+static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH;
+ char *cmd;
+ char *cmd_copy = NULL;
+ char *token;
+ int rc = 0;
+
+ if (count >= LPROCFS_NRS_WR_MAX_CMD) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD);
+ if (cmd == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ /**
+ * strsep() modifies its argument, so keep a copy
+ */
+ cmd_copy = cmd;
+
+ if (copy_from_user(cmd, buffer, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ cmd[count] = '\0';
+
+ token = strsep(&cmd, " ");
+
+ if (strlen(token) > NRS_POL_NAME_MAX - 1) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /**
+ * No [reg|hp] token has been specified
+ */
+ if (cmd == NULL)
+ goto default_queue;
+
+ /**
+ * The second token is either NULL, or an optional [reg|hp] string
+ */
+ if (strcmp(cmd, "reg") == 0)
+ queue = PTLRPC_NRS_QUEUE_REG;
+ else if (strcmp(cmd, "hp") == 0)
+ queue = PTLRPC_NRS_QUEUE_HP;
+ else {
+ rc = -EINVAL;
+ goto out;
+ }
+
+default_queue:
+
+ if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) {
+ rc = -ENODEV;
+ goto out;
+ } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
+ queue = PTLRPC_NRS_QUEUE_REG;
+
+ /**
+ * Serialize NRS core lprocfs operations with policy registration/
+ * unregistration.
+ */
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START,
+ false, NULL);
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+out:
+ if (cmd_copy)
+ OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD);
+
+ return rc < 0 ? rc : count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs);
+
+/** @} nrs */
+
+struct ptlrpc_srh_iterator {
+ int srhi_idx;
+ __u64 srhi_seq;
+ struct ptlrpc_request *srhi_req;
+};
+
+static int
+ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_srh_iterator *srhi,
+ __u64 seq)
+{
+ struct list_head *e;
+ struct ptlrpc_request *req;
+
+ if (srhi->srhi_req != NULL &&
+ srhi->srhi_seq > svcpt->scp_hist_seq_culled &&
+ srhi->srhi_seq <= seq) {
+ /* If srhi_req was set previously, hasn't been culled and
+ * we're searching for a seq on or after it (i.e. more
+ * recent), search from it onwards.
+ * Since the service history is LRU (i.e. culled reqs will
+ * be near the head), we shouldn't have to do long
+ * re-scans */
+ LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq,
+ "%s:%d: seek seq %llu, request seq %llu\n",
+ svcpt->scp_service->srv_name, svcpt->scp_cpt,
+ srhi->srhi_seq, srhi->srhi_req->rq_history_seq);
+ LASSERTF(!list_empty(&svcpt->scp_hist_reqs),
+ "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
+ svcpt->scp_service->srv_name, svcpt->scp_cpt,
+ seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled);
+ e = &srhi->srhi_req->rq_history_list;
+ } else {
+ /* search from start */
+ e = svcpt->scp_hist_reqs.next;
+ }
+
+ while (e != &svcpt->scp_hist_reqs) {
+ req = list_entry(e, struct ptlrpc_request, rq_history_list);
+
+ if (req->rq_history_seq >= seq) {
+ srhi->srhi_seq = req->rq_history_seq;
+ srhi->srhi_req = req;
+ return 0;
+ }
+ e = e->next;
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * ptlrpc history sequence is used as "position" of seq_file, in some case,
+ * seq_read() will increase "position" to indicate reading the next
+ * element, however, low bits of history sequence are reserved for CPT id
+ * (check the details from comments before ptlrpc_req_add_history), which
+ * means seq_read() might change CPT id of history sequence and never
+ * finish reading of requests on a CPT. To make it work, we have to shift
+ * CPT id to high bits and timestamp to low bits, so seq_read() will only
+ * increase timestamp which can correctly indicate the next position.
+ */
+
+/* convert seq_file pos to cpt */
+#define PTLRPC_REQ_POS2CPT(svc, pos) \
+ ((svc)->srv_cpt_bits == 0 ? 0 : \
+ (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
+
+/* make up seq_file pos from cpt */
+#define PTLRPC_REQ_CPT2POS(svc, cpt) \
+ ((svc)->srv_cpt_bits == 0 ? 0 : \
+ (cpt) << (64 - (svc)->srv_cpt_bits))
+
+/* convert sequence to position */
+#define PTLRPC_REQ_SEQ2POS(svc, seq) \
+ ((svc)->srv_cpt_bits == 0 ? (seq) : \
+ ((seq) >> (svc)->srv_cpt_bits) | \
+ ((seq) << (64 - (svc)->srv_cpt_bits)))
+
+/* convert position to sequence */
+#define PTLRPC_REQ_POS2SEQ(svc, pos) \
+ ((svc)->srv_cpt_bits == 0 ? (pos) : \
+ ((__u64)(pos) << (svc)->srv_cpt_bits) | \
+ ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
+
+static void *
+ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_srh_iterator *srhi;
+ unsigned int cpt;
+ int rc;
+ int i;
+
+ if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */
+ CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
+ (int)sizeof(loff_t));
+ return NULL;
+ }
+
+ OBD_ALLOC(srhi, sizeof(*srhi));
+ if (srhi == NULL)
+ return NULL;
+
+ srhi->srhi_seq = 0;
+ srhi->srhi_req = NULL;
+
+ cpt = PTLRPC_REQ_POS2CPT(svc, *pos);
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (i < cpt) /* skip */
+ continue;
+ if (i > cpt) /* make up the lowest position for this CPT */
+ *pos = PTLRPC_REQ_CPT2POS(svc, i);
+
+ spin_lock(&svcpt->scp_lock);
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi,
+ PTLRPC_REQ_POS2SEQ(svc, *pos));
+ spin_unlock(&svcpt->scp_lock);
+ if (rc == 0) {
+ *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+ srhi->srhi_idx = i;
+ return srhi;
+ }
+ }
+
+ OBD_FREE(srhi, sizeof(*srhi));
+ return NULL;
+}
+
+static void
+ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter)
+{
+ struct ptlrpc_srh_iterator *srhi = iter;
+
+ if (srhi != NULL)
+ OBD_FREE(srhi, sizeof(*srhi));
+}
+
+static void *
+ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s,
+ void *iter, loff_t *pos)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_srh_iterator *srhi = iter;
+ struct ptlrpc_service_part *svcpt;
+ __u64 seq;
+ int rc;
+ int i;
+
+ for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) {
+ svcpt = svc->srv_parts[i];
+
+ if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */
+ srhi->srhi_req = NULL;
+ seq = srhi->srhi_seq = 0;
+ } else { /* the next sequence */
+ seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits);
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq);
+ spin_unlock(&svcpt->scp_lock);
+ if (rc == 0) {
+ *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq);
+ srhi->srhi_idx = i;
+ return srhi;
+ }
+ }
+
+ OBD_FREE(srhi, sizeof(*srhi));
+ return NULL;
+}
+
+/* common ost/mdt so_req_printer */
+void target_print_req(void *seq_file, struct ptlrpc_request *req)
+{
+ /* Called holding srv_lock with irqs disabled.
+ * Print specific req contents and a newline.
+ * CAVEAT EMPTOR: check request message length before printing!!!
+ * You might have received any old crap so you must be just as
+ * careful here as the service's request parser!!! */
+ struct seq_file *sf = seq_file;
+
+ switch (req->rq_phase) {
+ case RQ_PHASE_NEW:
+ /* still awaiting a service thread's attention, or rejected
+ * because the generic request message didn't unpack */
+ seq_printf(sf, "<not swabbed>\n");
+ break;
+ case RQ_PHASE_INTERPRET:
+ /* being handled, so basic msg swabbed, and opc is valid
+ * but racing with mds_handle() */
+ case RQ_PHASE_COMPLETE:
+ /* been handled by mds_handle() reply state possibly still
+ * volatile */
+ seq_printf(sf, "opc %d\n", lustre_msg_get_opc(req->rq_reqmsg));
+ break;
+ default:
+ DEBUG_REQ(D_ERROR, req, "bad phase %d", req->rq_phase);
+ }
+}
+EXPORT_SYMBOL(target_print_req);
+
+static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
+{
+ struct ptlrpc_service *svc = s->private;
+ struct ptlrpc_srh_iterator *srhi = iter;
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request *req;
+ int rc;
+
+ LASSERT(srhi->srhi_idx < svc->srv_ncpts);
+
+ svcpt = svc->srv_parts[srhi->srhi_idx];
+
+ spin_lock(&svcpt->scp_lock);
+
+ rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq);
+
+ if (rc == 0) {
+ req = srhi->srhi_req;
+
+ /* Print common req fields.
+ * CAVEAT EMPTOR: we're racing with the service handler
+ * here. The request could contain any old crap, so you
+ * must be just as careful as the service's request
+ * parser. Currently I only print stuff here I know is OK
+ * to look at coz it was set up in request_in_callback()!!! */
+ seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%ld:%lds(%+lds) ",
+ req->rq_history_seq, libcfs_nid2str(req->rq_self),
+ libcfs_id2str(req->rq_peer), req->rq_xid,
+ req->rq_reqlen, ptlrpc_rqphase2str(req),
+ req->rq_arrival_time.tv_sec,
+ req->rq_sent - req->rq_arrival_time.tv_sec,
+ req->rq_sent - req->rq_deadline);
+ if (svc->srv_ops.so_req_printer == NULL)
+ seq_printf(s, "\n");
+ else
+ svc->srv_ops.so_req_printer(s, srhi->srhi_req);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+ return rc;
+}
+
+static int
+ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
+{
+ static struct seq_operations sops = {
+ .start = ptlrpc_lprocfs_svc_req_history_start,
+ .stop = ptlrpc_lprocfs_svc_req_history_stop,
+ .next = ptlrpc_lprocfs_svc_req_history_next,
+ .show = ptlrpc_lprocfs_svc_req_history_show,
+ };
+ struct seq_file *seqf;
+ int rc;
+
+ rc = seq_open(file, &sops);
+ if (rc)
+ return rc;
+
+ seqf = file->private_data;
+ seqf->private = PDE_DATA(inode);
+ return 0;
+}
+
+/* See also lprocfs_rd_timeouts */
+static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n)
+{
+ struct ptlrpc_service *svc = m->private;
+ struct ptlrpc_service_part *svcpt;
+ struct dhms ts;
+ time_t worstt;
+ unsigned int cur;
+ unsigned int worst;
+ int i;
+
+ if (AT_OFF) {
+ seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n",
+ obd_timeout);
+ return 0;
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ cur = at_get(&svcpt->scp_at_estimate);
+ worst = svcpt->scp_at_estimate.at_worst_ever;
+ worstt = svcpt->scp_at_estimate.at_worst_time;
+ s2dhms(&ts, get_seconds() - worstt);
+
+ seq_printf(m, "%10s : cur %3u worst %3u (at %ld, "
+ DHMS_FMT" ago) ", "service",
+ cur, worst, worstt, DHMS_VARS(&ts));
+
+ lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate);
+ }
+
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts);
+
+static int ptlrpc_lprocfs_hp_ratio_seq_show(struct seq_file *m, void *v)
+{
+ struct ptlrpc_service *svc = m->private;
+ seq_printf(m, "%d", svc->srv_hpreq_ratio);
+ return 0;
+}
+
+static ssize_t ptlrpc_lprocfs_hp_ratio_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count,
+ loff_t *off)
+{
+ struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private;
+ int rc;
+ int val;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val < 0)
+ return -ERANGE;
+
+ spin_lock(&svc->srv_lock);
+ svc->srv_hpreq_ratio = val;
+ spin_unlock(&svc->srv_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ptlrpc_lprocfs_hp_ratio);
+
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry,
+ struct ptlrpc_service *svc)
+{
+ struct lprocfs_vars lproc_vars[] = {
+ {.name = "high_priority_ratio",
+ .fops = &ptlrpc_lprocfs_hp_ratio_fops,
+ .data = svc},
+ {.name = "req_buffer_history_len",
+ .fops = &ptlrpc_lprocfs_req_history_len_fops,
+ .data = svc},
+ {.name = "req_buffer_history_max",
+ .fops = &ptlrpc_lprocfs_req_history_max_fops,
+ .data = svc},
+ {.name = "threads_min",
+ .fops = &ptlrpc_lprocfs_threads_min_fops,
+ .data = svc},
+ {.name = "threads_max",
+ .fops = &ptlrpc_lprocfs_threads_max_fops,
+ .data = svc},
+ {.name = "threads_started",
+ .fops = &ptlrpc_lprocfs_threads_started_fops,
+ .data = svc},
+ {.name = "timeouts",
+ .fops = &ptlrpc_lprocfs_timeouts_fops,
+ .data = svc},
+ {.name = "nrs_policies",
+ .fops = &ptlrpc_lprocfs_nrs_fops,
+ .data = svc},
+ {NULL}
+ };
+ static const struct file_operations req_history_fops = {
+ .owner = THIS_MODULE,
+ .open = ptlrpc_lprocfs_svc_req_history_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = lprocfs_seq_release,
+ };
+
+ int rc;
+
+ ptlrpc_lprocfs_register(entry, svc->srv_name,
+ "stats", &svc->srv_procroot,
+ &svc->srv_stats);
+
+ if (svc->srv_procroot == NULL)
+ return;
+
+ lprocfs_add_vars(svc->srv_procroot, lproc_vars, NULL);
+
+ rc = lprocfs_seq_create(svc->srv_procroot, "req_history",
+ 0400, &req_history_fops, svc);
+ if (rc)
+ CWARN("Error adding the req_history file\n");
+}
+
+void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
+{
+ ptlrpc_lprocfs_register(obddev->obd_proc_entry, NULL, "stats",
+ &obddev->obd_svc_procroot,
+ &obddev->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd);
+
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount)
+{
+ struct lprocfs_stats *svc_stats;
+ __u32 op = lustre_msg_get_opc(req->rq_reqmsg);
+ int opc = opcode_offset(op);
+
+ svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+ if (svc_stats == NULL || opc <= 0)
+ return;
+ LASSERT(opc < LUSTRE_MAX_OPCODES);
+ if (!(op == LDLM_ENQUEUE || op == MDS_REINT))
+ lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount);
+}
+
+void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes)
+{
+ struct lprocfs_stats *svc_stats;
+ int idx;
+
+ if (!req->rq_import)
+ return;
+ svc_stats = req->rq_import->imp_obd->obd_svc_stats;
+ if (!svc_stats)
+ return;
+ idx = lustre_msg_get_opc(req->rq_reqmsg);
+ switch (idx) {
+ case OST_READ:
+ idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR;
+ break;
+ case OST_WRITE:
+ idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR;
+ break;
+ default:
+ LASSERTF(0, "unsupported opcode %u\n", idx);
+ break;
+ }
+
+ lprocfs_counter_add(svc_stats, idx, bytes);
+}
+
+EXPORT_SYMBOL(ptlrpc_lprocfs_brw);
+
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
+{
+ if (svc->srv_procroot != NULL)
+ lprocfs_remove(&svc->srv_procroot);
+
+ if (svc->srv_stats)
+ lprocfs_free_stats(&svc->srv_stats);
+}
+
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
+{
+ if (obd->obd_svc_procroot)
+ lprocfs_remove(&obd->obd_svc_procroot);
+
+ if (obd->obd_svc_stats)
+ lprocfs_free_stats(&obd->obd_svc_stats);
+}
+EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd);
+
+
+#define BUFLEN (UUID_MAX + 5)
+
+int lprocfs_wr_evict_client(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ char *kbuf;
+ char *tmpbuf;
+
+ OBD_ALLOC(kbuf, BUFLEN);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ /*
+ * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1
+ * bytes into kbuf, to ensure that the string is NUL-terminated.
+ * UUID_MAX should include a trailing NUL already.
+ */
+ if (copy_from_user(kbuf, buffer,
+ min_t(unsigned long, BUFLEN - 1, count))) {
+ count = -EFAULT;
+ goto out;
+ }
+ tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+ /* Kludge code(deadlock situation): the lprocfs lock has been held
+ * since the client is evicted by writing client's
+ * uuid/nid to procfs "evict_client" entry. However,
+ * obd_export_evict_by_uuid() will call lprocfs_remove() to destroy
+ * the proc entries under the being destroyed export{}, so I have
+ * to drop the lock at first here.
+ * - jay, jxiong@clusterfs.com */
+ class_incref(obd, __func__, current);
+
+ if (strncmp(tmpbuf, "nid:", 4) == 0)
+ obd_export_evict_by_nid(obd, tmpbuf + 4);
+ else if (strncmp(tmpbuf, "uuid:", 5) == 0)
+ obd_export_evict_by_uuid(obd, tmpbuf + 5);
+ else
+ obd_export_evict_by_uuid(obd, tmpbuf);
+
+ class_decref(obd, __func__, current);
+
+out:
+ OBD_FREE(kbuf, BUFLEN);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_evict_client);
+
+#undef BUFLEN
+
+int lprocfs_wr_ping(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct ptlrpc_request *req;
+ int rc;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+ LPROCFS_CLIMP_EXIT(obd);
+ if (req == NULL)
+ return -ENOMEM;
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+ if (rc >= 0)
+ return count;
+ return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_ping);
+
+/* Write the connection UUID to this file to attempt to connect to that node.
+ * The connection UUID is a node's primary NID. For example,
+ * "echo connection=192.168.0.1@tcp0::instance > .../import".
+ */
+int lprocfs_wr_import(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct obd_import *imp = obd->u.cli.cl_import;
+ char *kbuf = NULL;
+ char *uuid;
+ char *ptr;
+ int do_reconn = 1;
+ const char prefix[] = "connection=";
+ const int prefix_len = sizeof(prefix) - 1;
+
+ if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len)
+ return -EINVAL;
+
+ OBD_ALLOC(kbuf, count + 1);
+ if (kbuf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(kbuf, buffer, count)) {
+ count = -EFAULT;
+ goto out;
+ }
+
+ kbuf[count] = 0;
+
+ /* only support connection=uuid::instance now */
+ if (strncmp(prefix, kbuf, prefix_len) != 0) {
+ count = -EINVAL;
+ goto out;
+ }
+
+ uuid = kbuf + prefix_len;
+ ptr = strstr(uuid, "::");
+ if (ptr) {
+ __u32 inst;
+ char *endptr;
+
+ *ptr = 0;
+ do_reconn = 0;
+ ptr += strlen("::");
+ inst = simple_strtol(ptr, &endptr, 10);
+ if (*endptr) {
+ CERROR("config: wrong instance # %s\n", ptr);
+ } else if (inst != imp->imp_connect_data.ocd_instance) {
+ CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
+ imp->imp_obd->obd_name,
+ imp->imp_connect_data.ocd_instance, inst);
+ do_reconn = 1;
+ } else {
+ CDEBUG(D_INFO, "IR: %s has already been connecting to new target(%u)\n",
+ imp->imp_obd->obd_name, inst);
+ }
+ }
+
+ if (do_reconn)
+ ptlrpc_recover_import(imp, uuid, 1);
+
+out:
+ OBD_FREE(kbuf, count + 1);
+ return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_import);
+
+int lprocfs_rd_pinger_recov(struct seq_file *m, void *n)
+{
+ struct obd_device *obd = m->private;
+ struct obd_import *imp = obd->u.cli.cl_import;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ seq_printf(m, "%d\n", !imp->imp_no_pinger_recover);
+ LPROCFS_CLIMP_EXIT(obd);
+
+ return 0;
+}
+EXPORT_SYMBOL(lprocfs_rd_pinger_recov);
+
+int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd = ((struct seq_file *)file->private_data)->private;
+ struct client_obd *cli = &obd->u.cli;
+ struct obd_import *imp = cli->cl_import;
+ int rc, val;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc < 0)
+ return rc;
+
+ if (val != 0 && val != 1)
+ return -ERANGE;
+
+ LPROCFS_CLIMP_CHECK(obd);
+ spin_lock(&imp->imp_lock);
+ imp->imp_no_pinger_recover = !val;
+ spin_unlock(&imp->imp_lock);
+ LPROCFS_CLIMP_EXIT(obd);
+
+ return count;
+
+}
+EXPORT_SYMBOL(lprocfs_wr_pinger_recov);
+
+#endif /* CONFIG_PROC_FS */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
new file mode 100644
index 000000000..2fa258558
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -0,0 +1,731 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_lib.h"
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+/**
+ * Helper function. Sends \a len bytes from \a base at offset \a offset
+ * over \a conn connection to portal \a portal.
+ * Returns 0 on success or error code.
+ */
+static int ptl_send_buf(lnet_handle_md_t *mdh, void *base, int len,
+ lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
+ struct ptlrpc_connection *conn, int portal, __u64 xid,
+ unsigned int offset)
+{
+ int rc;
+ lnet_md_t md;
+
+ LASSERT(portal != 0);
+ LASSERT(conn != NULL);
+ CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer));
+ md.start = base;
+ md.length = len;
+ md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1;
+ md.options = PTLRPC_MD_OPTIONS;
+ md.user_ptr = cbid;
+ md.eq_handle = ptlrpc_eq_h;
+
+ if (unlikely(ack == LNET_ACK_REQ &&
+ OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK,
+ OBD_FAIL_ONCE))) {
+ /* don't ask for the ack to simulate failing client */
+ ack = LNET_NOACK_REQ;
+ }
+
+ rc = LNetMDBind(md, LNET_UNLINK, mdh);
+ if (unlikely(rc != 0)) {
+ CERROR("LNetMDBind failed: %d\n", rc);
+ LASSERT(rc == -ENOMEM);
+ return -ENOMEM;
+ }
+
+ CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n",
+ len, portal, xid, offset);
+
+ rc = LNetPut(conn->c_self, *mdh, ack,
+ conn->c_peer, portal, xid, offset, 0);
+ if (unlikely(rc != 0)) {
+ int rc2;
+ /* We're going to get an UNLINK event when I unlink below,
+ * which will complete just like any other failed send, so
+ * I fall through and return success here! */
+ CERROR("LNetPut(%s, %d, %lld) failed: %d\n",
+ libcfs_id2str(conn->c_peer), portal, xid, rc);
+ rc2 = LNetMDUnlink(*mdh);
+ LASSERTF(rc2 == 0, "rc2 = %d\n", rc2);
+ }
+
+ return 0;
+}
+
+static void mdunlink_iterate_helper(lnet_handle_md_t *bd_mds, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ LNetMDUnlink(bd_mds[i]);
+}
+
+
+/**
+ * Register bulk at the sender for later transfer.
+ * Returns 0 on success or error code.
+ */
+int ptlrpc_register_bulk(struct ptlrpc_request *req)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ lnet_process_id_t peer;
+ int rc = 0;
+ int rc2;
+ int posted_md;
+ int total_md;
+ __u64 xid;
+ lnet_handle_me_t me_h;
+ lnet_md_t md;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
+ return 0;
+
+ /* NB no locking required until desc is on the network */
+ LASSERT(desc->bd_nob > 0);
+ LASSERT(desc->bd_md_count == 0);
+ LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
+ LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+ LASSERT(desc->bd_req != NULL);
+ LASSERT(desc->bd_type == BULK_PUT_SINK ||
+ desc->bd_type == BULK_GET_SOURCE);
+
+ /* cleanup the state of the bulk for it will be reused */
+ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
+ desc->bd_nob_transferred = 0;
+ else
+ LASSERT(desc->bd_nob_transferred == 0);
+
+ desc->bd_failure = 0;
+
+ peer = desc->bd_import->imp_connection->c_peer;
+
+ LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
+ LASSERT(desc->bd_cbid.cbid_arg == desc);
+
+ /* An XID is only used for a single request from the client.
+ * For retried bulk transfers, a new XID will be allocated in
+ * in ptlrpc_check_set() if it needs to be resent, so it is not
+ * using the same RDMA match bits after an error.
+ *
+ * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
+ * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
+ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
+ LASSERTF(!(desc->bd_registered &&
+ req->rq_send_state != LUSTRE_IMP_REPLAY) ||
+ xid != desc->bd_last_xid,
+ "registered: %d rq_xid: %llu bd_last_xid: %llu\n",
+ desc->bd_registered, xid, desc->bd_last_xid);
+
+ total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
+ desc->bd_registered = 1;
+ desc->bd_last_xid = xid;
+ desc->bd_md_count = total_md;
+ md.user_ptr = &desc->bd_cbid;
+ md.eq_handle = ptlrpc_eq_h;
+ md.threshold = 1; /* PUT or GET */
+
+ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
+ md.options = PTLRPC_MD_OPTIONS |
+ ((desc->bd_type == BULK_GET_SOURCE) ?
+ LNET_MD_OP_GET : LNET_MD_OP_PUT);
+ ptlrpc_fill_bulk_md(&md, desc, posted_md);
+
+ rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
+ LNET_UNLINK, LNET_INS_AFTER, &me_h);
+ if (rc != 0) {
+ CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
+ desc->bd_import->imp_obd->obd_name, xid,
+ posted_md, rc);
+ break;
+ }
+
+ /* About to let the network at it... */
+ rc = LNetMDAttach(me_h, md, LNET_UNLINK,
+ &desc->bd_mds[posted_md]);
+ if (rc != 0) {
+ CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
+ desc->bd_import->imp_obd->obd_name, xid,
+ posted_md, rc);
+ rc2 = LNetMEUnlink(me_h);
+ LASSERT(rc2 == 0);
+ break;
+ }
+ }
+
+ if (rc != 0) {
+ LASSERT(rc == -ENOMEM);
+ spin_lock(&desc->bd_lock);
+ desc->bd_md_count -= total_md - posted_md;
+ spin_unlock(&desc->bd_lock);
+ LASSERT(desc->bd_md_count >= 0);
+ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+ req->rq_status = -ENOMEM;
+ return -ENOMEM;
+ }
+
+ /* Set rq_xid to matchbits of the final bulk so that server can
+ * infer the number of bulks that were prepared */
+ req->rq_xid = --xid;
+ LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
+ "bd_last_xid = x%llu, rq_xid = x%llu\n",
+ desc->bd_last_xid, req->rq_xid);
+
+ spin_lock(&desc->bd_lock);
+ /* Holler if peer manages to touch buffers before he knows the xid */
+ if (desc->bd_md_count != total_md)
+ CWARN("%s: Peer %s touched %d buffers while I registered\n",
+ desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
+ total_md - desc->bd_md_count);
+ spin_unlock(&desc->bd_lock);
+
+ CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n",
+ desc->bd_md_count,
+ desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
+ desc->bd_iov_count, desc->bd_nob,
+ desc->bd_last_xid, req->rq_xid, desc->bd_portal);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_register_bulk);
+
+/**
+ * Disconnect a bulk desc from the network. Idempotent. Not
+ * thread-safe (i.e. only interlocks with completion callback).
+ * Returns 1 on success or 0 if network unregistration failed for whatever
+ * reason.
+ */
+int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
+{
+ struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+ wait_queue_head_t *wq;
+ struct l_wait_info lwi;
+ int rc;
+
+ LASSERT(!in_interrupt()); /* might sleep */
+
+ /* Let's setup deadline for reply unlink. */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+ async && req->rq_bulk_deadline == 0)
+ req->rq_bulk_deadline = get_seconds() + LONG_UNLINK;
+
+ if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
+ return 1; /* never registered */
+
+ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */
+
+ /* the unlink ensures the callback happens ASAP and is the last
+ * one. If it fails, it must be because completion just happened,
+ * but we must still l_wait_event() in this case to give liblustre
+ * a chance to run client_bulk_callback() */
+ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+
+ if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
+ return 1; /* never registered */
+
+ /* Move to "Unregistering" phase as bulk was not unlinked yet. */
+ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
+
+ /* Do not wait for unlink to finish. */
+ if (async)
+ return 0;
+
+ if (req->rq_set != NULL)
+ wq = &req->rq_set->set_waitq;
+ else
+ wq = &req->rq_reply_waitq;
+
+ for (;;) {
+ /* Network access will complete in finite time but the HUGE
+ * timeout lets us CWARN for visibility of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
+ if (rc == 0) {
+ ptlrpc_rqphase_move(req, req->rq_next_phase);
+ return 1;
+ }
+
+ LASSERT(rc == -ETIMEDOUT);
+ DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
+ desc);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_bulk);
+
+static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int service_time = max_t(int, get_seconds() -
+ req->rq_arrival_time.tv_sec, 1);
+
+ if (!(flags & PTLRPC_REPLY_EARLY) &&
+ (req->rq_type != PTL_RPC_MSG_ERR) &&
+ (req->rq_reqmsg != NULL) &&
+ !(lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_RESENT | MSG_REPLAY |
+ MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
+ /* early replies, errors and recovery requests don't count
+ * toward our service time estimate */
+ int oldse = at_measured(&svcpt->scp_at_estimate, service_time);
+
+ if (oldse != 0) {
+ DEBUG_REQ(D_ADAPTTO, req,
+ "svc %s changed estimate from %d to %d",
+ svc->srv_name, oldse,
+ at_get(&svcpt->scp_at_estimate));
+ }
+ }
+ /* Report actual service time for client latency calc */
+ lustre_msg_set_service_time(req->rq_repmsg, service_time);
+ /* Report service time estimate for future client reqs, but report 0
+ * (to be ignored by client) if it's a error reply during recovery.
+ * (bz15815) */
+ if (req->rq_type == PTL_RPC_MSG_ERR &&
+ (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering))
+ lustre_msg_set_timeout(req->rq_repmsg, 0);
+ else
+ lustre_msg_set_timeout(req->rq_repmsg,
+ at_get(&svcpt->scp_at_estimate));
+
+ if (req->rq_reqmsg &&
+ !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+ CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x req_flags=%#x magic=%d:%x/%x len=%d\n",
+ flags, lustre_msg_get_flags(req->rq_reqmsg),
+ lustre_msg_is_v1(req->rq_reqmsg),
+ lustre_msg_get_magic(req->rq_reqmsg),
+ lustre_msg_get_magic(req->rq_repmsg), req->rq_replen);
+ }
+}
+
+/**
+ * Send request reply from request \a req reply buffer.
+ * \a flags defines reply types
+ * Returns 0 on success or error code
+ */
+int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_connection *conn;
+ int rc;
+
+ /* We must already have a reply buffer (only ptlrpc_error() may be
+ * called without one). The reply generated by sptlrpc layer (e.g.
+ * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
+ * have a request buffer which is either the actual (swabbed) incoming
+ * request, or a saved copy if this is a req saved in
+ * target_queue_final_reply().
+ */
+ LASSERT(req->rq_no_reply == 0);
+ LASSERT(req->rq_reqbuf != NULL);
+ LASSERT(rs != NULL);
+ LASSERT((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult);
+ LASSERT(req->rq_repmsg != NULL);
+ LASSERT(req->rq_repmsg == rs->rs_msg);
+ LASSERT(rs->rs_cb_id.cbid_fn == reply_out_callback);
+ LASSERT(rs->rs_cb_id.cbid_arg == rs);
+
+ /* There may be no rq_export during failover */
+
+ if (unlikely(req->rq_export && req->rq_export->exp_obd &&
+ req->rq_export->exp_obd->obd_fail)) {
+ /* Failed obd's only send ENODEV */
+ req->rq_type = PTL_RPC_MSG_ERR;
+ req->rq_status = -ENODEV;
+ CDEBUG(D_HA, "sending ENODEV from failed obd %d\n",
+ req->rq_export->exp_obd->obd_minor);
+ }
+
+ /* In order to keep interoperability with the client (< 2.3) which
+ * doesn't have pb_jobid in ptlrpc_body, We have to shrink the
+ * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the
+ * reply buffer on client will be overflow.
+ *
+ * XXX Remove this whenever we drop the interoperability with
+ * such client.
+ */
+ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0,
+ sizeof(struct ptlrpc_body_v2), 1);
+
+ if (req->rq_type != PTL_RPC_MSG_ERR)
+ req->rq_type = PTL_RPC_MSG_REPLY;
+
+ lustre_msg_set_type(req->rq_repmsg, req->rq_type);
+ lustre_msg_set_status(req->rq_repmsg,
+ ptlrpc_status_hton(req->rq_status));
+ lustre_msg_set_opc(req->rq_repmsg,
+ req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+
+ target_pack_pool_reply(req);
+
+ ptlrpc_at_set_reply(req, flags);
+
+ if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
+ conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
+ else
+ conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
+
+ if (unlikely(conn == NULL)) {
+ CERROR("not replying on NULL connection\n"); /* bug 9635 */
+ return -ENOTCONN;
+ }
+ ptlrpc_rs_addref(rs); /* +1 ref for the network */
+
+ rc = sptlrpc_svc_wrap_reply(req);
+ if (unlikely(rc))
+ goto out;
+
+ req->rq_sent = get_seconds();
+
+ rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
+ (rs->rs_difficult && !rs->rs_no_ack) ?
+ LNET_ACK_REQ : LNET_NOACK_REQ,
+ &rs->rs_cb_id, conn,
+ ptlrpc_req2svc(req)->srv_rep_portal,
+ req->rq_xid, req->rq_reply_off);
+out:
+ if (unlikely(rc != 0))
+ ptlrpc_req_drop_rs(req);
+ ptlrpc_connection_put(conn);
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_send_reply);
+
+int ptlrpc_reply(struct ptlrpc_request *req)
+{
+ if (req->rq_no_reply)
+ return 0;
+ return ptlrpc_send_reply(req, 0);
+}
+EXPORT_SYMBOL(ptlrpc_reply);
+
+/**
+ * For request \a req send an error reply back. Create empty
+ * reply buffers if necessary.
+ */
+int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
+{
+ int rc;
+
+ if (req->rq_no_reply)
+ return 0;
+
+ if (!req->rq_repmsg) {
+ rc = lustre_pack_reply(req, 1, NULL, NULL);
+ if (rc)
+ return rc;
+ }
+
+ if (req->rq_status != -ENOSPC && req->rq_status != -EACCES &&
+ req->rq_status != -EPERM && req->rq_status != -ENOENT &&
+ req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT)
+ req->rq_type = PTL_RPC_MSG_ERR;
+
+ rc = ptlrpc_send_reply(req, may_be_difficult);
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_send_error);
+
+int ptlrpc_error(struct ptlrpc_request *req)
+{
+ return ptlrpc_send_error(req, 0);
+}
+EXPORT_SYMBOL(ptlrpc_error);
+
+/**
+ * Send request \a request.
+ * if \a noreply is set, don't expect any reply back and don't set up
+ * reply buffers.
+ * Returns 0 on success or error code.
+ */
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
+{
+ int rc;
+ int rc2;
+ int mpflag = 0;
+ struct ptlrpc_connection *connection;
+ lnet_handle_me_t reply_me_h;
+ lnet_md_t reply_md;
+ struct obd_device *obd = request->rq_import->imp_obd;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
+ return 0;
+
+ LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
+ LASSERT(request->rq_wait_ctx == 0);
+
+ /* If this is a re-transmit, we're required to have disengaged
+ * cleanly from the previous attempt */
+ LASSERT(!request->rq_receiving_reply);
+ LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
+ (request->rq_import->imp_state == LUSTRE_IMP_FULL)));
+
+ if (unlikely(obd != NULL && obd->obd_fail)) {
+ CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
+ obd->obd_name);
+ /* this prevents us from waiting in ptlrpc_queue_wait */
+ spin_lock(&request->rq_lock);
+ request->rq_err = 1;
+ spin_unlock(&request->rq_lock);
+ request->rq_status = -ENODEV;
+ return -ENODEV;
+ }
+
+ connection = request->rq_import->imp_connection;
+
+ lustre_msg_set_handle(request->rq_reqmsg,
+ &request->rq_import->imp_remote_handle);
+ lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
+ lustre_msg_set_conn_cnt(request->rq_reqmsg,
+ request->rq_import->imp_conn_cnt);
+ lustre_msghdr_set_flags(request->rq_reqmsg,
+ request->rq_import->imp_msghdr_flags);
+
+ if (request->rq_resend)
+ lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+
+ if (request->rq_memalloc)
+ mpflag = cfs_memory_pressure_get_and_set();
+
+ rc = sptlrpc_cli_wrap_request(request);
+ if (rc)
+ goto out;
+
+ /* bulk register should be done after wrap_request() */
+ if (request->rq_bulk != NULL) {
+ rc = ptlrpc_register_bulk(request);
+ if (rc != 0)
+ goto out;
+ }
+
+ if (!noreply) {
+ LASSERT(request->rq_replen != 0);
+ if (request->rq_repbuf == NULL) {
+ LASSERT(request->rq_repdata == NULL);
+ LASSERT(request->rq_repmsg == NULL);
+ rc = sptlrpc_cli_alloc_repbuf(request,
+ request->rq_replen);
+ if (rc) {
+ /* this prevents us from looping in
+ * ptlrpc_queue_wait */
+ spin_lock(&request->rq_lock);
+ request->rq_err = 1;
+ spin_unlock(&request->rq_lock);
+ request->rq_status = rc;
+ goto cleanup_bulk;
+ }
+ } else {
+ request->rq_repdata = NULL;
+ request->rq_repmsg = NULL;
+ }
+
+ rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
+ connection->c_peer, request->rq_xid, 0,
+ LNET_UNLINK, LNET_INS_AFTER, &reply_me_h);
+ if (rc != 0) {
+ CERROR("LNetMEAttach failed: %d\n", rc);
+ LASSERT(rc == -ENOMEM);
+ rc = -ENOMEM;
+ goto cleanup_bulk;
+ }
+ }
+
+ spin_lock(&request->rq_lock);
+ /* If the MD attach succeeds, there _will_ be a reply_in callback */
+ request->rq_receiving_reply = !noreply;
+ request->rq_req_unlink = 1;
+ /* We are responsible for unlinking the reply buffer */
+ request->rq_reply_unlink = !noreply;
+ /* Clear any flags that may be present from previous sends. */
+ request->rq_replied = 0;
+ request->rq_err = 0;
+ request->rq_timedout = 0;
+ request->rq_net_err = 0;
+ request->rq_resend = 0;
+ request->rq_restart = 0;
+ request->rq_reply_truncate = 0;
+ spin_unlock(&request->rq_lock);
+
+ if (!noreply) {
+ reply_md.start = request->rq_repbuf;
+ reply_md.length = request->rq_repbuf_len;
+ /* Allow multiple early replies */
+ reply_md.threshold = LNET_MD_THRESH_INF;
+ /* Manage remote for early replies */
+ reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
+ LNET_MD_MANAGE_REMOTE |
+ LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
+ reply_md.user_ptr = &request->rq_reply_cbid;
+ reply_md.eq_handle = ptlrpc_eq_h;
+
+ /* We must see the unlink callback to unset rq_reply_unlink,
+ so we can't auto-unlink */
+ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
+ &request->rq_reply_md_h);
+ if (rc != 0) {
+ CERROR("LNetMDAttach failed: %d\n", rc);
+ LASSERT(rc == -ENOMEM);
+ spin_lock(&request->rq_lock);
+ /* ...but the MD attach didn't succeed... */
+ request->rq_receiving_reply = 0;
+ spin_unlock(&request->rq_lock);
+ rc = -ENOMEM;
+ goto cleanup_me;
+ }
+
+ CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n",
+ request->rq_repbuf_len, request->rq_xid,
+ request->rq_reply_portal);
+ }
+
+ /* add references on request for request_out_callback */
+ ptlrpc_request_addref(request);
+ if (obd != NULL && obd->obd_svc_stats != NULL)
+ lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
+ atomic_read(&request->rq_import->imp_inflight));
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
+
+ do_gettimeofday(&request->rq_arrival_time);
+ request->rq_sent = get_seconds();
+ /* We give the server rq_timeout secs to process the req, and
+ add the network latency for our local timeout. */
+ request->rq_deadline = request->rq_sent + request->rq_timeout +
+ ptlrpc_at_get_net_latency(request);
+
+ ptlrpc_pinger_sending_on_import(request->rq_import);
+
+ DEBUG_REQ(D_INFO, request, "send flg=%x",
+ lustre_msg_get_flags(request->rq_reqmsg));
+ rc = ptl_send_buf(&request->rq_req_md_h,
+ request->rq_reqbuf, request->rq_reqdata_len,
+ LNET_NOACK_REQ, &request->rq_req_cbid,
+ connection,
+ request->rq_request_portal,
+ request->rq_xid, 0);
+ if (rc == 0)
+ goto out;
+
+ ptlrpc_req_finished(request);
+ if (noreply)
+ goto out;
+
+ cleanup_me:
+ /* MEUnlink is safe; the PUT didn't even get off the ground, and
+ * nobody apart from the PUT's target has the right nid+XID to
+ * access the reply buffer. */
+ rc2 = LNetMEUnlink(reply_me_h);
+ LASSERT(rc2 == 0);
+ /* UNLINKED callback called synchronously */
+ LASSERT(!request->rq_receiving_reply);
+
+ cleanup_bulk:
+ /* We do sync unlink here as there was no real transfer here so
+ * the chance to have long unlink to sluggish net is smaller here. */
+ ptlrpc_unregister_bulk(request, 0);
+ out:
+ if (request->rq_memalloc)
+ cfs_memory_pressure_restore(mpflag);
+ return rc;
+}
+EXPORT_SYMBOL(ptl_send_rpc);
+
+/**
+ * Register request buffer descriptor for request receiving.
+ */
+int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+ struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
+ static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY};
+ int rc;
+ lnet_md_t md;
+ lnet_handle_me_t me_h;
+
+ CDEBUG(D_NET, "LNetMEAttach: portal %d\n",
+ service->srv_req_portal);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD))
+ return -ENOMEM;
+
+ /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL,
+ * which means buffer can only be attached on local CPT, and LND
+ * threads can find it by grabbing a local lock */
+ rc = LNetMEAttach(service->srv_req_portal,
+ match_id, 0, ~0, LNET_UNLINK,
+ rqbd->rqbd_svcpt->scp_cpt >= 0 ?
+ LNET_INS_LOCAL : LNET_INS_AFTER, &me_h);
+ if (rc != 0) {
+ CERROR("LNetMEAttach failed: %d\n", rc);
+ return -ENOMEM;
+ }
+
+ LASSERT(rqbd->rqbd_refcount == 0);
+ rqbd->rqbd_refcount = 1;
+
+ md.start = rqbd->rqbd_buffer;
+ md.length = service->srv_buf_size;
+ md.max_size = service->srv_max_req_size;
+ md.threshold = LNET_MD_THRESH_INF;
+ md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE;
+ md.user_ptr = &rqbd->rqbd_cbid;
+ md.eq_handle = ptlrpc_eq_h;
+
+ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h);
+ if (rc == 0)
+ return 0;
+
+ CERROR("LNetMDAttach failed: %d;\n", rc);
+ LASSERT(rc == -ENOMEM);
+ rc = LNetMEUnlink(me_h);
+ LASSERT(rc == 0);
+ rqbd->rqbd_refcount = 0;
+
+ return -ENOMEM;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
new file mode 100644
index 000000000..81ad74732
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c
@@ -0,0 +1,1754 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs.c
+ *
+ * Network Request Scheduler (NRS)
+ *
+ * Allows to reorder the handling of RPCs at servers.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lprocfs_status.h"
+#include "../../include/linux/libcfs/libcfs.h"
+#include "ptlrpc_internal.h"
+
+/* XXX: This is just for liblustre. Remove the #if defined directive when the
+ * "cfs_" prefix is dropped from cfs_list_head. */
+extern struct list_head ptlrpc_all_services;
+
+/**
+ * NRS core object.
+ */
+struct nrs_core nrs_core;
+
+static int nrs_policy_init(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_desc->pd_ops->op_policy_init != NULL ?
+ policy->pol_desc->pd_ops->op_policy_init(policy) : 0;
+}
+
+static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_ref == 0);
+ LASSERT(policy->pol_req_queued == 0);
+
+ if (policy->pol_desc->pd_ops->op_policy_fini != NULL)
+ policy->pol_desc->pd_ops->op_policy_fini(policy);
+}
+
+static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy,
+ enum ptlrpc_nrs_ctl opc, void *arg)
+{
+ /**
+ * The policy may be stopped, but the lprocfs files and
+ * ptlrpc_nrs_policy instances remain present until unregistration time.
+ * Do not perform the ctl operation if the policy is stopped, as
+ * policy->pol_private will be NULL in such a case.
+ */
+ if (policy->pol_state == NRS_POL_STATE_STOPPED)
+ return -ENODEV;
+
+ return policy->pol_desc->pd_ops->op_policy_ctl != NULL ?
+ policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) :
+ -ENOSYS;
+}
+
+static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+
+ if (policy->pol_desc->pd_ops->op_policy_stop != NULL) {
+ spin_unlock(&nrs->nrs_lock);
+
+ policy->pol_desc->pd_ops->op_policy_stop(policy);
+
+ spin_lock(&nrs->nrs_lock);
+ }
+
+ LASSERT(list_empty(&policy->pol_list_queued));
+ LASSERT(policy->pol_req_queued == 0 &&
+ policy->pol_req_started == 0);
+
+ policy->pol_private = NULL;
+
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+
+ if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+ module_put(policy->pol_desc->pd_owner);
+}
+
+static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+
+ if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping)
+ return -EPERM;
+
+ if (policy->pol_state == NRS_POL_STATE_STARTING)
+ return -EAGAIN;
+
+ /* In progress or already stopped */
+ if (policy->pol_state != NRS_POL_STATE_STARTED)
+ return 0;
+
+ policy->pol_state = NRS_POL_STATE_STOPPING;
+
+ /* Immediately make it invisible */
+ if (nrs->nrs_policy_primary == policy) {
+ nrs->nrs_policy_primary = NULL;
+
+ } else {
+ LASSERT(nrs->nrs_policy_fallback == policy);
+ nrs->nrs_policy_fallback = NULL;
+ }
+
+ /* I have the only refcount */
+ if (policy->pol_ref == 1)
+ nrs_policy_stop0(policy);
+
+ return 0;
+}
+
+/**
+ * Transitions the \a nrs NRS head's primary policy to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no
+ * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED.
+ *
+ * \param[in] nrs the NRS head to carry out this operation on
+ */
+static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs)
+{
+ struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary;
+
+ if (tmp == NULL)
+ return;
+
+ nrs->nrs_policy_primary = NULL;
+
+ LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED);
+ tmp->pol_state = NRS_POL_STATE_STOPPING;
+
+ if (tmp->pol_ref == 0)
+ nrs_policy_stop0(tmp);
+}
+
+/**
+ * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in
+ * response to an lprocfs command to start a policy.
+ *
+ * If a primary policy different to the current one is specified, this function
+ * will transition the new policy to the
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition
+ * the old primary policy (if there is one) to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED.
+ *
+ * If the fallback policy is specified, this is taken to indicate an instruction
+ * to stop the current primary policy, without substituting it with another
+ * primary policy, so the primary policy (if any) is transitioned to
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding
+ * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In
+ * this case, the fallback policy is only left active in the NRS head.
+ */
+static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy)
+{
+ struct ptlrpc_nrs *nrs = policy->pol_nrs;
+ int rc = 0;
+
+ /**
+ * Don't allow multiple starting which is too complex, and has no real
+ * benefit.
+ */
+ if (nrs->nrs_policy_starting)
+ return -EAGAIN;
+
+ LASSERT(policy->pol_state != NRS_POL_STATE_STARTING);
+
+ if (policy->pol_state == NRS_POL_STATE_STOPPING)
+ return -EAGAIN;
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+ /**
+ * This is for cases in which the user sets the policy to the
+ * fallback policy (currently fifo for all services); i.e. the
+ * user is resetting the policy to the default; so we stop the
+ * primary policy, if any.
+ */
+ if (policy == nrs->nrs_policy_fallback) {
+ nrs_policy_stop_primary(nrs);
+ return 0;
+ }
+
+ /**
+ * If we reach here, we must be setting up the fallback policy
+ * at service startup time, and only a single policy with the
+ * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can
+ * register with NRS core.
+ */
+ LASSERT(nrs->nrs_policy_fallback == NULL);
+ } else {
+ /**
+ * Shouldn't start primary policy if w/o fallback policy.
+ */
+ if (nrs->nrs_policy_fallback == NULL)
+ return -EPERM;
+
+ if (policy->pol_state == NRS_POL_STATE_STARTED)
+ return 0;
+ }
+
+ /**
+ * Increase the module usage count for policies registering from other
+ * modules.
+ */
+ if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 &&
+ !try_module_get(policy->pol_desc->pd_owner)) {
+ atomic_dec(&policy->pol_desc->pd_refs);
+ CERROR("NRS: cannot get module for policy %s; is it alive?\n",
+ policy->pol_desc->pd_name);
+ return -ENODEV;
+ }
+
+ /**
+ * Serialize policy starting across the NRS head
+ */
+ nrs->nrs_policy_starting = 1;
+
+ policy->pol_state = NRS_POL_STATE_STARTING;
+
+ if (policy->pol_desc->pd_ops->op_policy_start) {
+ spin_unlock(&nrs->nrs_lock);
+
+ rc = policy->pol_desc->pd_ops->op_policy_start(policy);
+
+ spin_lock(&nrs->nrs_lock);
+ if (rc != 0) {
+ if (atomic_dec_and_test(&policy->pol_desc->pd_refs))
+ module_put(policy->pol_desc->pd_owner);
+
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+ goto out;
+ }
+ }
+
+ policy->pol_state = NRS_POL_STATE_STARTED;
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) {
+ /**
+ * This path is only used at PTLRPC service setup time.
+ */
+ nrs->nrs_policy_fallback = policy;
+ } else {
+ /*
+ * Try to stop the current primary policy if there is one.
+ */
+ nrs_policy_stop_primary(nrs);
+
+ /**
+ * And set the newly-started policy as the primary one.
+ */
+ nrs->nrs_policy_primary = policy;
+ }
+
+out:
+ nrs->nrs_policy_starting = 0;
+
+ return rc;
+}
+
+/**
+ * Increases the policy's usage reference count.
+ */
+static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy)
+{
+ policy->pol_ref++;
+}
+
+/**
+ * Decreases the policy's usage reference count, and stops the policy in case it
+ * was already stopping and have no more outstanding usage references (which
+ * indicates it has no more queued or started requests, and can be safely
+ * stopped).
+ */
+static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_ref > 0);
+
+ policy->pol_ref--;
+ if (unlikely(policy->pol_ref == 0 &&
+ policy->pol_state == NRS_POL_STATE_STOPPING))
+ nrs_policy_stop0(policy);
+}
+
+static void nrs_policy_put(struct ptlrpc_nrs_policy *policy)
+{
+ spin_lock(&policy->pol_nrs->nrs_lock);
+ nrs_policy_put_locked(policy);
+ spin_unlock(&policy->pol_nrs->nrs_lock);
+}
+
+/**
+ * Find and return a policy by name.
+ */
+static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs,
+ char *name)
+{
+ struct ptlrpc_nrs_policy *tmp;
+
+ list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) {
+ if (strncmp(tmp->pol_desc->pd_name, name,
+ NRS_POL_NAME_MAX) == 0) {
+ nrs_policy_get_locked(tmp);
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Release references for the resource hierarchy moving upwards towards the
+ * policy instance resource.
+ */
+static void nrs_resource_put(struct ptlrpc_nrs_resource *res)
+{
+ struct ptlrpc_nrs_policy *policy = res->res_policy;
+
+ if (policy->pol_desc->pd_ops->op_res_put != NULL) {
+ struct ptlrpc_nrs_resource *parent;
+
+ for (; res != NULL; res = parent) {
+ parent = res->res_parent;
+ policy->pol_desc->pd_ops->op_res_put(policy, res);
+ }
+ }
+}
+
+/**
+ * Obtains references for each resource in the resource hierarchy for request
+ * \a nrq if it is to be handled by \a policy.
+ *
+ * \param[in] policy the policy
+ * \param[in] nrq the request
+ * \param[in] moving_req denotes whether this is a call to the function by
+ * ldlm_lock_reorder_req(), in order to move \a nrq to
+ * the high-priority NRS head; we should not sleep when
+ * set.
+ *
+ * \retval NULL resource hierarchy references not obtained
+ * \retval valid-pointer the bottom level of the resource hierarchy
+ *
+ * \see ptlrpc_nrs_pol_ops::op_res_get()
+ */
+static
+struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ bool moving_req)
+{
+ /**
+ * Set to NULL to traverse the resource hierarchy from the top.
+ */
+ struct ptlrpc_nrs_resource *res = NULL;
+ struct ptlrpc_nrs_resource *tmp = NULL;
+ int rc;
+
+ while (1) {
+ rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res,
+ &tmp, moving_req);
+ if (rc < 0) {
+ if (res != NULL)
+ nrs_resource_put(res);
+ return NULL;
+ }
+
+ LASSERT(tmp != NULL);
+ tmp->res_parent = res;
+ tmp->res_policy = policy;
+ res = tmp;
+ tmp = NULL;
+ /**
+ * Return once we have obtained a reference to the bottom level
+ * of the resource hierarchy.
+ */
+ if (rc > 0)
+ return res;
+ }
+}
+
+/**
+ * Obtains resources for the resource hierarchies and policy references for
+ * the fallback and current primary policy (if any), that will later be used
+ * to handle request \a nrq.
+ *
+ * \param[in] nrs the NRS head instance that will be handling request \a nrq.
+ * \param[in] nrq the request that is being handled.
+ * \param[out] resp the array where references to the resource hierarchy are
+ * stored.
+ * \param[in] moving_req is set when obtaining resources while moving a
+ * request from a policy on the regular NRS head to a
+ * policy on the HP NRS head (via
+ * ldlm_lock_reorder_req()). It signifies that
+ * allocations to get resources should be atomic; for
+ * a full explanation, see comment in
+ * ptlrpc_nrs_pol_ops::op_res_get().
+ */
+static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_nrs_request *nrq,
+ struct ptlrpc_nrs_resource **resp,
+ bool moving_req)
+{
+ struct ptlrpc_nrs_policy *primary = NULL;
+ struct ptlrpc_nrs_policy *fallback = NULL;
+
+ memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX);
+
+ /**
+ * Obtain policy references.
+ */
+ spin_lock(&nrs->nrs_lock);
+
+ fallback = nrs->nrs_policy_fallback;
+ nrs_policy_get_locked(fallback);
+
+ primary = nrs->nrs_policy_primary;
+ if (primary != NULL)
+ nrs_policy_get_locked(primary);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ /**
+ * Obtain resource hierarchy references.
+ */
+ resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req);
+ LASSERT(resp[NRS_RES_FALLBACK] != NULL);
+
+ if (primary != NULL) {
+ resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq,
+ moving_req);
+ /**
+ * A primary policy may exist which may not wish to serve a
+ * particular request for different reasons; release the
+ * reference on the policy as it will not be used for this
+ * request.
+ */
+ if (resp[NRS_RES_PRIMARY] == NULL)
+ nrs_policy_put(primary);
+ }
+}
+
+/**
+ * Releases references to resource hierarchies and policies, because they are no
+ * longer required; used when request handling has been completed, or the
+ * request is moving to the high priority NRS head.
+ *
+ * \param resp the resource hierarchy that is being released
+ *
+ * \see ptlrpcnrs_req_hp_move()
+ * \see ptlrpc_nrs_req_finalize()
+ */
+static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp)
+{
+ struct ptlrpc_nrs_policy *pols[NRS_RES_MAX];
+ struct ptlrpc_nrs *nrs = NULL;
+ int i;
+
+ for (i = 0; i < NRS_RES_MAX; i++) {
+ if (resp[i] != NULL) {
+ pols[i] = resp[i]->res_policy;
+ nrs_resource_put(resp[i]);
+ resp[i] = NULL;
+ } else {
+ pols[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < NRS_RES_MAX; i++) {
+ if (pols[i] == NULL)
+ continue;
+
+ if (nrs == NULL) {
+ nrs = pols[i]->pol_nrs;
+ spin_lock(&nrs->nrs_lock);
+ }
+ nrs_policy_put_locked(pols[i]);
+ }
+
+ if (nrs != NULL)
+ spin_unlock(&nrs->nrs_lock);
+}
+
+/**
+ * Obtains an NRS request from \a policy for handling or examination; the
+ * request should be removed in the 'handling' case.
+ *
+ * Calling into this function implies we already know the policy has a request
+ * waiting to be handled.
+ *
+ * \param[in] policy the policy from which a request
+ * \param[in] peek when set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force when set, it will force a policy to return a request if it
+ * has one pending
+ *
+ * \retval the NRS request to be handled
+ */
+static inline
+struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy,
+ bool peek, bool force)
+{
+ struct ptlrpc_nrs_request *nrq;
+
+ LASSERT(policy->pol_req_queued > 0);
+
+ nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force);
+
+ LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy));
+
+ return nrq;
+}
+
+/**
+ * Enqueues request \a nrq for later handling, via one one the policies for
+ * which resources where earlier obtained via nrs_resource_get_safe(). The
+ * function attempts to enqueue the request first on the primary policy
+ * (if any), since this is the preferred choice.
+ *
+ * \param nrq the request being enqueued
+ *
+ * \see nrs_resource_get_safe()
+ */
+static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_nrs_policy *policy;
+ int rc;
+ int i;
+
+ /**
+ * Try in descending order, because the primary policy (if any) is
+ * the preferred choice.
+ */
+ for (i = NRS_RES_MAX - 1; i >= 0; i--) {
+ if (nrq->nr_res_ptrs[i] == NULL)
+ continue;
+
+ nrq->nr_res_idx = i;
+ policy = nrq->nr_res_ptrs[i]->res_policy;
+
+ rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq);
+ if (rc == 0) {
+ policy->pol_nrs->nrs_req_queued++;
+ policy->pol_req_queued++;
+ return;
+ }
+ }
+ /**
+ * Should never get here, as at least the primary policy's
+ * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always
+ * succeed.
+ */
+ LBUG();
+}
+
+/**
+ * Called when a request has been handled
+ *
+ * \param[in] nrs the request that has been handled; can be used for
+ * job/resource control.
+ *
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq);
+
+ if (policy->pol_desc->pd_ops->op_req_stop)
+ policy->pol_desc->pd_ops->op_req_stop(policy, nrq);
+
+ LASSERT(policy->pol_nrs->nrs_req_started > 0);
+ LASSERT(policy->pol_req_started > 0);
+
+ policy->pol_nrs->nrs_req_started--;
+ policy->pol_req_started--;
+}
+
+/**
+ * Handler for operations that can be carried out on policies.
+ *
+ * Handles opcodes that are common to all policy types within NRS core, and
+ * passes any unknown opcodes to the policy-specific control function.
+ *
+ * \param[in] nrs the NRS head this policy belongs to.
+ * \param[in] name the human-readable policy name; should be the same as
+ * ptlrpc_nrs_pol_desc::pd_name.
+ * \param[in] opc the opcode of the operation being carried out.
+ * \param[in,out] arg can be used to pass information in and out between when
+ * carrying an operation; usually data that is private to
+ * the policy at some level, or generic policy status
+ * information.
+ *
+ * \retval -ve error condition
+ * \retval 0 operation was carried out successfully
+ */
+static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name,
+ enum ptlrpc_nrs_ctl opc, void *arg)
+{
+ struct ptlrpc_nrs_policy *policy;
+ int rc = 0;
+
+ spin_lock(&nrs->nrs_lock);
+
+ policy = nrs_policy_find_locked(nrs, name);
+ if (policy == NULL) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ switch (opc) {
+ /**
+ * Unknown opcode, pass it down to the policy-specific control
+ * function for handling.
+ */
+ default:
+ rc = nrs_policy_ctl_locked(policy, opc, arg);
+ break;
+
+ /**
+ * Start \e policy
+ */
+ case PTLRPC_NRS_CTL_START:
+ rc = nrs_policy_start_locked(policy);
+ break;
+ }
+out:
+ if (policy != NULL)
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ return rc;
+}
+
+/**
+ * Unregisters a policy by name.
+ *
+ * \param[in] nrs the NRS head this policy belongs to.
+ * \param[in] name the human-readable policy name; should be the same as
+ * ptlrpc_nrs_pol_desc::pd_name
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name)
+{
+ struct ptlrpc_nrs_policy *policy = NULL;
+
+ spin_lock(&nrs->nrs_lock);
+
+ policy = nrs_policy_find_locked(nrs, name);
+ if (policy == NULL) {
+ spin_unlock(&nrs->nrs_lock);
+
+ CERROR("Can't find NRS policy %s\n", name);
+ return -ENOENT;
+ }
+
+ if (policy->pol_ref > 1) {
+ CERROR("Policy %s is busy with %d references\n", name,
+ (int)policy->pol_ref);
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+ return -EBUSY;
+ }
+
+ LASSERT(policy->pol_req_queued == 0);
+ LASSERT(policy->pol_req_started == 0);
+
+ if (policy->pol_state != NRS_POL_STATE_STOPPED) {
+ nrs_policy_stop_locked(policy);
+ LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED);
+ }
+
+ list_del(&policy->pol_list);
+ nrs->nrs_num_pols--;
+
+ nrs_policy_put_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ nrs_policy_fini(policy);
+
+ LASSERT(policy->pol_private == NULL);
+ OBD_FREE_PTR(policy);
+
+ return 0;
+}
+
+/**
+ * Register a policy from \policy descriptor \a desc with NRS head \a nrs.
+ *
+ * \param[in] nrs the NRS head on which the policy will be registered.
+ * \param[in] desc the policy descriptor from which the information will be
+ * obtained to register the policy.
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+static int nrs_policy_register(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_nrs_pol_desc *desc)
+{
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_policy *tmp;
+ struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
+ int rc;
+
+ LASSERT(svcpt != NULL);
+ LASSERT(desc->pd_ops != NULL);
+ LASSERT(desc->pd_ops->op_res_get != NULL);
+ LASSERT(desc->pd_ops->op_req_get != NULL);
+ LASSERT(desc->pd_ops->op_req_enqueue != NULL);
+ LASSERT(desc->pd_ops->op_req_dequeue != NULL);
+ LASSERT(desc->pd_compat != NULL);
+
+ OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable,
+ svcpt->scp_cpt, sizeof(*policy), GFP_NOFS);
+ if (policy == NULL)
+ return -ENOMEM;
+
+ policy->pol_nrs = nrs;
+ policy->pol_desc = desc;
+ policy->pol_state = NRS_POL_STATE_STOPPED;
+ policy->pol_flags = desc->pd_flags;
+
+ INIT_LIST_HEAD(&policy->pol_list);
+ INIT_LIST_HEAD(&policy->pol_list_queued);
+
+ rc = nrs_policy_init(policy);
+ if (rc != 0) {
+ OBD_FREE_PTR(policy);
+ return rc;
+ }
+
+ spin_lock(&nrs->nrs_lock);
+
+ tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name);
+ if (tmp != NULL) {
+ CERROR("NRS policy %s has been registered, can't register it for %s\n",
+ policy->pol_desc->pd_name,
+ svcpt->scp_service->srv_name);
+ nrs_policy_put_locked(tmp);
+
+ spin_unlock(&nrs->nrs_lock);
+ nrs_policy_fini(policy);
+ OBD_FREE_PTR(policy);
+
+ return -EEXIST;
+ }
+
+ list_add_tail(&policy->pol_list, &nrs->nrs_policy_list);
+ nrs->nrs_num_pols++;
+
+ if (policy->pol_flags & PTLRPC_NRS_FL_REG_START)
+ rc = nrs_policy_start_locked(policy);
+
+ spin_unlock(&nrs->nrs_lock);
+
+ if (rc != 0)
+ (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+
+ return rc;
+}
+
+/**
+ * Enqueue request \a req using one of the policies its resources are referring
+ * to.
+ *
+ * \param[in] req the request to enqueue.
+ */
+static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req)
+{
+ struct ptlrpc_nrs_policy *policy;
+
+ LASSERT(req->rq_nrq.nr_initialized);
+ LASSERT(!req->rq_nrq.nr_enqueued);
+
+ nrs_request_enqueue(&req->rq_nrq);
+ req->rq_nrq.nr_enqueued = 1;
+
+ policy = nrs_request_policy(&req->rq_nrq);
+ /**
+ * Add the policy to the NRS head's list of policies with enqueued
+ * requests, if it has not been added there.
+ */
+ if (unlikely(list_empty(&policy->pol_list_queued)))
+ list_add_tail(&policy->pol_list_queued,
+ &policy->pol_nrs->nrs_policy_queued);
+}
+
+/**
+ * Enqueue a request on the high priority NRS head.
+ *
+ * \param req the request to enqueue.
+ */
+static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req)
+{
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ spin_lock(&req->rq_lock);
+ req->rq_hp = 1;
+ ptlrpc_nrs_req_add_nolock(req);
+ if (opc != OBD_PING)
+ DEBUG_REQ(D_NET, req, "high priority req");
+ spin_unlock(&req->rq_lock);
+}
+
+/**
+ * Returns a boolean predicate indicating whether the policy described by
+ * \a desc is adequate for use with service \a svc.
+ *
+ * \param[in] svc the service
+ * \param[in] desc the policy descriptor
+ *
+ * \retval false the policy is not compatible with the service
+ * \retval true the policy is compatible with the service
+ */
+static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc,
+ const struct ptlrpc_nrs_pol_desc *desc)
+{
+ return desc->pd_compat(svc, desc);
+}
+
+/**
+ * Registers all compatible policies in nrs_core.nrs_policies, for NRS head
+ * \a nrs.
+ *
+ * \param[in] nrs the NRS head
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ *
+ * \see ptlrpc_service_nrs_setup()
+ */
+static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ /* for convenience */
+ struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int rc = -EINVAL;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (nrs_policy_compatible(svc, desc)) {
+ rc = nrs_policy_register(nrs, desc);
+ if (rc != 0) {
+ CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svc->srv_name, rc);
+ /**
+ * Fail registration if any of the policies'
+ * registration fails.
+ */
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * Initializes NRS head \a nrs of service partition \a svcpt, and registers all
+ * compatible policies in NRS core, with the NRS head.
+ *
+ * \param[in] nrs the NRS head
+ * \param[in] svcpt the PTLRPC service partition to setup
+ *
+ * \retval -ve error
+ * \retval 0 success
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs,
+ struct ptlrpc_service_part *svcpt)
+{
+ enum ptlrpc_nrs_queue_type queue;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ if (nrs == &svcpt->scp_nrs_reg)
+ queue = PTLRPC_NRS_QUEUE_REG;
+ else if (nrs == svcpt->scp_nrs_hp)
+ queue = PTLRPC_NRS_QUEUE_HP;
+ else
+ LBUG();
+
+ nrs->nrs_svcpt = svcpt;
+ nrs->nrs_queue_type = queue;
+ spin_lock_init(&nrs->nrs_lock);
+ INIT_LIST_HEAD(&nrs->nrs_policy_list);
+ INIT_LIST_HEAD(&nrs->nrs_policy_queued);
+
+ return nrs_register_policies_locked(nrs);
+}
+
+/**
+ * Allocates a regular and optionally a high-priority NRS head (if the service
+ * handles high-priority RPCs), and then registers all available compatible
+ * policies on those NRS heads.
+ *
+ * \param[in,out] svcpt the PTLRPC service partition to setup
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_nrs *nrs;
+ int rc;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+ /**
+ * Initialize the regular NRS head.
+ */
+ nrs = nrs_svcpt2nrs(svcpt, false);
+ rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+ if (rc < 0)
+ goto out;
+
+ /**
+ * Optionally allocate a high-priority NRS head.
+ */
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL)
+ goto out;
+
+ OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp,
+ svcpt->scp_service->srv_cptable,
+ svcpt->scp_cpt);
+ if (svcpt->scp_nrs_hp == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ nrs = nrs_svcpt2nrs(svcpt, true);
+ rc = nrs_svcpt_setup_locked0(nrs, svcpt);
+
+out:
+ return rc;
+}
+
+/**
+ * Unregisters all policies on all available NRS heads in a service partition;
+ * called at PTLRPC service unregistration time.
+ *
+ * \param[in] svcpt the PTLRPC service partition
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ */
+static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_policy *tmp;
+ int rc;
+ bool hp = false;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ nrs->nrs_stopping = 1;
+
+ list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list,
+ pol_list) {
+ rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name);
+ LASSERT(rc == 0);
+ }
+
+ /**
+ * If the service partition has an HP NRS head, clean that up as well.
+ */
+ if (!hp && nrs_svcpt_has_hp(svcpt)) {
+ hp = true;
+ goto again;
+ }
+
+ if (hp)
+ OBD_FREE_PTR(nrs);
+}
+
+/**
+ * Returns the descriptor for a policy as identified by by \a name.
+ *
+ * \param[in] name the policy name
+ *
+ * \retval the policy descriptor
+ * \retval NULL
+ */
+static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name)
+{
+ struct ptlrpc_nrs_pol_desc *tmp;
+
+ list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) {
+ if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0)
+ return tmp;
+ }
+ return NULL;
+}
+
+/**
+ * Removes the policy from all supported NRS heads of all partitions of all
+ * PTLRPC services.
+ *
+ * \param[in] desc the policy descriptor to unregister
+ *
+ * \retval -ve error
+ * \retval 0 successfully unregistered policy on all supported NRS heads
+ *
+ * \pre mutex_is_locked(&nrs_core.nrs_mutex)
+ * \pre mutex_is_locked(&ptlrpc_all_services_mutex)
+ */
+static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc)
+{
+ struct ptlrpc_nrs *nrs;
+ struct ptlrpc_service *svc;
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc = 0;
+
+ LASSERT(mutex_is_locked(&nrs_core.nrs_mutex));
+ LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex));
+
+ list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+
+ if (!nrs_policy_compatible(svc, desc) ||
+ unlikely(svc->srv_is_stopping))
+ continue;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ bool hp = false;
+
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ rc = nrs_policy_unregister(nrs, desc->pd_name);
+ /**
+ * Ignore -ENOENT as the policy may not have registered
+ * successfully on all service partitions.
+ */
+ if (rc == -ENOENT) {
+ rc = 0;
+ } else if (rc != 0) {
+ CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svcpt->scp_service->srv_name, rc);
+ return rc;
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ hp = true;
+ goto again;
+ }
+ }
+
+ if (desc->pd_ops->op_lprocfs_fini != NULL)
+ desc->pd_ops->op_lprocfs_fini(svc);
+ }
+
+ return rc;
+}
+
+/**
+ * Registers a new policy with NRS core.
+ *
+ * The function will only succeed if policy registration with all compatible
+ * service partitions (if any) is successful.
+ *
+ * N.B. This function should be called either at ptlrpc module initialization
+ * time when registering a policy that ships with NRS core, or in a
+ * module's init() function for policies registering from other modules.
+ *
+ * \param[in] conf configuration information for the new policy to register
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf)
+{
+ struct ptlrpc_service *svc;
+ struct ptlrpc_nrs_pol_desc *desc;
+ int rc = 0;
+
+ LASSERT(conf != NULL);
+ LASSERT(conf->nc_ops != NULL);
+ LASSERT(conf->nc_compat != NULL);
+ LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one,
+ conf->nc_compat_svc_name != NULL));
+ LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0,
+ conf->nc_owner != NULL));
+
+ conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+ /**
+ * External policies are not allowed to start immediately upon
+ * registration, as there is a relatively higher chance that their
+ * registration might fail. In such a case, some policy instances may
+ * already have requests queued wen unregistration needs to happen as
+ * part o cleanup; since there is currently no way to drain requests
+ * from a policy unless the service is unregistering, we just disallow
+ * this.
+ */
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) &&
+ (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK |
+ PTLRPC_NRS_FL_REG_START))) {
+ CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n",
+ conf->nc_name);
+ return -EINVAL;
+ }
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) {
+ CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n",
+ conf->nc_name);
+ rc = -EEXIST;
+ goto fail;
+ }
+
+ OBD_ALLOC_PTR(desc);
+ if (desc == NULL) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX);
+ desc->pd_ops = conf->nc_ops;
+ desc->pd_compat = conf->nc_compat;
+ desc->pd_compat_svc_name = conf->nc_compat_svc_name;
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0)
+ desc->pd_owner = conf->nc_owner;
+ desc->pd_flags = conf->nc_flags;
+ atomic_set(&desc->pd_refs, 0);
+
+ /**
+ * For policies that are held in the same module as NRS (currently
+ * ptlrpc), do not register the policy with all compatible services,
+ * as the services will not have started at this point, since we are
+ * calling from ptlrpc module initialization code. In such cases each
+ * service will register all compatible policies later, via
+ * ptlrpc_service_nrs_setup().
+ */
+ if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0)
+ goto internal;
+
+ /**
+ * Register the new policy on all compatible services
+ */
+ mutex_lock(&ptlrpc_all_services_mutex);
+
+ list_for_each_entry(svc, &ptlrpc_all_services, srv_list) {
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc2;
+
+ if (!nrs_policy_compatible(svc, desc) ||
+ unlikely(svc->srv_is_stopping))
+ continue;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ struct ptlrpc_nrs *nrs;
+ bool hp = false;
+again:
+ nrs = nrs_svcpt2nrs(svcpt, hp);
+ rc = nrs_policy_register(nrs, desc);
+ if (rc != 0) {
+ CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n",
+ desc->pd_name, svcpt->scp_cpt,
+ svcpt->scp_service->srv_name, rc);
+
+ rc2 = nrs_policy_unregister_locked(desc);
+ /**
+ * Should not fail at this point
+ */
+ LASSERT(rc2 == 0);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+ OBD_FREE_PTR(desc);
+ goto fail;
+ }
+
+ if (!hp && nrs_svc_has_hp(svc)) {
+ hp = true;
+ goto again;
+ }
+ }
+
+ /**
+ * No need to take a reference to other modules here, as we
+ * will be calling from the module's init() function.
+ */
+ if (desc->pd_ops->op_lprocfs_init != NULL) {
+ rc = desc->pd_ops->op_lprocfs_init(svc);
+ if (rc != 0) {
+ rc2 = nrs_policy_unregister_locked(desc);
+ /**
+ * Should not fail at this point
+ */
+ LASSERT(rc2 == 0);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+ OBD_FREE_PTR(desc);
+ goto fail;
+ }
+ }
+ }
+
+ mutex_unlock(&ptlrpc_all_services_mutex);
+internal:
+ list_add_tail(&desc->pd_list, &nrs_core.nrs_policies);
+fail:
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_register);
+
+/**
+ * Unregisters a previously registered policy with NRS core. All instances of
+ * the policy on all NRS heads of all supported services are removed.
+ *
+ * N.B. This function should only be called from a module's exit() function.
+ * Although it can be used for policies that ship alongside NRS core, the
+ * function is primarily intended for policies that register externally,
+ * from other modules.
+ *
+ * \param[in] conf configuration information for the policy to unregister
+ *
+ * \retval -ve error
+ * \retval 0 success
+ */
+int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ int rc;
+
+ LASSERT(conf != NULL);
+
+ if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) {
+ CERROR("Unable to unregister a fallback policy, unless the PTLRPC service is stopping.\n");
+ return -EPERM;
+ }
+
+ conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0';
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ desc = nrs_policy_find_desc_locked(conf->nc_name);
+ if (desc == NULL) {
+ CERROR("Failing to unregister NRS policy %s which has not been registered with NRS core!\n",
+ conf->nc_name);
+ rc = -ENOENT;
+ goto not_exist;
+ }
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+
+ rc = nrs_policy_unregister_locked(desc);
+ if (rc < 0) {
+ if (rc == -EBUSY)
+ CERROR("Please first stop policy %s on all service partitions and then retry to unregister the policy.\n",
+ conf->nc_name);
+ goto fail;
+ }
+
+ CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n",
+ conf->nc_name);
+
+ list_del(&desc->pd_list);
+ OBD_FREE_PTR(desc);
+
+fail:
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+not_exist:
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister);
+
+/**
+ * Setup NRS heads on all service partitions of service \a svc, and register
+ * all compatible policies on those NRS heads.
+ *
+ * To be called from within ptl
+ * \param[in] svc the service to setup
+ *
+ * \retval -ve error, the calling logic should eventually call
+ * ptlrpc_service_nrs_cleanup() to undo any work performed
+ * by this function.
+ *
+ * \see ptlrpc_register_service()
+ * \see ptlrpc_service_nrs_cleanup()
+ */
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ const struct ptlrpc_nrs_pol_desc *desc;
+ int i;
+ int rc = 0;
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Initialize NRS heads on all service CPTs.
+ */
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ rc = nrs_svcpt_setup_locked(svcpt);
+ if (rc != 0)
+ goto failed;
+ }
+
+ /**
+ * Set up lprocfs interfaces for all supported policies for the
+ * service.
+ */
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (!nrs_policy_compatible(svc, desc))
+ continue;
+
+ if (desc->pd_ops->op_lprocfs_init != NULL) {
+ rc = desc->pd_ops->op_lprocfs_init(svc);
+ if (rc != 0)
+ goto failed;
+ }
+ }
+
+failed:
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+
+ return rc;
+}
+
+/**
+ * Unregisters all policies on all service partitions of service \a svc.
+ *
+ * \param[in] svc the PTLRPC service to unregister
+ */
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ const struct ptlrpc_nrs_pol_desc *desc;
+ int i;
+
+ mutex_lock(&nrs_core.nrs_mutex);
+
+ /**
+ * Clean up NRS heads on all service partitions
+ */
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ nrs_svcpt_cleanup_locked(svcpt);
+
+ /**
+ * Clean up lprocfs interfaces for all supported policies for the
+ * service.
+ */
+ list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) {
+ if (!nrs_policy_compatible(svc, desc))
+ continue;
+
+ if (desc->pd_ops->op_lprocfs_fini != NULL)
+ desc->pd_ops->op_lprocfs_fini(svc);
+ }
+
+ mutex_unlock(&nrs_core.nrs_mutex);
+}
+
+/**
+ * Obtains NRS head resources for request \a req.
+ *
+ * These could be either on the regular or HP NRS head of \a svcpt; resources
+ * taken on the regular head can later be swapped for HP head resources by
+ * ldlm_lock_reorder_req().
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req the request
+ * \param[in] hp which NRS head of \a svcpt to use
+ */
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+
+ memset(&req->rq_nrq, 0, sizeof(req->rq_nrq));
+ nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs,
+ false);
+
+ /**
+ * It is fine to access \e nr_initialized without locking as there is
+ * no contention at this early stage.
+ */
+ req->rq_nrq.nr_initialized = 1;
+}
+
+/**
+ * Releases resources for a request; is called after the request has been
+ * handled.
+ *
+ * \param[in] req the request
+ *
+ * \see ptlrpc_server_finish_request()
+ */
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req)
+{
+ if (req->rq_nrq.nr_initialized) {
+ nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs);
+ /* no protection on bit nr_initialized because no
+ * contention at this late stage */
+ req->rq_nrq.nr_finalized = 1;
+ }
+}
+
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req)
+{
+ if (req->rq_nrq.nr_started)
+ nrs_request_stop(&req->rq_nrq);
+}
+
+/**
+ * Enqueues request \a req on either the regular or high-priority NRS head
+ * of service partition \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] req the request to be enqueued
+ * \param[in] hp whether to enqueue the request on the regular or
+ * high-priority NRS head.
+ */
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp)
+{
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (hp)
+ ptlrpc_nrs_hpreq_add_nolock(req);
+ else
+ ptlrpc_nrs_req_add_nolock(req);
+
+ spin_unlock(&svcpt->scp_req_lock);
+}
+
+static void nrs_request_removed(struct ptlrpc_nrs_policy *policy)
+{
+ LASSERT(policy->pol_nrs->nrs_req_queued > 0);
+ LASSERT(policy->pol_req_queued > 0);
+
+ policy->pol_nrs->nrs_req_queued--;
+ policy->pol_req_queued--;
+
+ /**
+ * If the policy has no more requests queued, remove it from
+ * ptlrpc_nrs::nrs_policy_queued.
+ */
+ if (unlikely(policy->pol_req_queued == 0)) {
+ list_del_init(&policy->pol_list_queued);
+
+ /**
+ * If there are other policies with queued requests, move the
+ * current policy to the end so that we can round robin over
+ * all policies and drain the requests.
+ */
+ } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) {
+ LASSERT(policy->pol_req_queued <
+ policy->pol_nrs->nrs_req_queued);
+
+ list_move_tail(&policy->pol_list_queued,
+ &policy->pol_nrs->nrs_policy_queued);
+ }
+}
+
+/**
+ * Obtains a request for handling from an NRS head of service partition
+ * \a svcpt.
+ *
+ * \param[in] svcpt the service partition
+ * \param[in] hp whether to obtain a request from the regular or
+ * high-priority NRS head.
+ * \param[in] peek when set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force when set, it will force a policy to return a request if it
+ * has one pending
+ *
+ * \retval the request to be handled
+ * \retval NULL the head has no requests to serve
+ */
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+ bool peek, bool force)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+ struct ptlrpc_nrs_policy *policy;
+ struct ptlrpc_nrs_request *nrq;
+
+ /**
+ * Always try to drain requests from all NRS polices even if they are
+ * inactive, because the user can change policy status at runtime.
+ */
+ list_for_each_entry(policy, &nrs->nrs_policy_queued,
+ pol_list_queued) {
+ nrq = nrs_request_get(policy, peek, force);
+ if (nrq != NULL) {
+ if (likely(!peek)) {
+ nrq->nr_started = 1;
+
+ policy->pol_req_started++;
+ policy->pol_nrs->nrs_req_started++;
+
+ nrs_request_removed(policy);
+ }
+
+ return container_of(nrq, struct ptlrpc_request, rq_nrq);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Dequeues request \a req from the policy it has been enqueued on.
+ *
+ * \param[in] req the request
+ */
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req)
+{
+ struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq);
+
+ policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq);
+
+ req->rq_nrq.nr_enqueued = 0;
+
+ nrs_request_removed(policy);
+}
+
+/**
+ * Returns whether there are any requests currently enqueued on any of the
+ * policies of service partition's \a svcpt NRS head specified by \a hp. Should
+ * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable
+ * result.
+ *
+ * \param[in] svcpt the service partition to enquire.
+ * \param[in] hp whether the regular or high-priority NRS head is to be
+ * enquired.
+ *
+ * \retval false the indicated NRS head has no enqueued requests.
+ * \retval true the indicated NRS head has some enqueued requests.
+ */
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp);
+
+ return nrs->nrs_req_queued > 0;
+};
+
+/**
+ * Moves request \a req from the regular to the high-priority NRS head.
+ *
+ * \param[in] req the request to move
+ */
+void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_nrs_request *nrq = &req->rq_nrq;
+ struct ptlrpc_nrs_resource *res1[NRS_RES_MAX];
+ struct ptlrpc_nrs_resource *res2[NRS_RES_MAX];
+
+ /**
+ * Obtain the high-priority NRS head resources.
+ */
+ nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true);
+
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (!ptlrpc_nrs_req_can_move(req))
+ goto out;
+
+ ptlrpc_nrs_req_del_nolock(req);
+
+ memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0]));
+ memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0]));
+
+ ptlrpc_nrs_hpreq_add_nolock(req);
+
+ memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0]));
+out:
+ spin_unlock(&svcpt->scp_req_lock);
+
+ /**
+ * Release either the regular NRS head resources if we moved the
+ * request, or the high-priority NRS head resources if we took a
+ * reference earlier in this function and ptlrpc_nrs_req_can_move()
+ * returned false.
+ */
+ nrs_resource_put_safe(res1);
+}
+
+/**
+ * Carries out a control operation \a opc on the policy identified by the
+ * human-readable \a name, on either all partitions, or only on the first
+ * partition of service \a svc.
+ *
+ * \param[in] svc the service the policy belongs to.
+ * \param[in] queue whether to carry out the command on the policy which
+ * belongs to the regular, high-priority, or both NRS
+ * heads of service partitions of \a svc.
+ * \param[in] name the policy to act upon, by human-readable name
+ * \param[in] opc the opcode of the operation to carry out
+ * \param[in] single when set, the operation will only be carried out on the
+ * NRS heads of the first service partition of \a svc.
+ * This is useful for some policies which e.g. share
+ * identical values on the same parameters of different
+ * service partitions; when reading these parameters via
+ * lprocfs, these policies may just want to obtain and
+ * print out the values from the first service partition.
+ * Storing these values centrally elsewhere then could be
+ * another solution for this.
+ * \param[in,out] arg can be used as a generic in/out buffer between control
+ * operations and the user environment.
+ *
+ *\retval -ve error condition
+ *\retval 0 operation was carried out successfully
+ */
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+ enum ptlrpc_nrs_queue_type queue, char *name,
+ enum ptlrpc_nrs_ctl opc, bool single, void *arg)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+ int rc = 0;
+
+ LASSERT(opc != PTLRPC_NRS_CTL_INVALID);
+
+ if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0)
+ return -EINVAL;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) {
+ rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name,
+ opc, arg);
+ if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG &&
+ single))
+ goto out;
+ }
+
+ if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) {
+ /**
+ * XXX: We could optionally check for
+ * nrs_svc_has_hp(svc) here, and return an error if it
+ * is false. Right now we rely on the policies' lprocfs
+ * handlers that call the present function to make this
+ * check; if they fail to do so, they might hit the
+ * assertion inside nrs_svcpt2nrs() below.
+ */
+ rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name,
+ opc, arg);
+ if (rc != 0 || single)
+ goto out;
+ }
+ }
+out:
+ return rc;
+}
+
+
+/* ptlrpc/nrs_fifo.c */
+extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo;
+
+/**
+ * Adds all policies that ship with the ptlrpc module, to NRS core's list of
+ * policies \e nrs_core.nrs_policies.
+ *
+ * \retval 0 all policies have been registered successfully
+ * \retval -ve error
+ */
+int ptlrpc_nrs_init(void)
+{
+ int rc;
+
+ mutex_init(&nrs_core.nrs_mutex);
+ INIT_LIST_HEAD(&nrs_core.nrs_policies);
+
+ rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo);
+ if (rc != 0)
+ goto fail;
+
+
+ return rc;
+fail:
+ /**
+ * Since no PTLRPC services have been started at this point, all we need
+ * to do for cleanup is to free the descriptors.
+ */
+ ptlrpc_nrs_fini();
+
+ return rc;
+}
+
+/**
+ * Removes all policy descriptors from nrs_core::nrs_policies, and frees the
+ * policy descriptors.
+ *
+ * Since all PTLRPC services are stopped at this point, there are no more
+ * instances of any policies, because each service will have stopped its policy
+ * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the
+ * descriptors here.
+ */
+void ptlrpc_nrs_fini(void)
+{
+ struct ptlrpc_nrs_pol_desc *desc;
+ struct ptlrpc_nrs_pol_desc *tmp;
+
+ list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies,
+ pd_list) {
+ list_del_init(&desc->pd_list);
+ OBD_FREE_PTR(desc);
+ }
+}
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
new file mode 100644
index 000000000..eb40c01db
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c
@@ -0,0 +1,270 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details. A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Copyright 2012 Xyratex Technology Limited
+ */
+/*
+ * lustre/ptlrpc/nrs_fifo.c
+ *
+ * Network Request Scheduler (NRS) FIFO policy
+ *
+ * Handles RPCs in a FIFO manner, as received from the network. This policy is
+ * a logical wrapper around previous, non-NRS functionality. It is used as the
+ * default and fallback policy for all types of RPCs on all PTLRPC service
+ * partitions, for both regular and high-priority NRS heads. Default here means
+ * the policy is the one enabled at PTLRPC service partition startup time, and
+ * fallback means the policy is used to handle RPCs that are not handled
+ * successfully or are not handled at all by any primary policy that may be
+ * enabled on a given NRS head.
+ *
+ * Author: Liang Zhen <liang@whamcloud.com>
+ * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com>
+ */
+/**
+ * \addtogoup nrs
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../../include/linux/libcfs/libcfs.h"
+#include "ptlrpc_internal.h"
+
+/**
+ * \name fifo
+ *
+ * The FIFO policy is a logical wrapper around previous, non-NRS functionality.
+ * It schedules RPCs in the same order as they are queued from LNet.
+ *
+ * @{
+ */
+
+#define NRS_POL_NAME_FIFO "fifo"
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a
+ * policy-specific private data structure.
+ *
+ * \param[in] policy The policy to start
+ *
+ * \retval -ENOMEM OOM error
+ * \retval 0 success
+ *
+ * \see nrs_policy_register()
+ * \see nrs_policy_ctl()
+ */
+static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy)
+{
+ struct nrs_fifo_head *head;
+
+ OBD_CPT_ALLOC_PTR(head, nrs_pol2cptab(policy), nrs_pol2cptid(policy));
+ if (head == NULL)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&head->fh_list);
+ policy->pol_private = head;
+ return 0;
+}
+
+/**
+ * Is called before the policy transitions into
+ * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific
+ * private data structure.
+ *
+ * \param[in] policy The policy to stop
+ *
+ * \see nrs_policy_stop0()
+ */
+static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy)
+{
+ struct nrs_fifo_head *head = policy->pol_private;
+
+ LASSERT(head != NULL);
+ LASSERT(list_empty(&head->fh_list));
+
+ OBD_FREE_PTR(head);
+}
+
+/**
+ * Is called for obtaining a FIFO policy resource.
+ *
+ * \param[in] policy The policy on which the request is being asked for
+ * \param[in] nrq The request for which resources are being taken
+ * \param[in] parent Parent resource, unused in this policy
+ * \param[out] resp Resources references are placed in this array
+ * \param[in] moving_req Signifies limited caller context; unused in this
+ * policy
+ *
+ * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since
+ * it implements a simple scheduling algorithm in which request
+ * priority is determined on the request arrival order, it does not
+ * need to maintain a set of resources that would otherwise be used
+ * to calculate a request's priority.
+ *
+ * \see nrs_resource_get_safe()
+ */
+static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq,
+ const struct ptlrpc_nrs_resource *parent,
+ struct ptlrpc_nrs_resource **resp, bool moving_req)
+{
+ /**
+ * Just return the resource embedded inside nrs_fifo_head, and end this
+ * resource hierarchy reference request.
+ */
+ *resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res;
+ return 1;
+}
+
+/**
+ * Called when getting a request from the FIFO policy for handling, or just
+ * peeking; removes the request from the policy when it is to be handled.
+ *
+ * \param[in] policy The policy
+ * \param[in] peek When set, signifies that we just want to examine the
+ * request, and not handle it, so the request is not removed
+ * from the policy.
+ * \param[in] force Force the policy to return a request; unused in this
+ * policy
+ *
+ * \retval The request to be handled; this is the next request in the FIFO
+ * queue
+ *
+ * \see ptlrpc_nrs_req_get_nolock()
+ * \see nrs_request_get()
+ */
+static
+struct ptlrpc_nrs_request *nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy,
+ bool peek, bool force)
+{
+ struct nrs_fifo_head *head = policy->pol_private;
+ struct ptlrpc_nrs_request *nrq;
+
+ nrq = unlikely(list_empty(&head->fh_list)) ? NULL :
+ list_entry(head->fh_list.next, struct ptlrpc_nrs_request,
+ nr_u.fifo.fr_list);
+
+ if (likely(!peek && nrq != NULL)) {
+ struct ptlrpc_request *req = container_of(nrq,
+ struct ptlrpc_request,
+ rq_nrq);
+
+ list_del_init(&nrq->nr_u.fifo.fr_list);
+
+ CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: %llu\n",
+ policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
+ nrq->nr_u.fifo.fr_sequence);
+ }
+
+ return nrq;
+}
+
+/**
+ * Adds request \a nrq to \a policy's list of queued requests
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq The request to add
+ *
+ * \retval 0 success; nrs_request_enqueue() assumes this function will always
+ * succeed
+ */
+static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ struct nrs_fifo_head *head;
+
+ head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head,
+ fh_res);
+ /**
+ * Only used for debugging
+ */
+ nrq->nr_u.fifo.fr_sequence = head->fh_sequence++;
+ list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list);
+
+ return 0;
+}
+
+/**
+ * Removes request \a nrq from \a policy's list of queued requests.
+ *
+ * \param[in] policy The policy
+ * \param[in] nrq The request to remove
+ */
+static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list));
+ list_del_init(&nrq->nr_u.fifo.fr_list);
+}
+
+/**
+ * Prints a debug statement right before the request \a nrq stops being
+ * handled.
+ *
+ * \param[in] policy The policy handling the request
+ * \param[in] nrq The request being handled
+ *
+ * \see ptlrpc_server_finish_request()
+ * \see ptlrpc_nrs_req_stop_nolock()
+ */
+static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy,
+ struct ptlrpc_nrs_request *nrq)
+{
+ struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request,
+ rq_nrq);
+
+ CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n",
+ policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
+ nrq->nr_u.fifo.fr_sequence);
+}
+
+/**
+ * FIFO policy operations
+ */
+static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = {
+ .op_policy_start = nrs_fifo_start,
+ .op_policy_stop = nrs_fifo_stop,
+ .op_res_get = nrs_fifo_res_get,
+ .op_req_get = nrs_fifo_req_get,
+ .op_req_enqueue = nrs_fifo_req_add,
+ .op_req_dequeue = nrs_fifo_req_del,
+ .op_req_stop = nrs_fifo_req_stop,
+};
+
+/**
+ * FIFO policy configuration
+ */
+struct ptlrpc_nrs_pol_conf nrs_conf_fifo = {
+ .nc_name = NRS_POL_NAME_FIFO,
+ .nc_ops = &nrs_fifo_ops,
+ .nc_compat = nrs_policy_compat_all,
+ .nc_flags = PTLRPC_NRS_FL_FALLBACK |
+ PTLRPC_NRS_FL_REG_START
+};
+
+/** @} fifo */
+
+/** @} nrs */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
new file mode 100644
index 000000000..b51af9bf3
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -0,0 +1,2536 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pack_generic.c
+ *
+ * (Un)packing of OST requests
+ *
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eeb@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/obd_cksum.h"
+#include "../include/lustre/ll_fiemap.h"
+
+static inline int lustre_msg_hdr_size_v2(int count)
+{
+ return cfs_size_round(offsetof(struct lustre_msg_v2,
+ lm_buflens[count]));
+}
+
+int lustre_msg_hdr_size(__u32 magic, int count)
+{
+ switch (magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_hdr_size_v2(count);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_hdr_size);
+
+void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
+ int index)
+{
+ if (inout)
+ lustre_set_req_swabbed(req, index);
+ else
+ lustre_set_rep_swabbed(req, index);
+}
+EXPORT_SYMBOL(ptlrpc_buf_set_swabbed);
+
+int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
+ int index)
+{
+ if (inout)
+ return (ptlrpc_req_need_swab(req) &&
+ !lustre_req_swabbed(req, index));
+ else
+ return (ptlrpc_rep_need_swab(req) &&
+ !lustre_rep_swabbed(req, index));
+}
+EXPORT_SYMBOL(ptlrpc_buf_need_swab);
+
+static inline int lustre_msg_check_version_v2(struct lustre_msg_v2 *msg,
+ __u32 version)
+{
+ __u32 ver = lustre_msg_get_version(msg);
+ return (ver & LUSTRE_VERSION_MASK) != version;
+}
+
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ CERROR("msg v1 not supported - please upgrade you system\n");
+ return -EINVAL;
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_check_version_v2(msg, version);
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_check_version);
+
+/* early reply size */
+int lustre_msg_early_size(void)
+{
+ static int size;
+ if (!size) {
+ /* Always reply old ptlrpc_body_v2 to keep interoperability
+ * with the old client (< 2.3) which doesn't have pb_jobid
+ * in the ptlrpc_body.
+ *
+ * XXX Remove this whenever we drop interoperability with such
+ * client.
+ */
+ __u32 pblen = sizeof(struct ptlrpc_body_v2);
+ size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen);
+ }
+ return size;
+}
+EXPORT_SYMBOL(lustre_msg_early_size);
+
+int lustre_msg_size_v2(int count, __u32 *lengths)
+{
+ int size;
+ int i;
+
+ size = lustre_msg_hdr_size_v2(count);
+ for (i = 0; i < count; i++)
+ size += cfs_size_round(lengths[i]);
+
+ return size;
+}
+EXPORT_SYMBOL(lustre_msg_size_v2);
+
+/* This returns the size of the buffer that is required to hold a lustre_msg
+ * with the given sub-buffer lengths.
+ * NOTE: this should only be used for NEW requests, and should always be
+ * in the form of a v2 request. If this is a connection to a v1
+ * target then the first buffer will be stripped because the ptlrpc
+ * data is part of the lustre_msg_v1 header. b=14043 */
+int lustre_msg_size(__u32 magic, int count, __u32 *lens)
+{
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2));
+
+ switch (magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_size_v2(count, lens);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_size);
+
+/* This is used to determine the size of a buffer that was already packed
+ * and will correctly handle the different message formats. */
+int lustre_packed_msg_size(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_packed_msg_size);
+
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
+ char **bufs)
+{
+ char *ptr;
+ int i;
+
+ msg->lm_bufcount = count;
+ /* XXX: lm_secflvr uninitialized here */
+ msg->lm_magic = LUSTRE_MSG_MAGIC_V2;
+
+ for (i = 0; i < count; i++)
+ msg->lm_buflens[i] = lens[i];
+
+ if (bufs == NULL)
+ return;
+
+ ptr = (char *)msg + lustre_msg_hdr_size_v2(count);
+ for (i = 0; i < count; i++) {
+ char *tmp = bufs[i];
+ LOGL(tmp, lens[i], ptr);
+ }
+}
+EXPORT_SYMBOL(lustre_init_msg_v2);
+
+static int lustre_pack_request_v2(struct ptlrpc_request *req,
+ int count, __u32 *lens, char **bufs)
+{
+ int reqlen, rc;
+
+ reqlen = lustre_msg_size_v2(count, lens);
+
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+ if (rc)
+ return rc;
+
+ req->rq_reqlen = reqlen;
+
+ lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs);
+ lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION);
+ return 0;
+}
+
+int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
+ __u32 *lens, char **bufs)
+{
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+ /* only use new format, we don't need to be compatible with 1.4 */
+ return lustre_pack_request_v2(req, count, lens, bufs);
+}
+EXPORT_SYMBOL(lustre_pack_request);
+
+#if RS_DEBUG
+LIST_HEAD(ptlrpc_rs_debug_lru);
+spinlock_t ptlrpc_rs_debug_lock;
+
+#define PTLRPC_RS_DEBUG_LRU_ADD(rs) \
+do { \
+ spin_lock(&ptlrpc_rs_debug_lock); \
+ list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru); \
+ spin_unlock(&ptlrpc_rs_debug_lock); \
+} while (0)
+
+#define PTLRPC_RS_DEBUG_LRU_DEL(rs) \
+do { \
+ spin_lock(&ptlrpc_rs_debug_lock); \
+ list_del(&(rs)->rs_debug_list); \
+ spin_unlock(&ptlrpc_rs_debug_lock); \
+} while (0)
+#else
+# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while (0)
+# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while (0)
+#endif
+
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_reply_state *rs = NULL;
+
+ spin_lock(&svcpt->scp_rep_lock);
+
+ /* See if we have anything in a pool, and wait if nothing */
+ while (list_empty(&svcpt->scp_rep_idle)) {
+ struct l_wait_info lwi;
+ int rc;
+
+ spin_unlock(&svcpt->scp_rep_lock);
+ /* If we cannot get anything for some long time, we better
+ * bail out instead of waiting infinitely */
+ lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
+ rc = l_wait_event(svcpt->scp_rep_waitq,
+ !list_empty(&svcpt->scp_rep_idle), &lwi);
+ if (rc != 0)
+ goto out;
+ spin_lock(&svcpt->scp_rep_lock);
+ }
+
+ rs = list_entry(svcpt->scp_rep_idle.next,
+ struct ptlrpc_reply_state, rs_list);
+ list_del(&rs->rs_list);
+
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ memset(rs, 0, svcpt->scp_service->srv_max_reply_size);
+ rs->rs_size = svcpt->scp_service->srv_max_reply_size;
+ rs->rs_svcpt = svcpt;
+ rs->rs_prealloc = 1;
+out:
+ return rs;
+}
+
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+ spin_lock(&svcpt->scp_rep_lock);
+ list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+ spin_unlock(&svcpt->scp_rep_lock);
+ wake_up(&svcpt->scp_rep_waitq);
+}
+
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+ __u32 *lens, char **bufs, int flags)
+{
+ struct ptlrpc_reply_state *rs;
+ int msg_len, rc;
+
+ LASSERT(req->rq_reply_state == NULL);
+
+ if ((flags & LPRFL_EARLY_REPLY) == 0) {
+ spin_lock(&req->rq_lock);
+ req->rq_packed_final = 1;
+ spin_unlock(&req->rq_lock);
+ }
+
+ msg_len = lustre_msg_size_v2(count, lens);
+ rc = sptlrpc_svc_alloc_rs(req, msg_len);
+ if (rc)
+ return rc;
+
+ rs = req->rq_reply_state;
+ atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */
+ rs->rs_cb_id.cbid_fn = reply_out_callback;
+ rs->rs_cb_id.cbid_arg = rs;
+ rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt;
+ INIT_LIST_HEAD(&rs->rs_exp_list);
+ INIT_LIST_HEAD(&rs->rs_obd_list);
+ INIT_LIST_HEAD(&rs->rs_list);
+ spin_lock_init(&rs->rs_lock);
+
+ req->rq_replen = msg_len;
+ req->rq_reply_state = rs;
+ req->rq_repmsg = rs->rs_msg;
+
+ lustre_init_msg_v2(rs->rs_msg, count, lens, bufs);
+ lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION);
+
+ PTLRPC_RS_DEBUG_LRU_ADD(rs);
+
+ return 0;
+}
+EXPORT_SYMBOL(lustre_pack_reply_v2);
+
+int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
+ char **bufs, int flags)
+{
+ int rc = 0;
+ __u32 size[] = { sizeof(struct ptlrpc_body) };
+
+ if (!lens) {
+ LASSERT(count == 1);
+ lens = size;
+ }
+
+ LASSERT(count > 0);
+ LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ rc = lustre_pack_reply_v2(req, count, lens, bufs, flags);
+ break;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n",
+ req->rq_reqmsg->lm_magic);
+ rc = -EINVAL;
+ }
+ if (rc != 0)
+ CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc,
+ lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
+ return rc;
+}
+EXPORT_SYMBOL(lustre_pack_reply_flags);
+
+int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
+ char **bufs)
+{
+ return lustre_pack_reply_flags(req, count, lens, bufs, 0);
+}
+EXPORT_SYMBOL(lustre_pack_reply);
+
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
+{
+ int i, offset, buflen, bufcount;
+
+ LASSERT(m != NULL);
+ LASSERT(n >= 0);
+
+ bufcount = m->lm_bufcount;
+ if (unlikely(n >= bufcount)) {
+ CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n",
+ m, n, bufcount);
+ return NULL;
+ }
+
+ buflen = m->lm_buflens[n];
+ if (unlikely(buflen < min_size)) {
+ CERROR("msg %p buffer[%d] size %d too small (required %d, opc=%d)\n",
+ m, n, buflen, min_size,
+ n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m));
+ return NULL;
+ }
+
+ offset = lustre_msg_hdr_size_v2(bufcount);
+ for (i = 0; i < n; i++)
+ offset += cfs_size_round(m->lm_buflens[i]);
+
+ return (char *)m + offset;
+}
+
+void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_buf_v2(m, n, min_size);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x(msg:%p)\n", m->lm_magic, m);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_buf);
+
+int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+ unsigned int newlen, int move_data)
+{
+ char *tail = NULL, *newpos;
+ int tail_len = 0, n;
+
+ LASSERT(msg);
+ LASSERT(msg->lm_bufcount > segment);
+ LASSERT(msg->lm_buflens[segment] >= newlen);
+
+ if (msg->lm_buflens[segment] == newlen)
+ goto out;
+
+ if (move_data && msg->lm_bufcount > segment + 1) {
+ tail = lustre_msg_buf_v2(msg, segment + 1, 0);
+ for (n = segment + 1; n < msg->lm_bufcount; n++)
+ tail_len += cfs_size_round(msg->lm_buflens[n]);
+ }
+
+ msg->lm_buflens[segment] = newlen;
+
+ if (tail && tail_len) {
+ newpos = lustre_msg_buf_v2(msg, segment + 1, 0);
+ LASSERT(newpos <= tail);
+ if (newpos != tail)
+ memmove(newpos, tail, tail_len);
+ }
+out:
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+}
+
+/*
+ * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
+ * we also move data forward from @segment + 1.
+ *
+ * if @newlen == 0, we remove the segment completely, but we still keep the
+ * totally bufcount the same to save possible data moving. this will leave a
+ * unused segment with size 0 at the tail, but that's ok.
+ *
+ * return new msg size after shrinking.
+ *
+ * CAUTION:
+ * + if any buffers higher than @segment has been filled in, must call shrink
+ * with non-zero @move_data.
+ * + caller should NOT keep pointers to msg buffers which higher than @segment
+ * after call shrink.
+ */
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+ unsigned int newlen, int move_data)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_shrink_msg);
+
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
+{
+ PTLRPC_RS_DEBUG_LRU_DEL(rs);
+
+ LASSERT(atomic_read(&rs->rs_refcount) == 0);
+ LASSERT(!rs->rs_difficult || rs->rs_handled);
+ LASSERT(!rs->rs_on_net);
+ LASSERT(!rs->rs_scheduled);
+ LASSERT(rs->rs_export == NULL);
+ LASSERT(rs->rs_nlocks == 0);
+ LASSERT(list_empty(&rs->rs_exp_list));
+ LASSERT(list_empty(&rs->rs_obd_list));
+
+ sptlrpc_svc_free_rs(rs);
+}
+EXPORT_SYMBOL(lustre_free_reply_state);
+
+static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
+{
+ int swabbed, required_len, i;
+
+ /* Now we know the sender speaks my language. */
+ required_len = lustre_msg_hdr_size_v2(0);
+ if (len < required_len) {
+ /* can't even look inside the message */
+ CERROR("message length %d too small for lustre_msg\n", len);
+ return -EINVAL;
+ }
+
+ swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+ if (swabbed) {
+ __swab32s(&m->lm_magic);
+ __swab32s(&m->lm_bufcount);
+ __swab32s(&m->lm_secflvr);
+ __swab32s(&m->lm_repsize);
+ __swab32s(&m->lm_cksum);
+ __swab32s(&m->lm_flags);
+ CLASSERT(offsetof(typeof(*m), lm_padding_2) != 0);
+ CLASSERT(offsetof(typeof(*m), lm_padding_3) != 0);
+ }
+
+ required_len = lustre_msg_hdr_size_v2(m->lm_bufcount);
+ if (len < required_len) {
+ /* didn't receive all the buffer lengths */
+ CERROR("message length %d too small for %d buflens\n",
+ len, m->lm_bufcount);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < m->lm_bufcount; i++) {
+ if (swabbed)
+ __swab32s(&m->lm_buflens[i]);
+ required_len += cfs_size_round(m->lm_buflens[i]);
+ }
+
+ if (len < required_len) {
+ CERROR("len: %d, required_len %d\n", len, required_len);
+ CERROR("bufcount: %d\n", m->lm_bufcount);
+ for (i = 0; i < m->lm_bufcount; i++)
+ CERROR("buffer %d length %d\n", i, m->lm_buflens[i]);
+ return -EINVAL;
+ }
+
+ return swabbed;
+}
+
+int __lustre_unpack_msg(struct lustre_msg *m, int len)
+{
+ int required_len, rc;
+
+ /* We can provide a slightly better error log, if we check the
+ * message magic and version first. In the future, struct
+ * lustre_msg may grow, and we'd like to log a version mismatch,
+ * rather than a short message.
+ *
+ */
+ required_len = offsetof(struct lustre_msg, lm_magic) +
+ sizeof(m->lm_magic);
+ if (len < required_len) {
+ /* can't even look inside the message */
+ CERROR("message length %d too small for magic/version check\n",
+ len);
+ return -EINVAL;
+ }
+
+ rc = lustre_unpack_msg_v2(m, len);
+
+ return rc;
+}
+EXPORT_SYMBOL(__lustre_unpack_msg);
+
+int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
+{
+ int rc;
+ rc = __lustre_unpack_msg(req->rq_reqmsg, len);
+ if (rc == 1) {
+ lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_req_msg);
+
+int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
+{
+ int rc;
+ rc = __lustre_unpack_msg(req->rq_repmsg, len);
+ if (rc == 1) {
+ lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ rc = 0;
+ }
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_unpack_rep_msg);
+
+static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
+ const int inout, int offset)
+{
+ struct ptlrpc_body *pb;
+ struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg;
+
+ pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2));
+ if (!pb) {
+ CERROR("error unpacking ptlrpc body\n");
+ return -EFAULT;
+ }
+ if (ptlrpc_buf_need_swab(req, inout, offset)) {
+ lustre_swab_ptlrpc_body(pb);
+ ptlrpc_buf_set_swabbed(req, inout, offset);
+ }
+
+ if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) {
+ CERROR("wrong lustre_msg version %08x\n", pb->pb_version);
+ return -EINVAL;
+ }
+
+ if (!inout)
+ pb->pb_status = ptlrpc_status_ntoh(pb->pb_status);
+
+ return 0;
+}
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_unpack_ptlrpc_body_v2(req, 1, offset);
+ default:
+ CERROR("bad lustre msg magic: %08x\n",
+ req->rq_reqmsg->lm_magic);
+ return -EINVAL;
+ }
+}
+
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
+{
+ switch (req->rq_repmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_unpack_ptlrpc_body_v2(req, 0, offset);
+ default:
+ CERROR("bad lustre msg magic: %08x\n",
+ req->rq_repmsg->lm_magic);
+ return -EINVAL;
+ }
+}
+
+static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
+{
+ if (n >= m->lm_bufcount)
+ return 0;
+
+ return m->lm_buflens[n];
+}
+
+/**
+ * lustre_msg_buflen - return the length of buffer \a n in message \a m
+ * \param m lustre_msg (request or reply) to look at
+ * \param n message index (base 0)
+ *
+ * returns zero for non-existent message indices
+ */
+int lustre_msg_buflen(struct lustre_msg *m, int n)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_msg_buflen_v2(m, n);
+ default:
+ CERROR("incorrect message magic: %08x\n", m->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_buflen);
+
+static inline void
+lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len)
+{
+ if (n >= m->lm_bufcount)
+ LBUG();
+
+ m->lm_buflens[n] = len;
+}
+
+void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ lustre_msg_set_buflen_v2(m, n, len);
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+ }
+}
+
+EXPORT_SYMBOL(lustre_msg_set_buflen);
+
+/* NB return the bufcount for lustre_msg_v2 format, so if message is packed
+ * in V1 format, the result is one bigger. (add struct ptlrpc_body). */
+int lustre_msg_bufcount(struct lustre_msg *m)
+{
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return m->lm_bufcount;
+ default:
+ CERROR("incorrect message magic: %08x\n", m->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_bufcount);
+
+char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
+{
+ /* max_len == 0 means the string should fill the buffer */
+ char *str;
+ int slen, blen;
+
+ switch (m->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ str = lustre_msg_buf_v2(m, index, 0);
+ blen = lustre_msg_buflen_v2(m, index);
+ break;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic);
+ }
+
+ if (str == NULL) {
+ CERROR("can't unpack string in msg %p buffer[%d]\n", m, index);
+ return NULL;
+ }
+
+ slen = strnlen(str, blen);
+
+ if (slen == blen) { /* not NULL terminated */
+ CERROR("can't unpack non-NULL terminated string in msg %p buffer[%d] len %d\n",
+ m, index, blen);
+ return NULL;
+ }
+
+ if (max_len == 0) {
+ if (slen != blen - 1) {
+ CERROR("can't unpack short string in msg %p buffer[%d] len %d: strlen %d\n",
+ m, index, blen, slen);
+ return NULL;
+ }
+ } else if (slen > max_len) {
+ CERROR("can't unpack oversized string in msg %p buffer[%d] len %d strlen %d: max %d expected\n",
+ m, index, blen, slen, max_len);
+ return NULL;
+ }
+
+ return str;
+}
+EXPORT_SYMBOL(lustre_msg_string);
+
+/* Wrap up the normal fixed length cases */
+static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index,
+ int min_size, void *swabber)
+{
+ void *ptr = NULL;
+
+ LASSERT(msg != NULL);
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ ptr = lustre_msg_buf_v2(msg, index, min_size);
+ break;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ }
+
+ if (ptr && swabber)
+ ((void (*)(void *))swabber)(ptr);
+
+ return ptr;
+}
+
+static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg)
+{
+ return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body_v2));
+}
+
+__u32 lustre_msghdr_get_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2:
+ /* already in host endian */
+ return msg->lm_flags;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msghdr_get_flags);
+
+void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2:
+ msg->lm_flags = flags;
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+__u32 lustre_msg_get_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_flags;
+ }
+ default:
+ /* flags might be printed in debug code while message
+ * uninitialized */
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_flags);
+
+void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_flags);
+
+void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags = flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_flags);
+
+void lustre_msg_clear_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_flags &= ~(MSG_GEN_FLAG_MASK & flags);
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_clear_flags);
+
+__u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_op_flags;
+ }
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_op_flags);
+
+void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_op_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_op_flags);
+
+void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_op_flags |= flags;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_op_flags);
+
+struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return NULL;
+ }
+ return &pb->pb_handle;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_handle);
+
+__u32 lustre_msg_get_type(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return PTL_RPC_MSG_ERR;
+ }
+ return pb->pb_type;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return PTL_RPC_MSG_ERR;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_type);
+
+__u32 lustre_msg_get_version(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_version;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_version);
+
+void lustre_msg_add_version(struct lustre_msg *msg, int version)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_version |= version;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_add_version);
+
+__u32 lustre_msg_get_opc(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_opc;
+ }
+ default:
+ CERROR("incorrect message magic: %08x(msg:%p)\n", msg->lm_magic, msg);
+ LBUG();
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_opc);
+
+__u64 lustre_msg_get_last_xid(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_last_xid;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_last_xid);
+
+__u64 lustre_msg_get_last_committed(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_last_committed;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_last_committed);
+
+__u64 *lustre_msg_get_versions(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return NULL;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return NULL;
+ }
+ return pb->pb_pre_versions;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_versions);
+
+__u64 lustre_msg_get_transno(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_transno;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_transno);
+
+int lustre_msg_get_status(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_status;
+ }
+ default:
+ /* status might be printed in debug code while message
+ * uninitialized */
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_status);
+
+__u64 lustre_msg_get_slv(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_slv;
+ }
+ default:
+ CERROR("invalid msg magic %08x\n", msg->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_slv);
+
+
+void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return;
+ }
+ pb->pb_slv = slv;
+ return;
+ }
+ default:
+ CERROR("invalid msg magic %x\n", msg->lm_magic);
+ return;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_slv);
+
+__u32 lustre_msg_get_limit(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return -EINVAL;
+ }
+ return pb->pb_limit;
+ }
+ default:
+ CERROR("invalid msg magic %x\n", msg->lm_magic);
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_limit);
+
+
+void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return;
+ }
+ pb->pb_limit = limit;
+ return;
+ }
+ default:
+ CERROR("invalid msg magic %08x\n", msg->lm_magic);
+ return;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_limit);
+
+__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+ }
+ return pb->pb_conn_cnt;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_conn_cnt);
+
+int lustre_msg_is_v1(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 1;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_is_v1);
+
+__u32 lustre_msg_get_magic(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return msg->lm_magic;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_magic);
+
+__u32 lustre_msg_get_timeout(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+
+ }
+ return pb->pb_timeout;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+__u32 lustre_msg_get_service_time(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return 0;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ if (!pb) {
+ CERROR("invalid msg %p: no ptlrpc body!\n", msg);
+ return 0;
+
+ }
+ return pb->pb_service_time;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+char *lustre_msg_get_jobid(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ case LUSTRE_MSG_MAGIC_V1_SWABBED:
+ return NULL;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb =
+ lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body));
+ if (!pb)
+ return NULL;
+
+ return pb->pb_jobid;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return NULL;
+ }
+}
+EXPORT_SYMBOL(lustre_msg_get_jobid);
+
+__u32 lustre_msg_get_cksum(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return msg->lm_cksum;
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+__u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ __u32 crc;
+ unsigned int hsize = 4;
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
+ lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF),
+ NULL, 0, (unsigned char *)&crc, &hsize);
+ return crc;
+ }
+ default:
+ CERROR("incorrect message magic: %08x\n", msg->lm_magic);
+ return 0;
+ }
+}
+
+void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_handle = *handle;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_handle);
+
+void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_type = type;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_type);
+
+void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_opc = opc;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_opc);
+
+void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_last_xid = last_xid;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_last_xid);
+
+void lustre_msg_set_last_committed(struct lustre_msg *msg, __u64 last_committed)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_last_committed = last_committed;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_last_committed);
+
+void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_pre_versions[0] = versions[0];
+ pb->pb_pre_versions[1] = versions[1];
+ pb->pb_pre_versions[2] = versions[2];
+ pb->pb_pre_versions[3] = versions[3];
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_versions);
+
+void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_transno = transno;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_transno);
+
+void lustre_msg_set_status(struct lustre_msg *msg, __u32 status)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_status = status;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_status);
+
+void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_conn_cnt = conn_cnt;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
+
+void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_timeout = timeout;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg);
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+ pb->pb_service_time = service_time;
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2: {
+ __u32 opc = lustre_msg_get_opc(msg);
+ struct ptlrpc_body *pb;
+
+ /* Don't set jobid for ldlm ast RPCs, they've been shrunk.
+ * See the comment in ptlrpc_request_pack(). */
+ if (!opc || opc == LDLM_BL_CALLBACK ||
+ opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK)
+ return;
+
+ pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+ sizeof(struct ptlrpc_body));
+ LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
+
+ if (jobid != NULL)
+ memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+ else if (pb->pb_jobid[0] == '\0')
+ lustre_get_jobid(pb->pb_jobid);
+ return;
+ }
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+EXPORT_SYMBOL(lustre_msg_set_jobid);
+
+void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
+{
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V1:
+ return;
+ case LUSTRE_MSG_MAGIC_V2:
+ msg->lm_cksum = cksum;
+ return;
+ default:
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
+ }
+}
+
+
+void ptlrpc_request_set_replen(struct ptlrpc_request *req)
+{
+ int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER);
+
+ req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count,
+ req->rq_pill.rc_area[RCL_SERVER]);
+ if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+ req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_request_set_replen);
+
+void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *lens)
+{
+ req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens);
+ if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+ req->rq_reqmsg->lm_repsize = req->rq_replen;
+}
+EXPORT_SYMBOL(ptlrpc_req_set_repsize);
+
+/**
+ * Send a remote set_info_async.
+ *
+ * This may go from client to server or server to client.
+ */
+int do_set_info_async(struct obd_import *imp,
+ int opcode, int version,
+ u32 keylen, void *key,
+ u32 vallen, void *val,
+ struct ptlrpc_request_set *set)
+{
+ struct ptlrpc_request *req;
+ char *tmp;
+ int rc;
+
+ req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO);
+ if (req == NULL)
+ return -ENOMEM;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
+ RCL_CLIENT, keylen);
+ req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL,
+ RCL_CLIENT, vallen);
+ rc = ptlrpc_request_pack(req, version, opcode);
+ if (rc) {
+ ptlrpc_request_free(req);
+ return rc;
+ }
+
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
+ memcpy(tmp, val, vallen);
+
+ ptlrpc_request_set_replen(req);
+
+ if (set) {
+ ptlrpc_set_add_req(set, req);
+ ptlrpc_check_set(NULL, set);
+ } else {
+ rc = ptlrpc_queue_wait(req);
+ ptlrpc_req_finished(req);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(do_set_info_async);
+
+/* byte flipping routines for all wire types declared in
+ * lustre_idl.h implemented here.
+ */
+void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
+{
+ __swab32s(&b->pb_type);
+ __swab32s(&b->pb_version);
+ __swab32s(&b->pb_opc);
+ __swab32s(&b->pb_status);
+ __swab64s(&b->pb_last_xid);
+ __swab64s(&b->pb_last_seen);
+ __swab64s(&b->pb_last_committed);
+ __swab64s(&b->pb_transno);
+ __swab32s(&b->pb_flags);
+ __swab32s(&b->pb_op_flags);
+ __swab32s(&b->pb_conn_cnt);
+ __swab32s(&b->pb_timeout);
+ __swab32s(&b->pb_service_time);
+ __swab32s(&b->pb_limit);
+ __swab64s(&b->pb_slv);
+ __swab64s(&b->pb_pre_versions[0]);
+ __swab64s(&b->pb_pre_versions[1]);
+ __swab64s(&b->pb_pre_versions[2]);
+ __swab64s(&b->pb_pre_versions[3]);
+ CLASSERT(offsetof(typeof(*b), pb_padding) != 0);
+ /* While we need to maintain compatibility between
+ * clients and servers without ptlrpc_body_v2 (< 2.3)
+ * do not swab any fields beyond pb_jobid, as we are
+ * using this swab function for both ptlrpc_body
+ * and ptlrpc_body_v2. */
+ CLASSERT(offsetof(typeof(*b), pb_jobid) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_ptlrpc_body);
+
+void lustre_swab_connect(struct obd_connect_data *ocd)
+{
+ __swab64s(&ocd->ocd_connect_flags);
+ __swab32s(&ocd->ocd_version);
+ __swab32s(&ocd->ocd_grant);
+ __swab64s(&ocd->ocd_ibits_known);
+ __swab32s(&ocd->ocd_index);
+ __swab32s(&ocd->ocd_brw_size);
+ /* ocd_blocksize and ocd_inodespace don't need to be swabbed because
+ * they are 8-byte values */
+ __swab16s(&ocd->ocd_grant_extent);
+ __swab32s(&ocd->ocd_unused);
+ __swab64s(&ocd->ocd_transno);
+ __swab32s(&ocd->ocd_group);
+ __swab32s(&ocd->ocd_cksum_types);
+ __swab32s(&ocd->ocd_instance);
+ /* Fields after ocd_cksum_types are only accessible by the receiver
+ * if the corresponding flag in ocd_connect_flags is set. Accessing
+ * any field after ocd_maxbytes on the receiver without a valid flag
+ * may result in out-of-bound memory access and kernel oops. */
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
+ __swab32s(&ocd->ocd_max_easize);
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
+ __swab64s(&ocd->ocd_maxbytes);
+ CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding5) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding6) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding7) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding8) != 0);
+ CLASSERT(offsetof(typeof(*ocd), padding9) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingA) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingB) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingC) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingD) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingE) != 0);
+ CLASSERT(offsetof(typeof(*ocd), paddingF) != 0);
+}
+
+void lustre_swab_obdo(struct obdo *o)
+{
+ __swab64s(&o->o_valid);
+ lustre_swab_ost_id(&o->o_oi);
+ __swab64s(&o->o_parent_seq);
+ __swab64s(&o->o_size);
+ __swab64s(&o->o_mtime);
+ __swab64s(&o->o_atime);
+ __swab64s(&o->o_ctime);
+ __swab64s(&o->o_blocks);
+ __swab64s(&o->o_grant);
+ __swab32s(&o->o_blksize);
+ __swab32s(&o->o_mode);
+ __swab32s(&o->o_uid);
+ __swab32s(&o->o_gid);
+ __swab32s(&o->o_flags);
+ __swab32s(&o->o_nlink);
+ __swab32s(&o->o_parent_oid);
+ __swab32s(&o->o_misc);
+ __swab64s(&o->o_ioepoch);
+ __swab32s(&o->o_stripe_idx);
+ __swab32s(&o->o_parent_ver);
+ /* o_handle is opaque */
+ /* o_lcookie is swabbed elsewhere */
+ __swab32s(&o->o_uid_h);
+ __swab32s(&o->o_gid_h);
+ __swab64s(&o->o_data_version);
+ CLASSERT(offsetof(typeof(*o), o_padding_4) != 0);
+ CLASSERT(offsetof(typeof(*o), o_padding_5) != 0);
+ CLASSERT(offsetof(typeof(*o), o_padding_6) != 0);
+
+}
+EXPORT_SYMBOL(lustre_swab_obdo);
+
+void lustre_swab_obd_statfs(struct obd_statfs *os)
+{
+ __swab64s(&os->os_type);
+ __swab64s(&os->os_blocks);
+ __swab64s(&os->os_bfree);
+ __swab64s(&os->os_bavail);
+ __swab64s(&os->os_files);
+ __swab64s(&os->os_ffree);
+ /* no need to swab os_fsid */
+ __swab32s(&os->os_bsize);
+ __swab32s(&os->os_namelen);
+ __swab64s(&os->os_maxbytes);
+ __swab32s(&os->os_state);
+ CLASSERT(offsetof(typeof(*os), os_fprecreated) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare2) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare3) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare4) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare5) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare6) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare7) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare8) != 0);
+ CLASSERT(offsetof(typeof(*os), os_spare9) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_obd_statfs);
+
+void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
+{
+ lustre_swab_ost_id(&ioo->ioo_oid);
+ __swab32s(&ioo->ioo_max_brw);
+ __swab32s(&ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(lustre_swab_obd_ioobj);
+
+void lustre_swab_niobuf_remote(struct niobuf_remote *nbr)
+{
+ __swab64s(&nbr->offset);
+ __swab32s(&nbr->len);
+ __swab32s(&nbr->flags);
+}
+EXPORT_SYMBOL(lustre_swab_niobuf_remote);
+
+void lustre_swab_ost_body(struct ost_body *b)
+{
+ lustre_swab_obdo(&b->oa);
+}
+EXPORT_SYMBOL(lustre_swab_ost_body);
+
+void lustre_swab_ost_last_id(u64 *id)
+{
+ __swab64s(id);
+}
+EXPORT_SYMBOL(lustre_swab_ost_last_id);
+
+void lustre_swab_generic_32s(__u32 *val)
+{
+ __swab32s(val);
+}
+EXPORT_SYMBOL(lustre_swab_generic_32s);
+
+void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
+{
+ lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid);
+ __swab64s(&desc->lquota_desc.gl_flags);
+ __swab64s(&desc->lquota_desc.gl_ver);
+ __swab64s(&desc->lquota_desc.gl_hardlimit);
+ __swab64s(&desc->lquota_desc.gl_softlimit);
+ __swab64s(&desc->lquota_desc.gl_time);
+ CLASSERT(offsetof(typeof(desc->lquota_desc), gl_pad2) != 0);
+}
+
+void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb)
+{
+ __swab64s(&lvb->lvb_size);
+ __swab64s(&lvb->lvb_mtime);
+ __swab64s(&lvb->lvb_atime);
+ __swab64s(&lvb->lvb_ctime);
+ __swab64s(&lvb->lvb_blocks);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb_v1);
+
+void lustre_swab_ost_lvb(struct ost_lvb *lvb)
+{
+ __swab64s(&lvb->lvb_size);
+ __swab64s(&lvb->lvb_mtime);
+ __swab64s(&lvb->lvb_atime);
+ __swab64s(&lvb->lvb_ctime);
+ __swab64s(&lvb->lvb_blocks);
+ __swab32s(&lvb->lvb_mtime_ns);
+ __swab32s(&lvb->lvb_atime_ns);
+ __swab32s(&lvb->lvb_ctime_ns);
+ __swab32s(&lvb->lvb_padding);
+}
+EXPORT_SYMBOL(lustre_swab_ost_lvb);
+
+void lustre_swab_lquota_lvb(struct lquota_lvb *lvb)
+{
+ __swab64s(&lvb->lvb_flags);
+ __swab64s(&lvb->lvb_id_may_rel);
+ __swab64s(&lvb->lvb_id_rel);
+ __swab64s(&lvb->lvb_id_qunit);
+ __swab64s(&lvb->lvb_pad1);
+}
+EXPORT_SYMBOL(lustre_swab_lquota_lvb);
+
+void lustre_swab_mdt_body(struct mdt_body *b)
+{
+ lustre_swab_lu_fid(&b->fid1);
+ lustre_swab_lu_fid(&b->fid2);
+ /* handle is opaque */
+ __swab64s(&b->valid);
+ __swab64s(&b->size);
+ __swab64s(&b->mtime);
+ __swab64s(&b->atime);
+ __swab64s(&b->ctime);
+ __swab64s(&b->blocks);
+ __swab64s(&b->ioepoch);
+ __swab64s(&b->t_state);
+ __swab32s(&b->fsuid);
+ __swab32s(&b->fsgid);
+ __swab32s(&b->capability);
+ __swab32s(&b->mode);
+ __swab32s(&b->uid);
+ __swab32s(&b->gid);
+ __swab32s(&b->flags);
+ __swab32s(&b->rdev);
+ __swab32s(&b->nlink);
+ CLASSERT(offsetof(typeof(*b), unused2) != 0);
+ __swab32s(&b->suppgid);
+ __swab32s(&b->eadatasize);
+ __swab32s(&b->aclsize);
+ __swab32s(&b->max_mdsize);
+ __swab32s(&b->max_cookiesize);
+ __swab32s(&b->uid_h);
+ __swab32s(&b->gid_h);
+ CLASSERT(offsetof(typeof(*b), padding_5) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_body);
+
+void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
+{
+ /* handle is opaque */
+ __swab64s(&b->ioepoch);
+ __swab32s(&b->flags);
+ CLASSERT(offsetof(typeof(*b), padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_mdt_ioepoch);
+
+void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
+{
+ int i;
+ __swab32s(&mti->mti_lustre_ver);
+ __swab32s(&mti->mti_stripe_index);
+ __swab32s(&mti->mti_config_ver);
+ __swab32s(&mti->mti_flags);
+ __swab32s(&mti->mti_instance);
+ __swab32s(&mti->mti_nid_count);
+ CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ for (i = 0; i < MTI_NIDS_MAX; i++)
+ __swab64s(&mti->mti_nids[i]);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_target_info);
+
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
+{
+ int i;
+
+ __swab64s(&entry->mne_version);
+ __swab32s(&entry->mne_instance);
+ __swab32s(&entry->mne_index);
+ __swab32s(&entry->mne_length);
+
+ /* mne_nid_(count|type) must be one byte size because we're gonna
+ * access it w/o swapping. */
+ CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8));
+ CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8));
+
+ /* remove this assertion if ipv6 is supported. */
+ LASSERT(entry->mne_nid_type == 0);
+ for (i = 0; i < entry->mne_nid_count; i++) {
+ CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+ __swab64s(&entry->u.nids[i]);
+ }
+}
+EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
+
+void lustre_swab_mgs_config_body(struct mgs_config_body *body)
+{
+ __swab64s(&body->mcb_offset);
+ __swab32s(&body->mcb_units);
+ __swab16s(&body->mcb_type);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_body);
+
+void lustre_swab_mgs_config_res(struct mgs_config_res *body)
+{
+ __swab64s(&body->mcr_offset);
+ __swab64s(&body->mcr_size);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_res);
+
+static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i)
+{
+ __swab64s(&i->dqi_bgrace);
+ __swab64s(&i->dqi_igrace);
+ __swab32s(&i->dqi_flags);
+ __swab32s(&i->dqi_valid);
+}
+
+static void lustre_swab_obd_dqblk(struct obd_dqblk *b)
+{
+ __swab64s(&b->dqb_ihardlimit);
+ __swab64s(&b->dqb_isoftlimit);
+ __swab64s(&b->dqb_curinodes);
+ __swab64s(&b->dqb_bhardlimit);
+ __swab64s(&b->dqb_bsoftlimit);
+ __swab64s(&b->dqb_curspace);
+ __swab64s(&b->dqb_btime);
+ __swab64s(&b->dqb_itime);
+ __swab32s(&b->dqb_valid);
+ CLASSERT(offsetof(typeof(*b), dqb_padding) != 0);
+}
+
+void lustre_swab_obd_quotactl(struct obd_quotactl *q)
+{
+ __swab32s(&q->qc_cmd);
+ __swab32s(&q->qc_type);
+ __swab32s(&q->qc_id);
+ __swab32s(&q->qc_stat);
+ lustre_swab_obd_dqinfo(&q->qc_dqinfo);
+ lustre_swab_obd_dqblk(&q->qc_dqblk);
+}
+EXPORT_SYMBOL(lustre_swab_obd_quotactl);
+
+void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p)
+{
+ __swab32s(&p->rp_uid);
+ __swab32s(&p->rp_gid);
+ __swab32s(&p->rp_fsuid);
+ __swab32s(&p->rp_fsuid_h);
+ __swab32s(&p->rp_fsgid);
+ __swab32s(&p->rp_fsgid_h);
+ __swab32s(&p->rp_access_perm);
+ __swab32s(&p->rp_padding);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_remote_perm);
+
+void lustre_swab_fid2path(struct getinfo_fid2path *gf)
+{
+ lustre_swab_lu_fid(&gf->gf_fid);
+ __swab64s(&gf->gf_recno);
+ __swab32s(&gf->gf_linkno);
+ __swab32s(&gf->gf_pathlen);
+}
+EXPORT_SYMBOL(lustre_swab_fid2path);
+
+void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
+{
+ __swab64s(&fm_extent->fe_logical);
+ __swab64s(&fm_extent->fe_physical);
+ __swab64s(&fm_extent->fe_length);
+ __swab32s(&fm_extent->fe_flags);
+ __swab32s(&fm_extent->fe_device);
+}
+
+void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
+{
+ int i;
+
+ __swab64s(&fiemap->fm_start);
+ __swab64s(&fiemap->fm_length);
+ __swab32s(&fiemap->fm_flags);
+ __swab32s(&fiemap->fm_mapped_extents);
+ __swab32s(&fiemap->fm_extent_count);
+ __swab32s(&fiemap->fm_reserved);
+
+ for (i = 0; i < fiemap->fm_mapped_extents; i++)
+ lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
+}
+EXPORT_SYMBOL(lustre_swab_fiemap);
+
+void lustre_swab_idx_info(struct idx_info *ii)
+{
+ __swab32s(&ii->ii_magic);
+ __swab32s(&ii->ii_flags);
+ __swab16s(&ii->ii_count);
+ __swab32s(&ii->ii_attrs);
+ lustre_swab_lu_fid(&ii->ii_fid);
+ __swab64s(&ii->ii_version);
+ __swab64s(&ii->ii_hash_start);
+ __swab64s(&ii->ii_hash_end);
+ __swab16s(&ii->ii_keysize);
+ __swab16s(&ii->ii_recsize);
+}
+
+void lustre_swab_lip_header(struct lu_idxpage *lip)
+{
+ /* swab header */
+ __swab32s(&lip->lip_magic);
+ __swab16s(&lip->lip_flags);
+ __swab16s(&lip->lip_nr);
+}
+EXPORT_SYMBOL(lustre_swab_lip_header);
+
+void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
+{
+ __swab32s(&rr->rr_opcode);
+ __swab32s(&rr->rr_cap);
+ __swab32s(&rr->rr_fsuid);
+ /* rr_fsuid_h is unused */
+ __swab32s(&rr->rr_fsgid);
+ /* rr_fsgid_h is unused */
+ __swab32s(&rr->rr_suppgid1);
+ /* rr_suppgid1_h is unused */
+ __swab32s(&rr->rr_suppgid2);
+ /* rr_suppgid2_h is unused */
+ lustre_swab_lu_fid(&rr->rr_fid1);
+ lustre_swab_lu_fid(&rr->rr_fid2);
+ __swab64s(&rr->rr_mtime);
+ __swab64s(&rr->rr_atime);
+ __swab64s(&rr->rr_ctime);
+ __swab64s(&rr->rr_size);
+ __swab64s(&rr->rr_blocks);
+ __swab32s(&rr->rr_bias);
+ __swab32s(&rr->rr_mode);
+ __swab32s(&rr->rr_flags);
+ __swab32s(&rr->rr_flags_h);
+ __swab32s(&rr->rr_umask);
+
+ CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
+};
+EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
+
+void lustre_swab_lov_desc(struct lov_desc *ld)
+{
+ __swab32s(&ld->ld_tgt_count);
+ __swab32s(&ld->ld_active_tgt_count);
+ __swab32s(&ld->ld_default_stripe_count);
+ __swab32s(&ld->ld_pattern);
+ __swab64s(&ld->ld_default_stripe_size);
+ __swab64s(&ld->ld_default_stripe_offset);
+ __swab32s(&ld->ld_qos_maxage);
+ /* uuid endian insensitive */
+}
+EXPORT_SYMBOL(lustre_swab_lov_desc);
+
+void lustre_swab_lmv_desc(struct lmv_desc *ld)
+{
+ __swab32s(&ld->ld_tgt_count);
+ __swab32s(&ld->ld_active_tgt_count);
+ __swab32s(&ld->ld_default_stripe_count);
+ __swab32s(&ld->ld_pattern);
+ __swab64s(&ld->ld_default_hash_size);
+ __swab32s(&ld->ld_qos_maxage);
+ /* uuid endian insensitive */
+}
+
+void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea)
+{
+ __swab32s(&mea->mea_magic);
+ __swab32s(&mea->mea_count);
+ __swab32s(&mea->mea_master);
+ CLASSERT(offsetof(typeof(*mea), mea_padding) != 0);
+}
+
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
+{
+ int i;
+
+ __swab32s(&lum->lum_magic);
+ __swab32s(&lum->lum_stripe_count);
+ __swab32s(&lum->lum_stripe_offset);
+ __swab32s(&lum->lum_hash_type);
+ __swab32s(&lum->lum_type);
+ CLASSERT(offsetof(typeof(*lum), lum_padding1) != 0);
+ CLASSERT(offsetof(typeof(*lum), lum_padding2) != 0);
+ CLASSERT(offsetof(typeof(*lum), lum_padding3) != 0);
+
+ for (i = 0; i < lum->lum_stripe_count; i++) {
+ __swab32s(&lum->lum_objects[i].lum_mds);
+ lustre_swab_lu_fid(&lum->lum_objects[i].lum_fid);
+ }
+
+}
+EXPORT_SYMBOL(lustre_swab_lmv_user_md);
+
+static void print_lum(struct lov_user_md *lum)
+{
+ CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
+ CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
+ CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
+ CDEBUG(D_OTHER, "\tlmm_object_id: %llu\n", lmm_oi_id(&lum->lmm_oi));
+ CDEBUG(D_OTHER, "\tlmm_object_gr: %llu\n", lmm_oi_seq(&lum->lmm_oi));
+ CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
+ CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
+ CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
+ lum->lmm_stripe_offset);
+}
+
+static void lustre_swab_lmm_oi(struct ost_id *oi)
+{
+ __swab64s(&oi->oi.oi_id);
+ __swab64s(&oi->oi.oi_seq);
+}
+
+static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
+{
+ __swab32s(&lum->lmm_magic);
+ __swab32s(&lum->lmm_pattern);
+ lustre_swab_lmm_oi(&lum->lmm_oi);
+ __swab32s(&lum->lmm_stripe_size);
+ __swab16s(&lum->lmm_stripe_count);
+ __swab16s(&lum->lmm_stripe_offset);
+ print_lum(lum);
+}
+
+void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
+{
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n");
+ lustre_swab_lov_user_md_common(lum);
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v1);
+
+void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
+{
+ CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n");
+ lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum);
+ /* lmm_pool_name nothing to do with char */
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
+
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
+{
+ CDEBUG(D_IOCTL, "swabbing lov_mds_md\n");
+ __swab32s(&lmm->lmm_magic);
+ __swab32s(&lmm->lmm_pattern);
+ lustre_swab_lmm_oi(&lmm->lmm_oi);
+ __swab32s(&lmm->lmm_stripe_size);
+ __swab16s(&lmm->lmm_stripe_count);
+ __swab16s(&lmm->lmm_layout_gen);
+}
+EXPORT_SYMBOL(lustre_swab_lov_mds_md);
+
+void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
+ int stripe_count)
+{
+ int i;
+
+ for (i = 0; i < stripe_count; i++) {
+ lustre_swab_ost_id(&(lod[i].l_ost_oi));
+ __swab32s(&(lod[i].l_ost_gen));
+ __swab32s(&(lod[i].l_ost_idx));
+ }
+}
+EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
+
+void lustre_swab_ldlm_res_id(struct ldlm_res_id *id)
+{
+ int i;
+
+ for (i = 0; i < RES_NAME_SIZE; i++)
+ __swab64s(&id->name[i]);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
+
+void lustre_swab_ldlm_policy_data(ldlm_wire_policy_data_t *d)
+{
+ /* the lock data is a union and the first two fields are always an
+ * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock
+ * data the same way. */
+ __swab64s(&d->l_extent.start);
+ __swab64s(&d->l_extent.end);
+ __swab64s(&d->l_extent.gid);
+ __swab64s(&d->l_flock.lfw_owner);
+ __swab32s(&d->l_flock.lfw_pid);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_policy_data);
+
+void lustre_swab_ldlm_intent(struct ldlm_intent *i)
+{
+ __swab64s(&i->opc);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_intent);
+
+void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r)
+{
+ __swab32s(&r->lr_type);
+ CLASSERT(offsetof(typeof(*r), lr_padding) != 0);
+ lustre_swab_ldlm_res_id(&r->lr_name);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_resource_desc);
+
+void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l)
+{
+ lustre_swab_ldlm_resource_desc(&l->l_resource);
+ __swab32s(&l->l_req_mode);
+ __swab32s(&l->l_granted_mode);
+ lustre_swab_ldlm_policy_data(&l->l_policy_data);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
+
+void lustre_swab_ldlm_request(struct ldlm_request *rq)
+{
+ __swab32s(&rq->lock_flags);
+ lustre_swab_ldlm_lock_desc(&rq->lock_desc);
+ __swab32s(&rq->lock_count);
+ /* lock_handle[] opaque */
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_request);
+
+void lustre_swab_ldlm_reply(struct ldlm_reply *r)
+{
+ __swab32s(&r->lock_flags);
+ CLASSERT(offsetof(typeof(*r), lock_padding) != 0);
+ lustre_swab_ldlm_lock_desc(&r->lock_desc);
+ /* lock_handle opaque */
+ __swab64s(&r->lock_policy_res1);
+ __swab64s(&r->lock_policy_res2);
+}
+EXPORT_SYMBOL(lustre_swab_ldlm_reply);
+
+void lustre_swab_quota_body(struct quota_body *b)
+{
+ lustre_swab_lu_fid(&b->qb_fid);
+ lustre_swab_lu_fid((struct lu_fid *)&b->qb_id);
+ __swab32s(&b->qb_flags);
+ __swab64s(&b->qb_count);
+ __swab64s(&b->qb_usage);
+ __swab64s(&b->qb_slv_ver);
+}
+
+/* Dump functions */
+void dump_ioo(struct obd_ioobj *ioo)
+{
+ CDEBUG(D_RPCTRACE,
+ "obd_ioobj: ioo_oid=" DOSTID ", ioo_max_brw=%#x, ioo_bufct=%d\n",
+ POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
+ ioo->ioo_bufcnt);
+}
+EXPORT_SYMBOL(dump_ioo);
+
+void dump_rniobuf(struct niobuf_remote *nb)
+{
+ CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n",
+ nb->offset, nb->len, nb->flags);
+}
+EXPORT_SYMBOL(dump_rniobuf);
+
+void dump_obdo(struct obdo *oa)
+{
+ __u32 valid = oa->o_valid;
+
+ CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid);
+ if (valid & OBD_MD_FLID)
+ CDEBUG(D_RPCTRACE, "obdo: id = "DOSTID"\n", POSTID(&oa->o_oi));
+ if (valid & OBD_MD_FLFID)
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = %#llx\n",
+ oa->o_parent_seq);
+ if (valid & OBD_MD_FLSIZE)
+ CDEBUG(D_RPCTRACE, "obdo: o_size = %lld\n", oa->o_size);
+ if (valid & OBD_MD_FLMTIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_mtime = %lld\n", oa->o_mtime);
+ if (valid & OBD_MD_FLATIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_atime = %lld\n", oa->o_atime);
+ if (valid & OBD_MD_FLCTIME)
+ CDEBUG(D_RPCTRACE, "obdo: o_ctime = %lld\n", oa->o_ctime);
+ if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
+ CDEBUG(D_RPCTRACE, "obdo: o_blocks = %lld\n", oa->o_blocks);
+ if (valid & OBD_MD_FLGRANT)
+ CDEBUG(D_RPCTRACE, "obdo: o_grant = %lld\n", oa->o_grant);
+ if (valid & OBD_MD_FLBLKSZ)
+ CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize);
+ if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE))
+ CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n",
+ oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) |
+ (valid & OBD_MD_FLMODE ? ~S_IFMT : 0)));
+ if (valid & OBD_MD_FLUID)
+ CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid);
+ if (valid & OBD_MD_FLUID)
+ CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h);
+ if (valid & OBD_MD_FLGID)
+ CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid);
+ if (valid & OBD_MD_FLGID)
+ CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h);
+ if (valid & OBD_MD_FLFLAGS)
+ CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags);
+ if (valid & OBD_MD_FLNLINK)
+ CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink);
+ else if (valid & OBD_MD_FLCKSUM)
+ CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n",
+ oa->o_nlink);
+ if (valid & OBD_MD_FLGENER)
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n",
+ oa->o_parent_oid);
+ if (valid & OBD_MD_FLEPOCH)
+ CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = %lld\n",
+ oa->o_ioepoch);
+ if (valid & OBD_MD_FLFID) {
+ CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n",
+ oa->o_stripe_idx);
+ CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n",
+ oa->o_parent_ver);
+ }
+ if (valid & OBD_MD_FLHANDLE)
+ CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n",
+ oa->o_handle.cookie);
+ if (valid & OBD_MD_FLCOOKIE)
+ CDEBUG(D_RPCTRACE, "obdo: o_lcookie = (llog_cookie dumping not yet implemented)\n");
+}
+EXPORT_SYMBOL(dump_obdo);
+
+void dump_ost_body(struct ost_body *ob)
+{
+ dump_obdo(&ob->oa);
+}
+EXPORT_SYMBOL(dump_ost_body);
+
+void dump_rcs(__u32 *rc)
+{
+ CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
+}
+EXPORT_SYMBOL(dump_rcs);
+
+static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_reqmsg);
+
+ switch (req->rq_reqmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF);
+ default:
+ CERROR("bad lustre msg magic: %#08X\n",
+ req->rq_reqmsg->lm_magic);
+ }
+ return 0;
+}
+
+static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repmsg);
+
+ switch (req->rq_repmsg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF);
+ default:
+ /* uninitialized yet */
+ return 0;
+ }
+}
+
+void _debug_req(struct ptlrpc_request *req,
+ struct libcfs_debug_msg_data *msgdata,
+ const char *fmt, ...)
+{
+ int req_ok = req->rq_reqmsg != NULL;
+ int rep_ok = req->rq_repmsg != NULL;
+ lnet_nid_t nid = LNET_NID_ANY;
+ va_list args;
+
+ if (ptlrpc_req_need_swab(req)) {
+ req_ok = req_ok && req_ptlrpc_body_swabbed(req);
+ rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req);
+ }
+
+ if (req->rq_import && req->rq_import->imp_connection)
+ nid = req->rq_import->imp_connection->c_peer.nid;
+ else if (req->rq_export && req->rq_export->exp_connection)
+ nid = req->rq_export->exp_connection->c_peer.nid;
+
+ va_start(args, fmt);
+ libcfs_debug_vmsg2(msgdata, fmt, args,
+ " req@%p x%llu/t%lld(%lld) o%d->%s@%s:%d/%d lens %d/%d e %d to %d dl " CFS_TIME_T " ref %d fl " REQ_FLAGS_FMT "/%x/%x rc %d/%d\n",
+ req, req->rq_xid, req->rq_transno,
+ req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0,
+ req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1,
+ req->rq_import ?
+ req->rq_import->imp_obd->obd_name :
+ req->rq_export ?
+ req->rq_export->exp_client_uuid.uuid :
+ "<?>",
+ libcfs_nid2str(nid),
+ req->rq_request_portal, req->rq_reply_portal,
+ req->rq_reqlen, req->rq_replen,
+ req->rq_early_count, req->rq_timedout,
+ req->rq_deadline,
+ atomic_read(&req->rq_refcount),
+ DEBUG_REQ_FLAGS(req),
+ req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
+ rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1,
+ req->rq_status,
+ rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1);
+ va_end(args);
+}
+EXPORT_SYMBOL(_debug_req);
+
+void lustre_swab_lustre_capa(struct lustre_capa *c)
+{
+ lustre_swab_lu_fid(&c->lc_fid);
+ __swab64s(&c->lc_opc);
+ __swab64s(&c->lc_uid);
+ __swab64s(&c->lc_gid);
+ __swab32s(&c->lc_flags);
+ __swab32s(&c->lc_keyid);
+ __swab32s(&c->lc_timeout);
+ __swab32s(&c->lc_expiry);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa);
+
+void lustre_swab_lustre_capa_key(struct lustre_capa_key *k)
+{
+ __swab64s(&k->lk_seq);
+ __swab32s(&k->lk_keyid);
+ CLASSERT(offsetof(typeof(*k), lk_padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_lustre_capa_key);
+
+void lustre_swab_hsm_user_state(struct hsm_user_state *state)
+{
+ __swab32s(&state->hus_states);
+ __swab32s(&state->hus_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_state);
+
+void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
+{
+ __swab32s(&hss->hss_valid);
+ __swab64s(&hss->hss_setmask);
+ __swab64s(&hss->hss_clearmask);
+ __swab32s(&hss->hss_archive_id);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_state_set);
+
+void lustre_swab_hsm_extent(struct hsm_extent *extent)
+{
+ __swab64s(&extent->offset);
+ __swab64s(&extent->length);
+}
+
+void lustre_swab_hsm_current_action(struct hsm_current_action *action)
+{
+ __swab32s(&action->hca_state);
+ __swab32s(&action->hca_action);
+ lustre_swab_hsm_extent(&action->hca_location);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_current_action);
+
+void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
+{
+ lustre_swab_lu_fid(&hui->hui_fid);
+ lustre_swab_hsm_extent(&hui->hui_extent);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_user_item);
+
+void lustre_swab_layout_intent(struct layout_intent *li)
+{
+ __swab32s(&li->li_opc);
+ __swab32s(&li->li_flags);
+ __swab64s(&li->li_start);
+ __swab64s(&li->li_end);
+}
+EXPORT_SYMBOL(lustre_swab_layout_intent);
+
+void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
+{
+ lustre_swab_lu_fid(&hpk->hpk_fid);
+ __swab64s(&hpk->hpk_cookie);
+ __swab64s(&hpk->hpk_extent.offset);
+ __swab64s(&hpk->hpk_extent.length);
+ __swab16s(&hpk->hpk_flags);
+ __swab16s(&hpk->hpk_errval);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel);
+
+void lustre_swab_hsm_request(struct hsm_request *hr)
+{
+ __swab32s(&hr->hr_action);
+ __swab32s(&hr->hr_archive_id);
+ __swab64s(&hr->hr_flags);
+ __swab32s(&hr->hr_itemcount);
+ __swab32s(&hr->hr_data_len);
+}
+EXPORT_SYMBOL(lustre_swab_hsm_request);
+
+void lustre_swab_update_buf(struct update_buf *ub)
+{
+ __swab32s(&ub->ub_magic);
+ __swab32s(&ub->ub_count);
+}
+EXPORT_SYMBOL(lustre_swab_update_buf);
+
+void lustre_swab_update_reply_buf(struct update_reply *ur)
+{
+ int i;
+
+ __swab32s(&ur->ur_version);
+ __swab32s(&ur->ur_count);
+ for (i = 0; i < ur->ur_count; i++)
+ __swab32s(&ur->ur_lens[i]);
+}
+EXPORT_SYMBOL(lustre_swab_update_reply_buf);
+
+void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
+{
+ __swab64s(&msl->msl_flags);
+}
+EXPORT_SYMBOL(lustre_swab_swap_layouts);
+
+void lustre_swab_close_data(struct close_data *cd)
+{
+ lustre_swab_lu_fid(&cd->cd_fid);
+ __swab64s(&cd->cd_data_version);
+}
+EXPORT_SYMBOL(lustre_swab_close_data);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
new file mode 100644
index 000000000..e1334c24e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -0,0 +1,75 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_import.h"
+
+#include "ptlrpc_internal.h"
+
+
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+ int mdidx)
+{
+ CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON);
+
+ LASSERT(mdidx < desc->bd_md_max_brw);
+ LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
+ LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV |
+ LNET_MD_PHYS)));
+
+ md->options |= LNET_MD_KIOV;
+ md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV);
+ md->length = min_t(unsigned int, LNET_MAX_IOV, md->length);
+ if (desc->bd_enc_iov)
+ md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV];
+ else
+ md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV];
+}
+
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len)
+{
+ lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
+
+ kiov->kiov_page = page;
+ kiov->kiov_offset = pageoffset;
+ kiov->kiov_len = len;
+
+ desc->bd_iov_count++;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
new file mode 100644
index 000000000..9dbda9332
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -0,0 +1,678 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/pinger.c
+ *
+ * Portal-RPC reconnection and replay operations, for use in recovery.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "ptlrpc_internal.h"
+
+static int suppress_pings;
+module_param(suppress_pings, int, 0644);
+MODULE_PARM_DESC(suppress_pings, "Suppress pings");
+
+struct mutex pinger_mutex;
+static LIST_HEAD(pinger_imports);
+static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list);
+
+int ptlrpc_pinger_suppress_pings(void)
+{
+ return suppress_pings;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings);
+
+struct ptlrpc_request *
+ptlrpc_prep_ping(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING,
+ LUSTRE_OBD_VERSION, OBD_PING);
+ if (req) {
+ ptlrpc_request_set_replen(req);
+ req->rq_no_resend = req->rq_no_delay = 1;
+ }
+ return req;
+}
+
+int ptlrpc_obd_ping(struct obd_device *obd)
+{
+ int rc;
+ struct ptlrpc_request *req;
+
+ req = ptlrpc_prep_ping(obd->u.cli.cl_import);
+ if (req == NULL)
+ return -ENOMEM;
+
+ req->rq_send_state = LUSTRE_IMP_FULL;
+
+ rc = ptlrpc_queue_wait(req);
+
+ ptlrpc_req_finished(req);
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_obd_ping);
+
+int ptlrpc_ping(struct obd_import *imp)
+{
+ struct ptlrpc_request *req;
+
+ req = ptlrpc_prep_ping(imp);
+ if (req == NULL) {
+ CERROR("OOM trying to ping %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid,
+ obd2cli_tgt(imp->imp_obd));
+ return -ENOMEM;
+ }
+
+ DEBUG_REQ(D_INFO, req, "pinging %s->%s",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+
+ return 0;
+}
+
+void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
+{
+ int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
+ if (imp->imp_state == LUSTRE_IMP_DISCON) {
+ int dtime = max_t(int, CONNECTION_SWITCH_MIN,
+ AT_OFF ? 0 :
+ at_get(&imp->imp_at.iat_net_latency));
+ time = min(time, dtime);
+ }
+ imp->imp_next_ping = cfs_time_shift(time);
+}
+
+void ptlrpc_ping_import_soon(struct obd_import *imp)
+{
+ imp->imp_next_ping = cfs_time_current();
+}
+
+static inline int imp_is_deactive(struct obd_import *imp)
+{
+ return (imp->imp_deactive ||
+ OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
+}
+
+static inline int ptlrpc_next_reconnect(struct obd_import *imp)
+{
+ if (imp->imp_server_timeout)
+ return cfs_time_shift(obd_timeout / 2);
+ else
+ return cfs_time_shift(obd_timeout);
+}
+
+long pinger_check_timeout(unsigned long time)
+{
+ struct timeout_item *item;
+ unsigned long timeout = PING_INTERVAL;
+
+ /* The timeout list is a increase order sorted list */
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ int ti_timeout = item->ti_timeout;
+ if (timeout > ti_timeout)
+ timeout = ti_timeout;
+ break;
+ }
+ mutex_unlock(&pinger_mutex);
+
+ return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)),
+ cfs_time_current());
+}
+
+static bool ir_up;
+
+void ptlrpc_pinger_ir_up(void)
+{
+ CDEBUG(D_HA, "IR up\n");
+ ir_up = true;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_up);
+
+void ptlrpc_pinger_ir_down(void)
+{
+ CDEBUG(D_HA, "IR down\n");
+ ir_up = false;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_ir_down);
+
+static void ptlrpc_pinger_process_import(struct obd_import *imp,
+ unsigned long this_ping)
+{
+ int level;
+ int force;
+ int force_next;
+ int suppress;
+
+ spin_lock(&imp->imp_lock);
+
+ level = imp->imp_state;
+ force = imp->imp_force_verify;
+ force_next = imp->imp_force_next_verify;
+ /*
+ * This will be used below only if the import is "FULL".
+ */
+ suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS);
+
+ imp->imp_force_verify = 0;
+
+ if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) &&
+ !force) {
+ spin_unlock(&imp->imp_lock);
+ return;
+ }
+
+ imp->imp_force_next_verify = 0;
+
+ spin_unlock(&imp->imp_lock);
+
+ CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(level), level, force, force_next,
+ imp->imp_deactive, imp->imp_pingable, suppress);
+
+ if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) {
+ /* wait for a while before trying recovery again */
+ imp->imp_next_ping = ptlrpc_next_reconnect(imp);
+ if (!imp->imp_no_pinger_recover)
+ ptlrpc_initiate_recovery(imp);
+ } else if (level != LUSTRE_IMP_FULL ||
+ imp->imp_obd->obd_no_recov ||
+ imp_is_deactive(imp)) {
+ CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd),
+ ptlrpc_import_state_name(level));
+ if (force) {
+ spin_lock(&imp->imp_lock);
+ imp->imp_force_verify = 1;
+ spin_unlock(&imp->imp_lock);
+ }
+ } else if ((imp->imp_pingable && !suppress) || force_next || force) {
+ ptlrpc_ping(imp);
+ }
+}
+
+static int ptlrpc_pinger_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+
+ /* Record that the thread is running */
+ thread_set_flags(thread, SVC_RUNNING);
+ wake_up(&thread->t_ctl_waitq);
+
+ /* And now, loop forever, pinging as needed. */
+ while (1) {
+ unsigned long this_ping = cfs_time_current();
+ struct l_wait_info lwi;
+ long time_to_next_wake;
+ struct timeout_item *item;
+ struct list_head *iter;
+
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ item->ti_cb(item, item->ti_cb_data);
+ }
+ list_for_each(iter, &pinger_imports) {
+ struct obd_import *imp =
+ list_entry(iter, struct obd_import,
+ imp_pinger_chain);
+
+ ptlrpc_pinger_process_import(imp, this_ping);
+ /* obd_timeout might have changed */
+ if (imp->imp_pingable && imp->imp_next_ping &&
+ cfs_time_after(imp->imp_next_ping,
+ cfs_time_add(this_ping,
+ cfs_time_seconds(PING_INTERVAL))))
+ ptlrpc_update_next_ping(imp, 0);
+ }
+ mutex_unlock(&pinger_mutex);
+ /* update memory usage info */
+ obd_update_maxusage();
+
+ /* Wait until the next ping time, or until we're stopped. */
+ time_to_next_wake = pinger_check_timeout(this_ping);
+ /* The ping sent by ptlrpc_send_rpc may get sent out
+ say .01 second after this.
+ ptlrpc_pinger_sending_on_import will then set the
+ next ping time to next_ping + .01 sec, which means
+ we will SKIP the next ping at next_ping, and the
+ ping will get sent 2 timeouts from now! Beware. */
+ CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" ("
+ CFS_TIME_T")\n", time_to_next_wake,
+ cfs_time_add(this_ping,
+ cfs_time_seconds(PING_INTERVAL)));
+ if (time_to_next_wake > 0) {
+ lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake,
+ cfs_time_seconds(1)),
+ NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopping(thread) ||
+ thread_is_event(thread),
+ &lwi);
+ if (thread_test_and_clear_flags(thread, SVC_STOPPING)) {
+ break;
+ } else {
+ /* woken after adding import to reset timer */
+ thread_test_and_clear_flags(thread, SVC_EVENT);
+ }
+ }
+ }
+
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+
+ CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid());
+ return 0;
+}
+
+static struct ptlrpc_thread pinger_thread;
+
+int ptlrpc_start_pinger(void)
+{
+ struct l_wait_info lwi = { 0 };
+ int rc;
+
+ if (!thread_is_init(&pinger_thread) &&
+ !thread_is_stopped(&pinger_thread))
+ return -EALREADY;
+
+ init_waitqueue_head(&pinger_thread.t_ctl_waitq);
+
+ strcpy(pinger_thread.t_name, "ll_ping");
+
+ /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
+ * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */
+ rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread,
+ "%s", pinger_thread.t_name));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("cannot start thread: %d\n", rc);
+ return rc;
+ }
+ l_wait_event(pinger_thread.t_ctl_waitq,
+ thread_is_running(&pinger_thread), &lwi);
+
+ if (suppress_pings)
+ CWARN("Pings will be suppressed at the request of the administrator. The configuration shall meet the additional requirements described in the manual. (Search for the \"suppress_pings\" kernel module parameter.)\n");
+
+ return 0;
+}
+
+int ptlrpc_pinger_remove_timeouts(void);
+
+int ptlrpc_stop_pinger(void)
+{
+ struct l_wait_info lwi = { 0 };
+ int rc = 0;
+
+ if (thread_is_init(&pinger_thread) ||
+ thread_is_stopped(&pinger_thread))
+ return -EALREADY;
+
+ ptlrpc_pinger_remove_timeouts();
+ thread_set_flags(&pinger_thread, SVC_STOPPING);
+ wake_up(&pinger_thread.t_ctl_waitq);
+
+ l_wait_event(pinger_thread.t_ctl_waitq,
+ thread_is_stopped(&pinger_thread), &lwi);
+
+ return rc;
+}
+
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
+{
+ ptlrpc_update_next_ping(imp, 0);
+}
+EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
+
+void ptlrpc_pinger_commit_expected(struct obd_import *imp)
+{
+ ptlrpc_update_next_ping(imp, 1);
+ assert_spin_locked(&imp->imp_lock);
+ /*
+ * Avoid reading stale imp_connect_data. When not sure if pings are
+ * expected or not on next connection, we assume they are not and force
+ * one anyway to guarantee the chance of updating
+ * imp_peer_committed_transno.
+ */
+ if (imp->imp_state != LUSTRE_IMP_FULL ||
+ OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS))
+ imp->imp_force_next_verify = 1;
+}
+
+int ptlrpc_pinger_add_import(struct obd_import *imp)
+{
+ if (!list_empty(&imp->imp_pinger_chain))
+ return -EALREADY;
+
+ mutex_lock(&pinger_mutex);
+ CDEBUG(D_HA, "adding pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ /* if we add to pinger we want recovery on this import */
+ imp->imp_obd->obd_no_recov = 0;
+ ptlrpc_update_next_ping(imp, 0);
+ /* XXX sort, blah blah */
+ list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
+ class_import_get(imp);
+
+ ptlrpc_pinger_wake_up();
+ mutex_unlock(&pinger_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_add_import);
+
+int ptlrpc_pinger_del_import(struct obd_import *imp)
+{
+ if (list_empty(&imp->imp_pinger_chain))
+ return -ENOENT;
+
+ mutex_lock(&pinger_mutex);
+ list_del_init(&imp->imp_pinger_chain);
+ CDEBUG(D_HA, "removing pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
+ /* if we remove from pinger we don't want recovery on this import */
+ imp->imp_obd->obd_no_recov = 1;
+ class_import_put(imp);
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_pinger_del_import);
+
+/**
+ * Register a timeout callback to the pinger list, and the callback will
+ * be called when timeout happens.
+ */
+struct timeout_item *ptlrpc_new_timeout(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data)
+{
+ struct timeout_item *ti;
+
+ OBD_ALLOC_PTR(ti);
+ if (!ti)
+ return NULL;
+
+ INIT_LIST_HEAD(&ti->ti_obd_list);
+ INIT_LIST_HEAD(&ti->ti_chain);
+ ti->ti_timeout = time;
+ ti->ti_event = event;
+ ti->ti_cb = cb;
+ ti->ti_cb_data = data;
+
+ return ti;
+}
+
+/**
+ * Register timeout event on the pinger thread.
+ * Note: the timeout list is an sorted list with increased timeout value.
+ */
+static struct timeout_item*
+ptlrpc_pinger_register_timeout(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data)
+{
+ struct timeout_item *item, *tmp;
+
+ LASSERT(mutex_is_locked(&pinger_mutex));
+
+ list_for_each_entry(item, &timeout_list, ti_chain)
+ if (item->ti_event == event)
+ goto out;
+
+ item = ptlrpc_new_timeout(time, event, cb, data);
+ if (item) {
+ list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) {
+ if (tmp->ti_timeout < time) {
+ list_add(&item->ti_chain, &tmp->ti_chain);
+ goto out;
+ }
+ }
+ list_add(&item->ti_chain, &timeout_list);
+ }
+out:
+ return item;
+}
+
+/* Add a client_obd to the timeout event list, when timeout(@time)
+ * happens, the callback(@cb) will be called.
+ */
+int ptlrpc_add_timeout_client(int time, enum timeout_event event,
+ timeout_cb_t cb, void *data,
+ struct list_head *obd_list)
+{
+ struct timeout_item *ti;
+
+ mutex_lock(&pinger_mutex);
+ ti = ptlrpc_pinger_register_timeout(time, event, cb, data);
+ if (!ti) {
+ mutex_unlock(&pinger_mutex);
+ return -EINVAL;
+ }
+ list_add(obd_list, &ti->ti_obd_list);
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_add_timeout_client);
+
+int ptlrpc_del_timeout_client(struct list_head *obd_list,
+ enum timeout_event event)
+{
+ struct timeout_item *ti = NULL, *item;
+
+ if (list_empty(obd_list))
+ return 0;
+ mutex_lock(&pinger_mutex);
+ list_del_init(obd_list);
+ /**
+ * If there are no obd attached to the timeout event
+ * list, remove this timeout event from the pinger
+ */
+ list_for_each_entry(item, &timeout_list, ti_chain) {
+ if (item->ti_event == event) {
+ ti = item;
+ break;
+ }
+ }
+ LASSERTF(ti != NULL, "ti is NULL !\n");
+ if (list_empty(&ti->ti_obd_list)) {
+ list_del(&ti->ti_chain);
+ OBD_FREE_PTR(ti);
+ }
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_del_timeout_client);
+
+int ptlrpc_pinger_remove_timeouts(void)
+{
+ struct timeout_item *item, *tmp;
+
+ mutex_lock(&pinger_mutex);
+ list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) {
+ LASSERT(list_empty(&item->ti_obd_list));
+ list_del(&item->ti_chain);
+ OBD_FREE_PTR(item);
+ }
+ mutex_unlock(&pinger_mutex);
+ return 0;
+}
+
+void ptlrpc_pinger_wake_up(void)
+{
+ thread_add_flags(&pinger_thread, SVC_EVENT);
+ wake_up(&pinger_thread.t_ctl_waitq);
+}
+
+/* Ping evictor thread */
+#define PET_READY 1
+#define PET_TERMINATE 2
+
+static int pet_refcount;
+static int pet_state;
+static wait_queue_head_t pet_waitq;
+LIST_HEAD(pet_list);
+static DEFINE_SPINLOCK(pet_lock);
+
+int ping_evictor_wake(struct obd_export *exp)
+{
+ struct obd_device *obd;
+
+ spin_lock(&pet_lock);
+ if (pet_state != PET_READY) {
+ /* eventually the new obd will call here again. */
+ spin_unlock(&pet_lock);
+ return 1;
+ }
+
+ obd = class_exp2obd(exp);
+ if (list_empty(&obd->obd_evict_list)) {
+ class_incref(obd, "evictor", obd);
+ list_add(&obd->obd_evict_list, &pet_list);
+ }
+ spin_unlock(&pet_lock);
+
+ wake_up(&pet_waitq);
+ return 0;
+}
+
+static int ping_evictor_main(void *arg)
+{
+ struct obd_device *obd;
+ struct obd_export *exp;
+ struct l_wait_info lwi = { 0 };
+ time_t expire_time;
+
+ unshare_fs_struct();
+
+ CDEBUG(D_HA, "Starting Ping Evictor\n");
+ pet_state = PET_READY;
+ while (1) {
+ l_wait_event(pet_waitq, (!list_empty(&pet_list)) ||
+ (pet_state == PET_TERMINATE), &lwi);
+
+ /* loop until all obd's will be removed */
+ if ((pet_state == PET_TERMINATE) && list_empty(&pet_list))
+ break;
+
+ /* we only get here if pet_exp != NULL, and the end of this
+ * loop is the only place which sets it NULL again, so lock
+ * is not strictly necessary. */
+ spin_lock(&pet_lock);
+ obd = list_entry(pet_list.next, struct obd_device,
+ obd_evict_list);
+ spin_unlock(&pet_lock);
+
+ expire_time = get_seconds() - PING_EVICT_TIMEOUT;
+
+ CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n",
+ obd->obd_name, expire_time);
+
+ /* Exports can't be deleted out of the list while we hold
+ * the obd lock (class_unlink_export), which means we can't
+ * lose the last ref on the export. If they've already been
+ * removed from the list, we won't find them here. */
+ spin_lock(&obd->obd_dev_lock);
+ while (!list_empty(&obd->obd_exports_timed)) {
+ exp = list_entry(obd->obd_exports_timed.next,
+ struct obd_export,
+ exp_obd_chain_timed);
+ if (expire_time > exp->exp_last_request_time) {
+ class_export_get(exp);
+ spin_unlock(&obd->obd_dev_lock);
+ LCONSOLE_WARN("%s: haven't heard from client %s (at %s) in %ld seconds. I think it's dead, and I am evicting it. exp %p, cur %ld expire %ld last %ld\n",
+ obd->obd_name,
+ obd_uuid2str(&exp->exp_client_uuid),
+ obd_export_nid2str(exp),
+ (long)(get_seconds() -
+ exp->exp_last_request_time),
+ exp, (long)get_seconds(),
+ (long)expire_time,
+ (long)exp->exp_last_request_time);
+ CDEBUG(D_HA, "Last request was at %ld\n",
+ exp->exp_last_request_time);
+ class_fail_export(exp);
+ class_export_put(exp);
+ spin_lock(&obd->obd_dev_lock);
+ } else {
+ /* List is sorted, so everyone below is ok */
+ break;
+ }
+ }
+ spin_unlock(&obd->obd_dev_lock);
+
+ spin_lock(&pet_lock);
+ list_del_init(&obd->obd_evict_list);
+ spin_unlock(&pet_lock);
+
+ class_decref(obd, "evictor", obd);
+ }
+ CDEBUG(D_HA, "Exiting Ping Evictor\n");
+
+ return 0;
+}
+
+void ping_evictor_start(void)
+{
+ struct task_struct *task;
+
+ if (++pet_refcount > 1)
+ return;
+
+ init_waitqueue_head(&pet_waitq);
+
+ task = kthread_run(ping_evictor_main, NULL, "ll_evictor");
+ if (IS_ERR(task)) {
+ pet_refcount--;
+ CERROR("Cannot start ping evictor thread: %ld\n",
+ PTR_ERR(task));
+ }
+}
+EXPORT_SYMBOL(ping_evictor_start);
+
+void ping_evictor_stop(void)
+{
+ if (--pet_refcount > 0)
+ return;
+
+ pet_state = PET_TERMINATE;
+ wake_up(&pet_waitq);
+}
+EXPORT_SYMBOL(ping_evictor_stop);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
new file mode 100644
index 000000000..a66dc3c6d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -0,0 +1,312 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+/* Intramodule declarations for ptlrpc. */
+
+#ifndef PTLRPC_INTERNAL_H
+#define PTLRPC_INTERNAL_H
+
+#include "../ldlm/ldlm_internal.h"
+
+struct ldlm_namespace;
+struct obd_import;
+struct ldlm_res_id;
+struct ptlrpc_request_set;
+extern int test_req_buffer_pressure;
+extern struct mutex ptlrpc_all_services_mutex;
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
+/* ptlrpcd.c */
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc);
+
+/* client.c */
+struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
+ unsigned type, unsigned portal);
+int ptlrpc_request_cache_init(void);
+void ptlrpc_request_cache_fini(void);
+struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
+void ptlrpc_request_cache_free(struct ptlrpc_request *req);
+void ptlrpc_init_xid(void);
+
+/* events.c */
+int ptlrpc_init_portals(void);
+void ptlrpc_exit_portals(void);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *);
+void lustre_assert_wire_constants(void);
+int ptlrpc_import_in_recovery(struct obd_import *imp);
+int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt);
+void ptlrpc_handle_failed_import(struct obd_import *imp);
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight);
+void ptlrpc_initiate_recovery(struct obd_import *imp);
+
+int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset);
+int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset);
+
+#if defined(CONFIG_PROC_FS)
+void ptlrpc_lprocfs_register_service(struct proc_dir_entry *proc_entry,
+ struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
+void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount);
+void ptlrpc_lprocfs_do_request_stat(struct ptlrpc_request *req,
+ long q_usec, long work_usec);
+#else
+#define ptlrpc_lprocfs_register_service(params...) do {} while (0)
+#define ptlrpc_lprocfs_unregister_service(params...) do {} while (0)
+#define ptlrpc_lprocfs_rpc_sent(params...) do {} while (0)
+#define ptlrpc_lprocfs_do_request_stat(params...) do {} while (0)
+#endif /* CONFIG_PROC_FS */
+
+/* NRS */
+
+/**
+ * NRS core object.
+ *
+ * Holds NRS core fields.
+ */
+struct nrs_core {
+ /**
+ * Protects nrs_core::nrs_policies, serializes external policy
+ * registration/unregistration, and NRS core lprocfs operations.
+ */
+ struct mutex nrs_mutex;
+ /* XXX: This is just for liblustre. Remove the #if defined directive
+ * when the * "cfs_" prefix is dropped from cfs_list_head. */
+ /**
+ * List of all policy descriptors registered with NRS core; protected
+ * by nrs_core::nrs_mutex.
+ */
+ struct list_head nrs_policies;
+
+};
+
+int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc);
+void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc);
+
+void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp);
+void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req);
+void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req, bool hp);
+
+struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp,
+ bool peek, bool force);
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp,
+ bool force)
+{
+ return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force);
+}
+
+static inline struct ptlrpc_request *
+ptlrpc_nrs_req_peek_nolock(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ return ptlrpc_nrs_req_get_nolock0(svcpt, hp, true, false);
+}
+
+void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req);
+bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp);
+
+int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc,
+ enum ptlrpc_nrs_queue_type queue, char *name,
+ enum ptlrpc_nrs_ctl opc, bool single, void *arg);
+
+int ptlrpc_nrs_init(void);
+void ptlrpc_nrs_fini(void);
+
+static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nrs_hp != NULL;
+}
+
+static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc)
+{
+ /**
+ * If the first service partition has an HP NRS head, all service
+ * partitions will.
+ */
+ return nrs_svcpt_has_hp(svc->srv_parts[0]);
+}
+
+static inline
+struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp)
+{
+ LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt)));
+ return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg;
+}
+
+static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt->scp_cpt;
+}
+
+static inline
+struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt->scp_service;
+}
+
+static inline
+struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy)
+{
+ return policy->pol_nrs->nrs_svcpt;
+}
+
+static inline
+struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy)
+{
+ return nrs_pol2svc(policy)->srv_cptable;
+}
+
+static inline struct ptlrpc_nrs_resource *
+nrs_request_resource(struct ptlrpc_nrs_request *nrq)
+{
+ LASSERT(nrq->nr_initialized);
+ LASSERT(!nrq->nr_finalized);
+
+ return nrq->nr_res_ptrs[nrq->nr_res_idx];
+}
+
+static inline
+struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq)
+{
+ return nrs_request_resource(nrq)->res_policy;
+}
+
+#define NRS_LPROCFS_QUANTUM_NAME_REG "reg_quantum:"
+#define NRS_LPROCFS_QUANTUM_NAME_HP "hp_quantum:"
+
+/**
+ * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum.
+ */
+#define LPROCFS_NRS_QUANTUM_MAX 65535
+
+/**
+ * Max valid command string is the size of the labels, plus "65535" twice, plus
+ * a separating space character.
+ */
+#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD \
+ sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " " \
+ NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX))
+
+/* recovd_thread.c */
+
+int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink);
+
+/* pers.c */
+void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc,
+ int mdcnt);
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+ int pageoffset, int len);
+
+/* pack_generic.c */
+struct ptlrpc_reply_state *
+lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt);
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
+
+/* pinger.c */
+int ptlrpc_start_pinger(void);
+int ptlrpc_stop_pinger(void);
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
+void ptlrpc_pinger_commit_expected(struct obd_import *imp);
+void ptlrpc_pinger_wake_up(void);
+void ptlrpc_ping_import_soon(struct obd_import *imp);
+int ping_evictor_wake(struct obd_export *exp);
+
+/* sec_null.c */
+int sptlrpc_null_init(void);
+void sptlrpc_null_fini(void);
+
+/* sec_plain.c */
+int sptlrpc_plain_init(void);
+void sptlrpc_plain_fini(void);
+
+/* sec_bulk.c */
+int sptlrpc_enc_pool_init(void);
+void sptlrpc_enc_pool_fini(void);
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v);
+
+/* sec_lproc.c */
+#if defined(CONFIG_PROC_FS)
+int sptlrpc_lproc_init(void);
+void sptlrpc_lproc_fini(void);
+#else
+static inline int sptlrpc_lproc_init(void)
+{ return 0; }
+static inline void sptlrpc_lproc_fini(void) {}
+#endif
+
+/* sec_gc.c */
+int sptlrpc_gc_init(void);
+void sptlrpc_gc_fini(void);
+
+/* sec_config.c */
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ struct obd_uuid *target,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf);
+int sptlrpc_conf_init(void);
+void sptlrpc_conf_fini(void);
+
+/* sec.c */
+int sptlrpc_init(void);
+void sptlrpc_fini(void);
+
+static inline int ll_rpc_recoverable_error(int rc)
+{
+ return (rc == -ENOTCONN || rc == -ENODEV);
+}
+
+static inline int tgt_mod_init(void)
+{
+ return 0;
+}
+
+static inline void tgt_mod_exit(void)
+{
+ return;
+}
+
+static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set)
+{
+ if (atomic_dec_and_test(&set->set_refcount))
+ OBD_FREE_PTR(set);
+}
+#endif /* PTLRPC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
new file mode 100644
index 000000000..5268887ca
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c
@@ -0,0 +1,171 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_req_layout.h"
+
+#include "ptlrpc_internal.h"
+
+extern spinlock_t ptlrpc_last_xid_lock;
+#if RS_DEBUG
+extern spinlock_t ptlrpc_rs_debug_lock;
+#endif
+extern struct mutex pinger_mutex;
+extern struct mutex ptlrpcd_mutex;
+
+__init int ptlrpc_init(void)
+{
+ int rc, cleanup_phase = 0;
+
+ lustre_assert_wire_constants();
+#if RS_DEBUG
+ spin_lock_init(&ptlrpc_rs_debug_lock);
+#endif
+ mutex_init(&ptlrpc_all_services_mutex);
+ mutex_init(&pinger_mutex);
+ mutex_init(&ptlrpcd_mutex);
+ ptlrpc_init_xid();
+
+ rc = req_layout_init();
+ if (rc)
+ return rc;
+
+ rc = ptlrpc_hr_init();
+ if (rc)
+ return rc;
+
+ cleanup_phase = 1;
+ rc = ptlrpc_request_cache_init();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 2;
+ rc = ptlrpc_init_portals();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 3;
+
+ rc = ptlrpc_connection_init();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 4;
+ ptlrpc_put_connection_superhack = ptlrpc_connection_put;
+
+ rc = ptlrpc_start_pinger();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 5;
+ rc = ldlm_init();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 6;
+ rc = sptlrpc_init();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 7;
+ rc = ptlrpc_nrs_init();
+ if (rc)
+ goto cleanup;
+
+ cleanup_phase = 8;
+ rc = tgt_mod_init();
+ if (rc)
+ goto cleanup;
+ return 0;
+
+cleanup:
+ switch (cleanup_phase) {
+ case 8:
+ ptlrpc_nrs_fini();
+ /* Fall through */
+ case 7:
+ sptlrpc_fini();
+ /* Fall through */
+ case 6:
+ ldlm_exit();
+ /* Fall through */
+ case 5:
+ ptlrpc_stop_pinger();
+ /* Fall through */
+ case 4:
+ ptlrpc_connection_fini();
+ /* Fall through */
+ case 3:
+ ptlrpc_exit_portals();
+ /* Fall through */
+ case 2:
+ ptlrpc_request_cache_fini();
+ /* Fall through */
+ case 1:
+ ptlrpc_hr_fini();
+ req_layout_fini();
+ /* Fall through */
+ default: ;
+ }
+
+ return rc;
+}
+
+static void __exit ptlrpc_exit(void)
+{
+ tgt_mod_exit();
+ ptlrpc_nrs_fini();
+ sptlrpc_fini();
+ ldlm_exit();
+ ptlrpc_stop_pinger();
+ ptlrpc_exit_portals();
+ ptlrpc_request_cache_fini();
+ ptlrpc_hr_fini();
+ ptlrpc_connection_fini();
+}
+
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Request Processor and Lock Management");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+
+module_init(ptlrpc_init);
+module_exit(ptlrpc_exit);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
new file mode 100644
index 000000000..0c178ec0e
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -0,0 +1,811 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/ptlrpcd.c
+ */
+
+/** \defgroup ptlrpcd PortalRPC daemon
+ *
+ * ptlrpcd is a special thread with its own set where other user might add
+ * requests when they don't want to wait for their completion.
+ * PtlRPCD will take care of sending such requests and then processing their
+ * replies and calling completion callbacks as necessary.
+ * The callbacks are called directly from ptlrpcd context.
+ * It is important to never significantly block (esp. on RPCs!) within such
+ * completion handler or a deadlock might occur where ptlrpcd enters some
+ * callback that attempts to send another RPC and wait for it to return,
+ * during which time ptlrpcd is completely blocked, so e.g. if import
+ * fails, recovery cannot progress because connection requests are also
+ * sent by ptlrpcd.
+ *
+ * @{
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/lustre_net.h"
+#include "../include/lustre_lib.h"
+#include "../include/lustre_ha.h"
+#include "../include/obd_class.h" /* for obd_zombie */
+#include "../include/obd_support.h" /* for OBD_FAIL_CHECK */
+#include "../include/cl_object.h" /* cl_env_{get,put}() */
+#include "../include/lprocfs_status.h"
+
+#include "ptlrpc_internal.h"
+
+struct ptlrpcd {
+ int pd_size;
+ int pd_index;
+ int pd_nthreads;
+ struct ptlrpcd_ctl pd_thread_rcv;
+ struct ptlrpcd_ctl pd_threads[0];
+};
+
+static int max_ptlrpcds;
+module_param(max_ptlrpcds, int, 0644);
+MODULE_PARM_DESC(max_ptlrpcds, "Max ptlrpcd thread count to be started.");
+
+static int ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+module_param(ptlrpcd_bind_policy, int, 0644);
+MODULE_PARM_DESC(ptlrpcd_bind_policy, "Ptlrpcd threads binding mode.");
+static struct ptlrpcd *ptlrpcds;
+
+struct mutex ptlrpcd_mutex;
+static int ptlrpcd_users;
+
+void ptlrpcd_wake(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_set *rq_set = req->rq_set;
+
+ LASSERT(rq_set != NULL);
+
+ wake_up(&rq_set->set_waitq);
+}
+EXPORT_SYMBOL(ptlrpcd_wake);
+
+static struct ptlrpcd_ctl *
+ptlrpcd_select_pc(struct ptlrpc_request *req, pdl_policy_t policy, int index)
+{
+ int idx = 0;
+
+ if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL)
+ return &ptlrpcds->pd_thread_rcv;
+
+ switch (policy) {
+ case PDL_POLICY_SAME:
+ idx = smp_processor_id() % ptlrpcds->pd_nthreads;
+ break;
+ case PDL_POLICY_LOCAL:
+ /* Before CPU partition patches available, process it the same
+ * as "PDL_POLICY_ROUND". */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix this code to use new CPU partition APIs"
+# endif
+ /* Fall through to PDL_POLICY_ROUND until the CPU
+ * CPU partition patches are available. */
+ index = -1;
+ case PDL_POLICY_PREFERRED:
+ if (index >= 0 && index < num_online_cpus()) {
+ idx = index % ptlrpcds->pd_nthreads;
+ break;
+ }
+ /* Fall through to PDL_POLICY_ROUND for bad index. */
+ default:
+ /* Fall through to PDL_POLICY_ROUND for unknown policy. */
+ case PDL_POLICY_ROUND:
+ /* We do not care whether it is strict load balance. */
+ idx = ptlrpcds->pd_index + 1;
+ if (idx == smp_processor_id())
+ idx++;
+ idx %= ptlrpcds->pd_nthreads;
+ ptlrpcds->pd_index = idx;
+ break;
+ }
+
+ return &ptlrpcds->pd_threads[idx];
+}
+
+/**
+ * Move all request from an existing request set to the ptlrpcd queue.
+ * All requests from the set must be in phase RQ_PHASE_NEW.
+ */
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpcd_ctl *pc;
+ struct ptlrpc_request_set *new;
+ int count, i;
+
+ pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1);
+ new = pc->pc_set;
+
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ req->rq_set = new;
+ req->rq_queued_time = cfs_time_current();
+ }
+
+ spin_lock(&new->set_new_req_lock);
+ list_splice_init(&set->set_requests, &new->set_new_requests);
+ i = atomic_read(&set->set_remaining);
+ count = atomic_add_return(i, &new->set_new_count);
+ atomic_set(&set->set_remaining, 0);
+ spin_unlock(&new->set_new_req_lock);
+ if (count == i) {
+ wake_up(&new->set_waitq);
+
+ /* XXX: It maybe unnecessary to wakeup all the partners. But to
+ * guarantee the async RPC can be processed ASAP, we have
+ * no other better choice. It maybe fixed in future. */
+ for (i = 0; i < pc->pc_npartners; i++)
+ wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+ }
+}
+EXPORT_SYMBOL(ptlrpcd_add_rqset);
+
+/**
+ * Return transferred RPCs count.
+ */
+static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
+ struct ptlrpc_request_set *src)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ int rc = 0;
+
+ spin_lock(&src->set_new_req_lock);
+ if (likely(!list_empty(&src->set_new_requests))) {
+ list_for_each_safe(pos, tmp, &src->set_new_requests) {
+ req = list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+ req->rq_set = des;
+ }
+ list_splice_init(&src->set_new_requests,
+ &des->set_requests);
+ rc = atomic_read(&src->set_new_count);
+ atomic_add(rc, &des->set_remaining);
+ atomic_set(&src->set_new_count, 0);
+ }
+ spin_unlock(&src->set_new_req_lock);
+ return rc;
+}
+
+/**
+ * Requests that are added to the ptlrpcd queue are sent via
+ * ptlrpcd_check->ptlrpc_check_set().
+ */
+void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
+{
+ struct ptlrpcd_ctl *pc;
+
+ if (req->rq_reqmsg)
+ lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
+ spin_lock(&req->rq_lock);
+ if (req->rq_invalid_rqset) {
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
+ back_to_sleep, NULL);
+
+ req->rq_invalid_rqset = 0;
+ spin_unlock(&req->rq_lock);
+ l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+ } else if (req->rq_set) {
+ /* If we have a valid "rq_set", just reuse it to avoid double
+ * linked. */
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
+
+ /* ptlrpc_check_set will decrease the count */
+ atomic_inc(&req->rq_set->set_remaining);
+ spin_unlock(&req->rq_lock);
+ wake_up(&req->rq_set->set_waitq);
+ return;
+ } else {
+ spin_unlock(&req->rq_lock);
+ }
+
+ pc = ptlrpcd_select_pc(req, policy, idx);
+
+ DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
+ req, pc->pc_name, pc->pc_index);
+
+ ptlrpc_set_add_new_req(pc, req);
+}
+EXPORT_SYMBOL(ptlrpcd_add_req);
+
+static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
+{
+ atomic_inc(&set->set_refcount);
+}
+
+/**
+ * Check if there is more work to do on ptlrpcd set.
+ * Returns 1 if yes.
+ */
+static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ int rc = 0;
+ int rc2;
+
+ if (atomic_read(&set->set_new_count)) {
+ spin_lock(&set->set_new_req_lock);
+ if (likely(!list_empty(&set->set_new_requests))) {
+ list_splice_init(&set->set_new_requests,
+ &set->set_requests);
+ atomic_add(atomic_read(&set->set_new_count),
+ &set->set_remaining);
+ atomic_set(&set->set_new_count, 0);
+ /*
+ * Need to calculate its timeout.
+ */
+ rc = 1;
+ }
+ spin_unlock(&set->set_new_req_lock);
+ }
+
+ /* We should call lu_env_refill() before handling new requests to make
+ * sure that env key the requests depending on really exists.
+ */
+ rc2 = lu_env_refill(env);
+ if (rc2 != 0) {
+ /*
+ * XXX This is very awkward situation, because
+ * execution can neither continue (request
+ * interpreters assume that env is set up), nor repeat
+ * the loop (as this potentially results in a tight
+ * loop of -ENOMEM's).
+ *
+ * Fortunately, refill only ever does something when
+ * new modules are loaded, i.e., early during boot up.
+ */
+ CERROR("Failure to refill session: %d\n", rc2);
+ return rc;
+ }
+
+ if (atomic_read(&set->set_remaining))
+ rc |= ptlrpc_check_set(env, set);
+
+ /* NB: ptlrpc_check_set has already moved completed request at the
+ * head of seq::set_requests */
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
+ if (req->rq_phase != RQ_PHASE_COMPLETE)
+ break;
+
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+ ptlrpc_req_finished(req);
+ }
+
+ if (rc == 0) {
+ /*
+ * If new requests have been added, make sure to wake up.
+ */
+ rc = atomic_read(&set->set_new_count);
+
+ /* If we have nothing to do, check whether we can take some
+ * work from our partner threads. */
+ if (rc == 0 && pc->pc_npartners > 0) {
+ struct ptlrpcd_ctl *partner;
+ struct ptlrpc_request_set *ps;
+ int first = pc->pc_cursor;
+
+ do {
+ partner = pc->pc_partners[pc->pc_cursor++];
+ if (pc->pc_cursor >= pc->pc_npartners)
+ pc->pc_cursor = 0;
+ if (partner == NULL)
+ continue;
+
+ spin_lock(&partner->pc_lock);
+ ps = partner->pc_set;
+ if (ps == NULL) {
+ spin_unlock(&partner->pc_lock);
+ continue;
+ }
+
+ ptlrpc_reqset_get(ps);
+ spin_unlock(&partner->pc_lock);
+
+ if (atomic_read(&ps->set_new_count)) {
+ rc = ptlrpcd_steal_rqset(set, ps);
+ if (rc > 0)
+ CDEBUG(D_RPCTRACE, "transfer %d async RPCs [%d->%d]\n",
+ rc, partner->pc_index,
+ pc->pc_index);
+ }
+ ptlrpc_reqset_put(ps);
+ } while (rc == 0 && pc->pc_cursor != first);
+ }
+ }
+
+ return rc;
+}
+
+/**
+ * Main ptlrpcd thread.
+ * ptlrpc's code paths like to execute in process context, so we have this
+ * thread which spins on a set which contains the rpcs and sends them.
+ *
+ */
+static int ptlrpcd(void *arg)
+{
+ struct ptlrpcd_ctl *pc = arg;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ struct lu_env env = { .le_ses = NULL };
+ int rc, exit = 0;
+
+ unshare_fs_struct();
+#if defined(CONFIG_SMP)
+ if (test_bit(LIOD_BIND, &pc->pc_flags)) {
+ int index = pc->pc_index;
+
+ if (index >= 0 && index < num_possible_cpus()) {
+ while (!cpu_online(index)) {
+ if (++index >= num_possible_cpus())
+ index = 0;
+ }
+ set_cpus_allowed_ptr(current,
+ cpumask_of_node(cpu_to_node(index)));
+ }
+ }
+#endif
+ /*
+ * XXX So far only "client" ptlrpcd uses an environment. In
+ * the future, ptlrpcd thread (or a thread-set) has to given
+ * an argument, describing its "scope".
+ */
+ rc = lu_context_init(&env.le_ctx,
+ LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ complete(&pc->pc_starting);
+
+ if (rc != 0)
+ return rc;
+
+ /*
+ * This mainloop strongly resembles ptlrpc_set_wait() except that our
+ * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when
+ * there are requests in the set. New requests come in on the set's
+ * new_req_list and ptlrpcd_check() moves them into the set.
+ */
+ do {
+ struct l_wait_info lwi;
+ int timeout;
+
+ timeout = ptlrpc_set_next_timeout(set);
+ lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
+ ptlrpc_expired_set, set);
+
+ lu_context_enter(&env.le_ctx);
+ l_wait_event(set->set_waitq,
+ ptlrpcd_check(&env, pc), &lwi);
+ lu_context_exit(&env.le_ctx);
+
+ /*
+ * Abort inflight rpcs for forced stop case.
+ */
+ if (test_bit(LIOD_STOP, &pc->pc_flags)) {
+ if (test_bit(LIOD_FORCE, &pc->pc_flags))
+ ptlrpc_abort_set(set);
+ exit++;
+ }
+
+ /*
+ * Let's make one more loop to make sure that ptlrpcd_check()
+ * copied all raced new rpcs into the set so we can kill them.
+ */
+ } while (exit < 2);
+
+ /*
+ * Wait for inflight requests to drain.
+ */
+ if (!list_empty(&set->set_requests))
+ ptlrpc_set_wait(set);
+ lu_context_fini(&env.le_ctx);
+
+ complete(&pc->pc_finishing);
+
+ return 0;
+}
+
+/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
+ * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
+ * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
+ * CPU core. But binding all ptlrpcd threads maybe cause response delay
+ * because of some CPU core(s) busy with other loads.
+ *
+ * For example: "ls -l", some async RPCs for statahead are assigned to
+ * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
+ * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
+ * thread, statahead thread, and ptlrpcd thread can run in parallel), under
+ * such case, the statahead async RPCs can not be processed in time, it is
+ * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
+ * be better. But it breaks former data transfer policy.
+ *
+ * So we shouldn't be blind for avoiding the data transfer. We make some
+ * compromise: divide the ptlrpcd threads pool into two parts. One part is
+ * for bound mode, each ptlrpcd thread in this part is bound to some CPU
+ * core. The other part is for free mode, all the ptlrpcd threads in the
+ * part can be scheduled on any CPU core. We specify some partnership
+ * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
+ * and the async RPC load within the partners are shared.
+ *
+ * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
+ * thread can be scheduled in time), and try to guarantee the async RPC
+ * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
+ * on any CPU core).
+ *
+ * As for how to specify the partnership between bound mode ptlrpcd
+ * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
+ * <free bound> pair. In future, we can specify some more complex
+ * partnership based on the patches for CPU partition. But before such
+ * patches are available, we prefer to use the simplest one.
+ */
+# ifdef CFS_CPU_MODE_NUMA
+# warning "fix ptlrpcd_bind() to use new CPU partition APIs"
+# endif
+static int ptlrpcd_bind(int index, int max)
+{
+ struct ptlrpcd_ctl *pc;
+ int rc = 0;
+#if defined(CONFIG_NUMA)
+ cpumask_t mask;
+#endif
+
+ LASSERT(index <= max - 1);
+ pc = &ptlrpcds->pd_threads[index];
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_NONE:
+ pc->pc_npartners = -1;
+ break;
+ case PDB_POLICY_FULL:
+ pc->pc_npartners = 0;
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ break;
+ case PDB_POLICY_PAIR:
+ LASSERT(max % 2 == 0);
+ pc->pc_npartners = 1;
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ int i;
+ cpumask_copy(&mask, cpumask_of_node(cpu_to_node(index)));
+ for (i = max; i < num_online_cpus(); i++)
+ cpumask_clear_cpu(i, &mask);
+ pc->pc_npartners = cpumask_weight(&mask) - 1;
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ }
+#else
+ LASSERT(max >= 3);
+ pc->pc_npartners = 2;
+#endif
+ break;
+ default:
+ CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
+ rc = -EINVAL;
+ }
+
+ if (rc == 0 && pc->pc_npartners > 0) {
+ OBD_ALLOC(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ if (pc->pc_partners == NULL) {
+ pc->pc_npartners = 0;
+ rc = -ENOMEM;
+ } else {
+ switch (ptlrpcd_bind_policy) {
+ case PDB_POLICY_PAIR:
+ if (index & 0x1) {
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[0] = pc;
+ }
+ break;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ struct ptlrpcd_ctl *ppc;
+ int i, pidx;
+ /* partners are cores in the same NUMA node.
+ * setup partnership only with ptlrpcd threads
+ * that are already initialized
+ */
+ for (pidx = 0, i = 0; i < index; i++) {
+ if (cpumask_test_cpu(i, &mask)) {
+ ppc = &ptlrpcds->pd_threads[i];
+ pc->pc_partners[pidx++] = ppc;
+ ppc->pc_partners[ppc->
+ pc_npartners++] = pc;
+ }
+ }
+ /* adjust number of partners to the number
+ * of partnership really setup */
+ pc->pc_npartners = pidx;
+ }
+#else
+ if (index & 0x1)
+ set_bit(LIOD_BIND, &pc->pc_flags);
+ if (index > 0) {
+ pc->pc_partners[0] = &ptlrpcds->
+ pd_threads[index - 1];
+ ptlrpcds->pd_threads[index - 1].
+ pc_partners[1] = pc;
+ if (index == max - 1) {
+ pc->pc_partners[1] =
+ &ptlrpcds->pd_threads[0];
+ ptlrpcds->pd_threads[0].
+ pc_partners[0] = pc;
+ }
+ }
+#endif
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+
+int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc)
+{
+ int rc;
+
+ /*
+ * Do not allow start second thread for one pc.
+ */
+ if (test_and_set_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Starting second thread (%s) for same pc %p\n",
+ name, pc);
+ return 0;
+ }
+
+ pc->pc_index = index;
+ init_completion(&pc->pc_starting);
+ init_completion(&pc->pc_finishing);
+ spin_lock_init(&pc->pc_lock);
+ strlcpy(pc->pc_name, name, sizeof(pc->pc_name));
+ pc->pc_set = ptlrpc_prep_set();
+ if (pc->pc_set == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * So far only "client" ptlrpcd uses an environment. In the future,
+ * ptlrpcd thread (or a thread-set) has to be given an argument,
+ * describing its "scope".
+ */
+ rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
+ if (rc != 0)
+ goto out_set;
+
+ {
+ struct task_struct *task;
+ if (index >= 0) {
+ rc = ptlrpcd_bind(index, max);
+ if (rc < 0)
+ goto out_env;
+ }
+
+ task = kthread_run(ptlrpcd, pc, "%s", pc->pc_name);
+ if (IS_ERR(task)) {
+ rc = PTR_ERR(task);
+ goto out_env;
+ }
+
+ wait_for_completion(&pc->pc_starting);
+ }
+ return 0;
+
+out_env:
+ lu_context_fini(&pc->pc_env.le_ctx);
+
+out_set:
+ if (pc->pc_set != NULL) {
+ struct ptlrpc_request_set *set = pc->pc_set;
+
+ spin_lock(&pc->pc_lock);
+ pc->pc_set = NULL;
+ spin_unlock(&pc->pc_lock);
+ ptlrpc_set_destroy(set);
+ }
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+
+out:
+ clear_bit(LIOD_START, &pc->pc_flags);
+ return rc;
+}
+
+void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
+{
+ if (!test_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Thread for pc %p was not started\n", pc);
+ return;
+ }
+
+ set_bit(LIOD_STOP, &pc->pc_flags);
+ if (force)
+ set_bit(LIOD_FORCE, &pc->pc_flags);
+ wake_up(&pc->pc_set->set_waitq);
+}
+
+void ptlrpcd_free(struct ptlrpcd_ctl *pc)
+{
+ struct ptlrpc_request_set *set = pc->pc_set;
+
+ if (!test_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Thread for pc %p was not started\n", pc);
+ goto out;
+ }
+
+ wait_for_completion(&pc->pc_finishing);
+ lu_context_fini(&pc->pc_env.le_ctx);
+
+ spin_lock(&pc->pc_lock);
+ pc->pc_set = NULL;
+ spin_unlock(&pc->pc_lock);
+ ptlrpc_set_destroy(set);
+
+ clear_bit(LIOD_START, &pc->pc_flags);
+ clear_bit(LIOD_STOP, &pc->pc_flags);
+ clear_bit(LIOD_FORCE, &pc->pc_flags);
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+
+out:
+ if (pc->pc_npartners > 0) {
+ LASSERT(pc->pc_partners != NULL);
+
+ OBD_FREE(pc->pc_partners,
+ sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners);
+ pc->pc_partners = NULL;
+ }
+ pc->pc_npartners = 0;
+}
+
+static void ptlrpcd_fini(void)
+{
+ int i;
+
+ if (ptlrpcds != NULL) {
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0);
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[i]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+ OBD_FREE(ptlrpcds, ptlrpcds->pd_size);
+ ptlrpcds = NULL;
+ }
+}
+
+static int ptlrpcd_init(void)
+{
+ int nthreads = num_online_cpus();
+ char name[16];
+ int size, i = -1, j, rc = 0;
+
+ if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads)
+ nthreads = max_ptlrpcds;
+ if (nthreads < 2)
+ nthreads = 2;
+ if (nthreads < 3 && ptlrpcd_bind_policy == PDB_POLICY_NEIGHBOR)
+ ptlrpcd_bind_policy = PDB_POLICY_PAIR;
+ else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR)
+ nthreads &= ~1; /* make sure it is even */
+
+ size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
+ OBD_ALLOC(ptlrpcds, size);
+ if (ptlrpcds == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ snprintf(name, sizeof(name), "ptlrpcd_rcv");
+ set_bit(LIOD_RECOVERY, &ptlrpcds->pd_thread_rcv.pc_flags);
+ rc = ptlrpcd_start(-1, nthreads, name, &ptlrpcds->pd_thread_rcv);
+ if (rc < 0)
+ goto out;
+
+ /* XXX: We start nthreads ptlrpc daemons. Each of them can process any
+ * non-recovery async RPC to improve overall async RPC efficiency.
+ *
+ * But there are some issues with async I/O RPCs and async non-I/O
+ * RPCs processed in the same set under some cases. The ptlrpcd may
+ * be blocked by some async I/O RPC(s), then will cause other async
+ * non-I/O RPC(s) can not be processed in time.
+ *
+ * Maybe we should distinguish blocked async RPCs from non-blocked
+ * async RPCs, and process them in different ptlrpcd sets to avoid
+ * unnecessary dependency. But how to distribute async RPCs load
+ * among all the ptlrpc daemons becomes another trouble. */
+ for (i = 0; i < nthreads; i++) {
+ snprintf(name, sizeof(name), "ptlrpcd_%d", i);
+ rc = ptlrpcd_start(i, nthreads, name, &ptlrpcds->pd_threads[i]);
+ if (rc < 0)
+ goto out;
+ }
+
+ ptlrpcds->pd_size = size;
+ ptlrpcds->pd_index = 0;
+ ptlrpcds->pd_nthreads = nthreads;
+
+out:
+ if (rc != 0 && ptlrpcds != NULL) {
+ for (j = 0; j <= i; j++)
+ ptlrpcd_stop(&ptlrpcds->pd_threads[j], 0);
+ for (j = 0; j <= i; j++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[j]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+ OBD_FREE(ptlrpcds, size);
+ ptlrpcds = NULL;
+ }
+
+ return 0;
+}
+
+int ptlrpcd_addref(void)
+{
+ int rc = 0;
+
+ mutex_lock(&ptlrpcd_mutex);
+ if (++ptlrpcd_users == 1)
+ rc = ptlrpcd_init();
+ mutex_unlock(&ptlrpcd_mutex);
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpcd_addref);
+
+void ptlrpcd_decref(void)
+{
+ mutex_lock(&ptlrpcd_mutex);
+ if (--ptlrpcd_users == 0)
+ ptlrpcd_fini();
+ mutex_unlock(&ptlrpcd_mutex);
+}
+EXPORT_SYMBOL(ptlrpcd_decref);
+/** @} ptlrpcd */
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
new file mode 100644
index 000000000..7b1d72947
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -0,0 +1,379 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/recover.c
+ *
+ * Author: Mike Shaver <shaver@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/lustre_ha.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_export.h"
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include <linux/list.h>
+
+#include "ptlrpc_internal.h"
+
+/**
+ * Start recovery on disconnected import.
+ * This is done by just attempting a connect
+ */
+void ptlrpc_initiate_recovery(struct obd_import *imp)
+{
+ CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
+ ptlrpc_connect_import(imp);
+}
+
+/**
+ * Identify what request from replay list needs to be replayed next
+ * (based on what we have already replayed) and send it to server.
+ */
+int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
+{
+ int rc = 0;
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req = NULL;
+ __u64 last_transno;
+
+ *inflight = 0;
+
+ /* It might have committed some after we last spoke, so make sure we
+ * get rid of them now.
+ */
+ spin_lock(&imp->imp_lock);
+ imp->imp_last_transno_checked = 0;
+ ptlrpc_free_committed(imp);
+ last_transno = imp->imp_last_replay_transno;
+ spin_unlock(&imp->imp_lock);
+
+ CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n",
+ imp, obd2cli_tgt(imp->imp_obd),
+ imp->imp_peer_committed_transno, last_transno);
+
+ /* Do I need to hold a lock across this iteration? We shouldn't be
+ * racing with any additions to the list, because we're in recovery
+ * and are therefore not processing additional requests to add. Calls
+ * to ptlrpc_free_committed might commit requests, but nothing "newer"
+ * than the one we're replaying (it can't be committed until it's
+ * replayed, and we're doing that here). l_f_e_safe protects against
+ * problems with the current request being committed, in the unlikely
+ * event of that race. So, in conclusion, I think that it's safe to
+ * perform this list-walk without the imp_lock held.
+ *
+ * But, the {mdc,osc}_replay_open callbacks both iterate
+ * request lists, and have comments saying they assume the
+ * imp_lock is being held by ptlrpc_replay, but it's not. it's
+ * just a little race...
+ */
+
+ /* Replay all the committed open requests on committed_list first */
+ if (!list_empty(&imp->imp_committed_list)) {
+ tmp = imp->imp_committed_list.prev;
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ /* The last request on committed_list hasn't been replayed */
+ if (req->rq_transno > last_transno) {
+ /* Since the imp_committed_list is immutable before
+ * all of it's requests being replayed, it's safe to
+ * use a cursor to accelerate the search */
+ imp->imp_replay_cursor = imp->imp_replay_cursor->next;
+
+ while (imp->imp_replay_cursor !=
+ &imp->imp_committed_list) {
+ req = list_entry(imp->imp_replay_cursor,
+ struct ptlrpc_request,
+ rq_replay_list);
+ if (req->rq_transno > last_transno)
+ break;
+
+ req = NULL;
+ imp->imp_replay_cursor =
+ imp->imp_replay_cursor->next;
+ }
+ } else {
+ /* All requests on committed_list have been replayed */
+ imp->imp_replay_cursor = &imp->imp_committed_list;
+ req = NULL;
+ }
+ }
+
+ /* All the requests in committed list have been replayed, let's replay
+ * the imp_replay_list */
+ if (req == NULL) {
+ list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_replay_list);
+
+ if (req->rq_transno > last_transno)
+ break;
+ req = NULL;
+ }
+ }
+
+ /* If need to resend the last sent transno (because a reconnect
+ * has occurred), then stop on the matching req and send it again.
+ * If, however, the last sent transno has been committed then we
+ * continue replay from the next request. */
+ if (req != NULL && imp->imp_resend_replay)
+ lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_resend_replay = 0;
+ spin_unlock(&imp->imp_lock);
+
+ if (req != NULL) {
+ rc = ptlrpc_replay_req(req);
+ if (rc) {
+ CERROR("recovery replay error %d for req %llu\n",
+ rc, req->rq_xid);
+ return rc;
+ }
+ *inflight = 1;
+ }
+ return rc;
+}
+
+/**
+ * Schedule resending of request on sending_list. This is done after
+ * we completed replaying of requests and locks.
+ */
+int ptlrpc_resend(struct obd_import *imp)
+{
+ struct ptlrpc_request *req, *next;
+
+ /* As long as we're in recovery, nothing should be added to the sending
+ * list, so we don't need to hold the lock during this iteration and
+ * resend process.
+ */
+ /* Well... what if lctl recover is called twice at the same time?
+ */
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_RECOVER) {
+ spin_unlock(&imp->imp_lock);
+ return -1;
+ }
+
+ list_for_each_entry_safe(req, next, &imp->imp_sending_list,
+ rq_list) {
+ LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON,
+ "req %p bad\n", req);
+ LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
+ if (!ptlrpc_no_resend(req))
+ ptlrpc_resend_req(req);
+ }
+ spin_unlock(&imp->imp_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_resend);
+
+/**
+ * Go through all requests in delayed list and wake their threads
+ * for resending
+ */
+void ptlrpc_wake_delayed(struct obd_import *imp)
+{
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set);
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&imp->imp_lock);
+}
+EXPORT_SYMBOL(ptlrpc_wake_delayed);
+
+void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
+{
+ struct obd_import *imp = failed_req->rq_import;
+
+ CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n",
+ imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ if (ptlrpc_set_import_discon(imp,
+ lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) {
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n",
+ obd2cli_tgt(imp->imp_obd),
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_deactivate_import(imp);
+ }
+ /* to control recovery via lctl {disable|enable}_recovery */
+ if (imp->imp_deactive == 0)
+ ptlrpc_connect_import(imp);
+ }
+
+ /* Wait for recovery to complete and resend. If evicted, then
+ this request will be errored out later.*/
+ spin_lock(&failed_req->rq_lock);
+ if (!failed_req->rq_no_resend)
+ failed_req->rq_resend = 1;
+ spin_unlock(&failed_req->rq_lock);
+}
+
+/**
+ * Administratively active/deactive a client.
+ * This should only be called by the ioctl interface, currently
+ * - the lctl deactivate and activate commands
+ * - echo 0/1 >> /proc/osc/XXX/active
+ * - client umount -f (ll_umount_begin)
+ */
+int ptlrpc_set_import_active(struct obd_import *imp, int active)
+{
+ struct obd_device *obd = imp->imp_obd;
+ int rc = 0;
+
+ LASSERT(obd);
+
+ /* When deactivating, mark import invalid, and abort in-flight
+ * requests. */
+ if (!active) {
+ LCONSOLE_WARN("setting import %s INACTIVE by administrator request\n",
+ obd2cli_tgt(imp->imp_obd));
+
+ /* set before invalidate to avoid messages about imp_inval
+ * set without imp_deactive in ptlrpc_import_delay_req */
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
+
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE);
+
+ ptlrpc_invalidate_import(imp);
+ }
+
+ /* When activating, mark import valid, and attempt recovery */
+ if (active) {
+ CDEBUG(D_HA, "setting import %s VALID\n",
+ obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
+ imp->imp_deactive = 0;
+ spin_unlock(&imp->imp_lock);
+ obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE);
+
+ rc = ptlrpc_recover_import(imp, NULL, 0);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_set_import_active);
+
+/* Attempt to reconnect an import */
+int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
+{
+ int rc = 0;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive ||
+ atomic_read(&imp->imp_inval_count))
+ rc = -EINVAL;
+ spin_unlock(&imp->imp_lock);
+ if (rc)
+ goto out;
+
+ /* force import to be disconnected. */
+ ptlrpc_set_import_discon(imp, 0);
+
+ if (new_uuid) {
+ struct obd_uuid uuid;
+
+ /* intruct import to use new uuid */
+ obd_str2uuid(&uuid, new_uuid);
+ rc = import_set_conn_priority(imp, &uuid);
+ if (rc)
+ goto out;
+ }
+
+ /* Check if reconnect is already in progress */
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state != LUSTRE_IMP_DISCON) {
+ imp->imp_force_verify = 1;
+ rc = -EALREADY;
+ }
+ spin_unlock(&imp->imp_lock);
+ if (rc)
+ goto out;
+
+ rc = ptlrpc_connect_import(imp);
+ if (rc)
+ goto out;
+
+ if (!async) {
+ struct l_wait_info lwi;
+ int secs = cfs_time_seconds(obd_timeout);
+
+ CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n",
+ obd2cli_tgt(imp->imp_obd), secs);
+
+ lwi = LWI_TIMEOUT(secs, NULL, NULL);
+ rc = l_wait_event(imp->imp_recovery_waitq,
+ !ptlrpc_import_in_recovery(imp), &lwi);
+ CDEBUG(D_HA, "%s: recovery finished\n",
+ obd2cli_tgt(imp->imp_obd));
+ }
+
+out:
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_recover_import);
+
+int ptlrpc_import_in_recovery(struct obd_import *imp)
+{
+ int in_recovery = 1;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_state == LUSTRE_IMP_FULL ||
+ imp->imp_state == LUSTRE_IMP_CLOSED ||
+ imp->imp_state == LUSTRE_IMP_DISCON ||
+ imp->imp_obd->obd_no_recov)
+ in_recovery = 0;
+ spin_unlock(&imp->imp_lock);
+
+ return in_recovery;
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
new file mode 100644
index 000000000..21e9dc9d5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -0,0 +1,2459 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+/***********************************************
+ * policy registers *
+ ***********************************************/
+
+static rwlock_t policy_lock;
+static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
+ NULL,
+};
+
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u16 number = policy->sp_policy;
+
+ LASSERT(policy->sp_name);
+ LASSERT(policy->sp_cops);
+ LASSERT(policy->sp_sops);
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return -EINVAL;
+
+ write_lock(&policy_lock);
+ if (unlikely(policies[number])) {
+ write_unlock(&policy_lock);
+ return -EALREADY;
+ }
+ policies[number] = policy;
+ write_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_register_policy);
+
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u16 number = policy->sp_policy;
+
+ LASSERT(number < SPTLRPC_POLICY_MAX);
+
+ write_lock(&policy_lock);
+ if (unlikely(policies[number] == NULL)) {
+ write_unlock(&policy_lock);
+ CERROR("%s: already unregistered\n", policy->sp_name);
+ return -EINVAL;
+ }
+
+ LASSERT(policies[number] == policy);
+ policies[number] = NULL;
+ write_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unregister_policy);
+
+static
+struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor)
+{
+ static DEFINE_MUTEX(load_mutex);
+ static atomic_t loaded = ATOMIC_INIT(0);
+ struct ptlrpc_sec_policy *policy;
+ __u16 number = SPTLRPC_FLVR_POLICY(flavor);
+ __u16 flag = 0;
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return NULL;
+
+ while (1) {
+ read_lock(&policy_lock);
+ policy = policies[number];
+ if (policy && !try_module_get(policy->sp_owner))
+ policy = NULL;
+ if (policy == NULL)
+ flag = atomic_read(&loaded);
+ read_unlock(&policy_lock);
+
+ if (policy != NULL || flag != 0 ||
+ number != SPTLRPC_POLICY_GSS)
+ break;
+
+ /* try to load gss module, once */
+ mutex_lock(&load_mutex);
+ if (atomic_read(&loaded) == 0) {
+ if (request_module("ptlrpc_gss") == 0)
+ CDEBUG(D_SEC,
+ "module ptlrpc_gss loaded on demand\n");
+ else
+ CERROR("Unable to load module ptlrpc_gss\n");
+
+ atomic_set(&loaded, 1);
+ }
+ mutex_unlock(&load_mutex);
+ }
+
+ return policy;
+}
+
+__u32 sptlrpc_name2flavor_base(const char *name)
+{
+ if (!strcmp(name, "null"))
+ return SPTLRPC_FLVR_NULL;
+ if (!strcmp(name, "plain"))
+ return SPTLRPC_FLVR_PLAIN;
+ if (!strcmp(name, "krb5n"))
+ return SPTLRPC_FLVR_KRB5N;
+ if (!strcmp(name, "krb5a"))
+ return SPTLRPC_FLVR_KRB5A;
+ if (!strcmp(name, "krb5i"))
+ return SPTLRPC_FLVR_KRB5I;
+ if (!strcmp(name, "krb5p"))
+ return SPTLRPC_FLVR_KRB5P;
+
+ return SPTLRPC_FLVR_INVALID;
+}
+EXPORT_SYMBOL(sptlrpc_name2flavor_base);
+
+const char *sptlrpc_flavor2name_base(__u32 flvr)
+{
+ __u32 base = SPTLRPC_FLVR_BASE(flvr);
+
+ if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
+ return "null";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
+ return "plain";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
+ return "krb5n";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
+ return "krb5a";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
+ return "krb5i";
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
+ return "krb5p";
+
+ CERROR("invalid wire flavor 0x%x\n", flvr);
+ return "invalid";
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_base);
+
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+ char *buf, int bufsize)
+{
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
+ snprintf(buf, bufsize, "hash:%s",
+ sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
+ else
+ snprintf(buf, bufsize, "%s",
+ sptlrpc_flavor2name_base(sf->sf_rpc));
+
+ buf[bufsize - 1] = '\0';
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
+
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+{
+ strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize);
+
+ /*
+ * currently we don't support customized bulk specification for
+ * flavors other than plain
+ */
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
+ char bspec[16];
+
+ bspec[0] = '-';
+ sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
+ strlcat(buf, bspec, bufsize);
+ }
+
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name);
+
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strlcat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strlcat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_UDESC)
+ strlcat(buf, "udesc,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strlcat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strlcat(buf, "-,", bufsize);
+
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_secflags2str);
+
+/**************************************************
+ * client context APIs *
+ **************************************************/
+
+static
+struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
+{
+ struct vfs_cred vcred;
+ int create = 1, remove_dead = 1;
+
+ LASSERT(sec);
+ LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
+
+ if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
+ PTLRPC_SEC_FL_ROOTONLY)) {
+ vcred.vc_uid = 0;
+ vcred.vc_gid = 0;
+ if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
+ create = 0;
+ remove_dead = 0;
+ }
+ } else {
+ vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
+ vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
+ }
+
+ return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
+ create, remove_dead);
+}
+
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
+{
+ atomic_inc(&ctx->cc_refcount);
+ return ctx;
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
+
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+
+ LASSERT(sec);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (!atomic_dec_and_test(&ctx->cc_refcount))
+ return;
+
+ sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
+
+/**
+ * Expire the client context immediately.
+ *
+ * \pre Caller must hold at least 1 reference on the \a ctx.
+ */
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(ctx->cc_ops->force_die);
+ ctx->cc_ops->force_die(ctx, 0);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
+
+/**
+ * To wake up the threads who are waiting for this client context. Called
+ * after some status change happened on \a ctx.
+ */
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_request *req, *next;
+
+ spin_lock(&ctx->cc_lock);
+ list_for_each_entry_safe(req, next, &ctx->cc_req_list,
+ rq_ctx_chain) {
+ list_del_init(&req->rq_ctx_chain);
+ ptlrpc_client_wake_req(req);
+ }
+ spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
+
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+ LASSERT(ctx->cc_ops);
+
+ if (ctx->cc_ops->display == NULL)
+ return 0;
+
+ return ctx->cc_ops->display(ctx, buf, bufsize);
+}
+
+static int import_sec_check_expire(struct obd_import *imp)
+{
+ int adapt = 0;
+
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_sec_expire &&
+ imp->imp_sec_expire < get_seconds()) {
+ adapt = 1;
+ imp->imp_sec_expire = 0;
+ }
+ spin_unlock(&imp->imp_lock);
+
+ if (!adapt)
+ return 0;
+
+ CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
+ return sptlrpc_import_sec_adapt(imp, NULL, NULL);
+}
+
+static int import_sec_validate_get(struct obd_import *imp,
+ struct ptlrpc_sec **sec)
+{
+ int rc;
+
+ if (unlikely(imp->imp_sec_expire)) {
+ rc = import_sec_check_expire(imp);
+ if (rc)
+ return rc;
+ }
+
+ *sec = sptlrpc_import_sec_ref(imp);
+ if (*sec == NULL) {
+ CERROR("import %p (%s) with no sec\n",
+ imp, ptlrpc_import_state_name(imp->imp_state));
+ return -EACCES;
+ }
+
+ if (unlikely((*sec)->ps_dying)) {
+ CERROR("attempt to use dying sec %p\n", sec);
+ sptlrpc_sec_put(*sec);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+/**
+ * Given a \a req, find or allocate a appropriate context for it.
+ * \pre req->rq_cli_ctx == NULL.
+ *
+ * \retval 0 succeed, and req->rq_cli_ctx is set.
+ * \retval -ev error number, and req->rq_cli_ctx == NULL.
+ */
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ struct ptlrpc_sec *sec;
+ int rc;
+
+ LASSERT(!req->rq_cli_ctx);
+ LASSERT(imp);
+
+ rc = import_sec_validate_get(imp, &sec);
+ if (rc)
+ return rc;
+
+ req->rq_cli_ctx = get_my_ctx(sec);
+
+ sptlrpc_sec_put(sec);
+
+ if (!req->rq_cli_ctx) {
+ CERROR("req %p: fail to get context\n", req);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * Drop the context for \a req.
+ * \pre req->rq_cli_ctx != NULL.
+ * \post req->rq_cli_ctx == NULL.
+ *
+ * If \a sync == 0, this function should return quickly without sleep;
+ * otherwise it might trigger and wait for the whole process of sending
+ * an context-destroying rpc to server.
+ */
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
+{
+ LASSERT(req);
+ LASSERT(req->rq_cli_ctx);
+
+ /* request might be asked to release earlier while still
+ * in the context waiting list.
+ */
+ if (!list_empty(&req->rq_ctx_chain)) {
+ spin_lock(&req->rq_cli_ctx->cc_lock);
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&req->rq_cli_ctx->cc_lock);
+ }
+
+ sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
+ req->rq_cli_ctx = NULL;
+}
+
+static
+int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
+ struct ptlrpc_cli_ctx *oldctx,
+ struct ptlrpc_cli_ctx *newctx)
+{
+ struct sptlrpc_flavor old_flvr;
+ char *reqmsg = NULL; /* to workaround old gcc */
+ int reqmsg_size;
+ int rc = 0;
+
+ LASSERT(req->rq_reqmsg);
+ LASSERT(req->rq_reqlen);
+ LASSERT(req->rq_replen);
+
+ CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
+ req,
+ oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec),
+ newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec),
+ oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
+ newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
+
+ /* save flavor */
+ old_flvr = req->rq_flvr;
+
+ /* save request message */
+ reqmsg_size = req->rq_reqlen;
+ if (reqmsg_size != 0) {
+ OBD_ALLOC_LARGE(reqmsg, reqmsg_size);
+ if (reqmsg == NULL)
+ return -ENOMEM;
+ memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
+ }
+
+ /* release old req/rep buf */
+ req->rq_cli_ctx = oldctx;
+ sptlrpc_cli_free_reqbuf(req);
+ sptlrpc_cli_free_repbuf(req);
+ req->rq_cli_ctx = newctx;
+
+ /* recalculate the flavor */
+ sptlrpc_req_set_flavor(req, 0);
+
+ /* alloc new request buffer
+ * we don't need to alloc reply buffer here, leave it to the
+ * rest procedure of ptlrpc */
+ if (reqmsg_size != 0) {
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
+ if (!rc) {
+ LASSERT(req->rq_reqmsg);
+ memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
+ } else {
+ CWARN("failed to alloc reqbuf: %d\n", rc);
+ req->rq_flvr = old_flvr;
+ }
+
+ OBD_FREE_LARGE(reqmsg, reqmsg_size);
+ }
+ return rc;
+}
+
+/**
+ * If current context of \a req is dead somehow, e.g. we just switched flavor
+ * thus marked original contexts dead, we'll find a new context for it. if
+ * no switch is needed, \a req will end up with the same context.
+ *
+ * \note a request must have a context, to keep other parts of code happy.
+ * In any case of failure during the switching, we must restore the old one.
+ */
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
+ struct ptlrpc_cli_ctx *newctx;
+ int rc;
+
+ LASSERT(oldctx);
+
+ sptlrpc_cli_ctx_get(oldctx);
+ sptlrpc_req_put_ctx(req, 0);
+
+ rc = sptlrpc_req_get_ctx(req);
+ if (unlikely(rc)) {
+ LASSERT(!req->rq_cli_ctx);
+
+ /* restore old ctx */
+ req->rq_cli_ctx = oldctx;
+ return rc;
+ }
+
+ newctx = req->rq_cli_ctx;
+ LASSERT(newctx);
+
+ if (unlikely(newctx == oldctx &&
+ test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
+ /*
+ * still get the old dead ctx, usually means system too busy
+ */
+ CDEBUG(D_SEC,
+ "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
+ newctx, newctx->cc_flags);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ } else {
+ /*
+ * it's possible newctx == oldctx if we're switching
+ * subflavor with the same sec.
+ */
+ rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
+ if (rc) {
+ /* restore old ctx */
+ sptlrpc_req_put_ctx(req, 0);
+ req->rq_cli_ctx = oldctx;
+ return rc;
+ }
+
+ LASSERT(req->rq_cli_ctx == newctx);
+ }
+
+ sptlrpc_cli_ctx_put(oldctx, 1);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
+
+static
+int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ if (cli_ctx_is_refreshed(ctx))
+ return 1;
+ return 0;
+}
+
+static
+int ctx_refresh_timeout(void *data)
+{
+ struct ptlrpc_request *req = data;
+ int rc;
+
+ /* conn_cnt is needed in expire_one_request */
+ lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
+
+ rc = ptlrpc_expire_one_request(req, 1);
+ /* if we started recovery, we should mark this ctx dead; otherwise
+ * in case of lgssd died nobody would retire this ctx, following
+ * connecting will still find the same ctx thus cause deadlock.
+ * there's an assumption that expire time of the request should be
+ * later than the context refresh expire time.
+ */
+ if (rc == 0)
+ req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0);
+ return rc;
+}
+
+static
+void ctx_refresh_interrupt(void *data)
+{
+ struct ptlrpc_request *req = data;
+
+ spin_lock(&req->rq_lock);
+ req->rq_intr = 1;
+ spin_unlock(&req->rq_lock);
+}
+
+static
+void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
+{
+ spin_lock(&ctx->cc_lock);
+ if (!list_empty(&req->rq_ctx_chain))
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+}
+
+/**
+ * To refresh the context of \req, if it's not up-to-date.
+ * \param timeout
+ * - < 0: don't wait
+ * - = 0: wait until success or fatal error occur
+ * - > 0: timeout value (in seconds)
+ *
+ * The status of the context could be subject to be changed by other threads
+ * at any time. We allow this race, but once we return with 0, the caller will
+ * suppose it's uptodated and keep using it until the owning rpc is done.
+ *
+ * \retval 0 only if the context is uptodated.
+ * \retval -ev error number.
+ */
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec *sec;
+ struct l_wait_info lwi;
+ int rc;
+
+ LASSERT(ctx);
+
+ if (req->rq_ctx_init || req->rq_ctx_fini)
+ return 0;
+
+ /*
+ * during the process a request's context might change type even
+ * (e.g. from gss ctx to null ctx), so each loop we need to re-check
+ * everything
+ */
+again:
+ rc = import_sec_validate_get(req->rq_import, &sec);
+ if (rc)
+ return rc;
+
+ if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
+ CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
+ req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
+ req_off_ctx_list(req, ctx);
+ sptlrpc_req_replace_dead_ctx(req);
+ ctx = req->rq_cli_ctx;
+ }
+ sptlrpc_sec_put(sec);
+
+ if (cli_ctx_is_eternal(ctx))
+ return 0;
+
+ if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
+ LASSERT(ctx->cc_ops->refresh);
+ ctx->cc_ops->refresh(ctx);
+ }
+ LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
+
+ LASSERT(ctx->cc_ops->validate);
+ if (ctx->cc_ops->validate(ctx) == 0) {
+ req_off_ctx_list(req, ctx);
+ return 0;
+ }
+
+ if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ req_off_ctx_list(req, ctx);
+ return -EPERM;
+ }
+
+ /*
+ * There's a subtle issue for resending RPCs, suppose following
+ * situation:
+ * 1. the request was sent to server.
+ * 2. recovery was kicked start, after finished the request was
+ * marked as resent.
+ * 3. resend the request.
+ * 4. old reply from server received, we accept and verify the reply.
+ * this has to be success, otherwise the error will be aware
+ * by application.
+ * 5. new reply from server received, dropped by LNet.
+ *
+ * Note the xid of old & new request is the same. We can't simply
+ * change xid for the resent request because the server replies on
+ * it for reply reconstruction.
+ *
+ * Commonly the original context should be uptodate because we
+ * have a expiry nice time; server will keep its context because
+ * we at least hold a ref of old context which prevent context
+ * destroying RPC being sent. So server still can accept the request
+ * and finish the RPC. But if that's not the case:
+ * 1. If server side context has been trimmed, a NO_CONTEXT will
+ * be returned, gss_cli_ctx_verify/unseal will switch to new
+ * context by force.
+ * 2. Current context never be refreshed, then we are fine: we
+ * never really send request with old context before.
+ */
+ if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
+ unlikely(req->rq_reqmsg) &&
+ lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ req_off_ctx_list(req, ctx);
+ return 0;
+ }
+
+ if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+ req_off_ctx_list(req, ctx);
+ /*
+ * don't switch ctx if import was deactivated
+ */
+ if (req->rq_import->imp_deactive) {
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ return -EINTR;
+ }
+
+ rc = sptlrpc_req_replace_dead_ctx(req);
+ if (rc) {
+ LASSERT(ctx == req->rq_cli_ctx);
+ CERROR("req %p: failed to replace dead ctx %p: %d\n",
+ req, ctx, rc);
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ spin_unlock(&req->rq_lock);
+ return rc;
+ }
+
+ ctx = req->rq_cli_ctx;
+ goto again;
+ }
+
+ /*
+ * Now we're sure this context is during upcall, add myself into
+ * waiting list
+ */
+ spin_lock(&ctx->cc_lock);
+ if (list_empty(&req->rq_ctx_chain))
+ list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
+ spin_unlock(&ctx->cc_lock);
+
+ if (timeout < 0)
+ return -EWOULDBLOCK;
+
+ /* Clear any flags that may be present from previous sends */
+ LASSERT(req->rq_receiving_reply == 0);
+ spin_lock(&req->rq_lock);
+ req->rq_err = 0;
+ req->rq_timedout = 0;
+ req->rq_resend = 0;
+ req->rq_restart = 0;
+ spin_unlock(&req->rq_lock);
+
+ lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
+ ctx_refresh_interrupt, req);
+ rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
+
+ /*
+ * following cases could lead us here:
+ * - successfully refreshed;
+ * - interrupted;
+ * - timedout, and we don't want recover from the failure;
+ * - timedout, and waked up upon recovery finished;
+ * - someone else mark this ctx dead by force;
+ * - someone invalidate the req and call ptlrpc_client_wake_req(),
+ * e.g. ptlrpc_abort_inflight();
+ */
+ if (!cli_ctx_is_refreshed(ctx)) {
+ /* timed out or interrupted */
+ req_off_ctx_list(req, ctx);
+
+ LASSERT(rc != 0);
+ return rc;
+ }
+
+ goto again;
+}
+
+/**
+ * Initialize flavor settings for \a req, according to \a opcode.
+ *
+ * \note this could be called in two situations:
+ * - new request from ptlrpc_pre_req(), with proper @opcode
+ * - old request which changed ctx in the middle, with @opcode == 0
+ */
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
+{
+ struct ptlrpc_sec *sec;
+
+ LASSERT(req->rq_import);
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_cli_ctx->cc_sec);
+ LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
+
+ /* special security flags according to opcode */
+ switch (opcode) {
+ case OST_READ:
+ case MDS_READPAGE:
+ case MGS_CONFIG_READ:
+ case OBD_IDX_READ:
+ req->rq_bulk_read = 1;
+ break;
+ case OST_WRITE:
+ case MDS_WRITEPAGE:
+ req->rq_bulk_write = 1;
+ break;
+ case SEC_CTX_INIT:
+ req->rq_ctx_init = 1;
+ break;
+ case SEC_CTX_FINI:
+ req->rq_ctx_fini = 1;
+ break;
+ case 0:
+ /* init/fini rpc won't be resend, so can't be here */
+ LASSERT(req->rq_ctx_init == 0);
+ LASSERT(req->rq_ctx_fini == 0);
+
+ /* cleanup flags, which should be recalculated */
+ req->rq_pack_udesc = 0;
+ req->rq_pack_bulk = 0;
+ break;
+ }
+
+ sec = req->rq_cli_ctx->cc_sec;
+
+ spin_lock(&sec->ps_lock);
+ req->rq_flvr = sec->ps_flvr;
+ spin_unlock(&sec->ps_lock);
+
+ /* force SVC_NULL for context initiation rpc, SVC_INTG for context
+ * destruction rpc */
+ if (unlikely(req->rq_ctx_init))
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
+ else if (unlikely(req->rq_ctx_fini))
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
+
+ /* user descriptor flag, null security can't do it anyway */
+ if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
+ (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
+ req->rq_pack_udesc = 1;
+
+ /* bulk security flag */
+ if ((req->rq_bulk_read || req->rq_bulk_write) &&
+ sptlrpc_flavor_has_bulk(&req->rq_flvr))
+ req->rq_pack_bulk = 1;
+}
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req)
+{
+ if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
+ return;
+
+ LASSERT(req->rq_clrbuf);
+ if (req->rq_pool || !req->rq_reqbuf)
+ return;
+
+ OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+}
+
+/**
+ * Given an import \a imp, check whether current user has a valid context
+ * or not. We may create a new context and try to refresh it, and try
+ * repeatedly try in case of non-fatal errors. Return 0 means success.
+ */
+int sptlrpc_import_check_ctx(struct obd_import *imp)
+{
+ struct ptlrpc_sec *sec;
+ struct ptlrpc_cli_ctx *ctx;
+ struct ptlrpc_request *req = NULL;
+ int rc;
+
+ might_sleep();
+
+ sec = sptlrpc_import_sec_ref(imp);
+ ctx = get_my_ctx(sec);
+ sptlrpc_sec_put(sec);
+
+ if (!ctx)
+ return -ENOMEM;
+
+ if (cli_ctx_is_eternal(ctx) ||
+ ctx->cc_ops->validate(ctx) == 0) {
+ sptlrpc_cli_ctx_put(ctx, 1);
+ return 0;
+ }
+
+ if (cli_ctx_is_error(ctx)) {
+ sptlrpc_cli_ctx_put(ctx, 1);
+ return -EACCES;
+ }
+
+ req = ptlrpc_request_cache_alloc(GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ spin_lock_init(&req->rq_lock);
+ atomic_set(&req->rq_refcount, 10000);
+ INIT_LIST_HEAD(&req->rq_ctx_chain);
+ init_waitqueue_head(&req->rq_reply_waitq);
+ init_waitqueue_head(&req->rq_set_waitq);
+ req->rq_import = imp;
+ req->rq_flvr = sec->ps_flvr;
+ req->rq_cli_ctx = ctx;
+
+ rc = sptlrpc_req_refresh_ctx(req, 0);
+ LASSERT(list_empty(&req->rq_ctx_chain));
+ sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
+ ptlrpc_request_cache_free(req);
+
+ return rc;
+}
+
+/**
+ * Used by ptlrpc client, to perform the pre-defined security transformation
+ * upon the request message of \a req. After this function called,
+ * req->rq_reqmsg is still accessible as clear text.
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc = 0;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* we wrap bulk request here because now we can be sure
+ * the context is uptodate.
+ */
+ if (req->rq_bulk) {
+ rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
+ if (rc)
+ return rc;
+ }
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ LASSERT(ctx->cc_ops->sign);
+ rc = ctx->cc_ops->sign(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->seal);
+ rc = ctx->cc_ops->seal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+
+ if (rc == 0) {
+ LASSERT(req->rq_reqdata_len);
+ LASSERT(req->rq_reqdata_len % 8 == 0);
+ LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
+ }
+
+ return rc;
+}
+
+static int do_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata);
+ LASSERT(req->rq_repmsg == NULL);
+
+ req->rq_rep_swab_mask = 0;
+
+ rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
+ switch (rc) {
+ case 1:
+ lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ case 0:
+ break;
+ default:
+ CERROR("failed unpack reply: x%llu\n", req->rq_xid);
+ return -EPROTO;
+ }
+
+ if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
+ CERROR("replied data length %d too small\n",
+ req->rq_repdata_len);
+ return -EPROTO;
+ }
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+ CERROR("reply policy %u doesn't match request policy %u\n",
+ SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
+ return -EPROTO;
+ }
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ case SPTLRPC_SVC_NULL:
+ case SPTLRPC_SVC_AUTH:
+ case SPTLRPC_SVC_INTG:
+ LASSERT(ctx->cc_ops->verify);
+ rc = ctx->cc_ops->verify(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->unseal);
+ rc = ctx->cc_ops->unseal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+ LASSERT(rc || req->rq_repmsg || req->rq_resend);
+
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
+ !req->rq_ctx_init)
+ req->rq_rep_swab_mask = 0;
+ return rc;
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the reply
+ * message of \a req. After return successfully, req->rq_repmsg points to
+ * the reply message in clear text.
+ *
+ * \pre the reply buffer should have been un-posted from LNet, so nothing is
+ * going to change.
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+ LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
+
+ if (req->rq_reply_off == 0 &&
+ (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+ CERROR("real reply with offset 0\n");
+ return -EPROTO;
+ }
+
+ if (req->rq_reply_off % 8 != 0) {
+ CERROR("reply at odd offset %u\n", req->rq_reply_off);
+ return -EPROTO;
+ }
+
+ req->rq_repdata = (struct lustre_msg *)
+ (req->rq_repbuf + req->rq_reply_off);
+ req->rq_repdata_len = req->rq_nob_received;
+
+ return do_cli_unwrap_reply(req);
+}
+
+/**
+ * Used by ptlrpc client, to perform security transformation upon the early
+ * reply message of \a req. We expect the rq_reply_off is 0, and
+ * rq_nob_received is the early reply size.
+ *
+ * Because the receive buffer might be still posted, the reply data might be
+ * changed at any time, no matter we're holding rq_lock or not. For this reason
+ * we allocate a separate ptlrpc_request and reply buffer for early reply
+ * processing.
+ *
+ * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
+ * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
+ * \a *req_ret to release it.
+ * \retval -ev error number, and \a req_ret will not be set.
+ */
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+ struct ptlrpc_request **req_ret)
+{
+ struct ptlrpc_request *early_req;
+ char *early_buf;
+ int early_bufsz, early_size;
+ int rc;
+
+ early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
+ if (early_req == NULL)
+ return -ENOMEM;
+
+ early_size = req->rq_nob_received;
+ early_bufsz = size_roundup_power2(early_size);
+ OBD_ALLOC_LARGE(early_buf, early_bufsz);
+ if (early_buf == NULL) {
+ rc = -ENOMEM;
+ goto err_req;
+ }
+
+ /* sanity checkings and copy data out, do it inside spinlock */
+ spin_lock(&req->rq_lock);
+
+ if (req->rq_replied) {
+ spin_unlock(&req->rq_lock);
+ rc = -EALREADY;
+ goto err_buf;
+ }
+
+ LASSERT(req->rq_repbuf);
+ LASSERT(req->rq_repdata == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+
+ if (req->rq_reply_off != 0) {
+ CERROR("early reply with offset %u\n", req->rq_reply_off);
+ spin_unlock(&req->rq_lock);
+ rc = -EPROTO;
+ goto err_buf;
+ }
+
+ if (req->rq_nob_received != early_size) {
+ /* even another early arrived the size should be the same */
+ CERROR("data size has changed from %u to %u\n",
+ early_size, req->rq_nob_received);
+ spin_unlock(&req->rq_lock);
+ rc = -EINVAL;
+ goto err_buf;
+ }
+
+ if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+ CERROR("early reply length %d too small\n",
+ req->rq_nob_received);
+ spin_unlock(&req->rq_lock);
+ rc = -EALREADY;
+ goto err_buf;
+ }
+
+ memcpy(early_buf, req->rq_repbuf, early_size);
+ spin_unlock(&req->rq_lock);
+
+ spin_lock_init(&early_req->rq_lock);
+ early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
+ early_req->rq_flvr = req->rq_flvr;
+ early_req->rq_repbuf = early_buf;
+ early_req->rq_repbuf_len = early_bufsz;
+ early_req->rq_repdata = (struct lustre_msg *) early_buf;
+ early_req->rq_repdata_len = early_size;
+ early_req->rq_early = 1;
+ early_req->rq_reqmsg = req->rq_reqmsg;
+
+ rc = do_cli_unwrap_reply(early_req);
+ if (rc) {
+ DEBUG_REQ(D_ADAPTTO, early_req,
+ "error %d unwrap early reply", rc);
+ goto err_ctx;
+ }
+
+ LASSERT(early_req->rq_repmsg);
+ *req_ret = early_req;
+ return 0;
+
+err_ctx:
+ sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+err_buf:
+ OBD_FREE_LARGE(early_buf, early_bufsz);
+err_req:
+ ptlrpc_request_cache_free(early_req);
+ return rc;
+}
+
+/**
+ * Used by ptlrpc client, to release a processed early reply \a early_req.
+ *
+ * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
+ */
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
+{
+ LASSERT(early_req->rq_repbuf);
+ LASSERT(early_req->rq_repdata);
+ LASSERT(early_req->rq_repmsg);
+
+ sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+ OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len);
+ ptlrpc_request_cache_free(early_req);
+}
+
+/**************************************************
+ * sec ID *
+ **************************************************/
+
+/*
+ * "fixed" sec (e.g. null) use sec_id < 0
+ */
+static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
+
+int sptlrpc_get_next_secid(void)
+{
+ return atomic_inc_return(&sptlrpc_sec_id);
+}
+EXPORT_SYMBOL(sptlrpc_get_next_secid);
+
+/**************************************************
+ * client side high-level security APIs *
+ **************************************************/
+
+static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
+ int grace, int force)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ LASSERT(policy->sp_cops);
+ LASSERT(policy->sp_cops->flush_ctx_cache);
+
+ return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
+}
+
+static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ LASSERT_ATOMIC_ZERO(&sec->ps_refcount);
+ LASSERT_ATOMIC_ZERO(&sec->ps_nctx);
+ LASSERT(policy->sp_cops->destroy_sec);
+
+ CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec);
+
+ policy->sp_cops->destroy_sec(sec);
+ sptlrpc_policy_put(policy);
+}
+
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+ sec_cop_destroy_sec(sec);
+}
+EXPORT_SYMBOL(sptlrpc_sec_destroy);
+
+static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
+{
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ if (sec->ps_policy->sp_cops->kill_sec) {
+ sec->ps_policy->sp_cops->kill_sec(sec);
+
+ sec_cop_flush_ctx_cache(sec, -1, 1, 1);
+ }
+}
+
+struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
+{
+ if (sec)
+ atomic_inc(&sec->ps_refcount);
+
+ return sec;
+}
+EXPORT_SYMBOL(sptlrpc_sec_get);
+
+void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+{
+ if (sec) {
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ if (atomic_dec_and_test(&sec->ps_refcount)) {
+ sptlrpc_gc_del_sec(sec);
+ sec_cop_destroy_sec(sec);
+ }
+ }
+}
+EXPORT_SYMBOL(sptlrpc_sec_put);
+
+/*
+ * policy module is responsible for taking reference of import
+ */
+static
+struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf,
+ enum lustre_sec_part sp)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_sec *sec;
+ char str[32];
+
+ if (svc_ctx) {
+ LASSERT(imp->imp_dlm_fake == 1);
+
+ CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+ policy = sptlrpc_policy_get(svc_ctx->sc_policy);
+ sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+ } else {
+ LASSERT(imp->imp_dlm_fake == 0);
+
+ CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
+
+ policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
+ if (!policy) {
+ CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
+ return NULL;
+ }
+ }
+
+ sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
+ if (sec) {
+ atomic_inc(&sec->ps_refcount);
+
+ sec->ps_part = sp;
+
+ if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
+ sptlrpc_gc_add_sec(sec);
+ } else {
+ sptlrpc_policy_put(policy);
+ }
+
+ return sec;
+}
+
+struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
+{
+ struct ptlrpc_sec *sec;
+
+ spin_lock(&imp->imp_lock);
+ sec = sptlrpc_sec_get(imp->imp_sec);
+ spin_unlock(&imp->imp_lock);
+
+ return sec;
+}
+EXPORT_SYMBOL(sptlrpc_import_sec_ref);
+
+static void sptlrpc_import_sec_install(struct obd_import *imp,
+ struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec *old_sec;
+
+ LASSERT_ATOMIC_POS(&sec->ps_refcount);
+
+ spin_lock(&imp->imp_lock);
+ old_sec = imp->imp_sec;
+ imp->imp_sec = sec;
+ spin_unlock(&imp->imp_lock);
+
+ if (old_sec) {
+ sptlrpc_sec_kill(old_sec);
+
+ /* balance the ref taken by this import */
+ sptlrpc_sec_put(old_sec);
+ }
+}
+
+static inline
+int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
+{
+ return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
+}
+
+static inline
+void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
+{
+ *dst = *src;
+}
+
+static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
+ struct ptlrpc_sec *sec,
+ struct sptlrpc_flavor *sf)
+{
+ char str1[32], str2[32];
+
+ if (sec->ps_flvr.sf_flags != sf->sf_flags)
+ CDEBUG(D_SEC, "changing sec flags: %s -> %s\n",
+ sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
+ str1, sizeof(str1)),
+ sptlrpc_secflags2str(sf->sf_flags,
+ str2, sizeof(str2)));
+
+ spin_lock(&sec->ps_lock);
+ flavor_copy(&sec->ps_flvr, sf);
+ spin_unlock(&sec->ps_lock);
+}
+
+/**
+ * To get an appropriate ptlrpc_sec for the \a imp, according to the current
+ * configuration. Upon called, imp->imp_sec may or may not be NULL.
+ *
+ * - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
+ * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
+ */
+int sptlrpc_import_sec_adapt(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *flvr)
+{
+ struct ptlrpc_connection *conn;
+ struct sptlrpc_flavor sf;
+ struct ptlrpc_sec *sec, *newsec;
+ enum lustre_sec_part sp;
+ char str[24];
+ int rc = 0;
+
+ might_sleep();
+
+ if (imp == NULL)
+ return 0;
+
+ conn = imp->imp_connection;
+
+ if (svc_ctx == NULL) {
+ struct client_obd *cliobd = &imp->imp_obd->u.cli;
+ /*
+ * normal import, determine flavor from rule set, except
+ * for mgc the flavor is predetermined.
+ */
+ if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
+ sf = cliobd->cl_flvr_mgc;
+ else
+ sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
+ cliobd->cl_sp_to,
+ &cliobd->cl_target_uuid,
+ conn->c_self, &sf);
+
+ sp = imp->imp_obd->u.cli.cl_sp_me;
+ } else {
+ /* reverse import, determine flavor from incoming request */
+ sf = *flvr;
+
+ if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
+ sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
+ PTLRPC_SEC_FL_ROOTONLY;
+
+ sp = sptlrpc_target_sec_part(imp->imp_obd);
+ }
+
+ sec = sptlrpc_import_sec_ref(imp);
+ if (sec) {
+ char str2[24];
+
+ if (flavor_equal(&sf, &sec->ps_flvr))
+ goto out;
+
+ CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
+ sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
+
+ if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
+ SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
+ SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
+ sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
+ goto out;
+ }
+ } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
+ SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
+ CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid),
+ LNET_NIDNET(conn->c_self),
+ sptlrpc_flavor2name(&sf, str, sizeof(str)));
+ }
+
+ mutex_lock(&imp->imp_sec_mutex);
+
+ newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
+ if (newsec) {
+ sptlrpc_import_sec_install(imp, newsec);
+ } else {
+ CERROR("import %s->%s: failed to create new sec\n",
+ imp->imp_obd->obd_name,
+ obd_uuid2str(&conn->c_remote_uuid));
+ rc = -EPERM;
+ }
+
+ mutex_unlock(&imp->imp_sec_mutex);
+out:
+ sptlrpc_sec_put(sec);
+ return rc;
+}
+
+void sptlrpc_import_sec_put(struct obd_import *imp)
+{
+ if (imp->imp_sec) {
+ sptlrpc_sec_kill(imp->imp_sec);
+
+ sptlrpc_sec_put(imp->imp_sec);
+ imp->imp_sec = NULL;
+ }
+}
+
+static void import_flush_ctx_common(struct obd_import *imp,
+ uid_t uid, int grace, int force)
+{
+ struct ptlrpc_sec *sec;
+
+ if (imp == NULL)
+ return;
+
+ sec = sptlrpc_import_sec_ref(imp);
+ if (sec == NULL)
+ return;
+
+ sec_cop_flush_ctx_cache(sec, uid, grace, force);
+ sptlrpc_sec_put(sec);
+}
+
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+ /* it's important to use grace mode, see explain in
+ * sptlrpc_req_refresh_ctx() */
+ import_flush_ctx_common(imp, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
+{
+ import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()),
+ 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
+
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
+{
+ import_flush_ctx_common(imp, -1, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
+
+/**
+ * Used by ptlrpc client to allocate request buffer of \a req. Upon return
+ * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
+ */
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT(req->rq_reqmsg == NULL);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ policy = ctx->cc_sec->ps_policy;
+ rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
+ if (!rc) {
+ LASSERT(req->rq_reqmsg);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* zeroing preallocated buffer */
+ if (req->rq_pool)
+ memset(req->rq_reqmsg, 0, msgsize);
+ }
+
+ return rc;
+}
+
+/**
+ * Used by ptlrpc client to free request buffer of \a req. After this
+ * req->rq_reqmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
+ return;
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
+ req->rq_reqmsg = NULL;
+}
+
+/*
+ * NOTE caller must guarantee the buffer size is enough for the enlargement
+ */
+void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
+ int segment, int newsize)
+{
+ void *src, *dst;
+ int oldsize, oldmsg_size, movesize;
+
+ LASSERT(segment < msg->lm_bufcount);
+ LASSERT(msg->lm_buflens[segment] <= newsize);
+
+ if (msg->lm_buflens[segment] == newsize)
+ return;
+
+ /* nothing to do if we are enlarging the last segment */
+ if (segment == msg->lm_bufcount - 1) {
+ msg->lm_buflens[segment] = newsize;
+ return;
+ }
+
+ oldsize = msg->lm_buflens[segment];
+
+ src = lustre_msg_buf(msg, segment + 1, 0);
+ msg->lm_buflens[segment] = newsize;
+ dst = lustre_msg_buf(msg, segment + 1, 0);
+ msg->lm_buflens[segment] = oldsize;
+
+ /* move from segment + 1 to end segment */
+ LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
+ oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+ movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
+ LASSERT(movesize >= 0);
+
+ if (movesize)
+ memmove(dst, src, movesize);
+
+ /* note we don't clear the ares where old data live, not secret */
+
+ /* finally set new segment size */
+ msg->lm_buflens[segment] = newsize;
+}
+EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
+
+/**
+ * Used by ptlrpc client to enlarge the \a segment of request message pointed
+ * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
+ * preserved after the enlargement. this must be called after original request
+ * buffer being allocated.
+ *
+ * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
+ * so caller should refresh its local pointers if needed.
+ */
+int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_cops *cops;
+ struct lustre_msg *msg = req->rq_reqmsg;
+
+ LASSERT(ctx);
+ LASSERT(msg);
+ LASSERT(msg->lm_bufcount > segment);
+ LASSERT(msg->lm_buflens[segment] <= newsize);
+
+ if (msg->lm_buflens[segment] == newsize)
+ return 0;
+
+ cops = ctx->cc_sec->ps_policy->sp_cops;
+ LASSERT(cops->enlarge_reqbuf);
+ return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
+}
+EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
+
+/**
+ * Used by ptlrpc client to allocate reply buffer of \a req.
+ *
+ * \note After this, req->rq_repmsg is still not accessible.
+ */
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+
+ if (req->rq_repbuf)
+ return 0;
+
+ policy = ctx->cc_sec->ps_policy;
+ return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize);
+}
+
+/**
+ * Used by ptlrpc client to free reply buffer of \a req. After this
+ * req->rq_repmsg is set to NULL and should not be accessed anymore.
+ */
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT_ATOMIC_POS(&ctx->cc_refcount);
+
+ if (req->rq_repbuf == NULL)
+ return;
+ LASSERT(req->rq_repbuf_len);
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_repbuf(ctx->cc_sec, req);
+ req->rq_repmsg = NULL;
+}
+
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
+
+ if (!policy->sp_cops->install_rctx)
+ return 0;
+ return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
+}
+
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->sc_policy;
+
+ if (!policy->sp_sops->install_rctx)
+ return 0;
+ return policy->sp_sops->install_rctx(imp, ctx);
+}
+
+/****************************************
+ * server side security *
+ ****************************************/
+
+static int flavor_allowed(struct sptlrpc_flavor *exp,
+ struct ptlrpc_request *req)
+{
+ struct sptlrpc_flavor *flvr = &req->rq_flvr;
+
+ if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
+ return 1;
+
+ if ((req->rq_ctx_init || req->rq_ctx_fini) &&
+ SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
+ SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
+ return 1;
+
+ return 0;
+}
+
+#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10)
+
+/**
+ * Given an export \a exp, check whether the flavor of incoming \a req
+ * is allowed by the export \a exp. Main logic is about taking care of
+ * changing configurations. Return 0 means success.
+ */
+int sptlrpc_target_export_check(struct obd_export *exp,
+ struct ptlrpc_request *req)
+{
+ struct sptlrpc_flavor flavor;
+
+ if (exp == NULL)
+ return 0;
+
+ /* client side export has no imp_reverse, skip
+ * FIXME maybe we should check flavor this as well??? */
+ if (exp->exp_imp_reverse == NULL)
+ return 0;
+
+ /* don't care about ctx fini rpc */
+ if (req->rq_ctx_fini)
+ return 0;
+
+ spin_lock(&exp->exp_lock);
+
+ /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
+ * the first req with the new flavor, then treat it as current flavor,
+ * adapt reverse sec according to it.
+ * note the first rpc with new flavor might not be with root ctx, in
+ * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */
+ if (unlikely(exp->exp_flvr_changed) &&
+ flavor_allowed(&exp->exp_flvr_old[1], req)) {
+ /* make the new flavor as "current", and old ones as
+ * about-to-expire */
+ CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
+ exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
+ flavor = exp->exp_flvr_old[1];
+ exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
+ exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
+ exp->exp_flvr_old[0] = exp->exp_flvr;
+ exp->exp_flvr_expire[0] = get_seconds() +
+ EXP_FLVR_UPDATE_EXPIRE;
+ exp->exp_flvr = flavor;
+
+ /* flavor change finished */
+ exp->exp_flvr_changed = 0;
+ LASSERT(exp->exp_flvr_adapt == 1);
+
+ /* if it's gss, we only interested in root ctx init */
+ if (req->rq_auth_gss &&
+ !(req->rq_ctx_init &&
+ (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
+ req->rq_auth_usr_ost))) {
+ spin_unlock(&exp->exp_lock);
+ CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
+ req->rq_auth_gss, req->rq_ctx_init,
+ req->rq_auth_usr_root, req->rq_auth_usr_mdt,
+ req->rq_auth_usr_ost);
+ return 0;
+ }
+
+ exp->exp_flvr_adapt = 0;
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+ req->rq_svc_ctx, &flavor);
+ }
+
+ /* if it equals to the current flavor, we accept it, but need to
+ * dealing with reverse sec/ctx */
+ if (likely(flavor_allowed(&exp->exp_flvr, req))) {
+ /* most cases should return here, we only interested in
+ * gss root ctx init */
+ if (!req->rq_auth_gss || !req->rq_ctx_init ||
+ (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+ !req->rq_auth_usr_ost)) {
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+
+ /* if flavor just changed, we should not proceed, just leave
+ * it and current flavor will be discovered and replaced
+ * shortly, and let _this_ rpc pass through */
+ if (exp->exp_flvr_changed) {
+ LASSERT(exp->exp_flvr_adapt);
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+
+ if (exp->exp_flvr_adapt) {
+ exp->exp_flvr_adapt = 0;
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
+ exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ flavor = exp->exp_flvr;
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
+ req->rq_svc_ctx,
+ &flavor);
+ } else {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n",
+ exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ spin_unlock(&exp->exp_lock);
+
+ return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
+ req->rq_svc_ctx);
+ }
+ }
+
+ if (exp->exp_flvr_expire[0]) {
+ if (exp->exp_flvr_expire[0] >= get_seconds()) {
+ if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (" CFS_DURATION_T ")\n", exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[0] -
+ get_seconds());
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+ } else {
+ CDEBUG(D_SEC, "mark middle expired\n");
+ exp->exp_flvr_expire[0] = 0;
+ }
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+ req->rq_flvr.sf_rpc);
+ }
+
+ /* now it doesn't match the current flavor, the only chance we can
+ * accept it is match the old flavors which is not expired. */
+ if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
+ if (exp->exp_flvr_expire[1] >= get_seconds()) {
+ if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (" CFS_DURATION_T ")\n",
+ exp,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[1] -
+ get_seconds());
+ spin_unlock(&exp->exp_lock);
+ return 0;
+ }
+ } else {
+ CDEBUG(D_SEC, "mark oldest expired\n");
+ exp->exp_flvr_expire[1] = 0;
+ }
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
+ exp, exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
+ req->rq_flvr.sf_rpc);
+ } else {
+ CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
+ exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ }
+
+ spin_unlock(&exp->exp_lock);
+
+ CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
+ exp, exp->exp_obd->obd_name,
+ req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
+ req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
+ req->rq_flvr.sf_rpc,
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[0].sf_rpc,
+ exp->exp_flvr_expire[0] ?
+ (unsigned long) (exp->exp_flvr_expire[0] -
+ get_seconds()) : 0,
+ exp->exp_flvr_old[1].sf_rpc,
+ exp->exp_flvr_expire[1] ?
+ (unsigned long) (exp->exp_flvr_expire[1] -
+ get_seconds()) : 0);
+ return -EACCES;
+}
+EXPORT_SYMBOL(sptlrpc_target_export_check);
+
+void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
+ struct sptlrpc_rule_set *rset)
+{
+ struct obd_export *exp;
+ struct sptlrpc_flavor new_flvr;
+
+ LASSERT(obd);
+
+ spin_lock(&obd->obd_dev_lock);
+
+ list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+ if (exp->exp_connection == NULL)
+ continue;
+
+ /* note if this export had just been updated flavor
+ * (exp_flvr_changed == 1), this will override the
+ * previous one. */
+ spin_lock(&exp->exp_lock);
+ sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
+ exp->exp_connection->c_peer.nid,
+ &new_flvr);
+ if (exp->exp_flvr_changed ||
+ !flavor_equal(&new_flvr, &exp->exp_flvr)) {
+ exp->exp_flvr_old[1] = new_flvr;
+ exp->exp_flvr_expire[1] = 0;
+ exp->exp_flvr_changed = 1;
+ exp->exp_flvr_adapt = 1;
+
+ CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
+ exp, sptlrpc_part2name(exp->exp_sp_peer),
+ exp->exp_flvr.sf_rpc,
+ exp->exp_flvr_old[1].sf_rpc);
+ }
+ spin_unlock(&exp->exp_lock);
+ }
+
+ spin_unlock(&obd->obd_dev_lock);
+}
+EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
+
+static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
+{
+ /* peer's claim is unreliable unless gss is being used */
+ if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
+ return svc_rc;
+
+ switch (req->rq_sp_from) {
+ case LUSTRE_SP_CLI:
+ if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source CLI");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_MDT:
+ if (!req->rq_auth_usr_mdt) {
+ DEBUG_REQ(D_ERROR, req, "faked source MDT");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_OST:
+ if (!req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source OST");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_MGS:
+ case LUSTRE_SP_MGC:
+ if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
+ !req->rq_auth_usr_ost) {
+ DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
+ svc_rc = SECSVC_DROP;
+ }
+ break;
+ case LUSTRE_SP_ANY:
+ default:
+ DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
+ svc_rc = SECSVC_DROP;
+ }
+
+ return svc_rc;
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon request message of
+ * incoming \a req. This must be the first thing to do with a incoming
+ * request in ptlrpc layer.
+ *
+ * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
+ * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
+ * \retval SECSVC_COMPLETE success, the request has been fully processed, and
+ * reply message has been prepared.
+ * \retval SECSVC_DROP failed, this request should be dropped.
+ */
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int rc;
+
+ LASSERT(msg);
+ LASSERT(req->rq_reqmsg == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+ LASSERT(req->rq_svc_ctx == NULL);
+
+ req->rq_req_swab_mask = 0;
+
+ rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
+ switch (rc) {
+ case 1:
+ lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
+ case 0:
+ break;
+ default:
+ CERROR("error unpacking request from %s x%llu\n",
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ return SECSVC_DROP;
+ }
+
+ req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
+ req->rq_sp_from = LUSTRE_SP_ANY;
+ req->rq_auth_uid = -1;
+ req->rq_auth_mapped_uid = -1;
+
+ policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
+ if (!policy) {
+ CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ return SECSVC_DROP;
+ }
+
+ LASSERT(policy->sp_sops->accept);
+ rc = policy->sp_sops->accept(req);
+ sptlrpc_policy_put(policy);
+ LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+ LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
+
+ /*
+ * if it's not null flavor (which means embedded packing msg),
+ * reset the swab mask for the coming inner msg unpacking.
+ */
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
+ req->rq_req_swab_mask = 0;
+
+ /* sanity check for the request source */
+ rc = sptlrpc_svc_check_from(req, rc);
+ return rc;
+}
+
+/**
+ * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
+ * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
+ * a buffer of \a msglen size.
+ */
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_reply_state *rs;
+ int rc;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->alloc_rs);
+
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (unlikely(rc == -ENOMEM)) {
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ if (svcpt->scp_service->srv_max_reply_size <
+ msglen + sizeof(struct ptlrpc_reply_state)) {
+ /* Just return failure if the size is too big */
+ CERROR("size of message is too big (%zd), %d allowed",
+ msglen + sizeof(struct ptlrpc_reply_state),
+ svcpt->scp_service->srv_max_reply_size);
+ return -ENOMEM;
+ }
+
+ /* failed alloc, try emergency pool */
+ rs = lustre_get_emerg_rs(svcpt);
+ if (rs == NULL)
+ return -ENOMEM;
+
+ req->rq_reply_state = rs;
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (rc) {
+ lustre_put_emerg_rs(rs);
+ req->rq_reply_state = NULL;
+ }
+ }
+
+ LASSERT(rc != 0 ||
+ (req->rq_reply_state && req->rq_reply_state->rs_msg));
+
+ return rc;
+}
+
+/**
+ * Used by ptlrpc server, to perform transformation upon reply message.
+ *
+ * \post req->rq_reply_off is set to appropriate server-controlled reply offset.
+ * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
+ */
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->authorize);
+
+ rc = policy->sp_sops->authorize(req);
+ LASSERT(rc || req->rq_reply_state->rs_repdata_len);
+
+ return rc;
+}
+
+/**
+ * Used by ptlrpc server, to free reply_state.
+ */
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_sec_policy *policy;
+ unsigned int prealloc;
+
+ LASSERT(rs->rs_svc_ctx);
+ LASSERT(rs->rs_svc_ctx->sc_policy);
+
+ policy = rs->rs_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->free_rs);
+
+ prealloc = rs->rs_prealloc;
+ policy->sp_sops->free_rs(rs);
+
+ if (prealloc)
+ lustre_put_emerg_rs(rs);
+}
+
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx != NULL)
+ atomic_inc(&ctx->sc_refcount);
+}
+
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+ if (atomic_dec_and_test(&ctx->sc_refcount)) {
+ if (ctx->sc_policy->sp_sops->free_ctx)
+ ctx->sc_policy->sp_sops->free_ctx(ctx);
+ }
+ req->rq_svc_ctx = NULL;
+}
+
+void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT_ATOMIC_POS(&ctx->sc_refcount);
+ if (ctx->sc_policy->sp_sops->invalidate_ctx)
+ ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
+}
+EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
+
+/****************************************
+ * bulk security *
+ ****************************************/
+
+/**
+ * Perform transformation upon bulk data pointed by \a desc. This is called
+ * before transforming the request message.
+ */
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return 0;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->wrap_bulk)
+ return ctx->cc_ops->wrap_bulk(ctx, req, desc);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
+
+/**
+ * This is called after unwrap the reply message.
+ * return nob of actual plain text size received, or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc,
+ int nob)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
+
+ LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return desc->bd_nob_transferred;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk) {
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (rc < 0)
+ return rc;
+ }
+ return desc->bd_nob_transferred;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+
+/**
+ * This is called after unwrap the reply message.
+ * return 0 for success or error code.
+ */
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
+
+ LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return 0;
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk) {
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (rc < 0)
+ return rc;
+ }
+
+ /*
+ * if everything is going right, nob should equals to nob_transferred.
+ * in case of privacy mode, nob_transferred needs to be adjusted.
+ */
+ if (desc->bd_nob != desc->bd_nob_transferred) {
+ CERROR("nob %d doesn't match transferred nob %d",
+ desc->bd_nob, desc->bd_nob_transferred);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
+
+
+/****************************************
+ * user descriptor helpers *
+ ****************************************/
+
+int sptlrpc_current_user_desc_size(void)
+{
+ int ngroups;
+
+ ngroups = current_ngroups;
+
+ if (ngroups > LUSTRE_MAX_GROUPS)
+ ngroups = LUSTRE_MAX_GROUPS;
+ return sptlrpc_user_desc_size(ngroups);
+}
+EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
+
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_user_desc *pud;
+
+ pud = lustre_msg_buf(msg, offset, 0);
+
+ pud->pud_uid = from_kuid(&init_user_ns, current_uid());
+ pud->pud_gid = from_kgid(&init_user_ns, current_gid());
+ pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
+ pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid());
+ pud->pud_cap = cfs_curproc_cap_pack();
+ pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
+
+ task_lock(current);
+ if (pud->pud_ngroups > current_ngroups)
+ pud->pud_ngroups = current_ngroups;
+ memcpy(pud->pud_groups, current_cred()->group_info->blocks[0],
+ pud->pud_ngroups * sizeof(__u32));
+ task_unlock(current);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_pack_user_desc);
+
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
+{
+ struct ptlrpc_user_desc *pud;
+ int i;
+
+ pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+ if (!pud)
+ return -EINVAL;
+
+ if (swabbed) {
+ __swab32s(&pud->pud_uid);
+ __swab32s(&pud->pud_gid);
+ __swab32s(&pud->pud_fsuid);
+ __swab32s(&pud->pud_fsgid);
+ __swab32s(&pud->pud_cap);
+ __swab32s(&pud->pud_ngroups);
+ }
+
+ if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
+ CERROR("%u groups is too large\n", pud->pud_ngroups);
+ return -EINVAL;
+ }
+
+ if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
+ msg->lm_buflens[offset]) {
+ CERROR("%u groups are claimed but bufsize only %u\n",
+ pud->pud_ngroups, msg->lm_buflens[offset]);
+ return -EINVAL;
+ }
+
+ if (swabbed) {
+ for (i = 0; i < pud->pud_ngroups; i++)
+ __swab32s(&pud->pud_groups[i]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
+
+/****************************************
+ * misc helpers *
+ ****************************************/
+
+const char *sec2target_str(struct ptlrpc_sec *sec)
+{
+ if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
+ return "*";
+ if (sec_is_reverse(sec))
+ return "c";
+ return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
+}
+EXPORT_SYMBOL(sec2target_str);
+
+/*
+ * return true if the bulk data is protected
+ */
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
+{
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_INTG:
+ case SPTLRPC_BULK_SVC_PRIV:
+ return 1;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
+
+/****************************************
+ * crypto API helper/alloc blkciper *
+ ****************************************/
+
+/****************************************
+ * initialize/finalize *
+ ****************************************/
+
+int sptlrpc_init(void)
+{
+ int rc;
+
+ rwlock_init(&policy_lock);
+
+ rc = sptlrpc_gc_init();
+ if (rc)
+ goto out;
+
+ rc = sptlrpc_conf_init();
+ if (rc)
+ goto out_gc;
+
+ rc = sptlrpc_enc_pool_init();
+ if (rc)
+ goto out_conf;
+
+ rc = sptlrpc_null_init();
+ if (rc)
+ goto out_pool;
+
+ rc = sptlrpc_plain_init();
+ if (rc)
+ goto out_null;
+
+ rc = sptlrpc_lproc_init();
+ if (rc)
+ goto out_plain;
+
+ return 0;
+
+out_plain:
+ sptlrpc_plain_fini();
+out_null:
+ sptlrpc_null_fini();
+out_pool:
+ sptlrpc_enc_pool_fini();
+out_conf:
+ sptlrpc_conf_fini();
+out_gc:
+ sptlrpc_gc_fini();
+out:
+ return rc;
+}
+
+void sptlrpc_fini(void)
+{
+ sptlrpc_lproc_fini();
+ sptlrpc_plain_fini();
+ sptlrpc_null_fini();
+ sptlrpc_enc_pool_fini();
+ sptlrpc_conf_fini();
+ sptlrpc_gc_fini();
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
new file mode 100644
index 000000000..c05a8554d
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -0,0 +1,884 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_bulk.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+
+#include "../include/obd.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+/****************************************
+ * bulk encryption page pools *
+ ****************************************/
+
+
+#define POINTERS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *))
+#define PAGES_PER_POOL (POINTERS_PER_PAGE)
+
+#define IDLE_IDX_MAX (100)
+#define IDLE_IDX_WEIGHT (3)
+
+#define CACHE_QUIESCENT_PERIOD (20)
+
+static struct ptlrpc_enc_page_pool {
+ /*
+ * constants
+ */
+ unsigned long epp_max_pages; /* maximum pages can hold, const */
+ unsigned int epp_max_pools; /* number of pools, const */
+
+ /*
+ * wait queue in case of not enough free pages.
+ */
+ wait_queue_head_t epp_waitq; /* waiting threads */
+ unsigned int epp_waitqlen; /* wait queue length */
+ unsigned long epp_pages_short; /* # of pages wanted of in-q users */
+ unsigned int epp_growing:1; /* during adding pages */
+
+ /*
+ * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
+ * this is counted based on each time when getting pages from
+ * the pools, not based on time. which means in case that system
+ * is idled for a while but the idle_idx might still be low if no
+ * activities happened in the pools.
+ */
+ unsigned long epp_idle_idx;
+
+ /* last shrink time due to mem tight */
+ long epp_last_shrink;
+ long epp_last_access;
+
+ /*
+ * in-pool pages bookkeeping
+ */
+ spinlock_t epp_lock; /* protect following fields */
+ unsigned long epp_total_pages; /* total pages in pools */
+ unsigned long epp_free_pages; /* current pages available */
+
+ /*
+ * statistics
+ */
+ unsigned long epp_st_max_pages; /* # of pages ever reached */
+ unsigned int epp_st_grows; /* # of grows */
+ unsigned int epp_st_grow_fails; /* # of add pages failures */
+ unsigned int epp_st_shrinks; /* # of shrinks */
+ unsigned long epp_st_access; /* # of access */
+ unsigned long epp_st_missings; /* # of cache missing */
+ unsigned long epp_st_lowfree; /* lowest free pages reached */
+ unsigned int epp_st_max_wqlen; /* highest waitqueue length */
+ unsigned long epp_st_max_wait; /* in jiffies */
+ /*
+ * pointers to pools
+ */
+ struct page ***epp_pools;
+} page_pools;
+
+/*
+ * /proc/fs/lustre/sptlrpc/encrypt_page_pools
+ */
+int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
+{
+ spin_lock(&page_pools.epp_lock);
+
+ seq_printf(m,
+ "physical pages: %lu\n"
+ "pages per pool: %lu\n"
+ "max pages: %lu\n"
+ "max pools: %u\n"
+ "total pages: %lu\n"
+ "total free: %lu\n"
+ "idle index: %lu/100\n"
+ "last shrink: %lds\n"
+ "last access: %lds\n"
+ "max pages reached: %lu\n"
+ "grows: %u\n"
+ "grows failure: %u\n"
+ "shrinks: %u\n"
+ "cache access: %lu\n"
+ "cache missing: %lu\n"
+ "low free mark: %lu\n"
+ "max waitqueue depth: %u\n"
+ "max wait time: " CFS_TIME_T "/%u\n",
+ totalram_pages,
+ PAGES_PER_POOL,
+ page_pools.epp_max_pages,
+ page_pools.epp_max_pools,
+ page_pools.epp_total_pages,
+ page_pools.epp_free_pages,
+ page_pools.epp_idle_idx,
+ get_seconds() - page_pools.epp_last_shrink,
+ get_seconds() - page_pools.epp_last_access,
+ page_pools.epp_st_max_pages,
+ page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks,
+ page_pools.epp_st_access,
+ page_pools.epp_st_missings,
+ page_pools.epp_st_lowfree,
+ page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait,
+ HZ);
+
+ spin_unlock(&page_pools.epp_lock);
+
+ return 0;
+}
+
+static void enc_pools_release_free_pages(long npages)
+{
+ int p_idx, g_idx;
+ int p_idx_max1, p_idx_max2;
+
+ LASSERT(npages > 0);
+ LASSERT(npages <= page_pools.epp_free_pages);
+ LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
+
+ /* max pool index before the release */
+ p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
+
+ page_pools.epp_free_pages -= npages;
+ page_pools.epp_total_pages -= npages;
+
+ /* max pool index after the release */
+ p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
+ ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ while (npages--) {
+ LASSERT(page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+
+ __free_page(page_pools.epp_pools[p_idx][g_idx]);
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ /* free unused pools */
+ while (p_idx_max1 < p_idx_max2) {
+ LASSERT(page_pools.epp_pools[p_idx_max2]);
+ OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
+ page_pools.epp_pools[p_idx_max2] = NULL;
+ p_idx_max2--;
+ }
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_count(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ /*
+ * if no pool access for a long time, we consider it's fully idle.
+ * a little race here is fine.
+ */
+ if (unlikely(get_seconds() - page_pools.epp_last_access >
+ CACHE_QUIESCENT_PERIOD)) {
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_idle_idx = IDLE_IDX_MAX;
+ spin_unlock(&page_pools.epp_lock);
+ }
+
+ LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+ return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+ (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_scan(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ spin_lock(&page_pools.epp_lock);
+ sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
+ page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
+ if (sc->nr_to_scan > 0) {
+ enc_pools_release_free_pages(sc->nr_to_scan);
+ CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+ (long)sc->nr_to_scan, page_pools.epp_free_pages);
+
+ page_pools.epp_st_shrinks++;
+ page_pools.epp_last_shrink = get_seconds();
+ }
+ spin_unlock(&page_pools.epp_lock);
+
+ /*
+ * if no pool access for a long time, we consider it's fully idle.
+ * a little race here is fine.
+ */
+ if (unlikely(get_seconds() - page_pools.epp_last_access >
+ CACHE_QUIESCENT_PERIOD)) {
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_idle_idx = IDLE_IDX_MAX;
+ spin_unlock(&page_pools.epp_lock);
+ }
+
+ LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+ return sc->nr_to_scan;
+}
+
+static inline
+int npages_to_npools(unsigned long npages)
+{
+ return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+}
+
+/*
+ * return how many pages cleaned up.
+ */
+static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
+{
+ unsigned long cleaned = 0;
+ int i, j;
+
+ for (i = 0; i < npools; i++) {
+ if (pools[i]) {
+ for (j = 0; j < PAGES_PER_POOL; j++) {
+ if (pools[i][j]) {
+ __free_page(pools[i][j]);
+ cleaned++;
+ }
+ }
+ OBD_FREE(pools[i], PAGE_CACHE_SIZE);
+ pools[i] = NULL;
+ }
+ }
+
+ return cleaned;
+}
+
+/*
+ * merge @npools pointed by @pools which contains @npages new pages
+ * into current pools.
+ *
+ * we have options to avoid most memory copy with some tricks. but we choose
+ * the simplest way to avoid complexity. It's not frequently called.
+ */
+static void enc_pools_insert(struct page ***pools, int npools, int npages)
+{
+ int freeslot;
+ int op_idx, np_idx, og_idx, ng_idx;
+ int cur_npools, end_npools;
+
+ LASSERT(npages > 0);
+ LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
+ LASSERT(npages_to_npools(npages) == npools);
+ LASSERT(page_pools.epp_growing);
+
+ spin_lock(&page_pools.epp_lock);
+
+ /*
+ * (1) fill all the free slots of current pools.
+ */
+ /* free slots are those left by rent pages, and the extra ones with
+ * index >= total_pages, locate at the tail of last pool. */
+ freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
+ if (freeslot != 0)
+ freeslot = PAGES_PER_POOL - freeslot;
+ freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
+
+ op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ np_idx = npools - 1;
+ ng_idx = (npages - 1) % PAGES_PER_POOL;
+
+ while (freeslot) {
+ LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
+ LASSERT(pools[np_idx][ng_idx] != NULL);
+
+ page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
+ pools[np_idx][ng_idx] = NULL;
+
+ freeslot--;
+
+ if (++og_idx == PAGES_PER_POOL) {
+ op_idx++;
+ og_idx = 0;
+ }
+ if (--ng_idx < 0) {
+ if (np_idx == 0)
+ break;
+ np_idx--;
+ ng_idx = PAGES_PER_POOL - 1;
+ }
+ }
+
+ /*
+ * (2) add pools if needed.
+ */
+ cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
+ PAGES_PER_POOL;
+ end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL - 1)
+ / PAGES_PER_POOL;
+ LASSERT(end_npools <= page_pools.epp_max_pools);
+
+ np_idx = 0;
+ while (cur_npools < end_npools) {
+ LASSERT(page_pools.epp_pools[cur_npools] == NULL);
+ LASSERT(np_idx < npools);
+ LASSERT(pools[np_idx] != NULL);
+
+ page_pools.epp_pools[cur_npools++] = pools[np_idx];
+ pools[np_idx++] = NULL;
+ }
+
+ page_pools.epp_total_pages += npages;
+ page_pools.epp_free_pages += npages;
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
+ page_pools.epp_st_max_pages = page_pools.epp_total_pages;
+
+ CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
+ page_pools.epp_total_pages);
+
+ spin_unlock(&page_pools.epp_lock);
+}
+
+static int enc_pools_add_pages(int npages)
+{
+ static DEFINE_MUTEX(add_pages_mutex);
+ struct page ***pools;
+ int npools, alloced = 0;
+ int i, j, rc = -ENOMEM;
+
+ if (npages < PTLRPC_MAX_BRW_PAGES)
+ npages = PTLRPC_MAX_BRW_PAGES;
+
+ mutex_lock(&add_pages_mutex);
+
+ if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
+ npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
+ LASSERT(npages > 0);
+
+ page_pools.epp_st_grows++;
+
+ npools = npages_to_npools(npages);
+ OBD_ALLOC(pools, npools * sizeof(*pools));
+ if (pools == NULL)
+ goto out;
+
+ for (i = 0; i < npools; i++) {
+ OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
+ if (pools[i] == NULL)
+ goto out_pools;
+
+ for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
+ pools[i][j] = alloc_page(GFP_NOFS |
+ __GFP_HIGHMEM);
+ if (pools[i][j] == NULL)
+ goto out_pools;
+
+ alloced++;
+ }
+ }
+ LASSERT(alloced == npages);
+
+ enc_pools_insert(pools, npools, npages);
+ CDEBUG(D_SEC, "added %d pages into pools\n", npages);
+ rc = 0;
+
+out_pools:
+ enc_pools_cleanup(pools, npools);
+ OBD_FREE(pools, npools * sizeof(*pools));
+out:
+ if (rc) {
+ page_pools.epp_st_grow_fails++;
+ CERROR("Failed to allocate %d enc pages\n", npages);
+ }
+
+ mutex_unlock(&add_pages_mutex);
+ return rc;
+}
+
+static inline void enc_pools_wakeup(void)
+{
+ assert_spin_locked(&page_pools.epp_lock);
+ LASSERT(page_pools.epp_waitqlen >= 0);
+
+ if (unlikely(page_pools.epp_waitqlen)) {
+ LASSERT(waitqueue_active(&page_pools.epp_waitq));
+ wake_up_all(&page_pools.epp_waitq);
+ }
+}
+
+static int enc_pools_should_grow(int page_needed, long now)
+{
+ /* don't grow if someone else is growing the pools right now,
+ * or the pools has reached its full capacity
+ */
+ if (page_pools.epp_growing ||
+ page_pools.epp_total_pages == page_pools.epp_max_pages)
+ return 0;
+
+ /* if total pages is not enough, we need to grow */
+ if (page_pools.epp_total_pages < page_needed)
+ return 1;
+
+ /*
+ * we wanted to return 0 here if there was a shrink just happened
+ * moment ago, but this may cause deadlock if both client and ost
+ * live on single node.
+ */
+#if 0
+ if (now - page_pools.epp_last_shrink < 2)
+ return 0;
+#endif
+
+ /*
+ * here we perhaps need consider other factors like wait queue
+ * length, idle index, etc. ?
+ */
+
+ /* grow the pools in any other cases */
+ return 1;
+}
+
+/*
+ * we allocate the requested pages atomically.
+ */
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
+{
+ wait_queue_t waitlink;
+ unsigned long this_idle = -1;
+ unsigned long tick = 0;
+ long now;
+ int p_idx, g_idx;
+ int i;
+
+ LASSERT(desc->bd_iov_count > 0);
+ LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
+
+ /* resent bulk, enc iov might have been allocated previously */
+ if (desc->bd_enc_iov != NULL)
+ return 0;
+
+ OBD_ALLOC(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ if (desc->bd_enc_iov == NULL)
+ return -ENOMEM;
+
+ spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_st_access++;
+again:
+ if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
+ if (tick == 0)
+ tick = cfs_time_current();
+
+ now = get_seconds();
+
+ page_pools.epp_st_missings++;
+ page_pools.epp_pages_short += desc->bd_iov_count;
+
+ if (enc_pools_should_grow(desc->bd_iov_count, now)) {
+ page_pools.epp_growing = 1;
+
+ spin_unlock(&page_pools.epp_lock);
+ enc_pools_add_pages(page_pools.epp_pages_short / 2);
+ spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_growing = 0;
+
+ enc_pools_wakeup();
+ } else {
+ if (++page_pools.epp_waitqlen >
+ page_pools.epp_st_max_wqlen)
+ page_pools.epp_st_max_wqlen =
+ page_pools.epp_waitqlen;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ init_waitqueue_entry(&waitlink, current);
+ add_wait_queue(&page_pools.epp_waitq, &waitlink);
+
+ spin_unlock(&page_pools.epp_lock);
+ schedule();
+ remove_wait_queue(&page_pools.epp_waitq, &waitlink);
+ LASSERT(page_pools.epp_waitqlen > 0);
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_waitqlen--;
+ }
+
+ LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
+ page_pools.epp_pages_short -= desc->bd_iov_count;
+
+ this_idle = 0;
+ goto again;
+ }
+
+ /* record max wait time */
+ if (unlikely(tick != 0)) {
+ tick = cfs_time_current() - tick;
+ if (tick > page_pools.epp_st_max_wait)
+ page_pools.epp_st_max_wait = tick;
+ }
+
+ /* proceed with rest of allocation */
+ page_pools.epp_free_pages -= desc->bd_iov_count;
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+ desc->bd_enc_iov[i].kiov_page =
+ page_pools.epp_pools[p_idx][g_idx];
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ /*
+ * new idle index = (old * weight + new) / (weight + 1)
+ */
+ if (this_idle == -1) {
+ this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
+ page_pools.epp_total_pages;
+ }
+ page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
+ this_idle) /
+ (IDLE_IDX_WEIGHT + 1);
+
+ page_pools.epp_last_access = get_seconds();
+
+ spin_unlock(&page_pools.epp_lock);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
+
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
+{
+ int p_idx, g_idx;
+ int i;
+
+ if (desc->bd_enc_iov == NULL)
+ return;
+
+ LASSERT(desc->bd_iov_count > 0);
+
+ spin_lock(&page_pools.epp_lock);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
+ page_pools.epp_total_pages);
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
+ LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
+
+ page_pools.epp_pools[p_idx][g_idx] =
+ desc->bd_enc_iov[i].kiov_page;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ page_pools.epp_free_pages += desc->bd_iov_count;
+
+ enc_pools_wakeup();
+
+ spin_unlock(&page_pools.epp_lock);
+
+ OBD_FREE(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ desc->bd_enc_iov = NULL;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
+
+/*
+ * we don't do much stuff for add_user/del_user anymore, except adding some
+ * initial pages in add_user() if current pools are empty, rest would be
+ * handled by the pools's self-adaption.
+ */
+int sptlrpc_enc_pool_add_user(void)
+{
+ int need_grow = 0;
+
+ spin_lock(&page_pools.epp_lock);
+ if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
+ page_pools.epp_growing = 1;
+ need_grow = 1;
+ }
+ spin_unlock(&page_pools.epp_lock);
+
+ if (need_grow) {
+ enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
+ PTLRPC_MAX_BRW_PAGES);
+
+ spin_lock(&page_pools.epp_lock);
+ page_pools.epp_growing = 0;
+ enc_pools_wakeup();
+ spin_unlock(&page_pools.epp_lock);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+
+int sptlrpc_enc_pool_del_user(void)
+{
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
+
+static inline void enc_pools_alloc(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ OBD_ALLOC_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
+static inline void enc_pools_free(void)
+{
+ LASSERT(page_pools.epp_max_pools);
+ LASSERT(page_pools.epp_pools);
+
+ OBD_FREE_LARGE(page_pools.epp_pools,
+ page_pools.epp_max_pools *
+ sizeof(*page_pools.epp_pools));
+}
+
+static struct shrinker pools_shrinker = {
+ .count_objects = enc_pools_shrink_count,
+ .scan_objects = enc_pools_shrink_scan,
+ .seeks = DEFAULT_SEEKS,
+};
+
+int sptlrpc_enc_pool_init(void)
+{
+ /*
+ * maximum capacity is 1/8 of total physical memory.
+ * is the 1/8 a good number?
+ */
+ page_pools.epp_max_pages = totalram_pages / 8;
+ page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
+
+ init_waitqueue_head(&page_pools.epp_waitq);
+ page_pools.epp_waitqlen = 0;
+ page_pools.epp_pages_short = 0;
+
+ page_pools.epp_growing = 0;
+
+ page_pools.epp_idle_idx = 0;
+ page_pools.epp_last_shrink = get_seconds();
+ page_pools.epp_last_access = get_seconds();
+
+ spin_lock_init(&page_pools.epp_lock);
+ page_pools.epp_total_pages = 0;
+ page_pools.epp_free_pages = 0;
+
+ page_pools.epp_st_max_pages = 0;
+ page_pools.epp_st_grows = 0;
+ page_pools.epp_st_grow_fails = 0;
+ page_pools.epp_st_shrinks = 0;
+ page_pools.epp_st_access = 0;
+ page_pools.epp_st_missings = 0;
+ page_pools.epp_st_lowfree = 0;
+ page_pools.epp_st_max_wqlen = 0;
+ page_pools.epp_st_max_wait = 0;
+
+ enc_pools_alloc();
+ if (page_pools.epp_pools == NULL)
+ return -ENOMEM;
+
+ register_shrinker(&pools_shrinker);
+
+ return 0;
+}
+
+void sptlrpc_enc_pool_fini(void)
+{
+ unsigned long cleaned, npools;
+
+ LASSERT(page_pools.epp_pools);
+ LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
+
+ unregister_shrinker(&pools_shrinker);
+
+ npools = npages_to_npools(page_pools.epp_total_pages);
+ cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
+ LASSERT(cleaned == page_pools.epp_total_pages);
+
+ enc_pools_free();
+
+ if (page_pools.epp_st_access > 0) {
+ CDEBUG(D_SEC,
+ "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait "
+ CFS_TIME_T"/%d\n",
+ page_pools.epp_st_max_pages, page_pools.epp_st_grows,
+ page_pools.epp_st_grow_fails,
+ page_pools.epp_st_shrinks, page_pools.epp_st_access,
+ page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait, HZ);
+ }
+}
+
+
+static int cfs_hash_alg_id[] = {
+ [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL,
+ [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
+ [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32,
+ [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5,
+ [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1,
+ [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256,
+ [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384,
+ [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512,
+};
+const char *sptlrpc_get_hash_name(__u8 hash_alg)
+{
+ return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_name);
+
+__u8 sptlrpc_get_hash_alg(const char *algname)
+{
+ return cfs_crypto_hash_alg(algname);
+}
+EXPORT_SYMBOL(sptlrpc_get_hash_alg);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int size = msg->lm_buflens[offset];
+
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ if (bsd == NULL) {
+ CERROR("Invalid bulk sec desc: size %d\n", size);
+ return -EINVAL;
+ }
+
+ if (swabbed)
+ __swab32s(&bsd->bsd_nob);
+
+ if (unlikely(bsd->bsd_version != 0)) {
+ CERROR("Unexpected version %u\n", bsd->bsd_version);
+ return -EPROTO;
+ }
+
+ if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
+ CERROR("Invalid type %u\n", bsd->bsd_type);
+ return -EPROTO;
+ }
+
+ /* FIXME more sanity check here */
+
+ if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
+ CERROR("Invalid svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen)
+{
+ struct cfs_crypto_hash_desc *hdesc;
+ int hashsize;
+ char hashbuf[64];
+ unsigned int bufsize;
+ int i, err;
+
+ LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
+ LASSERT(buflen >= 4);
+
+ hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
+ if (IS_ERR(hdesc)) {
+ CERROR("Unable to initialize checksum hash %s\n",
+ cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
+ return PTR_ERR(hdesc);
+ }
+
+ hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
+ desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
+ desc->bd_iov[i].kiov_len);
+ }
+ if (hashsize > buflen) {
+ bufsize = sizeof(hashbuf);
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
+ &bufsize);
+ memcpy(buf, hashbuf, buflen);
+ } else {
+ bufsize = buflen;
+ err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf,
+ &bufsize);
+ }
+
+ if (err)
+ cfs_crypto_hash_final(hdesc, NULL, NULL);
+ return err;
+}
+EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
new file mode 100644
index 000000000..56ba9e4e5
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -0,0 +1,901 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+#include <linux/key.h>
+
+#include "../include/obd.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_param.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+const char *sptlrpc_part2name(enum lustre_sec_part part)
+{
+ switch (part) {
+ case LUSTRE_SP_CLI:
+ return "cli";
+ case LUSTRE_SP_MDT:
+ return "mdt";
+ case LUSTRE_SP_OST:
+ return "ost";
+ case LUSTRE_SP_MGC:
+ return "mgc";
+ case LUSTRE_SP_MGS:
+ return "mgs";
+ case LUSTRE_SP_ANY:
+ return "any";
+ default:
+ return "err";
+ }
+}
+EXPORT_SYMBOL(sptlrpc_part2name);
+
+enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
+{
+ const char *type = obd->obd_type->typ_name;
+
+ if (!strcmp(type, LUSTRE_MDT_NAME))
+ return LUSTRE_SP_MDT;
+ if (!strcmp(type, LUSTRE_OST_NAME))
+ return LUSTRE_SP_OST;
+ if (!strcmp(type, LUSTRE_MGS_NAME))
+ return LUSTRE_SP_MGS;
+
+ CERROR("unknown target %p(%s)\n", obd, type);
+ return LUSTRE_SP_ANY;
+}
+EXPORT_SYMBOL(sptlrpc_target_sec_part);
+
+/****************************************
+ * user supplied flavor string parsing *
+ ****************************************/
+
+/*
+ * format: <base_flavor>[-<bulk_type:alg_spec>]
+ */
+int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr)
+{
+ char buf[32];
+ char *bulk, *alg;
+
+ memset(flvr, 0, sizeof(*flvr));
+
+ if (str == NULL || str[0] == '\0') {
+ flvr->sf_rpc = SPTLRPC_FLVR_INVALID;
+ return 0;
+ }
+
+ strncpy(buf, str, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ bulk = strchr(buf, '-');
+ if (bulk)
+ *bulk++ = '\0';
+
+ flvr->sf_rpc = sptlrpc_name2flavor_base(buf);
+ if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID)
+ goto err_out;
+
+ /*
+ * currently only base flavor "plain" can have bulk specification.
+ */
+ if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) {
+ flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32;
+ if (bulk) {
+ /*
+ * format: plain-hash:<hash_alg>
+ */
+ alg = strchr(bulk, ':');
+ if (alg == NULL)
+ goto err_out;
+ *alg++ = '\0';
+
+ if (strcmp(bulk, "hash"))
+ goto err_out;
+
+ flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg);
+ if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX)
+ goto err_out;
+ }
+
+ if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL)
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL);
+ else
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG);
+ } else {
+ if (bulk)
+ goto err_out;
+ }
+
+ flvr->sf_flags = 0;
+ return 0;
+
+err_out:
+ CERROR("invalid flavor string: %s\n", str);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(sptlrpc_parse_flavor);
+
+/****************************************
+ * configure rules *
+ ****************************************/
+
+static void get_default_flavor(struct sptlrpc_flavor *sf)
+{
+ memset(sf, 0, sizeof(*sf));
+
+ sf->sf_rpc = SPTLRPC_FLVR_NULL;
+ sf->sf_flags = 0;
+}
+
+static void sptlrpc_rule_init(struct sptlrpc_rule *rule)
+{
+ rule->sr_netid = LNET_NIDNET(LNET_NID_ANY);
+ rule->sr_from = LUSTRE_SP_ANY;
+ rule->sr_to = LUSTRE_SP_ANY;
+ rule->sr_padding = 0;
+
+ get_default_flavor(&rule->sr_flvr);
+}
+
+/*
+ * format: network[.direction]=flavor
+ */
+int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule)
+{
+ char *flavor, *dir;
+ int rc;
+
+ sptlrpc_rule_init(rule);
+
+ flavor = strchr(param, '=');
+ if (flavor == NULL) {
+ CERROR("invalid param, no '='\n");
+ return -EINVAL;
+ }
+ *flavor++ = '\0';
+
+ dir = strchr(param, '.');
+ if (dir)
+ *dir++ = '\0';
+
+ /* 1.1 network */
+ if (strcmp(param, "default")) {
+ rule->sr_netid = libcfs_str2net(param);
+ if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) {
+ CERROR("invalid network name: %s\n", param);
+ return -EINVAL;
+ }
+ }
+
+ /* 1.2 direction */
+ if (dir) {
+ if (!strcmp(dir, "mdt2ost")) {
+ rule->sr_from = LUSTRE_SP_MDT;
+ rule->sr_to = LUSTRE_SP_OST;
+ } else if (!strcmp(dir, "mdt2mdt")) {
+ rule->sr_from = LUSTRE_SP_MDT;
+ rule->sr_to = LUSTRE_SP_MDT;
+ } else if (!strcmp(dir, "cli2ost")) {
+ rule->sr_from = LUSTRE_SP_CLI;
+ rule->sr_to = LUSTRE_SP_OST;
+ } else if (!strcmp(dir, "cli2mdt")) {
+ rule->sr_from = LUSTRE_SP_CLI;
+ rule->sr_to = LUSTRE_SP_MDT;
+ } else {
+ CERROR("invalid rule dir segment: %s\n", dir);
+ return -EINVAL;
+ }
+ }
+
+ /* 2.1 flavor */
+ rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr);
+ if (rc)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_parse_rule);
+
+void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset)
+{
+ LASSERT(rset->srs_nslot ||
+ (rset->srs_nrule == 0 && rset->srs_rules == NULL));
+
+ if (rset->srs_nslot) {
+ OBD_FREE(rset->srs_rules,
+ rset->srs_nslot * sizeof(*rset->srs_rules));
+ sptlrpc_rule_set_init(rset);
+ }
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_free);
+
+/*
+ * return 0 if the rule set could accommodate one more rule.
+ */
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset)
+{
+ struct sptlrpc_rule *rules;
+ int nslot;
+
+ might_sleep();
+
+ if (rset->srs_nrule < rset->srs_nslot)
+ return 0;
+
+ nslot = rset->srs_nslot + 8;
+
+ /* better use realloc() if available */
+ OBD_ALLOC(rules, nslot * sizeof(*rset->srs_rules));
+ if (rules == NULL)
+ return -ENOMEM;
+
+ if (rset->srs_nrule) {
+ LASSERT(rset->srs_nslot && rset->srs_rules);
+ memcpy(rules, rset->srs_rules,
+ rset->srs_nrule * sizeof(*rset->srs_rules));
+
+ OBD_FREE(rset->srs_rules,
+ rset->srs_nslot * sizeof(*rset->srs_rules));
+ }
+
+ rset->srs_rules = rules;
+ rset->srs_nslot = nslot;
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_expand);
+
+static inline int rule_spec_dir(struct sptlrpc_rule *rule)
+{
+ return (rule->sr_from != LUSTRE_SP_ANY ||
+ rule->sr_to != LUSTRE_SP_ANY);
+}
+static inline int rule_spec_net(struct sptlrpc_rule *rule)
+{
+ return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY));
+}
+static inline int rule_match_dir(struct sptlrpc_rule *r1,
+ struct sptlrpc_rule *r2)
+{
+ return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to);
+}
+static inline int rule_match_net(struct sptlrpc_rule *r1,
+ struct sptlrpc_rule *r2)
+{
+ return (r1->sr_netid == r2->sr_netid);
+}
+
+/*
+ * merge @rule into @rset.
+ * the @rset slots might be expanded.
+ */
+int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset,
+ struct sptlrpc_rule *rule)
+{
+ struct sptlrpc_rule *p = rset->srs_rules;
+ int spec_dir, spec_net;
+ int rc, n, match = 0;
+
+ might_sleep();
+
+ spec_net = rule_spec_net(rule);
+ spec_dir = rule_spec_dir(rule);
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ p = &rset->srs_rules[n];
+
+ /* test network match, if failed:
+ * - spec rule: skip rules which is also spec rule match, until
+ * we hit a wild rule, which means no more chance
+ * - wild rule: skip until reach the one which is also wild
+ * and matches
+ */
+ if (!rule_match_net(p, rule)) {
+ if (spec_net) {
+ if (rule_spec_net(p))
+ continue;
+ else
+ break;
+ } else {
+ continue;
+ }
+ }
+
+ /* test dir match, same logic as net matching */
+ if (!rule_match_dir(p, rule)) {
+ if (spec_dir) {
+ if (rule_spec_dir(p))
+ continue;
+ else
+ break;
+ } else {
+ continue;
+ }
+ }
+
+ /* find a match */
+ match = 1;
+ break;
+ }
+
+ if (match) {
+ LASSERT(n >= 0 && n < rset->srs_nrule);
+
+ if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) {
+ /* remove this rule */
+ if (n < rset->srs_nrule - 1)
+ memmove(&rset->srs_rules[n],
+ &rset->srs_rules[n + 1],
+ (rset->srs_nrule - n - 1) *
+ sizeof(*rule));
+ rset->srs_nrule--;
+ } else {
+ /* override the rule */
+ memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+ }
+ } else {
+ LASSERT(n >= 0 && n <= rset->srs_nrule);
+
+ if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) {
+ rc = sptlrpc_rule_set_expand(rset);
+ if (rc)
+ return rc;
+
+ if (n < rset->srs_nrule)
+ memmove(&rset->srs_rules[n + 1],
+ &rset->srs_rules[n],
+ (rset->srs_nrule - n) * sizeof(*rule));
+ memcpy(&rset->srs_rules[n], rule, sizeof(*rule));
+ rset->srs_nrule++;
+ } else {
+ CDEBUG(D_CONFIG, "ignore the unmatched deletion\n");
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_merge);
+
+/**
+ * given from/to/nid, determine a matching flavor in ruleset.
+ * return 1 if a match found, otherwise return 0.
+ */
+int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ struct sptlrpc_rule *r;
+ int n;
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ r = &rset->srs_rules[n];
+
+ if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) &&
+ r->sr_netid != LNET_NIDNET(LNET_NID_ANY) &&
+ LNET_NIDNET(nid) != r->sr_netid)
+ continue;
+
+ if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY &&
+ from != r->sr_from)
+ continue;
+
+ if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY &&
+ to != r->sr_to)
+ continue;
+
+ *sf = r->sr_flvr;
+ return 1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_choose);
+
+void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *rset)
+{
+ struct sptlrpc_rule *r;
+ int n;
+
+ for (n = 0; n < rset->srs_nrule; n++) {
+ r = &rset->srs_rules[n];
+ CDEBUG(D_SEC, "<%02d> from %x to %x, net %x, rpc %x\n", n,
+ r->sr_from, r->sr_to, r->sr_netid, r->sr_flvr.sf_rpc);
+ }
+}
+EXPORT_SYMBOL(sptlrpc_rule_set_dump);
+
+/**********************************
+ * sptlrpc configuration support *
+ **********************************/
+
+struct sptlrpc_conf_tgt {
+ struct list_head sct_list;
+ char sct_name[MAX_OBD_NAME];
+ struct sptlrpc_rule_set sct_rset;
+};
+
+struct sptlrpc_conf {
+ struct list_head sc_list;
+ char sc_fsname[MTI_NAME_MAXLEN];
+ unsigned int sc_modified; /* modified during updating */
+ unsigned int sc_updated:1, /* updated copy from MGS */
+ sc_local:1; /* local copy from target */
+ struct sptlrpc_rule_set sc_rset; /* fs general rules */
+ struct list_head sc_tgts; /* target-specific rules */
+};
+
+static struct mutex sptlrpc_conf_lock;
+static LIST_HEAD(sptlrpc_confs);
+
+static inline int is_hex(char c)
+{
+ return ((c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'f'));
+}
+
+static void target2fsname(const char *tgt, char *fsname, int buflen)
+{
+ const char *ptr;
+ int len;
+
+ ptr = strrchr(tgt, '-');
+ if (ptr) {
+ if ((strncmp(ptr, "-MDT", 4) != 0 &&
+ strncmp(ptr, "-OST", 4) != 0) ||
+ !is_hex(ptr[4]) || !is_hex(ptr[5]) ||
+ !is_hex(ptr[6]) || !is_hex(ptr[7]))
+ ptr = NULL;
+ }
+
+ /* if we didn't find the pattern, treat the whole string as fsname */
+ if (ptr == NULL)
+ len = strlen(tgt);
+ else
+ len = ptr - tgt;
+
+ len = min(len, buflen - 1);
+ memcpy(fsname, tgt, len);
+ fsname[len] = '\0';
+}
+
+static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf)
+{
+ struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next;
+
+ sptlrpc_rule_set_free(&conf->sc_rset);
+
+ list_for_each_entry_safe(conf_tgt, conf_tgt_next,
+ &conf->sc_tgts, sct_list) {
+ sptlrpc_rule_set_free(&conf_tgt->sct_rset);
+ list_del(&conf_tgt->sct_list);
+ OBD_FREE_PTR(conf_tgt);
+ }
+ LASSERT(list_empty(&conf->sc_tgts));
+
+ conf->sc_updated = 0;
+ conf->sc_local = 0;
+}
+
+static void sptlrpc_conf_free(struct sptlrpc_conf *conf)
+{
+ CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname);
+
+ sptlrpc_conf_free_rsets(conf);
+ list_del(&conf->sc_list);
+ OBD_FREE_PTR(conf);
+}
+
+static
+struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf,
+ const char *name,
+ int create)
+{
+ struct sptlrpc_conf_tgt *conf_tgt;
+
+ list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) {
+ if (strcmp(conf_tgt->sct_name, name) == 0)
+ return conf_tgt;
+ }
+
+ if (!create)
+ return NULL;
+
+ OBD_ALLOC_PTR(conf_tgt);
+ if (conf_tgt) {
+ strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name));
+ sptlrpc_rule_set_init(&conf_tgt->sct_rset);
+ list_add(&conf_tgt->sct_list, &conf->sc_tgts);
+ }
+
+ return conf_tgt;
+}
+
+static
+struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname,
+ int create)
+{
+ struct sptlrpc_conf *conf;
+
+ list_for_each_entry(conf, &sptlrpc_confs, sc_list) {
+ if (strcmp(conf->sc_fsname, fsname) == 0)
+ return conf;
+ }
+
+ if (!create)
+ return NULL;
+
+ OBD_ALLOC_PTR(conf);
+ if (conf == NULL)
+ return NULL;
+
+ strcpy(conf->sc_fsname, fsname);
+ sptlrpc_rule_set_init(&conf->sc_rset);
+ INIT_LIST_HEAD(&conf->sc_tgts);
+ list_add(&conf->sc_list, &sptlrpc_confs);
+
+ CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname);
+ return conf;
+}
+
+/**
+ * caller must hold conf_lock already.
+ */
+static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf,
+ const char *target,
+ struct sptlrpc_rule *rule)
+{
+ struct sptlrpc_conf_tgt *conf_tgt;
+ struct sptlrpc_rule_set *rule_set;
+
+ /* fsname == target means general rules for the whole fs */
+ if (strcmp(conf->sc_fsname, target) == 0) {
+ rule_set = &conf->sc_rset;
+ } else {
+ conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1);
+ if (conf_tgt) {
+ rule_set = &conf_tgt->sct_rset;
+ } else {
+ CERROR("out of memory, can't merge rule!\n");
+ return -ENOMEM;
+ }
+ }
+
+ return sptlrpc_rule_set_merge(rule_set, rule);
+}
+
+/**
+ * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we
+ * find one through the target name in the record inside conf_lock;
+ * otherwise means caller already hold conf_lock.
+ */
+static int __sptlrpc_process_config(struct lustre_cfg *lcfg,
+ struct sptlrpc_conf *conf)
+{
+ char *target, *param;
+ char fsname[MTI_NAME_MAXLEN];
+ struct sptlrpc_rule rule;
+ int rc;
+
+ target = lustre_cfg_string(lcfg, 1);
+ if (target == NULL) {
+ CERROR("missing target name\n");
+ return -EINVAL;
+ }
+
+ param = lustre_cfg_string(lcfg, 2);
+ if (param == NULL) {
+ CERROR("missing parameter\n");
+ return -EINVAL;
+ }
+
+ CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param);
+
+ /* parse rule to make sure the format is correct */
+ if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
+ CERROR("Invalid sptlrpc parameter: %s\n", param);
+ return -EINVAL;
+ }
+ param += sizeof(PARAM_SRPC_FLVR) - 1;
+
+ rc = sptlrpc_parse_rule(param, &rule);
+ if (rc)
+ return -EINVAL;
+
+ if (conf == NULL) {
+ target2fsname(target, fsname, sizeof(fsname));
+
+ mutex_lock(&sptlrpc_conf_lock);
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf == NULL) {
+ CERROR("can't find conf\n");
+ rc = -ENOMEM;
+ } else {
+ rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+ }
+ mutex_unlock(&sptlrpc_conf_lock);
+ } else {
+ LASSERT(mutex_is_locked(&sptlrpc_conf_lock));
+ rc = sptlrpc_conf_merge_rule(conf, target, &rule);
+ }
+
+ if (rc == 0)
+ conf->sc_modified++;
+
+ return rc;
+}
+
+int sptlrpc_process_config(struct lustre_cfg *lcfg)
+{
+ return __sptlrpc_process_config(lcfg, NULL);
+}
+EXPORT_SYMBOL(sptlrpc_process_config);
+
+static int logname2fsname(const char *logname, char *buf, int buflen)
+{
+ char *ptr;
+ int len;
+
+ ptr = strrchr(logname, '-');
+ if (ptr == NULL || strcmp(ptr, "-sptlrpc")) {
+ CERROR("%s is not a sptlrpc config log\n", logname);
+ return -EINVAL;
+ }
+
+ len = min((int) (ptr - logname), buflen - 1);
+
+ memcpy(buf, logname, len);
+ buf[len] = '\0';
+ return 0;
+}
+
+void sptlrpc_conf_log_update_begin(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf) {
+ if (conf->sc_local) {
+ LASSERT(conf->sc_updated == 0);
+ sptlrpc_conf_free_rsets(conf);
+ }
+ conf->sc_modified = 0;
+ }
+
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_begin);
+
+/**
+ * mark a config log has been updated
+ */
+void sptlrpc_conf_log_update_end(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf) {
+ /*
+ * if original state is not updated, make sure the
+ * modified counter > 0 to enforce updating local copy.
+ */
+ if (conf->sc_updated == 0)
+ conf->sc_modified++;
+
+ conf->sc_updated = 1;
+ }
+
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_update_end);
+
+void sptlrpc_conf_log_start(const char *logname)
+{
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ sptlrpc_conf_get(fsname, 1);
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_start);
+
+void sptlrpc_conf_log_stop(const char *logname)
+{
+ struct sptlrpc_conf *conf;
+ char fsname[16];
+
+ if (logname2fsname(logname, fsname, sizeof(fsname)))
+ return;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ conf = sptlrpc_conf_get(fsname, 0);
+ if (conf)
+ sptlrpc_conf_free(conf);
+ mutex_unlock(&sptlrpc_conf_lock);
+}
+EXPORT_SYMBOL(sptlrpc_conf_log_stop);
+
+static inline void flavor_set_flags(struct sptlrpc_flavor *sf,
+ enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ unsigned int fl_udesc)
+{
+ /*
+ * null flavor doesn't need to set any flavor, and in fact
+ * we'd better not do that because everybody share a single sec.
+ */
+ if (sf->sf_rpc == SPTLRPC_FLVR_NULL)
+ return;
+
+ if (from == LUSTRE_SP_MDT) {
+ /* MDT->MDT; MDT->OST */
+ sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY;
+ } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) {
+ /* CLI->OST */
+ sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK;
+ } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) {
+ /* CLI->MDT */
+ if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL)
+ sf->sf_flags |= PTLRPC_SEC_FL_UDESC;
+ }
+}
+
+void sptlrpc_conf_choose_flavor(enum lustre_sec_part from,
+ enum lustre_sec_part to,
+ struct obd_uuid *target,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ struct sptlrpc_conf *conf;
+ struct sptlrpc_conf_tgt *conf_tgt;
+ char name[MTI_NAME_MAXLEN];
+ int len, rc = 0;
+
+ target2fsname(target->uuid, name, sizeof(name));
+
+ mutex_lock(&sptlrpc_conf_lock);
+
+ conf = sptlrpc_conf_get(name, 0);
+ if (conf == NULL)
+ goto out;
+
+ /* convert uuid name (supposed end with _UUID) to target name */
+ len = strlen(target->uuid);
+ LASSERT(len > 5);
+ memcpy(name, target->uuid, len - 5);
+ name[len - 5] = '\0';
+
+ conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0);
+ if (conf_tgt) {
+ rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset,
+ from, to, nid, sf);
+ if (rc)
+ goto out;
+ }
+
+ rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf);
+out:
+ mutex_unlock(&sptlrpc_conf_lock);
+
+ if (rc == 0)
+ get_default_flavor(sf);
+
+ flavor_set_flags(sf, from, to, 1);
+}
+
+/**
+ * called by target devices, determine the expected flavor from
+ * certain peer (from, nid).
+ */
+void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset,
+ enum lustre_sec_part from,
+ lnet_nid_t nid,
+ struct sptlrpc_flavor *sf)
+{
+ if (sptlrpc_rule_set_choose(rset, from, LUSTRE_SP_ANY, nid, sf) == 0)
+ get_default_flavor(sf);
+}
+EXPORT_SYMBOL(sptlrpc_target_choose_flavor);
+
+#define SEC_ADAPT_DELAY (10)
+
+/**
+ * called by client devices, notify the sptlrpc config has changed and
+ * do import_sec_adapt later.
+ */
+void sptlrpc_conf_client_adapt(struct obd_device *obd)
+{
+ struct obd_import *imp;
+
+ LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0);
+ CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid);
+
+ /* serialize with connect/disconnect import */
+ down_read(&obd->u.cli.cl_sem);
+
+ imp = obd->u.cli.cl_import;
+ if (imp) {
+ spin_lock(&imp->imp_lock);
+ if (imp->imp_sec)
+ imp->imp_sec_expire = get_seconds() +
+ SEC_ADAPT_DELAY;
+ spin_unlock(&imp->imp_lock);
+ }
+
+ up_read(&obd->u.cli.cl_sem);
+}
+EXPORT_SYMBOL(sptlrpc_conf_client_adapt);
+
+int sptlrpc_conf_init(void)
+{
+ mutex_init(&sptlrpc_conf_lock);
+ return 0;
+}
+
+void sptlrpc_conf_fini(void)
+{
+ struct sptlrpc_conf *conf, *conf_next;
+
+ mutex_lock(&sptlrpc_conf_lock);
+ list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) {
+ sptlrpc_conf_free(conf);
+ }
+ LASSERT(list_empty(&sptlrpc_confs));
+ mutex_unlock(&sptlrpc_conf_lock);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
new file mode 100644
index 000000000..81de68edb
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -0,0 +1,252 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_gc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+#define SEC_GC_INTERVAL (30 * 60)
+
+
+static struct mutex sec_gc_mutex;
+static LIST_HEAD(sec_gc_list);
+static spinlock_t sec_gc_list_lock;
+
+static LIST_HEAD(sec_gc_ctx_list);
+static spinlock_t sec_gc_ctx_list_lock;
+
+static struct ptlrpc_thread sec_gc_thread;
+static atomic_t sec_gc_wait_del = ATOMIC_INIT(0);
+
+
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+ LASSERT(sec->ps_gc_interval > 0);
+ LASSERT(list_empty(&sec->ps_gc_list));
+
+ sec->ps_gc_next = get_seconds() + sec->ps_gc_interval;
+
+ spin_lock(&sec_gc_list_lock);
+ list_add_tail(&sec_gc_list, &sec->ps_gc_list);
+ spin_unlock(&sec_gc_list_lock);
+
+ CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_sec);
+
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
+{
+ if (list_empty(&sec->ps_gc_list))
+ return;
+
+ might_sleep();
+
+ /* signal before list_del to make iteration in gc thread safe */
+ atomic_inc(&sec_gc_wait_del);
+
+ spin_lock(&sec_gc_list_lock);
+ list_del_init(&sec->ps_gc_list);
+ spin_unlock(&sec_gc_list_lock);
+
+ /* barrier */
+ mutex_lock(&sec_gc_mutex);
+ mutex_unlock(&sec_gc_mutex);
+
+ atomic_dec(&sec_gc_wait_del);
+
+ CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+}
+EXPORT_SYMBOL(sptlrpc_gc_del_sec);
+
+void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(list_empty(&ctx->cc_gc_chain));
+
+ CDEBUG(D_SEC, "hand over ctx %p(%u->%s)\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ spin_lock(&sec_gc_ctx_list_lock);
+ list_add(&ctx->cc_gc_chain, &sec_gc_ctx_list);
+ spin_unlock(&sec_gc_ctx_list_lock);
+
+ thread_add_flags(&sec_gc_thread, SVC_SIGNAL);
+ wake_up(&sec_gc_thread.t_ctl_waitq);
+}
+EXPORT_SYMBOL(sptlrpc_gc_add_ctx);
+
+static void sec_process_ctx_list(void)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ spin_lock(&sec_gc_ctx_list_lock);
+
+ while (!list_empty(&sec_gc_ctx_list)) {
+ ctx = list_entry(sec_gc_ctx_list.next,
+ struct ptlrpc_cli_ctx, cc_gc_chain);
+ list_del_init(&ctx->cc_gc_chain);
+ spin_unlock(&sec_gc_ctx_list_lock);
+
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 1);
+ CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n",
+ ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+ sptlrpc_cli_ctx_put(ctx, 1);
+
+ spin_lock(&sec_gc_ctx_list_lock);
+ }
+
+ spin_unlock(&sec_gc_ctx_list_lock);
+}
+
+static void sec_do_gc(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec->ps_policy->sp_cops->gc_ctx);
+
+ if (unlikely(sec->ps_gc_next == 0)) {
+ CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n",
+ sec, sec->ps_policy->sp_name);
+ return;
+ }
+
+ CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+
+ if (cfs_time_after(sec->ps_gc_next, get_seconds()))
+ return;
+
+ sec->ps_policy->sp_cops->gc_ctx(sec);
+ sec->ps_gc_next = get_seconds() + sec->ps_gc_interval;
+}
+
+static int sec_gc_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *) arg;
+ struct l_wait_info lwi;
+
+ unshare_fs_struct();
+
+ /* Record that the thread is running */
+ thread_set_flags(thread, SVC_RUNNING);
+ wake_up(&thread->t_ctl_waitq);
+
+ while (1) {
+ struct ptlrpc_sec *sec;
+
+ thread_clear_flags(thread, SVC_SIGNAL);
+ sec_process_ctx_list();
+again:
+ /* go through sec list do gc.
+ * FIXME here we iterate through the whole list each time which
+ * is not optimal. we perhaps want to use balanced binary tree
+ * to trace each sec as order of expiry time.
+ * another issue here is we wakeup as fixed interval instead of
+ * according to each sec's expiry time */
+ mutex_lock(&sec_gc_mutex);
+ list_for_each_entry(sec, &sec_gc_list, ps_gc_list) {
+ /* if someone is waiting to be deleted, let it
+ * proceed as soon as possible. */
+ if (atomic_read(&sec_gc_wait_del)) {
+ CDEBUG(D_SEC, "deletion pending, start over\n");
+ mutex_unlock(&sec_gc_mutex);
+ goto again;
+ }
+
+ sec_do_gc(sec);
+ }
+ mutex_unlock(&sec_gc_mutex);
+
+ /* check ctx list again before sleep */
+ sec_process_ctx_list();
+
+ lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopping(thread) ||
+ thread_is_signal(thread),
+ &lwi);
+
+ if (thread_test_and_clear_flags(thread, SVC_STOPPING))
+ break;
+ }
+
+ thread_set_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+ return 0;
+}
+
+int sptlrpc_gc_init(void)
+{
+ struct l_wait_info lwi = { 0 };
+ struct task_struct *task;
+
+ mutex_init(&sec_gc_mutex);
+ spin_lock_init(&sec_gc_list_lock);
+ spin_lock_init(&sec_gc_ctx_list_lock);
+
+ /* initialize thread control */
+ memset(&sec_gc_thread, 0, sizeof(sec_gc_thread));
+ init_waitqueue_head(&sec_gc_thread.t_ctl_waitq);
+
+ task = kthread_run(sec_gc_main, &sec_gc_thread, "sptlrpc_gc");
+ if (IS_ERR(task)) {
+ CERROR("can't start gc thread: %ld\n", PTR_ERR(task));
+ return PTR_ERR(task);
+ }
+
+ l_wait_event(sec_gc_thread.t_ctl_waitq,
+ thread_is_running(&sec_gc_thread), &lwi);
+ return 0;
+}
+
+void sptlrpc_gc_fini(void)
+{
+ struct l_wait_info lwi = { 0 };
+
+ thread_set_flags(&sec_gc_thread, SVC_STOPPING);
+ wake_up(&sec_gc_thread.t_ctl_waitq);
+
+ l_wait_event(sec_gc_thread.t_ctl_waitq,
+ thread_is_stopped(&sec_gc_thread), &lwi);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
new file mode 100644
index 000000000..0d08145a6
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c
@@ -0,0 +1,199 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_lproc.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include <linux/crypto.h>
+
+#include "../include/obd.h"
+#include "../include/obd_class.h"
+#include "../include/obd_support.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_import.h"
+#include "../include/lustre_dlm.h"
+#include "../include/lustre_sec.h"
+
+#include "ptlrpc_internal.h"
+
+
+struct proc_dir_entry *sptlrpc_proc_root = NULL;
+EXPORT_SYMBOL(sptlrpc_proc_root);
+
+static char *sec_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strlcat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strlcat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_UDESC)
+ strlcat(buf, "udesc,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strlcat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strlcat(buf, "-,", bufsize);
+
+ return buf;
+}
+
+static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct ptlrpc_sec *sec = NULL;
+ char str[32];
+
+ LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+ if (cli->cl_import)
+ sec = sptlrpc_import_sec_ref(cli->cl_import);
+ if (sec == NULL)
+ goto out;
+
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));
+
+ seq_printf(seq, "rpc flavor: %s\n",
+ sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
+ seq_printf(seq, "bulk flavor: %s\n",
+ sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
+ seq_printf(seq, "flags: %s\n",
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
+ seq_printf(seq, "id: %d\n", sec->ps_id);
+ seq_printf(seq, "refcount: %d\n",
+ atomic_read(&sec->ps_refcount));
+ seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx));
+ seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval);
+ seq_printf(seq, "gc next %ld\n",
+ sec->ps_gc_interval ?
+ sec->ps_gc_next - get_seconds() : 0);
+
+ sptlrpc_sec_put(sec);
+out:
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs);
+
+static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v)
+{
+ struct obd_device *dev = seq->private;
+ struct client_obd *cli = &dev->u.cli;
+ struct ptlrpc_sec *sec = NULL;
+
+ LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+
+ if (cli->cl_import)
+ sec = sptlrpc_import_sec_ref(cli->cl_import);
+ if (sec == NULL)
+ goto out;
+
+ if (sec->ps_policy->sp_cops->display)
+ sec->ps_policy->sp_cops->display(sec, seq);
+
+ sptlrpc_sec_put(sec);
+out:
+ return 0;
+}
+LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs);
+
+int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev)
+{
+ int rc;
+
+ if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 &&
+ strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 &&
+ strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) {
+ CERROR("can't register lproc for obd type %s\n",
+ dev->obd_type->typ_name);
+ return -EINVAL;
+ }
+
+ rc = lprocfs_obd_seq_create(dev, "srpc_info", 0444,
+ &sptlrpc_info_lprocfs_fops, dev);
+ if (rc) {
+ CERROR("create proc entry srpc_info for %s: %d\n",
+ dev->obd_name, rc);
+ return rc;
+ }
+
+ rc = lprocfs_obd_seq_create(dev, "srpc_contexts", 0444,
+ &sptlrpc_ctxs_lprocfs_fops, dev);
+ if (rc) {
+ CERROR("create proc entry srpc_contexts for %s: %d\n",
+ dev->obd_name, rc);
+ return rc;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach);
+
+LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool);
+static struct lprocfs_vars sptlrpc_lprocfs_vars[] = {
+ { "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops },
+ { NULL }
+};
+
+int sptlrpc_lproc_init(void)
+{
+ int rc;
+
+ LASSERT(sptlrpc_proc_root == NULL);
+
+ sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root,
+ sptlrpc_lprocfs_vars, NULL);
+ if (IS_ERR(sptlrpc_proc_root)) {
+ rc = PTR_ERR(sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ return rc;
+ }
+ return 0;
+}
+
+void sptlrpc_lproc_fini(void)
+{
+ if (sptlrpc_proc_root) {
+ lprocfs_remove(&sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ }
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
new file mode 100644
index 000000000..4e132435b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c
@@ -0,0 +1,458 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_null.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+static struct ptlrpc_sec_policy null_policy;
+static struct ptlrpc_sec null_sec;
+static struct ptlrpc_cli_ctx null_cli_ctx;
+static struct ptlrpc_svc_ctx null_svc_ctx;
+
+/*
+ * we can temporarily use the topmost 8-bits of lm_secflvr to identify
+ * the source sec part.
+ */
+static inline
+void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
+{
+ msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
+}
+
+static inline
+enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg)
+{
+ return (msg->lm_secflvr >> 24) & 0xFF;
+}
+
+static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+
+ if (!req->rq_import->imp_dlm_fake) {
+ struct obd_device *obd = req->rq_import->imp_obd;
+ null_encode_sec_part(req->rq_reqbuf,
+ obd->u.cli.cl_sp_me);
+ }
+ req->rq_reqdata_len = req->rq_reqlen;
+ return 0;
+}
+
+static
+int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ __u32 cksums, cksumc;
+
+ LASSERT(req->rq_repdata);
+
+ req->rq_repmsg = req->rq_repdata;
+ req->rq_replen = req->rq_repdata_len;
+
+ if (req->rq_early) {
+ cksums = lustre_msg_get_cksum(req->rq_repdata);
+ cksumc = lustre_msg_calc_cksum(req->rq_repmsg);
+ if (cksumc != cksums) {
+ CDEBUG(D_SEC,
+ "early reply checksum mismatch: %08x != %08x\n",
+ cksumc, cksums);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static
+struct ptlrpc_sec *null_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf)
+{
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
+
+ /* general layer has take a module reference for us, because we never
+ * really destroy the sec, simply release the reference here.
+ */
+ sptlrpc_policy_put(&null_policy);
+ return &null_sec;
+}
+
+static
+void null_destroy_sec(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec == &null_sec);
+}
+
+static
+struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ atomic_inc(&null_cli_ctx.cc_refcount);
+ return &null_cli_ctx;
+}
+
+static
+int null_flush_ctx_cache(struct ptlrpc_sec *sec,
+ uid_t uid,
+ int grace, int force)
+{
+ return 0;
+}
+
+static
+int null_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ if (!req->rq_reqbuf) {
+ int alloc_size = size_roundup_power2(msgsize);
+
+ LASSERT(!req->rq_pool);
+ OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_size);
+ if (!req->rq_reqbuf)
+ return -ENOMEM;
+
+ req->rq_reqbuf_len = alloc_size;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= msgsize);
+ memset(req->rq_reqbuf, 0, msgsize);
+ }
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ return 0;
+}
+
+static
+void null_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ if (!req->rq_pool) {
+ LASSERTF(req->rq_reqmsg == req->rq_reqbuf,
+ "req %p: reqmsg %p is not reqbuf %p in null sec\n",
+ req, req->rq_reqmsg, req->rq_reqbuf);
+ LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen,
+ "req %p: reqlen %d should smaller than buflen %d\n",
+ req, req->rq_reqlen, req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+}
+
+static
+int null_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ /* add space for early replied */
+ msgsize += lustre_msg_early_size();
+
+ msgsize = size_roundup_power2(msgsize);
+
+ OBD_ALLOC_LARGE(req->rq_repbuf, msgsize);
+ if (!req->rq_repbuf)
+ return -ENOMEM;
+
+ req->rq_repbuf_len = msgsize;
+ return 0;
+}
+
+static
+void null_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ LASSERT(req->rq_repbuf);
+
+ OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+}
+
+static
+int null_enlarge_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct lustre_msg *newbuf;
+ struct lustre_msg *oldbuf = req->rq_reqmsg;
+ int oldsize, newmsg_size, alloc_size;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf == req->rq_reqmsg);
+ LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+ LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf));
+
+ /* compute new message size */
+ oldsize = req->rq_reqbuf->lm_buflens[segment];
+ req->rq_reqbuf->lm_buflens[segment] = newsize;
+ newmsg_size = lustre_packed_msg_size(oldbuf);
+ req->rq_reqbuf->lm_buflens[segment] = oldsize;
+
+ /* request from pool should always have enough buffer */
+ LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size);
+
+ if (req->rq_reqbuf_len < newmsg_size) {
+ alloc_size = size_roundup_power2(newmsg_size);
+
+ OBD_ALLOC_LARGE(newbuf, alloc_size);
+ if (newbuf == NULL)
+ return -ENOMEM;
+
+ /* Must lock this, so that otherwise unprotected change of
+ * rq_reqmsg is not racing with parallel processing of
+ * imp_replay_list traversing threads. See LU-3333
+ * This is a bandaid at best, we really need to deal with this
+ * in request enlarging code before unpacking that's already
+ * there */
+ if (req->rq_import)
+ spin_lock(&req->rq_import->imp_lock);
+ memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = req->rq_reqmsg = newbuf;
+ req->rq_reqbuf_len = alloc_size;
+
+ if (req->rq_import)
+ spin_unlock(&req->rq_import->imp_lock);
+ }
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+ req->rq_reqlen = newmsg_size;
+
+ return 0;
+}
+
+static struct ptlrpc_svc_ctx null_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &null_policy,
+};
+
+static
+int null_accept(struct ptlrpc_request *req)
+{
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_NULL);
+
+ if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) {
+ CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc);
+ return SECSVC_DROP;
+ }
+
+ req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf);
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ req->rq_reqlen = req->rq_reqdata_len;
+
+ req->rq_svc_ctx = &null_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ return SECSVC_OK;
+}
+
+static
+int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ int rs_size = sizeof(*rs) + msgsize;
+
+ LASSERT(msgsize % 8 == 0);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC_LARGE(rs, rs_size);
+ if (rs == NULL)
+ return -ENOMEM;
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+ rs->rs_msg = rs->rs_repbuf;
+
+ req->rq_reply_state = rs;
+ return 0;
+}
+
+static
+void null_free_rs(struct ptlrpc_reply_state *rs)
+{
+ LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+static
+int null_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+ LASSERT(rs);
+
+ rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+ rs->rs_repdata_len = req->rq_replen;
+
+ if (likely(req->rq_packed_final)) {
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+ req->rq_reply_off = lustre_msg_early_size();
+ else
+ req->rq_reply_off = 0;
+ } else {
+ __u32 cksum;
+
+ cksum = lustre_msg_calc_cksum(rs->rs_repbuf);
+ lustre_msg_set_cksum(rs->rs_repbuf, cksum);
+ req->rq_reply_off = 0;
+ }
+
+ return 0;
+}
+
+static struct ptlrpc_ctx_ops null_ctx_ops = {
+ .refresh = null_ctx_refresh,
+ .sign = null_ctx_sign,
+ .verify = null_ctx_verify,
+};
+
+static struct ptlrpc_sec_cops null_sec_cops = {
+ .create_sec = null_create_sec,
+ .destroy_sec = null_destroy_sec,
+ .lookup_ctx = null_lookup_ctx,
+ .flush_ctx_cache = null_flush_ctx_cache,
+ .alloc_reqbuf = null_alloc_reqbuf,
+ .alloc_repbuf = null_alloc_repbuf,
+ .free_reqbuf = null_free_reqbuf,
+ .free_repbuf = null_free_repbuf,
+ .enlarge_reqbuf = null_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops null_sec_sops = {
+ .accept = null_accept,
+ .alloc_rs = null_alloc_rs,
+ .authorize = null_authorize,
+ .free_rs = null_free_rs,
+};
+
+static struct ptlrpc_sec_policy null_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "sec.null",
+ .sp_policy = SPTLRPC_POLICY_NULL,
+ .sp_cops = &null_sec_cops,
+ .sp_sops = &null_sec_sops,
+};
+
+static void null_init_internal(void)
+{
+ static HLIST_HEAD(__list);
+
+ null_sec.ps_policy = &null_policy;
+ atomic_set(&null_sec.ps_refcount, 1); /* always busy */
+ null_sec.ps_id = -1;
+ null_sec.ps_import = NULL;
+ null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
+ null_sec.ps_flvr.sf_flags = 0;
+ null_sec.ps_part = LUSTRE_SP_ANY;
+ null_sec.ps_dying = 0;
+ spin_lock_init(&null_sec.ps_lock);
+ atomic_set(&null_sec.ps_nctx, 1); /* for "null_cli_ctx" */
+ INIT_LIST_HEAD(&null_sec.ps_gc_list);
+ null_sec.ps_gc_interval = 0;
+ null_sec.ps_gc_next = 0;
+
+ hlist_add_head(&null_cli_ctx.cc_cache, &__list);
+ atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */
+ null_cli_ctx.cc_sec = &null_sec;
+ null_cli_ctx.cc_ops = &null_ctx_ops;
+ null_cli_ctx.cc_expire = 0;
+ null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
+ PTLRPC_CTX_UPTODATE;
+ null_cli_ctx.cc_vcred.vc_uid = 0;
+ spin_lock_init(&null_cli_ctx.cc_lock);
+ INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
+ INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain);
+}
+
+int sptlrpc_null_init(void)
+{
+ int rc;
+
+ null_init_internal();
+
+ rc = sptlrpc_register_policy(&null_policy);
+ if (rc)
+ CERROR("failed to register %s: %d\n", null_policy.sp_name, rc);
+
+ return rc;
+}
+
+void sptlrpc_null_fini(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&null_policy);
+ if (rc)
+ CERROR("failed to unregister %s: %d\n",
+ null_policy.sp_name, rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
new file mode 100644
index 000000000..a79cd5301
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -0,0 +1,1013 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec_plain.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+
+#include "../include/obd_support.h"
+#include "../include/obd_cksum.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_sec.h"
+
+struct plain_sec {
+ struct ptlrpc_sec pls_base;
+ rwlock_t pls_lock;
+ struct ptlrpc_cli_ctx *pls_ctx;
+};
+
+static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec)
+{
+ return container_of(sec, struct plain_sec, pls_base);
+}
+
+static struct ptlrpc_sec_policy plain_policy;
+static struct ptlrpc_ctx_ops plain_ctx_ops;
+static struct ptlrpc_svc_ctx plain_svc_ctx;
+
+static unsigned int plain_at_offset;
+
+/*
+ * for simplicity, plain policy rpc use fixed layout.
+ */
+#define PLAIN_PACK_SEGMENTS (4)
+
+#define PLAIN_PACK_HDR_OFF (0)
+#define PLAIN_PACK_MSG_OFF (1)
+#define PLAIN_PACK_USER_OFF (2)
+#define PLAIN_PACK_BULK_OFF (3)
+
+#define PLAIN_FL_USER (0x01)
+#define PLAIN_FL_BULK (0x02)
+
+struct plain_header {
+ __u8 ph_ver; /* 0 */
+ __u8 ph_flags;
+ __u8 ph_sp; /* source */
+ __u8 ph_bulk_hash_alg; /* complete flavor desc */
+ __u8 ph_pad[4];
+};
+
+struct plain_bulk_token {
+ __u8 pbt_hash[8];
+};
+
+#define PLAIN_BSD_SIZE \
+ (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token))
+
+/****************************************
+ * bulk checksum helpers *
+ ****************************************/
+
+static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+
+ if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed))
+ return -EPROTO;
+
+ bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE);
+ if (bsd == NULL) {
+ CERROR("bulk sec desc has short size %d\n",
+ lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF));
+ return -EPROTO;
+ }
+
+ if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) {
+ CERROR("invalid bulk svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *token)
+{
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
+
+ memset(token->pbt_hash, 0, sizeof(token->pbt_hash));
+ return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash,
+ sizeof(token->pbt_hash));
+}
+
+static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *tokenr)
+{
+ struct plain_bulk_token tokenv;
+ int rc;
+
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
+
+ memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash));
+ rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash,
+ sizeof(tokenv.pbt_hash));
+ if (rc)
+ return rc;
+
+ if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash)))
+ return -EACCES;
+ return 0;
+}
+
+static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
+{
+ char *ptr;
+ unsigned int off, i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len == 0)
+ continue;
+
+ ptr = kmap(desc->bd_iov[i].kiov_page);
+ off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ ptr[off] ^= 0x1;
+ kunmap(desc->bd_iov[i].kiov_page);
+ return;
+ }
+}
+
+/****************************************
+ * cli_ctx apis *
+ ****************************************/
+
+static
+int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx)
+{
+ return 0;
+}
+
+static
+int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
+
+ msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_sp = ctx->cc_sec->ps_part;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+ if (req->rq_pack_udesc)
+ phdr->ph_flags |= PLAIN_FL_USER;
+ if (req->rq_pack_bulk)
+ phdr->ph_flags |= PLAIN_FL_BULK;
+
+ req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
+ msg->lm_buflens);
+ return 0;
+}
+
+static
+int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_repdata;
+ struct plain_header *phdr;
+ __u32 cksum;
+ int swabbed;
+
+ if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) {
+ CERROR("unexpected reply buf count %u\n", msg->lm_bufcount);
+ return -EPROTO;
+ }
+
+ swabbed = ptlrpc_rep_need_swab(req);
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ return -EPROTO;
+ }
+
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ return -EPROTO;
+ }
+
+ /* expect no user desc in reply */
+ if (phdr->ph_flags & PLAIN_FL_USER) {
+ CERROR("Unexpected udesc flag in reply\n");
+ return -EPROTO;
+ }
+
+ if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) {
+ CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg,
+ req->rq_flvr.u_bulk.hash.hash_alg);
+ return -EPROTO;
+ }
+
+ if (unlikely(req->rq_early)) {
+ unsigned int hsize = 4;
+
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+ lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&cksum, &hsize);
+ if (cksum != msg->lm_cksum) {
+ CDEBUG(D_SEC,
+ "early reply checksum mismatch: %08x != %08x\n",
+ cpu_to_le32(cksum), msg->lm_cksum);
+ return -EINVAL;
+ }
+ } else {
+ /* whether we sent with bulk or not, we expect the same
+ * in reply, except for early reply */
+ if (!req->rq_early &&
+ !equi(req->rq_pack_bulk == 1,
+ phdr->ph_flags & PLAIN_FL_BULK)) {
+ CERROR("%s bulk checksum in reply\n",
+ req->rq_pack_bulk ? "Missing" : "Unexpected");
+ return -EPROTO;
+ }
+
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg, swabbed))
+ return -EPROTO;
+ }
+ }
+
+ req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+ req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF);
+ return 0;
+}
+
+static
+int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ struct plain_bulk_token *token;
+ int rc;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+ bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ token = (struct plain_bulk_token *) bsd->bsd_data;
+
+ bsd->bsd_version = 0;
+ bsd->bsd_flags = 0;
+ bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ if (req->rq_bulk_read)
+ return 0;
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ token);
+ if (rc) {
+ CERROR("bulk write: failed to compute checksum: %d\n", rc);
+ } else {
+ /*
+ * for sending we only compute the wrong checksum instead
+ * of corrupting the data so it is still correct on a redo
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
+ req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL)
+ token->pbt_hash[0] ^= 0x1;
+ }
+
+ return rc;
+}
+
+static
+int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv;
+ struct plain_bulk_token *tokenv;
+ int rc;
+ int i, nob;
+
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
+ LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
+
+ bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ if (req->rq_bulk_write) {
+ if (bsdv->bsd_flags & BSD_FL_ERR)
+ return -EIO;
+ return 0;
+ }
+
+ /* fix the actual data size */
+ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
+ desc->bd_iov[i].kiov_len =
+ desc->bd_nob_transferred - nob;
+ }
+ nob += desc->bd_iov[i].kiov_len;
+ }
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc)
+ CERROR("bulk read: client verify failed: %d\n", rc);
+
+ return rc;
+}
+
+/****************************************
+ * sec apis *
+ ****************************************/
+
+static
+struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec)
+{
+ struct ptlrpc_cli_ctx *ctx, *ctx_new;
+
+ OBD_ALLOC_PTR(ctx_new);
+
+ write_lock(&plsec->pls_lock);
+
+ ctx = plsec->pls_ctx;
+ if (ctx) {
+ atomic_inc(&ctx->cc_refcount);
+
+ if (ctx_new)
+ OBD_FREE_PTR(ctx_new);
+ } else if (ctx_new) {
+ ctx = ctx_new;
+
+ atomic_set(&ctx->cc_refcount, 1); /* for cache */
+ ctx->cc_sec = &plsec->pls_base;
+ ctx->cc_ops = &plain_ctx_ops;
+ ctx->cc_expire = 0;
+ ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE;
+ ctx->cc_vcred.vc_uid = 0;
+ spin_lock_init(&ctx->cc_lock);
+ INIT_LIST_HEAD(&ctx->cc_req_list);
+ INIT_LIST_HEAD(&ctx->cc_gc_chain);
+
+ plsec->pls_ctx = ctx;
+ atomic_inc(&plsec->pls_base.ps_nctx);
+ atomic_inc(&plsec->pls_base.ps_refcount);
+
+ atomic_inc(&ctx->cc_refcount); /* for caller */
+ }
+
+ write_unlock(&plsec->pls_lock);
+
+ return ctx;
+}
+
+static
+void plain_destroy_sec(struct ptlrpc_sec *sec)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+
+ LASSERT(sec->ps_policy == &plain_policy);
+ LASSERT(sec->ps_import);
+ LASSERT(atomic_read(&sec->ps_refcount) == 0);
+ LASSERT(atomic_read(&sec->ps_nctx) == 0);
+ LASSERT(plsec->pls_ctx == NULL);
+
+ class_import_put(sec->ps_import);
+
+ OBD_FREE_PTR(plsec);
+}
+
+static
+void plain_kill_sec(struct ptlrpc_sec *sec)
+{
+ sec->ps_dying = 1;
+}
+
+static
+struct ptlrpc_sec *plain_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *svc_ctx,
+ struct sptlrpc_flavor *sf)
+{
+ struct plain_sec *plsec;
+ struct ptlrpc_sec *sec;
+ struct ptlrpc_cli_ctx *ctx;
+
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
+
+ OBD_ALLOC_PTR(plsec);
+ if (plsec == NULL)
+ return NULL;
+
+ /*
+ * initialize plain_sec
+ */
+ rwlock_init(&plsec->pls_lock);
+ plsec->pls_ctx = NULL;
+
+ sec = &plsec->pls_base;
+ sec->ps_policy = &plain_policy;
+ atomic_set(&sec->ps_refcount, 0);
+ atomic_set(&sec->ps_nctx, 0);
+ sec->ps_id = sptlrpc_get_next_secid();
+ sec->ps_import = class_import_get(imp);
+ sec->ps_flvr = *sf;
+ spin_lock_init(&sec->ps_lock);
+ INIT_LIST_HEAD(&sec->ps_gc_list);
+ sec->ps_gc_interval = 0;
+ sec->ps_gc_next = 0;
+
+ /* install ctx immediately if this is a reverse sec */
+ if (svc_ctx) {
+ ctx = plain_sec_install_ctx(plsec);
+ if (ctx == NULL) {
+ plain_destroy_sec(sec);
+ return NULL;
+ }
+ sptlrpc_cli_ctx_put(ctx, 1);
+ }
+
+ return sec;
+}
+
+static
+struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+ struct ptlrpc_cli_ctx *ctx;
+
+ read_lock(&plsec->pls_lock);
+ ctx = plsec->pls_ctx;
+ if (ctx)
+ atomic_inc(&ctx->cc_refcount);
+ read_unlock(&plsec->pls_lock);
+
+ if (unlikely(ctx == NULL))
+ ctx = plain_sec_install_ctx(plsec);
+
+ return ctx;
+}
+
+static
+void plain_release_ctx(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ LASSERT(atomic_read(&sec->ps_refcount) > 0);
+ LASSERT(atomic_read(&sec->ps_nctx) > 0);
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(ctx->cc_sec == sec);
+
+ OBD_FREE_PTR(ctx);
+
+ atomic_dec(&sec->ps_nctx);
+ sptlrpc_sec_put(sec);
+}
+
+static
+int plain_flush_ctx_cache(struct ptlrpc_sec *sec,
+ uid_t uid, int grace, int force)
+{
+ struct plain_sec *plsec = sec2plsec(sec);
+ struct ptlrpc_cli_ctx *ctx;
+
+ /* do nothing unless caller want to flush for 'all' */
+ if (uid != -1)
+ return 0;
+
+ write_lock(&plsec->pls_lock);
+ ctx = plsec->pls_ctx;
+ plsec->pls_ctx = NULL;
+ write_unlock(&plsec->pls_lock);
+
+ if (ctx)
+ sptlrpc_cli_ctx_put(ctx, 1);
+ return 0;
+}
+
+static
+int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int alloc_len;
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_udesc)
+ buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size();
+
+ if (req->rq_pack_bulk) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+ }
+
+ alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ if (!req->rq_reqbuf) {
+ LASSERT(!req->rq_pool);
+
+ alloc_len = size_roundup_power2(alloc_len);
+ OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_len);
+ if (!req->rq_reqbuf)
+ return -ENOMEM;
+
+ req->rq_reqbuf_len = alloc_len;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= alloc_len);
+ memset(req->rq_reqbuf, 0, alloc_len);
+ }
+
+ lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
+
+ if (req->rq_pack_udesc)
+ sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
+
+ return 0;
+}
+
+static
+void plain_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ if (!req->rq_pool) {
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+}
+
+static
+int plain_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int alloc_len;
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_bulk) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+ }
+
+ alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ /* add space for early reply */
+ alloc_len += plain_at_offset;
+
+ alloc_len = size_roundup_power2(alloc_len);
+
+ OBD_ALLOC_LARGE(req->rq_repbuf, alloc_len);
+ if (!req->rq_repbuf)
+ return -ENOMEM;
+
+ req->rq_repbuf_len = alloc_len;
+ return 0;
+}
+
+static
+void plain_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+}
+
+static
+int plain_enlarge_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int segment, int newsize)
+{
+ struct lustre_msg *newbuf;
+ int oldsize;
+ int newmsg_size, newbuf_size;
+
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len >= req->rq_reqlen);
+ LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) ==
+ req->rq_reqmsg);
+
+ /* compute new embedded msg size. */
+ oldsize = req->rq_reqmsg->lm_buflens[segment];
+ req->rq_reqmsg->lm_buflens[segment] = newsize;
+ newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount,
+ req->rq_reqmsg->lm_buflens);
+ req->rq_reqmsg->lm_buflens[segment] = oldsize;
+
+ /* compute new wrapper msg size. */
+ oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF];
+ req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size;
+ newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount,
+ req->rq_reqbuf->lm_buflens);
+ req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize;
+
+ /* request from pool should always have enough buffer */
+ LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size);
+
+ if (req->rq_reqbuf_len < newbuf_size) {
+ newbuf_size = size_roundup_power2(newbuf_size);
+
+ OBD_ALLOC_LARGE(newbuf, newbuf_size);
+ if (newbuf == NULL)
+ return -ENOMEM;
+
+ /* Must lock this, so that otherwise unprotected change of
+ * rq_reqmsg is not racing with parallel processing of
+ * imp_replay_list traversing threads. See LU-3333
+ * This is a bandaid at best, we really need to deal with this
+ * in request enlarging code before unpacking that's already
+ * there */
+ if (req->rq_import)
+ spin_lock(&req->rq_import->imp_lock);
+
+ memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
+
+ OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = newbuf;
+ req->rq_reqbuf_len = newbuf_size;
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf,
+ PLAIN_PACK_MSG_OFF, 0);
+
+ if (req->rq_import)
+ spin_unlock(&req->rq_import->imp_lock);
+ }
+
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF,
+ newmsg_size);
+ _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize);
+
+ req->rq_reqlen = newmsg_size;
+ return 0;
+}
+
+/****************************************
+ * service apis *
+ ****************************************/
+
+static struct ptlrpc_svc_ctx plain_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &plain_policy,
+};
+
+static
+int plain_accept(struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
+ int swabbed;
+
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_PLAIN);
+
+ if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) ||
+ SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) {
+ CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ return SECSVC_DROP;
+ }
+
+ if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) {
+ CERROR("unexpected request buf count %u\n", msg->lm_bufcount);
+ return SECSVC_DROP;
+ }
+
+ swabbed = ptlrpc_req_need_swab(req);
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ return -EPROTO;
+ }
+
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ return -EPROTO;
+ }
+
+ if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) {
+ CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg);
+ return -EPROTO;
+ }
+
+ req->rq_sp_from = phdr->ph_sp;
+ req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg;
+
+ if (phdr->ph_flags & PLAIN_FL_USER) {
+ if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF,
+ swabbed)) {
+ CERROR("Mal-formed user descriptor\n");
+ return SECSVC_DROP;
+ }
+
+ req->rq_pack_udesc = 1;
+ req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0);
+ }
+
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg, swabbed))
+ return SECSVC_DROP;
+
+ req->rq_pack_bulk = 1;
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0);
+ req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF];
+
+ req->rq_svc_ctx = &plain_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ return SECSVC_OK;
+}
+
+static
+int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int rs_size = sizeof(*rs);
+
+ LASSERT(msgsize % 8 == 0);
+
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
+ buflens[PLAIN_PACK_MSG_OFF] = msgsize;
+
+ if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write))
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
+
+ rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC_LARGE(rs, rs_size);
+ if (rs == NULL)
+ return -ENOMEM;
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+
+ lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
+ rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0);
+
+ req->rq_reply_state = rs;
+ return 0;
+}
+
+static
+void plain_free_rs(struct ptlrpc_reply_state *rs)
+{
+ LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE_LARGE(rs, rs->rs_size);
+}
+
+static
+int plain_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct lustre_msg_v2 *msg = rs->rs_repbuf;
+ struct plain_header *phdr;
+ int len;
+
+ LASSERT(rs);
+ LASSERT(msg);
+
+ if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF])
+ len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF,
+ req->rq_replen, 1);
+ else
+ len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+ msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+ if (req->rq_pack_bulk)
+ phdr->ph_flags |= PLAIN_FL_BULK;
+
+ rs->rs_repdata_len = len;
+
+ if (likely(req->rq_packed_final)) {
+ if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+ req->rq_reply_off = plain_at_offset;
+ else
+ req->rq_reply_off = 0;
+ } else {
+ unsigned int hsize = 4;
+
+ cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
+ lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
+ lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
+ NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize);
+ req->rq_reply_off = 0;
+ }
+
+ return 0;
+}
+
+static
+int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenr;
+ int rc;
+
+ LASSERT(req->rq_bulk_write);
+ LASSERT(req->rq_pack_bulk);
+
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenr);
+ if (rc) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("bulk write: server verify failed: %d\n", rc);
+ }
+
+ return rc;
+}
+
+static
+int plain_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenv;
+ int rc;
+
+ LASSERT(req->rq_bulk_read);
+ LASSERT(req->rq_pack_bulk);
+
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc) {
+ CERROR("bulk read: server failed to compute checksum: %d\n",
+ rc);
+ } else {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
+ corrupt_bulk_data(desc);
+ }
+
+ return rc;
+}
+
+static struct ptlrpc_ctx_ops plain_ctx_ops = {
+ .refresh = plain_ctx_refresh,
+ .validate = plain_ctx_validate,
+ .sign = plain_ctx_sign,
+ .verify = plain_ctx_verify,
+ .wrap_bulk = plain_cli_wrap_bulk,
+ .unwrap_bulk = plain_cli_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops plain_sec_cops = {
+ .create_sec = plain_create_sec,
+ .destroy_sec = plain_destroy_sec,
+ .kill_sec = plain_kill_sec,
+ .lookup_ctx = plain_lookup_ctx,
+ .release_ctx = plain_release_ctx,
+ .flush_ctx_cache = plain_flush_ctx_cache,
+ .alloc_reqbuf = plain_alloc_reqbuf,
+ .free_reqbuf = plain_free_reqbuf,
+ .alloc_repbuf = plain_alloc_repbuf,
+ .free_repbuf = plain_free_repbuf,
+ .enlarge_reqbuf = plain_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops plain_sec_sops = {
+ .accept = plain_accept,
+ .alloc_rs = plain_alloc_rs,
+ .authorize = plain_authorize,
+ .free_rs = plain_free_rs,
+ .unwrap_bulk = plain_svc_unwrap_bulk,
+ .wrap_bulk = plain_svc_wrap_bulk,
+};
+
+static struct ptlrpc_sec_policy plain_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "plain",
+ .sp_policy = SPTLRPC_POLICY_PLAIN,
+ .sp_cops = &plain_sec_cops,
+ .sp_sops = &plain_sec_sops,
+};
+
+int sptlrpc_plain_init(void)
+{
+ __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
+ int rc;
+
+ buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size();
+ plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
+
+ rc = sptlrpc_register_policy(&plain_policy);
+ if (rc)
+ CERROR("failed to register: %d\n", rc);
+
+ return rc;
+}
+
+void sptlrpc_plain_fini(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&plain_policy);
+ if (rc)
+ CERROR("cannot unregister: %d\n", rc);
+}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
new file mode 100644
index 000000000..8e6142151
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -0,0 +1,3105 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2010, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lu_object.h"
+#include "../../include/linux/lnet/types.h"
+#include "ptlrpc_internal.h"
+
+/* The following are visible and mutable through /sys/module/ptlrpc */
+int test_req_buffer_pressure = 0;
+module_param(test_req_buffer_pressure, int, 0444);
+MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools");
+module_param(at_min, int, 0644);
+MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)");
+module_param(at_max, int, 0644);
+MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)");
+module_param(at_history, int, 0644);
+MODULE_PARM_DESC(at_history,
+ "Adaptive timeouts remember the slowest event that took place within this period (sec)");
+module_param(at_early_margin, int, 0644);
+MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply");
+module_param(at_extra, int, 0644);
+MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply");
+
+
+/* forward ref */
+static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req);
+static void ptlrpc_at_remove_timed(struct ptlrpc_request *req);
+
+/** Holds a list of all PTLRPC services */
+LIST_HEAD(ptlrpc_all_services);
+/** Used to protect the \e ptlrpc_all_services list */
+struct mutex ptlrpc_all_services_mutex;
+
+struct ptlrpc_request_buffer_desc *
+ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request_buffer_desc *rqbd;
+
+ OBD_CPT_ALLOC_PTR(rqbd, svc->srv_cptable, svcpt->scp_cpt);
+ if (rqbd == NULL)
+ return NULL;
+
+ rqbd->rqbd_svcpt = svcpt;
+ rqbd->rqbd_refcount = 0;
+ rqbd->rqbd_cbid.cbid_fn = request_in_callback;
+ rqbd->rqbd_cbid.cbid_arg = rqbd;
+ INIT_LIST_HEAD(&rqbd->rqbd_reqs);
+ OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable,
+ svcpt->scp_cpt, svc->srv_buf_size);
+ if (rqbd->rqbd_buffer == NULL) {
+ OBD_FREE_PTR(rqbd);
+ return NULL;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+ svcpt->scp_nrqbds_total++;
+ spin_unlock(&svcpt->scp_lock);
+
+ return rqbd;
+}
+
+void
+ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
+{
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+
+ LASSERT(rqbd->rqbd_refcount == 0);
+ LASSERT(list_empty(&rqbd->rqbd_reqs));
+
+ spin_lock(&svcpt->scp_lock);
+ list_del(&rqbd->rqbd_list);
+ svcpt->scp_nrqbds_total--;
+ spin_unlock(&svcpt->scp_lock);
+
+ OBD_FREE_LARGE(rqbd->rqbd_buffer, svcpt->scp_service->srv_buf_size);
+ OBD_FREE_PTR(rqbd);
+}
+
+int
+ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ int rc = 0;
+ int i;
+
+ if (svcpt->scp_rqbd_allocating)
+ goto try_post;
+
+ spin_lock(&svcpt->scp_lock);
+ /* check again with lock */
+ if (svcpt->scp_rqbd_allocating) {
+ /* NB: we might allow more than one thread in the future */
+ LASSERT(svcpt->scp_rqbd_allocating == 1);
+ spin_unlock(&svcpt->scp_lock);
+ goto try_post;
+ }
+
+ svcpt->scp_rqbd_allocating++;
+ spin_unlock(&svcpt->scp_lock);
+
+
+ for (i = 0; i < svc->srv_nbuf_per_group; i++) {
+ /* NB: another thread might have recycled enough rqbds, we
+ * need to make sure it wouldn't over-allocate, see LU-1212. */
+ if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group)
+ break;
+
+ rqbd = ptlrpc_alloc_rqbd(svcpt);
+
+ if (rqbd == NULL) {
+ CERROR("%s: Can't allocate request buffer\n",
+ svc->srv_name);
+ rc = -ENOMEM;
+ break;
+ }
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ LASSERT(svcpt->scp_rqbd_allocating == 1);
+ svcpt->scp_rqbd_allocating--;
+
+ spin_unlock(&svcpt->scp_lock);
+
+ CDEBUG(D_RPCTRACE,
+ "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n",
+ svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted,
+ svcpt->scp_nrqbds_total, rc);
+
+ try_post:
+ if (post && rc == 0)
+ rc = ptlrpc_server_post_idle_rqbds(svcpt);
+
+ return rc;
+}
+
+/**
+ * Part of Rep-Ack logic.
+ * Puts a lock and its mode into reply state associated to request reply.
+ */
+void
+ptlrpc_save_lock(struct ptlrpc_request *req,
+ struct lustre_handle *lock, int mode, int no_ack)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ int idx;
+
+ LASSERT(rs != NULL);
+ LASSERT(rs->rs_nlocks < RS_MAX_LOCKS);
+
+ if (req->rq_export->exp_disconnected) {
+ ldlm_lock_decref(lock, mode);
+ } else {
+ idx = rs->rs_nlocks++;
+ rs->rs_locks[idx] = *lock;
+ rs->rs_modes[idx] = mode;
+ rs->rs_difficult = 1;
+ rs->rs_no_ack = !!no_ack;
+ }
+}
+EXPORT_SYMBOL(ptlrpc_save_lock);
+
+
+struct ptlrpc_hr_partition;
+
+struct ptlrpc_hr_thread {
+ int hrt_id; /* thread ID */
+ spinlock_t hrt_lock;
+ wait_queue_head_t hrt_waitq;
+ struct list_head hrt_queue; /* RS queue */
+ struct ptlrpc_hr_partition *hrt_partition;
+};
+
+struct ptlrpc_hr_partition {
+ /* # of started threads */
+ atomic_t hrp_nstarted;
+ /* # of stopped threads */
+ atomic_t hrp_nstopped;
+ /* cpu partition id */
+ int hrp_cpt;
+ /* round-robin rotor for choosing thread */
+ int hrp_rotor;
+ /* total number of threads on this partition */
+ int hrp_nthrs;
+ /* threads table */
+ struct ptlrpc_hr_thread *hrp_thrs;
+};
+
+#define HRT_RUNNING 0
+#define HRT_STOPPING 1
+
+struct ptlrpc_hr_service {
+ /* CPU partition table, it's just cfs_cpt_table for now */
+ struct cfs_cpt_table *hr_cpt_table;
+ /** controller sleep waitq */
+ wait_queue_head_t hr_waitq;
+ unsigned int hr_stopping;
+ /** roundrobin rotor for non-affinity service */
+ unsigned int hr_rotor;
+ /* partition data */
+ struct ptlrpc_hr_partition **hr_partitions;
+};
+
+struct rs_batch {
+ struct list_head rsb_replies;
+ unsigned int rsb_n_replies;
+ struct ptlrpc_service_part *rsb_svcpt;
+};
+
+/** reply handling service. */
+static struct ptlrpc_hr_service ptlrpc_hr;
+
+/**
+ * maximum number of replies scheduled in one batch
+ */
+#define MAX_SCHEDULED 256
+
+/**
+ * Initialize a reply batch.
+ *
+ * \param b batch
+ */
+static void rs_batch_init(struct rs_batch *b)
+{
+ memset(b, 0, sizeof(*b));
+ INIT_LIST_HEAD(&b->rsb_replies);
+}
+
+/**
+ * Choose an hr thread to dispatch requests to.
+ */
+static struct ptlrpc_hr_thread *
+ptlrpc_hr_select(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_hr_partition *hrp;
+ unsigned int rotor;
+
+ if (svcpt->scp_cpt >= 0 &&
+ svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) {
+ /* directly match partition */
+ hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt];
+
+ } else {
+ rotor = ptlrpc_hr.hr_rotor++;
+ rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table);
+
+ hrp = ptlrpc_hr.hr_partitions[rotor];
+ }
+
+ rotor = hrp->hrp_rotor++;
+ return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs];
+}
+
+/**
+ * Dispatch all replies accumulated in the batch to one from
+ * dedicated reply handling threads.
+ *
+ * \param b batch
+ */
+static void rs_batch_dispatch(struct rs_batch *b)
+{
+ if (b->rsb_n_replies != 0) {
+ struct ptlrpc_hr_thread *hrt;
+
+ hrt = ptlrpc_hr_select(b->rsb_svcpt);
+
+ spin_lock(&hrt->hrt_lock);
+ list_splice_init(&b->rsb_replies, &hrt->hrt_queue);
+ spin_unlock(&hrt->hrt_lock);
+
+ wake_up(&hrt->hrt_waitq);
+ b->rsb_n_replies = 0;
+ }
+}
+
+/**
+ * Add a reply to a batch.
+ * Add one reply object to a batch, schedule batched replies if overload.
+ *
+ * \param b batch
+ * \param rs reply
+ */
+static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+
+ if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) {
+ if (b->rsb_svcpt != NULL) {
+ rs_batch_dispatch(b);
+ spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+ }
+ spin_lock(&svcpt->scp_rep_lock);
+ b->rsb_svcpt = svcpt;
+ }
+ spin_lock(&rs->rs_lock);
+ rs->rs_scheduled_ever = 1;
+ if (rs->rs_scheduled == 0) {
+ list_move(&rs->rs_list, &b->rsb_replies);
+ rs->rs_scheduled = 1;
+ b->rsb_n_replies++;
+ }
+ rs->rs_committed = 1;
+ spin_unlock(&rs->rs_lock);
+}
+
+/**
+ * Reply batch finalization.
+ * Dispatch remaining replies from the batch
+ * and release remaining spinlock.
+ *
+ * \param b batch
+ */
+static void rs_batch_fini(struct rs_batch *b)
+{
+ if (b->rsb_svcpt != NULL) {
+ rs_batch_dispatch(b);
+ spin_unlock(&b->rsb_svcpt->scp_rep_lock);
+ }
+}
+
+#define DECLARE_RS_BATCH(b) struct rs_batch b
+
+
+/**
+ * Put reply state into a queue for processing because we received
+ * ACK from the client
+ */
+void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_hr_thread *hrt;
+
+ LASSERT(list_empty(&rs->rs_list));
+
+ hrt = ptlrpc_hr_select(rs->rs_svcpt);
+
+ spin_lock(&hrt->hrt_lock);
+ list_add_tail(&rs->rs_list, &hrt->hrt_queue);
+ spin_unlock(&hrt->hrt_lock);
+
+ wake_up(&hrt->hrt_waitq);
+}
+
+void
+ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs)
+{
+ assert_spin_locked(&rs->rs_svcpt->scp_rep_lock);
+ assert_spin_locked(&rs->rs_lock);
+ LASSERT(rs->rs_difficult);
+ rs->rs_scheduled_ever = 1; /* flag any notification attempt */
+
+ if (rs->rs_scheduled) { /* being set up or already notified */
+ return;
+ }
+
+ rs->rs_scheduled = 1;
+ list_del_init(&rs->rs_list);
+ ptlrpc_dispatch_difficult_reply(rs);
+}
+EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply);
+
+void ptlrpc_commit_replies(struct obd_export *exp)
+{
+ struct ptlrpc_reply_state *rs, *nxt;
+ DECLARE_RS_BATCH(batch);
+
+ rs_batch_init(&batch);
+ /* Find any replies that have been committed and get their service
+ * to attend to complete them. */
+
+ /* CAVEAT EMPTOR: spinlock ordering!!! */
+ spin_lock(&exp->exp_uncommitted_replies_lock);
+ list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies,
+ rs_obd_list) {
+ LASSERT(rs->rs_difficult);
+ /* VBR: per-export last_committed */
+ LASSERT(rs->rs_export);
+ if (rs->rs_transno <= exp->exp_last_committed) {
+ list_del_init(&rs->rs_obd_list);
+ rs_batch_add(&batch, rs);
+ }
+ }
+ spin_unlock(&exp->exp_uncommitted_replies_lock);
+ rs_batch_fini(&batch);
+}
+EXPORT_SYMBOL(ptlrpc_commit_replies);
+
+static int
+ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_request_buffer_desc *rqbd;
+ int rc;
+ int posted = 0;
+
+ for (;;) {
+ spin_lock(&svcpt->scp_lock);
+
+ if (list_empty(&svcpt->scp_rqbd_idle)) {
+ spin_unlock(&svcpt->scp_lock);
+ return posted;
+ }
+
+ rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+ list_del(&rqbd->rqbd_list);
+
+ /* assume we will post successfully */
+ svcpt->scp_nrqbds_posted++;
+ list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted);
+
+ spin_unlock(&svcpt->scp_lock);
+
+ rc = ptlrpc_register_rqbd(rqbd);
+ if (rc != 0)
+ break;
+
+ posted = 1;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ svcpt->scp_nrqbds_posted--;
+ list_del(&rqbd->rqbd_list);
+ list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle);
+
+ /* Don't complain if no request buffers are posted right now; LNET
+ * won't drop requests because we set the portal lazy! */
+
+ spin_unlock(&svcpt->scp_lock);
+
+ return -1;
+}
+
+static void ptlrpc_at_timer(unsigned long castmeharder)
+{
+ struct ptlrpc_service_part *svcpt;
+
+ svcpt = (struct ptlrpc_service_part *)castmeharder;
+
+ svcpt->scp_at_check = 1;
+ svcpt->scp_at_checktime = cfs_time_current();
+ wake_up(&svcpt->scp_waitq);
+}
+
+static void
+ptlrpc_server_nthreads_check(struct ptlrpc_service *svc,
+ struct ptlrpc_service_conf *conf)
+{
+ struct ptlrpc_service_thr_conf *tc = &conf->psc_thr;
+ unsigned init;
+ unsigned total;
+ unsigned nthrs;
+ int weight;
+
+ /*
+ * Common code for estimating & validating threads number.
+ * CPT affinity service could have percpt thread-pool instead
+ * of a global thread-pool, which means user might not always
+ * get the threads number they give it in conf::tc_nthrs_user
+ * even they did set. It's because we need to validate threads
+ * number for each CPT to guarantee each pool will have enough
+ * threads to keep the service healthy.
+ */
+ init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL);
+ init = max_t(int, init, tc->tc_nthrs_init);
+
+ /* NB: please see comments in lustre_lnet.h for definition
+ * details of these members */
+ LASSERT(tc->tc_nthrs_max != 0);
+
+ if (tc->tc_nthrs_user != 0) {
+ /* In case there is a reason to test a service with many
+ * threads, we give a less strict check here, it can
+ * be up to 8 * nthrs_max */
+ total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user);
+ nthrs = total / svc->srv_ncpts;
+ init = max(init, nthrs);
+ goto out;
+ }
+
+ total = tc->tc_nthrs_max;
+ if (tc->tc_nthrs_base == 0) {
+ /* don't care about base threads number per partition,
+ * this is most for non-affinity service */
+ nthrs = total / svc->srv_ncpts;
+ goto out;
+ }
+
+ nthrs = tc->tc_nthrs_base;
+ if (svc->srv_ncpts == 1) {
+ int i;
+
+ /* NB: Increase the base number if it's single partition
+ * and total number of cores/HTs is larger or equal to 4.
+ * result will always < 2 * nthrs_base */
+ weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY);
+ for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */
+ (tc->tc_nthrs_base >> i) != 0; i++)
+ nthrs += tc->tc_nthrs_base >> i;
+ }
+
+ if (tc->tc_thr_factor != 0) {
+ int factor = tc->tc_thr_factor;
+ const int fade = 4;
+
+ /*
+ * User wants to increase number of threads with for
+ * each CPU core/HT, most likely the factor is larger then
+ * one thread/core because service threads are supposed to
+ * be blocked by lock or wait for IO.
+ */
+ /*
+ * Amdahl's law says that adding processors wouldn't give
+ * a linear increasing of parallelism, so it's nonsense to
+ * have too many threads no matter how many cores/HTs
+ * there are.
+ */
+ /* weight is # of HTs */
+ if (cpumask_weight(topology_thread_cpumask(0)) > 1) {
+ /* depress thread factor for hyper-thread */
+ factor = factor - (factor >> 1) + (factor >> 3);
+ }
+
+ weight = cfs_cpt_weight(svc->srv_cptable, 0);
+ LASSERT(weight > 0);
+
+ for (; factor > 0 && weight > 0; factor--, weight -= fade)
+ nthrs += min(weight, fade) * factor;
+ }
+
+ if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+ nthrs = max(tc->tc_nthrs_base,
+ tc->tc_nthrs_max / svc->srv_ncpts);
+ }
+ out:
+ nthrs = max(nthrs, tc->tc_nthrs_init);
+ svc->srv_nthrs_cpt_limit = nthrs;
+ svc->srv_nthrs_cpt_init = init;
+
+ if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) {
+ CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n",
+ svc->srv_name, nthrs * svc->srv_ncpts,
+ tc->tc_nthrs_max);
+ }
+}
+
+/**
+ * Initialize percpt data for a service
+ */
+static int
+ptlrpc_service_part_init(struct ptlrpc_service *svc,
+ struct ptlrpc_service_part *svcpt, int cpt)
+{
+ struct ptlrpc_at_array *array;
+ int size;
+ int index;
+ int rc;
+
+ svcpt->scp_cpt = cpt;
+ INIT_LIST_HEAD(&svcpt->scp_threads);
+
+ /* rqbd and incoming request queue */
+ spin_lock_init(&svcpt->scp_lock);
+ INIT_LIST_HEAD(&svcpt->scp_rqbd_idle);
+ INIT_LIST_HEAD(&svcpt->scp_rqbd_posted);
+ INIT_LIST_HEAD(&svcpt->scp_req_incoming);
+ init_waitqueue_head(&svcpt->scp_waitq);
+ /* history request & rqbd list */
+ INIT_LIST_HEAD(&svcpt->scp_hist_reqs);
+ INIT_LIST_HEAD(&svcpt->scp_hist_rqbds);
+
+ /* active requests and hp requests */
+ spin_lock_init(&svcpt->scp_req_lock);
+
+ /* reply states */
+ spin_lock_init(&svcpt->scp_rep_lock);
+ INIT_LIST_HEAD(&svcpt->scp_rep_active);
+ INIT_LIST_HEAD(&svcpt->scp_rep_idle);
+ init_waitqueue_head(&svcpt->scp_rep_waitq);
+ atomic_set(&svcpt->scp_nreps_difficult, 0);
+
+ /* adaptive timeout */
+ spin_lock_init(&svcpt->scp_at_lock);
+ array = &svcpt->scp_at_array;
+
+ size = at_est2timeout(at_max);
+ array->paa_size = size;
+ array->paa_count = 0;
+ array->paa_deadline = -1;
+
+ /* allocate memory for scp_at_array (ptlrpc_at_array) */
+ OBD_CPT_ALLOC(array->paa_reqs_array,
+ svc->srv_cptable, cpt, sizeof(struct list_head) * size);
+ if (array->paa_reqs_array == NULL)
+ return -ENOMEM;
+
+ for (index = 0; index < size; index++)
+ INIT_LIST_HEAD(&array->paa_reqs_array[index]);
+
+ OBD_CPT_ALLOC(array->paa_reqs_count,
+ svc->srv_cptable, cpt, sizeof(__u32) * size);
+ if (array->paa_reqs_count == NULL)
+ goto failed;
+
+ cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
+ /* At SOW, service time should be quick; 10s seems generous. If client
+ * timeout is less than this, we'll be sending an early reply. */
+ at_init(&svcpt->scp_at_estimate, 10, 0);
+
+ /* assign this before call ptlrpc_grow_req_bufs */
+ svcpt->scp_service = svc;
+ /* Now allocate the request buffers, but don't post them now */
+ rc = ptlrpc_grow_req_bufs(svcpt, 0);
+ /* We shouldn't be under memory pressure at startup, so
+ * fail if we can't allocate all our buffers at this time. */
+ if (rc != 0)
+ goto failed;
+
+ return 0;
+
+ failed:
+ if (array->paa_reqs_count != NULL) {
+ OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size);
+ array->paa_reqs_count = NULL;
+ }
+
+ if (array->paa_reqs_array != NULL) {
+ OBD_FREE(array->paa_reqs_array,
+ sizeof(struct list_head) * array->paa_size);
+ array->paa_reqs_array = NULL;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * Initialize service on a given portal.
+ * This includes starting serving threads , allocating and posting rqbds and
+ * so on.
+ */
+struct ptlrpc_service *
+ptlrpc_register_service(struct ptlrpc_service_conf *conf,
+ struct proc_dir_entry *proc_entry)
+{
+ struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt;
+ struct ptlrpc_service *service;
+ struct ptlrpc_service_part *svcpt;
+ struct cfs_cpt_table *cptable;
+ __u32 *cpts = NULL;
+ int ncpts;
+ int cpt;
+ int rc;
+ int i;
+
+ LASSERT(conf->psc_buf.bc_nbufs > 0);
+ LASSERT(conf->psc_buf.bc_buf_size >=
+ conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD);
+ LASSERT(conf->psc_thr.tc_ctx_tags != 0);
+
+ cptable = cconf->cc_cptable;
+ if (cptable == NULL)
+ cptable = cfs_cpt_table;
+
+ if (!conf->psc_thr.tc_cpu_affinity) {
+ ncpts = 1;
+ } else {
+ ncpts = cfs_cpt_number(cptable);
+ if (cconf->cc_pattern != NULL) {
+ struct cfs_expr_list *el;
+
+ rc = cfs_expr_list_parse(cconf->cc_pattern,
+ strlen(cconf->cc_pattern),
+ 0, ncpts - 1, &el);
+ if (rc != 0) {
+ CERROR("%s: invalid CPT pattern string: %s",
+ conf->psc_name, cconf->cc_pattern);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rc = cfs_expr_list_values(el, ncpts, &cpts);
+ cfs_expr_list_free(el);
+ if (rc <= 0) {
+ CERROR("%s: failed to parse CPT array %s: %d\n",
+ conf->psc_name, cconf->cc_pattern, rc);
+ if (cpts != NULL)
+ OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+ return ERR_PTR(rc < 0 ? rc : -EINVAL);
+ }
+ ncpts = rc;
+ }
+ }
+
+ OBD_ALLOC(service, offsetof(struct ptlrpc_service, srv_parts[ncpts]));
+ if (service == NULL) {
+ if (cpts != NULL)
+ OBD_FREE(cpts, sizeof(*cpts) * ncpts);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ service->srv_cptable = cptable;
+ service->srv_cpts = cpts;
+ service->srv_ncpts = ncpts;
+
+ service->srv_cpt_bits = 0; /* it's zero already, easy to read... */
+ while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable))
+ service->srv_cpt_bits++;
+
+ /* public members */
+ spin_lock_init(&service->srv_lock);
+ service->srv_name = conf->psc_name;
+ service->srv_watchdog_factor = conf->psc_watchdog_factor;
+ INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */
+
+ /* buffer configuration */
+ service->srv_nbuf_per_group = test_req_buffer_pressure ?
+ 1 : conf->psc_buf.bc_nbufs;
+ service->srv_max_req_size = conf->psc_buf.bc_req_max_size +
+ SPTLRPC_MAX_PAYLOAD;
+ service->srv_buf_size = conf->psc_buf.bc_buf_size;
+ service->srv_rep_portal = conf->psc_buf.bc_rep_portal;
+ service->srv_req_portal = conf->psc_buf.bc_req_portal;
+
+ /* Increase max reply size to next power of two */
+ service->srv_max_reply_size = 1;
+ while (service->srv_max_reply_size <
+ conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD)
+ service->srv_max_reply_size <<= 1;
+
+ service->srv_thread_name = conf->psc_thr.tc_thr_name;
+ service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags;
+ service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO;
+ service->srv_ops = conf->psc_ops;
+
+ for (i = 0; i < ncpts; i++) {
+ if (!conf->psc_thr.tc_cpu_affinity)
+ cpt = CFS_CPT_ANY;
+ else
+ cpt = cpts != NULL ? cpts[i] : i;
+
+ OBD_CPT_ALLOC(svcpt, cptable, cpt, sizeof(*svcpt));
+ if (svcpt == NULL) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ service->srv_parts[i] = svcpt;
+ rc = ptlrpc_service_part_init(service, svcpt, cpt);
+ if (rc != 0)
+ goto failed;
+ }
+
+ ptlrpc_server_nthreads_check(service, conf);
+
+ rc = LNetSetLazyPortal(service->srv_req_portal);
+ LASSERT(rc == 0);
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+ list_add(&service->srv_list, &ptlrpc_all_services);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+ if (proc_entry != NULL)
+ ptlrpc_lprocfs_register_service(proc_entry, service);
+
+ rc = ptlrpc_service_nrs_setup(service);
+ if (rc != 0)
+ goto failed;
+
+ CDEBUG(D_NET, "%s: Started, listening on portal %d\n",
+ service->srv_name, service->srv_req_portal);
+
+ rc = ptlrpc_start_threads(service);
+ if (rc != 0) {
+ CERROR("Failed to start threads for service %s: %d\n",
+ service->srv_name, rc);
+ goto failed;
+ }
+
+ return service;
+failed:
+ ptlrpc_unregister_service(service);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL(ptlrpc_register_service);
+
+/**
+ * to actually free the request, must be called without holding svc_lock.
+ * note it's caller's responsibility to unlink req->rq_list.
+ */
+static void ptlrpc_server_free_request(struct ptlrpc_request *req)
+{
+ LASSERT(atomic_read(&req->rq_refcount) == 0);
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ /* DEBUG_REQ() assumes the reply state of a request with a valid
+ * ref will not be destroyed until that reference is dropped. */
+ ptlrpc_req_drop_rs(req);
+
+ sptlrpc_svc_ctx_decref(req);
+
+ if (req != &req->rq_rqbd->rqbd_req) {
+ /* NB request buffers use an embedded
+ * req if the incoming req unlinked the
+ * MD; this isn't one of them! */
+ ptlrpc_request_cache_free(req);
+ }
+}
+
+/**
+ * drop a reference count of the request. if it reaches 0, we either
+ * put it into history list, or free it immediately.
+ */
+void ptlrpc_server_drop_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd;
+ struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ int refcount;
+ struct list_head *tmp;
+ struct list_head *nxt;
+
+ if (!atomic_dec_and_test(&req->rq_refcount))
+ return;
+
+ if (req->rq_at_linked) {
+ spin_lock(&svcpt->scp_at_lock);
+ /* recheck with lock, in case it's unlinked by
+ * ptlrpc_at_check_timed() */
+ if (likely(req->rq_at_linked))
+ ptlrpc_at_remove_timed(req);
+ spin_unlock(&svcpt->scp_at_lock);
+ }
+
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ /* finalize request */
+ if (req->rq_export) {
+ class_export_put(req->rq_export);
+ req->rq_export = NULL;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ list_add(&req->rq_list, &rqbd->rqbd_reqs);
+
+ refcount = --(rqbd->rqbd_refcount);
+ if (refcount == 0) {
+ /* request buffer is now idle: add to history */
+ list_del(&rqbd->rqbd_list);
+
+ list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds);
+ svcpt->scp_hist_nrqbds++;
+
+ /* cull some history?
+ * I expect only about 1 or 2 rqbds need to be recycled here */
+ while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) {
+ rqbd = list_entry(svcpt->scp_hist_rqbds.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+
+ list_del(&rqbd->rqbd_list);
+ svcpt->scp_hist_nrqbds--;
+
+ /* remove rqbd's reqs from svc's req history while
+ * I've got the service lock */
+ list_for_each(tmp, &rqbd->rqbd_reqs) {
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_list);
+ /* Track the highest culled req seq */
+ if (req->rq_history_seq >
+ svcpt->scp_hist_seq_culled) {
+ svcpt->scp_hist_seq_culled =
+ req->rq_history_seq;
+ }
+ list_del(&req->rq_history_list);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+
+ list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) {
+ req = list_entry(rqbd->rqbd_reqs.next,
+ struct ptlrpc_request,
+ rq_list);
+ list_del(&req->rq_list);
+ ptlrpc_server_free_request(req);
+ }
+
+ spin_lock(&svcpt->scp_lock);
+ /*
+ * now all reqs including the embedded req has been
+ * disposed, schedule request buffer for re-use.
+ */
+ LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) ==
+ 0);
+ list_add_tail(&rqbd->rqbd_list,
+ &svcpt->scp_rqbd_idle);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+ } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) {
+ /* If we are low on memory, we are not interested in history */
+ list_del(&req->rq_list);
+ list_del_init(&req->rq_history_list);
+
+ /* Track the highest culled req seq */
+ if (req->rq_history_seq > svcpt->scp_hist_seq_culled)
+ svcpt->scp_hist_seq_culled = req->rq_history_seq;
+
+ spin_unlock(&svcpt->scp_lock);
+
+ ptlrpc_server_free_request(req);
+ } else {
+ spin_unlock(&svcpt->scp_lock);
+ }
+}
+
+/** Change request export and move hp request from old export to new */
+void ptlrpc_request_change_export(struct ptlrpc_request *req,
+ struct obd_export *export)
+{
+ if (req->rq_export != NULL) {
+ if (!list_empty(&req->rq_exp_list)) {
+ /* remove rq_exp_list from last export */
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_del_init(&req->rq_exp_list);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+
+ /* export has one reference already, so it`s safe to
+ * add req to export queue here and get another
+ * reference for request later */
+ spin_lock_bh(&export->exp_rpc_lock);
+ list_add(&req->rq_exp_list, &export->exp_hp_rpcs);
+ spin_unlock_bh(&export->exp_rpc_lock);
+ }
+ class_export_rpc_dec(req->rq_export);
+ class_export_put(req->rq_export);
+ }
+
+ /* request takes one export refcount */
+ req->rq_export = class_export_get(export);
+ class_export_rpc_inc(export);
+
+ return;
+}
+
+/**
+ * to finish a request: stop sending more early replies, and release
+ * the request.
+ */
+static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ ptlrpc_server_hpreq_fini(req);
+
+ ptlrpc_server_drop_request(req);
+}
+
+/**
+ * to finish a active request: stop sending more early replies, and release
+ * the request. should be called after we finished handling the request.
+ */
+static void ptlrpc_server_finish_active_request(
+ struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ spin_lock(&svcpt->scp_req_lock);
+ ptlrpc_nrs_req_stop_nolock(req);
+ svcpt->scp_nreqs_active--;
+ if (req->rq_hp)
+ svcpt->scp_nhreqs_active--;
+ spin_unlock(&svcpt->scp_req_lock);
+
+ ptlrpc_nrs_req_finalize(req);
+
+ if (req->rq_export != NULL)
+ class_export_rpc_dec(req->rq_export);
+
+ ptlrpc_server_finish_request(svcpt, req);
+}
+
+/**
+ * This function makes sure dead exports are evicted in a timely manner.
+ * This function is only called when some export receives a message (i.e.,
+ * the network is up.)
+ */
+static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
+{
+ struct obd_export *oldest_exp;
+ time_t oldest_time, new_time;
+
+ LASSERT(exp);
+
+ /* Compensate for slow machines, etc, by faking our request time
+ into the future. Although this can break the strict time-ordering
+ of the list, we can be really lazy here - we don't have to evict
+ at the exact right moment. Eventually, all silent exports
+ will make it to the top of the list. */
+
+ /* Do not pay attention on 1sec or smaller renewals. */
+ new_time = get_seconds() + extra_delay;
+ if (exp->exp_last_request_time + 1 /*second */ >= new_time)
+ return;
+
+ exp->exp_last_request_time = new_time;
+
+ /* exports may get disconnected from the chain even though the
+ export has references, so we must keep the spin lock while
+ manipulating the lists */
+ spin_lock(&exp->exp_obd->obd_dev_lock);
+
+ if (list_empty(&exp->exp_obd_chain_timed)) {
+ /* this one is not timed */
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+ return;
+ }
+
+ list_move_tail(&exp->exp_obd_chain_timed,
+ &exp->exp_obd->obd_exports_timed);
+
+ oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next,
+ struct obd_export, exp_obd_chain_timed);
+ oldest_time = oldest_exp->exp_last_request_time;
+ spin_unlock(&exp->exp_obd->obd_dev_lock);
+
+ if (exp->exp_obd->obd_recovering) {
+ /* be nice to everyone during recovery */
+ return;
+ }
+
+ /* Note - racing to start/reset the obd_eviction timer is safe */
+ if (exp->exp_obd->obd_eviction_timer == 0) {
+ /* Check if the oldest entry is expired. */
+ if (get_seconds() > (oldest_time + PING_EVICT_TIMEOUT +
+ extra_delay)) {
+ /* We need a second timer, in case the net was down and
+ * it just came back. Since the pinger may skip every
+ * other PING_INTERVAL (see note in ptlrpc_pinger_main),
+ * we better wait for 3. */
+ exp->exp_obd->obd_eviction_timer =
+ get_seconds() + 3 * PING_INTERVAL;
+ CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n",
+ exp->exp_obd->obd_name,
+ obd_export_nid2str(oldest_exp), oldest_time);
+ }
+ } else {
+ if (get_seconds() >
+ (exp->exp_obd->obd_eviction_timer + extra_delay)) {
+ /* The evictor won't evict anyone who we've heard from
+ * recently, so we don't have to check before we start
+ * it. */
+ if (!ping_evictor_wake(exp))
+ exp->exp_obd->obd_eviction_timer = 0;
+ }
+ }
+}
+
+/**
+ * Sanity check request \a req.
+ * Return 0 if all is ok, error code otherwise.
+ */
+static int ptlrpc_check_req(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ int rc = 0;
+
+ if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
+ req->rq_export->exp_conn_cnt)) {
+ DEBUG_REQ(D_RPCTRACE, req,
+ "DROPPING req from old connection %d < %d",
+ lustre_msg_get_conn_cnt(req->rq_reqmsg),
+ req->rq_export->exp_conn_cnt);
+ return -EEXIST;
+ }
+ if (unlikely(obd == NULL || obd->obd_fail)) {
+ /*
+ * Failing over, don't handle any more reqs, send
+ * error response instead.
+ */
+ CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n",
+ req, (obd != NULL) ? obd->obd_name : "unknown");
+ rc = -ENODEV;
+ } else if (lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE) &&
+ !obd->obd_recovering) {
+ DEBUG_REQ(D_ERROR, req,
+ "Invalid replay without recovery");
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
+ !obd->obd_recovering) {
+ DEBUG_REQ(D_ERROR, req, "Invalid req with transno %llu without recovery",
+ lustre_msg_get_transno(req->rq_reqmsg));
+ class_fail_export(req->rq_export);
+ rc = -ENODEV;
+ }
+
+ if (unlikely(rc < 0)) {
+ req->rq_status = rc;
+ ptlrpc_error(req);
+ }
+ return rc;
+}
+
+static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ __s32 next;
+
+ if (array->paa_count == 0) {
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ return;
+ }
+
+ /* Set timer for closest deadline */
+ next = (__s32)(array->paa_deadline - get_seconds() -
+ at_early_margin);
+ if (next <= 0) {
+ ptlrpc_at_timer((unsigned long)svcpt);
+ } else {
+ cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
+ CDEBUG(D_INFO, "armed %s at %+ds\n",
+ svcpt->scp_service->srv_name, next);
+ }
+}
+
+/* Add rpc to early reply check list */
+static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ struct ptlrpc_request *rq = NULL;
+ __u32 index;
+
+ if (AT_OFF)
+ return 0;
+
+ if (req->rq_no_reply)
+ return 0;
+
+ if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0)
+ return -ENOSYS;
+
+ spin_lock(&svcpt->scp_at_lock);
+ LASSERT(list_empty(&req->rq_timed_list));
+
+ index = (unsigned long)req->rq_deadline % array->paa_size;
+ if (array->paa_reqs_count[index] > 0) {
+ /* latest rpcs will have the latest deadlines in the list,
+ * so search backward. */
+ list_for_each_entry_reverse(rq,
+ &array->paa_reqs_array[index],
+ rq_timed_list) {
+ if (req->rq_deadline >= rq->rq_deadline) {
+ list_add(&req->rq_timed_list,
+ &rq->rq_timed_list);
+ break;
+ }
+ }
+ }
+
+ /* Add the request at the head of the list */
+ if (list_empty(&req->rq_timed_list))
+ list_add(&req->rq_timed_list,
+ &array->paa_reqs_array[index]);
+
+ spin_lock(&req->rq_lock);
+ req->rq_at_linked = 1;
+ spin_unlock(&req->rq_lock);
+ req->rq_at_index = index;
+ array->paa_reqs_count[index]++;
+ array->paa_count++;
+ if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) {
+ array->paa_deadline = req->rq_deadline;
+ ptlrpc_at_set_timer(svcpt);
+ }
+ spin_unlock(&svcpt->scp_at_lock);
+
+ return 0;
+}
+
+static void
+ptlrpc_at_remove_timed(struct ptlrpc_request *req)
+{
+ struct ptlrpc_at_array *array;
+
+ array = &req->rq_rqbd->rqbd_svcpt->scp_at_array;
+
+ /* NB: must call with hold svcpt::scp_at_lock */
+ LASSERT(!list_empty(&req->rq_timed_list));
+ list_del_init(&req->rq_timed_list);
+
+ spin_lock(&req->rq_lock);
+ req->rq_at_linked = 0;
+ spin_unlock(&req->rq_lock);
+
+ array->paa_reqs_count[req->rq_at_index]--;
+ array->paa_count--;
+}
+
+static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
+ struct ptlrpc_request *reqcopy;
+ struct lustre_msg *reqmsg;
+ long olddl = req->rq_deadline - get_seconds();
+ time_t newdl;
+ int rc;
+
+ /* deadline is when the client expects us to reply, margin is the
+ difference between clients' and servers' expectations */
+ DEBUG_REQ(D_ADAPTTO, req,
+ "%ssending early reply (deadline %+lds, margin %+lds) for %d+%d",
+ AT_OFF ? "AT off - not " : "",
+ olddl, olddl - at_get(&svcpt->scp_at_estimate),
+ at_get(&svcpt->scp_at_estimate), at_extra);
+
+ if (AT_OFF)
+ return 0;
+
+ if (olddl < 0) {
+ DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?",
+ olddl, at_early_margin);
+
+ /* Return an error so we're not re-added to the timed list. */
+ return -ETIMEDOUT;
+ }
+
+ if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+ DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support");
+ return -ENOSYS;
+ }
+
+ if (req->rq_export &&
+ lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+ /* During recovery, we don't want to send too many early
+ * replies, but on the other hand we want to make sure the
+ * client has enough time to resend if the rpc is lost. So
+ * during the recovery period send at least 4 early replies,
+ * spacing them every at_extra if we can. at_estimate should
+ * always equal this fixed value during recovery. */
+ at_measured(&svcpt->scp_at_estimate, min(at_extra,
+ req->rq_export->exp_obd->obd_recovery_timeout / 4));
+ } else {
+ /* Fake our processing time into the future to ask the clients
+ * for some extra amount of time */
+ at_measured(&svcpt->scp_at_estimate, at_extra +
+ get_seconds() -
+ req->rq_arrival_time.tv_sec);
+
+ /* Check to see if we've actually increased the deadline -
+ * we may be past adaptive_max */
+ if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate)) {
+ DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%ld), not sending early reply\n",
+ olddl, req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate) -
+ get_seconds());
+ return -ETIMEDOUT;
+ }
+ }
+ newdl = get_seconds() + at_get(&svcpt->scp_at_estimate);
+
+ reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
+ if (reqcopy == NULL)
+ return -ENOMEM;
+ OBD_ALLOC_LARGE(reqmsg, req->rq_reqlen);
+ if (!reqmsg) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+
+ *reqcopy = *req;
+ reqcopy->rq_reply_state = NULL;
+ reqcopy->rq_rep_swab_mask = 0;
+ reqcopy->rq_pack_bulk = 0;
+ reqcopy->rq_pack_udesc = 0;
+ reqcopy->rq_packed_final = 0;
+ sptlrpc_svc_ctx_addref(reqcopy);
+ /* We only need the reqmsg for the magic */
+ reqcopy->rq_reqmsg = reqmsg;
+ memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
+
+ LASSERT(atomic_read(&req->rq_refcount));
+ /** if it is last refcount then early reply isn't needed */
+ if (atomic_read(&req->rq_refcount) == 1) {
+ DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Connection ref */
+ reqcopy->rq_export = class_conn2export(
+ lustre_msg_get_handle(reqcopy->rq_reqmsg));
+ if (reqcopy->rq_export == NULL) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ /* RPC ref */
+ class_export_rpc_inc(reqcopy->rq_export);
+ if (reqcopy->rq_export->exp_obd &&
+ reqcopy->rq_export->exp_obd->obd_fail) {
+ rc = -ENODEV;
+ goto out_put;
+ }
+
+ rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY);
+ if (rc)
+ goto out_put;
+
+ rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
+
+ if (!rc) {
+ /* Adjust our own deadline to what we told the client */
+ req->rq_deadline = newdl;
+ req->rq_early_count++; /* number sent, server side */
+ } else {
+ DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
+ }
+
+ /* Free the (early) reply state from lustre_pack_reply.
+ (ptlrpc_send_reply takes it's own rs ref, so this is safe here) */
+ ptlrpc_req_drop_rs(reqcopy);
+
+out_put:
+ class_export_rpc_dec(reqcopy->rq_export);
+ class_export_put(reqcopy->rq_export);
+out:
+ sptlrpc_svc_ctx_decref(reqcopy);
+ OBD_FREE_LARGE(reqmsg, req->rq_reqlen);
+out_free:
+ ptlrpc_request_cache_free(reqcopy);
+ return rc;
+}
+
+/* Send early replies to everybody expiring within at_early_margin
+ asking for at_extra time */
+static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_at_array *array = &svcpt->scp_at_array;
+ struct ptlrpc_request *rq, *n;
+ struct list_head work_list;
+ __u32 index, count;
+ time_t deadline;
+ time_t now = get_seconds();
+ long delay;
+ int first, counter = 0;
+
+ spin_lock(&svcpt->scp_at_lock);
+ if (svcpt->scp_at_check == 0) {
+ spin_unlock(&svcpt->scp_at_lock);
+ return 0;
+ }
+ delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime);
+ svcpt->scp_at_check = 0;
+
+ if (array->paa_count == 0) {
+ spin_unlock(&svcpt->scp_at_lock);
+ return 0;
+ }
+
+ /* The timer went off, but maybe the nearest rpc already completed. */
+ first = array->paa_deadline - now;
+ if (first > at_early_margin) {
+ /* We've still got plenty of time. Reset the timer. */
+ ptlrpc_at_set_timer(svcpt);
+ spin_unlock(&svcpt->scp_at_lock);
+ return 0;
+ }
+
+ /* We're close to a timeout, and we don't know how much longer the
+ server will take. Send early replies to everyone expiring soon. */
+ INIT_LIST_HEAD(&work_list);
+ deadline = -1;
+ index = (unsigned long)array->paa_deadline % array->paa_size;
+ count = array->paa_count;
+ while (count > 0) {
+ count -= array->paa_reqs_count[index];
+ list_for_each_entry_safe(rq, n,
+ &array->paa_reqs_array[index],
+ rq_timed_list) {
+ if (rq->rq_deadline > now + at_early_margin) {
+ /* update the earliest deadline */
+ if (deadline == -1 ||
+ rq->rq_deadline < deadline)
+ deadline = rq->rq_deadline;
+ break;
+ }
+
+ ptlrpc_at_remove_timed(rq);
+ /**
+ * ptlrpc_server_drop_request() may drop
+ * refcount to 0 already. Let's check this and
+ * don't add entry to work_list
+ */
+ if (likely(atomic_inc_not_zero(&rq->rq_refcount)))
+ list_add(&rq->rq_timed_list, &work_list);
+ counter++;
+ }
+
+ if (++index >= array->paa_size)
+ index = 0;
+ }
+ array->paa_deadline = deadline;
+ /* we have a new earliest deadline, restart the timer */
+ ptlrpc_at_set_timer(svcpt);
+
+ spin_unlock(&svcpt->scp_at_lock);
+
+ CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n",
+ first, at_extra, counter);
+ if (first < 0) {
+ /* We're already past request deadlines before we even get a
+ chance to send early replies */
+ LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n",
+ svcpt->scp_service->srv_name);
+ CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=" CFS_DURATION_T "(jiff)\n",
+ counter, svcpt->scp_nreqs_incoming,
+ svcpt->scp_nreqs_active,
+ at_get(&svcpt->scp_at_estimate), delay);
+ }
+
+ /* we took additional refcount so entries can't be deleted from list, no
+ * locking is needed */
+ while (!list_empty(&work_list)) {
+ rq = list_entry(work_list.next, struct ptlrpc_request,
+ rq_timed_list);
+ list_del_init(&rq->rq_timed_list);
+
+ if (ptlrpc_at_send_early_reply(rq) == 0)
+ ptlrpc_at_add_timed(rq);
+
+ ptlrpc_server_drop_request(rq);
+ }
+
+ return 1; /* return "did_something" for liblustre */
+}
+
+/**
+ * Put the request to the export list if the request may become
+ * a high priority one.
+ */
+static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ int rc = 0;
+
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
+ rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
+ if (rc < 0)
+ return rc;
+ LASSERT(rc == 0);
+ }
+ if (req->rq_export && req->rq_ops) {
+ /* Perform request specific check. We should do this check
+ * before the request is added into exp_hp_rpcs list otherwise
+ * it may hit swab race at LU-1044. */
+ if (req->rq_ops->hpreq_check) {
+ rc = req->rq_ops->hpreq_check(req);
+ /**
+ * XXX: Out of all current
+ * ptlrpc_hpreq_ops::hpreq_check(), only
+ * ldlm_cancel_hpreq_check() can return an error code;
+ * other functions assert in similar places, which seems
+ * odd. What also does not seem right is that handlers
+ * for those RPCs do not assert on the same checks, but
+ * rather handle the error cases. e.g. see
+ * ost_rw_hpreq_check(), and ost_brw_read(),
+ * ost_brw_write().
+ */
+ if (rc < 0)
+ return rc;
+ LASSERT(rc == 0 || rc == 1);
+ }
+
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_add(&req->rq_exp_list,
+ &req->rq_export->exp_hp_rpcs);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+ }
+
+ ptlrpc_nrs_req_initialize(svcpt, req, rc);
+
+ return rc;
+}
+
+/** Remove the request from the export list. */
+static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req)
+{
+ if (req->rq_export && req->rq_ops) {
+ /* refresh lock timeout again so that client has more
+ * room to send lock cancel RPC. */
+ if (req->rq_ops->hpreq_fini)
+ req->rq_ops->hpreq_fini(req);
+
+ spin_lock_bh(&req->rq_export->exp_rpc_lock);
+ list_del_init(&req->rq_exp_list);
+ spin_unlock_bh(&req->rq_export->exp_rpc_lock);
+ }
+}
+
+static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
+{
+ return 1;
+}
+
+static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
+ .hpreq_check = ptlrpc_hpreq_check,
+};
+
+/* Hi-Priority RPC check by RPC operation code. */
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
+{
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ /* Check for export to let only reconnects for not yet evicted
+ * export to become a HP rpc. */
+ if ((req->rq_export != NULL) &&
+ (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT))
+ req->rq_ops = &ptlrpc_hpreq_common;
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_hpreq_handler);
+
+static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_request *req)
+{
+ int rc;
+
+ rc = ptlrpc_server_hpreq_init(svcpt, req);
+ if (rc < 0)
+ return rc;
+
+ ptlrpc_nrs_req_add(svcpt, req, !!rc);
+
+ return 0;
+}
+
+/**
+ * Allow to handle high priority request
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ int running = svcpt->scp_nthrs_running;
+
+ if (!nrs_svcpt_has_hp(svcpt))
+ return false;
+
+ if (force)
+ return true;
+
+ if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+ CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+ /* leave just 1 thread for normal RPCs */
+ running = PTLRPC_NTHRS_INIT;
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+ running += 1;
+ }
+
+ if (svcpt->scp_nreqs_active >= running - 1)
+ return false;
+
+ if (svcpt->scp_nhreqs_active == 0)
+ return true;
+
+ return !ptlrpc_nrs_req_pending_nolock(svcpt, false) ||
+ svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio;
+}
+
+static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ return ptlrpc_server_allow_high(svcpt, force) &&
+ ptlrpc_nrs_req_pending_nolock(svcpt, true);
+}
+
+/**
+ * Only allow normal priority requests on a service that has a high-priority
+ * queue if forced (i.e. cleanup), if there are other high priority requests
+ * already being processed (i.e. those threads can service more high-priority
+ * requests), or if there are enough idle threads that a later thread can do
+ * a high priority request.
+ * User can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_req_lock to get reliable result
+ */
+static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ int running = svcpt->scp_nthrs_running;
+ if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL &&
+ CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) {
+ /* leave just 1 thread for normal RPCs */
+ running = PTLRPC_NTHRS_INIT;
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL)
+ running += 1;
+ }
+
+ if (force ||
+ svcpt->scp_nreqs_active < running - 2)
+ return true;
+
+ if (svcpt->scp_nreqs_active >= running - 1)
+ return false;
+
+ return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt);
+}
+
+static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt,
+ bool force)
+{
+ return ptlrpc_server_allow_normal(svcpt, force) &&
+ ptlrpc_nrs_req_pending_nolock(svcpt, false);
+}
+
+/**
+ * Returns true if there are requests available in incoming
+ * request queue for processing and it is allowed to fetch them.
+ * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock
+ * to get reliable result
+ * \see ptlrpc_server_allow_normal
+ * \see ptlrpc_server_allow high
+ */
+static inline bool
+ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force)
+{
+ return ptlrpc_server_high_pending(svcpt, force) ||
+ ptlrpc_server_normal_pending(svcpt, force);
+}
+
+/**
+ * Fetch a request for processing from queue of unprocessed requests.
+ * Favors high-priority requests.
+ * Returns a pointer to fetched request.
+ */
+static struct ptlrpc_request *
+ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force)
+{
+ struct ptlrpc_request *req = NULL;
+
+ spin_lock(&svcpt->scp_req_lock);
+
+ if (ptlrpc_server_high_pending(svcpt, force)) {
+ req = ptlrpc_nrs_req_get_nolock(svcpt, true, force);
+ if (req != NULL) {
+ svcpt->scp_hreq_count++;
+ goto got_request;
+ }
+ }
+
+ if (ptlrpc_server_normal_pending(svcpt, force)) {
+ req = ptlrpc_nrs_req_get_nolock(svcpt, false, force);
+ if (req != NULL) {
+ svcpt->scp_hreq_count = 0;
+ goto got_request;
+ }
+ }
+
+ spin_unlock(&svcpt->scp_req_lock);
+ return NULL;
+
+got_request:
+ svcpt->scp_nreqs_active++;
+ if (req->rq_hp)
+ svcpt->scp_nhreqs_active++;
+
+ spin_unlock(&svcpt->scp_req_lock);
+
+ if (likely(req->rq_export))
+ class_export_rpc_inc(req->rq_export);
+
+ return req;
+}
+
+/**
+ * Handle freshly incoming reqs, add to timed early reply list,
+ * pass on to regular request queue.
+ * All incoming requests pass through here before getting into
+ * ptlrpc_server_handle_req later on.
+ */
+static int
+ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request *req;
+ __u32 deadline;
+ int rc;
+
+ spin_lock(&svcpt->scp_lock);
+ if (list_empty(&svcpt->scp_req_incoming)) {
+ spin_unlock(&svcpt->scp_lock);
+ return 0;
+ }
+
+ req = list_entry(svcpt->scp_req_incoming.next,
+ struct ptlrpc_request, rq_list);
+ list_del_init(&req->rq_list);
+ svcpt->scp_nreqs_incoming--;
+ /* Consider this still a "queued" request as far as stats are
+ * concerned */
+ spin_unlock(&svcpt->scp_lock);
+
+ /* go through security check/transform */
+ rc = sptlrpc_svc_unwrap_request(req);
+ switch (rc) {
+ case SECSVC_OK:
+ break;
+ case SECSVC_COMPLETE:
+ target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+ goto err_req;
+ case SECSVC_DROP:
+ goto err_req;
+ default:
+ LBUG();
+ }
+
+ /*
+ * for null-flavored rpc, msg has been unpacked by sptlrpc, although
+ * redo it wouldn't be harmful.
+ */
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+ rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
+ if (rc != 0) {
+ CERROR("error unpacking request: ptl %d from %s x%llu\n",
+ svc->srv_req_portal, libcfs_id2str(req->rq_peer),
+ req->rq_xid);
+ goto err_req;
+ }
+ }
+
+ rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+ if (rc) {
+ CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n",
+ svc->srv_req_portal, libcfs_id2str(req->rq_peer),
+ req->rq_xid);
+ goto err_req;
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
+ CERROR("drop incoming rpc opc %u, x%llu\n",
+ cfs_fail_val, req->rq_xid);
+ goto err_req;
+ }
+
+ rc = -EINVAL;
+ if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
+ CERROR("wrong packet type received (type=%u) from %s\n",
+ lustre_msg_get_type(req->rq_reqmsg),
+ libcfs_id2str(req->rq_peer));
+ goto err_req;
+ }
+
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_WRITEPAGE:
+ case OST_WRITE:
+ req->rq_bulk_write = 1;
+ break;
+ case MDS_READPAGE:
+ case OST_READ:
+ case MGS_CONFIG_READ:
+ req->rq_bulk_read = 1;
+ break;
+ }
+
+ CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid);
+
+ req->rq_export = class_conn2export(
+ lustre_msg_get_handle(req->rq_reqmsg));
+ if (req->rq_export) {
+ rc = ptlrpc_check_req(req);
+ if (rc == 0) {
+ rc = sptlrpc_target_export_check(req->rq_export, req);
+ if (rc)
+ DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,");
+ }
+
+ if (rc)
+ goto err_req;
+ ptlrpc_update_export_timer(req->rq_export, 0);
+ }
+
+ /* req_in handling should/must be fast */
+ if (get_seconds() - req->rq_arrival_time.tv_sec > 5)
+ DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s",
+ cfs_time_sub(get_seconds(),
+ req->rq_arrival_time.tv_sec));
+
+ /* Set rpc server deadline and add it to the timed list */
+ deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
+ MSGHDR_AT_SUPPORT) ?
+ /* The max time the client expects us to take */
+ lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+ req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
+ if (unlikely(deadline == 0)) {
+ DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
+ goto err_req;
+ }
+
+ req->rq_svc_thread = thread;
+
+ ptlrpc_at_add_timed(req);
+
+ /* Move it over to the request processing queue */
+ rc = ptlrpc_server_request_add(svcpt, req);
+ if (rc)
+ goto err_req;
+
+ wake_up(&svcpt->scp_waitq);
+ return 1;
+
+err_req:
+ ptlrpc_server_finish_request(svcpt, req);
+
+ return 1;
+}
+
+/**
+ * Main incoming request handling logic.
+ * Calls handler function from service to do actual processing.
+ */
+static int
+ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_request *request;
+ struct timeval work_start;
+ struct timeval work_end;
+ long timediff;
+ int rc;
+ int fail_opc = 0;
+
+ request = ptlrpc_server_request_get(svcpt, false);
+ if (request == NULL)
+ return 0;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT))
+ fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT;
+ else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
+ fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT;
+
+ if (unlikely(fail_opc)) {
+ if (request->rq_export && request->rq_ops)
+ OBD_FAIL_TIMEOUT(fail_opc, 4);
+ }
+
+ ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG))
+ libcfs_debug_dumplog();
+
+ do_gettimeofday(&work_start);
+ timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,
+ NULL);
+ if (likely(svc->srv_stats != NULL)) {
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
+ timediff);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR,
+ svcpt->scp_nreqs_incoming);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR,
+ svcpt->scp_nreqs_active);
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT,
+ at_get(&svcpt->scp_at_estimate));
+ }
+
+ rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF);
+ if (rc) {
+ CERROR("Failure to initialize session: %d\n", rc);
+ goto out_req;
+ }
+ request->rq_session.lc_thread = thread;
+ request->rq_session.lc_cookie = 0x5;
+ lu_context_enter(&request->rq_session);
+
+ CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
+
+ request->rq_svc_thread = thread;
+ if (thread)
+ request->rq_svc_thread->t_env->le_ses = &request->rq_session;
+
+ if (likely(request->rq_export)) {
+ if (unlikely(ptlrpc_check_req(request)))
+ goto put_conn;
+ ptlrpc_update_export_timer(request->rq_export, timediff >> 19);
+ }
+
+ /* Discard requests queued for longer than the deadline.
+ The deadline is increased if we send an early reply. */
+ if (get_seconds() > request->rq_deadline) {
+ DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline " CFS_DURATION_T ":" CFS_DURATION_T "s ago\n",
+ libcfs_id2str(request->rq_peer),
+ cfs_time_sub(request->rq_deadline,
+ request->rq_arrival_time.tv_sec),
+ cfs_time_sub(get_seconds(),
+ request->rq_deadline));
+ goto put_conn;
+ }
+
+ CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n",
+ current_comm(),
+ (request->rq_export ?
+ (char *)request->rq_export->exp_client_uuid.uuid : "0"),
+ (request->rq_export ?
+ atomic_read(&request->rq_export->exp_refcount) : -99),
+ lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
+ libcfs_id2str(request->rq_peer),
+ lustre_msg_get_opc(request->rq_reqmsg));
+
+ if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
+
+ rc = svc->srv_ops.so_req_handler(request);
+
+ ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE);
+
+put_conn:
+ lu_context_exit(&request->rq_session);
+ lu_context_fini(&request->rq_session);
+
+ if (unlikely(get_seconds() > request->rq_deadline)) {
+ DEBUG_REQ(D_WARNING, request,
+ "Request took longer than estimated ("
+ CFS_DURATION_T":"CFS_DURATION_T
+ "s); client may timeout.",
+ cfs_time_sub(request->rq_deadline,
+ request->rq_arrival_time.tv_sec),
+ cfs_time_sub(get_seconds(),
+ request->rq_deadline));
+ }
+
+ do_gettimeofday(&work_end);
+ timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+ CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n",
+ current_comm(),
+ (request->rq_export ?
+ (char *)request->rq_export->exp_client_uuid.uuid : "0"),
+ (request->rq_export ?
+ atomic_read(&request->rq_export->exp_refcount) : -99),
+ lustre_msg_get_status(request->rq_reqmsg),
+ request->rq_xid,
+ libcfs_id2str(request->rq_peer),
+ lustre_msg_get_opc(request->rq_reqmsg),
+ timediff,
+ cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL),
+ (request->rq_repmsg ?
+ lustre_msg_get_transno(request->rq_repmsg) :
+ request->rq_transno),
+ request->rq_status,
+ (request->rq_repmsg ?
+ lustre_msg_get_status(request->rq_repmsg) : -999));
+ if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) {
+ __u32 op = lustre_msg_get_opc(request->rq_reqmsg);
+ int opc = opcode_offset(op);
+ if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) {
+ LASSERT(opc < LUSTRE_MAX_OPCODES);
+ lprocfs_counter_add(svc->srv_stats,
+ opc + EXTRA_MAX_OPCODES,
+ timediff);
+ }
+ }
+ if (unlikely(request->rq_early_count)) {
+ DEBUG_REQ(D_ADAPTTO, request,
+ "sent %d early replies before finishing in "
+ CFS_DURATION_T"s",
+ request->rq_early_count,
+ cfs_time_sub(work_end.tv_sec,
+ request->rq_arrival_time.tv_sec));
+ }
+
+out_req:
+ ptlrpc_server_finish_active_request(svcpt, request);
+
+ return 1;
+}
+
+/**
+ * An internal function to process a single reply state object.
+ */
+static int
+ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct obd_export *exp;
+ int nlocks;
+ int been_handled;
+
+ exp = rs->rs_export;
+
+ LASSERT(rs->rs_difficult);
+ LASSERT(rs->rs_scheduled);
+ LASSERT(list_empty(&rs->rs_list));
+
+ spin_lock(&exp->exp_lock);
+ /* Noop if removed already */
+ list_del_init(&rs->rs_exp_list);
+ spin_unlock(&exp->exp_lock);
+
+ /* The disk commit callback holds exp_uncommitted_replies_lock while it
+ * iterates over newly committed replies, removing them from
+ * exp_uncommitted_replies. It then drops this lock and schedules the
+ * replies it found for handling here.
+ *
+ * We can avoid contention for exp_uncommitted_replies_lock between the
+ * HRT threads and further commit callbacks by checking rs_committed
+ * which is set in the commit callback while it holds both
+ * rs_lock and exp_uncommitted_reples.
+ *
+ * If we see rs_committed clear, the commit callback _may_ not have
+ * handled this reply yet and we race with it to grab
+ * exp_uncommitted_replies_lock before removing the reply from
+ * exp_uncommitted_replies. Note that if we lose the race and the
+ * reply has already been removed, list_del_init() is a noop.
+ *
+ * If we see rs_committed set, we know the commit callback is handling,
+ * or has handled this reply since store reordering might allow us to
+ * see rs_committed set out of sequence. But since this is done
+ * holding rs_lock, we can be sure it has all completed once we hold
+ * rs_lock, which we do right next.
+ */
+ if (!rs->rs_committed) {
+ spin_lock(&exp->exp_uncommitted_replies_lock);
+ list_del_init(&rs->rs_obd_list);
+ spin_unlock(&exp->exp_uncommitted_replies_lock);
+ }
+
+ spin_lock(&rs->rs_lock);
+
+ been_handled = rs->rs_handled;
+ rs->rs_handled = 1;
+
+ nlocks = rs->rs_nlocks; /* atomic "steal", but */
+ rs->rs_nlocks = 0; /* locks still on rs_locks! */
+
+ if (nlocks == 0 && !been_handled) {
+ /* If we see this, we should already have seen the warning
+ * in mds_steal_ack_locks() */
+ CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n",
+ rs,
+ rs->rs_xid, rs->rs_transno, rs->rs_opc,
+ libcfs_nid2str(exp->exp_connection->c_peer.nid));
+ }
+
+ if ((!been_handled && rs->rs_on_net) || nlocks > 0) {
+ spin_unlock(&rs->rs_lock);
+
+ if (!been_handled && rs->rs_on_net) {
+ LNetMDUnlink(rs->rs_md_h);
+ /* Ignore return code; we're racing with completion */
+ }
+
+ while (nlocks-- > 0)
+ ldlm_lock_decref(&rs->rs_locks[nlocks],
+ rs->rs_modes[nlocks]);
+
+ spin_lock(&rs->rs_lock);
+ }
+
+ rs->rs_scheduled = 0;
+
+ if (!rs->rs_on_net) {
+ /* Off the net */
+ spin_unlock(&rs->rs_lock);
+
+ class_export_put(exp);
+ rs->rs_export = NULL;
+ ptlrpc_rs_decref(rs);
+ if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) &&
+ svc->srv_is_stopping)
+ wake_up_all(&svcpt->scp_waitq);
+ return 1;
+ }
+
+ /* still on the net; callback will schedule */
+ spin_unlock(&rs->rs_lock);
+ return 1;
+}
+
+
+static void
+ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt)
+{
+ int avail = svcpt->scp_nrqbds_posted;
+ int low_water = test_req_buffer_pressure ? 0 :
+ svcpt->scp_service->srv_nbuf_per_group / 2;
+
+ /* NB I'm not locking; just looking. */
+
+ /* CAVEAT EMPTOR: We might be allocating buffers here because we've
+ * allowed the request history to grow out of control. We could put a
+ * sanity check on that here and cull some history if we need the
+ * space. */
+
+ if (avail <= low_water)
+ ptlrpc_grow_req_bufs(svcpt, 1);
+
+ if (svcpt->scp_service->srv_stats) {
+ lprocfs_counter_add(svcpt->scp_service->srv_stats,
+ PTLRPC_REQBUF_AVAIL_CNTR, avail);
+ }
+}
+
+static int
+ptlrpc_retry_rqbds(void *arg)
+{
+ struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg;
+
+ svcpt->scp_rqbd_timeout = 0;
+ return -ETIMEDOUT;
+}
+
+static inline int
+ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nreqs_active <
+ svcpt->scp_nthrs_running - 1 -
+ (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
+}
+
+/**
+ * allowed to create more threads
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_nthrs_running +
+ svcpt->scp_nthrs_starting <
+ svcpt->scp_service->srv_nthrs_cpt_limit;
+}
+
+/**
+ * too many requests and allowed to create more threads
+ */
+static inline int
+ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt)
+{
+ return !ptlrpc_threads_enough(svcpt) &&
+ ptlrpc_threads_increasable(svcpt);
+}
+
+static inline int
+ptlrpc_thread_stopping(struct ptlrpc_thread *thread)
+{
+ return thread_is_stopping(thread) ||
+ thread->t_svcpt->scp_service->srv_is_stopping;
+}
+
+static inline int
+ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt)
+{
+ return !list_empty(&svcpt->scp_rqbd_idle) &&
+ svcpt->scp_rqbd_timeout == 0;
+}
+
+static inline int
+ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
+{
+ return svcpt->scp_at_check;
+}
+
+/**
+ * requests wait on preprocessing
+ * user can call it w/o any lock but need to hold
+ * ptlrpc_service_part::scp_lock to get reliable result
+ */
+static inline int
+ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt)
+{
+ return !list_empty(&svcpt->scp_req_incoming);
+}
+
+static __attribute__((__noinline__)) int
+ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
+ struct ptlrpc_thread *thread)
+{
+ /* Don't exit while there are replies to be handled */
+ struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout,
+ ptlrpc_retry_rqbds, svcpt);
+
+ /* XXX: Add this back when libcfs watchdog is merged upstream
+ lc_watchdog_disable(thread->t_watchdog);
+ */
+
+ cond_resched();
+
+ l_wait_event_exclusive_head(svcpt->scp_waitq,
+ ptlrpc_thread_stopping(thread) ||
+ ptlrpc_server_request_incoming(svcpt) ||
+ ptlrpc_server_request_pending(svcpt, false) ||
+ ptlrpc_rqbd_pending(svcpt) ||
+ ptlrpc_at_check(svcpt), &lwi);
+
+ if (ptlrpc_thread_stopping(thread))
+ return -EINTR;
+
+ /*
+ lc_watchdog_touch(thread->t_watchdog,
+ ptlrpc_server_get_timeout(svcpt));
+ */
+ return 0;
+}
+
+/**
+ * Main thread body for service threads.
+ * Waits in a loop waiting for new requests to process to appear.
+ * Every time an incoming requests is added to its queue, a waitq
+ * is woken up and one of the threads will handle it.
+ */
+static int ptlrpc_main(void *arg)
+{
+ struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg;
+ struct ptlrpc_service_part *svcpt = thread->t_svcpt;
+ struct ptlrpc_service *svc = svcpt->scp_service;
+ struct ptlrpc_reply_state *rs;
+ struct group_info *ginfo = NULL;
+ struct lu_env *env;
+ int counter = 0, rc = 0;
+
+ thread->t_pid = current_pid();
+ unshare_fs_struct();
+
+ /* NB: we will call cfs_cpt_bind() for all threads, because we
+ * might want to run lustre server only on a subset of system CPUs,
+ * in that case ->scp_cpt is CFS_CPT_ANY */
+ rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt);
+ if (rc != 0) {
+ CWARN("%s: failed to bind %s on CPT %d\n",
+ svc->srv_name, thread->t_name, svcpt->scp_cpt);
+ }
+
+ ginfo = groups_alloc(0);
+ if (!ginfo) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ set_current_groups(ginfo);
+ put_group_info(ginfo);
+
+ if (svc->srv_ops.so_thr_init != NULL) {
+ rc = svc->srv_ops.so_thr_init(thread);
+ if (rc)
+ goto out;
+ }
+
+ OBD_ALLOC_PTR(env);
+ if (env == NULL) {
+ rc = -ENOMEM;
+ goto out_srv_fini;
+ }
+
+ rc = lu_context_init(&env->le_ctx,
+ svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF);
+ if (rc)
+ goto out_srv_fini;
+
+ thread->t_env = env;
+ env->le_ctx.lc_thread = thread;
+ env->le_ctx.lc_cookie = 0x6;
+
+ while (!list_empty(&svcpt->scp_rqbd_idle)) {
+ rc = ptlrpc_server_post_idle_rqbds(svcpt);
+ if (rc >= 0)
+ continue;
+
+ CERROR("Failed to post rqbd for %s on CPT %d: %d\n",
+ svc->srv_name, svcpt->scp_cpt, rc);
+ goto out_srv_fini;
+ }
+
+ /* Alloc reply state structure for this one */
+ OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size);
+ if (!rs) {
+ rc = -ENOMEM;
+ goto out_srv_fini;
+ }
+
+ spin_lock(&svcpt->scp_lock);
+
+ LASSERT(thread_is_starting(thread));
+ thread_clear_flags(thread, SVC_STARTING);
+
+ LASSERT(svcpt->scp_nthrs_starting == 1);
+ svcpt->scp_nthrs_starting--;
+
+ /* SVC_STOPPING may already be set here if someone else is trying
+ * to stop the service while this new thread has been dynamically
+ * forked. We still set SVC_RUNNING to let our creator know that
+ * we are now running, however we will exit as soon as possible */
+ thread_add_flags(thread, SVC_RUNNING);
+ svcpt->scp_nthrs_running++;
+ spin_unlock(&svcpt->scp_lock);
+
+ /* wake up our creator in case he's still waiting. */
+ wake_up(&thread->t_ctl_waitq);
+
+ /*
+ thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt),
+ NULL, NULL);
+ */
+
+ spin_lock(&svcpt->scp_rep_lock);
+ list_add(&rs->rs_list, &svcpt->scp_rep_idle);
+ wake_up(&svcpt->scp_rep_waitq);
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
+ svcpt->scp_nthrs_running);
+
+ /* XXX maintain a list of all managed devices: insert here */
+ while (!ptlrpc_thread_stopping(thread)) {
+ if (ptlrpc_wait_event(svcpt, thread))
+ break;
+
+ ptlrpc_check_rqbd_pool(svcpt);
+
+ if (ptlrpc_threads_need_create(svcpt)) {
+ /* Ignore return code - we tried... */
+ ptlrpc_start_thread(svcpt, 0);
+ }
+
+ /* Process all incoming reqs before handling any */
+ if (ptlrpc_server_request_incoming(svcpt)) {
+ lu_context_enter(&env->le_ctx);
+ env->le_ses = NULL;
+ ptlrpc_server_handle_req_in(svcpt, thread);
+ lu_context_exit(&env->le_ctx);
+
+ /* but limit ourselves in case of flood */
+ if (counter++ < 100)
+ continue;
+ counter = 0;
+ }
+
+ if (ptlrpc_at_check(svcpt))
+ ptlrpc_at_check_timed(svcpt);
+
+ if (ptlrpc_server_request_pending(svcpt, false)) {
+ lu_context_enter(&env->le_ctx);
+ ptlrpc_server_handle_request(svcpt, thread);
+ lu_context_exit(&env->le_ctx);
+ }
+
+ if (ptlrpc_rqbd_pending(svcpt) &&
+ ptlrpc_server_post_idle_rqbds(svcpt) < 0) {
+ /* I just failed to repost request buffers.
+ * Wait for a timeout (unless something else
+ * happens) before I try again */
+ svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
+ CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
+ svcpt->scp_nrqbds_posted);
+ }
+ }
+
+ /*
+ lc_watchdog_delete(thread->t_watchdog);
+ thread->t_watchdog = NULL;
+ */
+
+out_srv_fini:
+ /*
+ * deconstruct service specific state created by ptlrpc_start_thread()
+ */
+ if (svc->srv_ops.so_thr_done != NULL)
+ svc->srv_ops.so_thr_done(thread);
+
+ if (env != NULL) {
+ lu_context_fini(&env->le_ctx);
+ OBD_FREE_PTR(env);
+ }
+out:
+ CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n",
+ thread, thread->t_pid, thread->t_id, rc);
+
+ spin_lock(&svcpt->scp_lock);
+ if (thread_test_and_clear_flags(thread, SVC_STARTING))
+ svcpt->scp_nthrs_starting--;
+
+ if (thread_test_and_clear_flags(thread, SVC_RUNNING)) {
+ /* must know immediately */
+ svcpt->scp_nthrs_running--;
+ }
+
+ thread->t_id = rc;
+ thread_add_flags(thread, SVC_STOPPED);
+
+ wake_up(&thread->t_ctl_waitq);
+ spin_unlock(&svcpt->scp_lock);
+
+ return rc;
+}
+
+static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt,
+ struct list_head *replies)
+{
+ int result;
+
+ spin_lock(&hrt->hrt_lock);
+
+ list_splice_init(&hrt->hrt_queue, replies);
+ result = ptlrpc_hr.hr_stopping || !list_empty(replies);
+
+ spin_unlock(&hrt->hrt_lock);
+ return result;
+}
+
+/**
+ * Main body of "handle reply" function.
+ * It processes acked reply states
+ */
+static int ptlrpc_hr_main(void *arg)
+{
+ struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg;
+ struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
+ LIST_HEAD (replies);
+ char threadname[20];
+ int rc;
+
+ snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
+ hrp->hrp_cpt, hrt->hrt_id);
+ unshare_fs_struct();
+
+ rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
+ if (rc != 0) {
+ CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
+ threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
+ }
+
+ atomic_inc(&hrp->hrp_nstarted);
+ wake_up(&ptlrpc_hr.hr_waitq);
+
+ while (!ptlrpc_hr.hr_stopping) {
+ l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies));
+
+ while (!list_empty(&replies)) {
+ struct ptlrpc_reply_state *rs;
+
+ rs = list_entry(replies.prev,
+ struct ptlrpc_reply_state,
+ rs_list);
+ list_del_init(&rs->rs_list);
+ ptlrpc_handle_rs(rs);
+ }
+ }
+
+ atomic_inc(&hrp->hrp_nstopped);
+ wake_up(&ptlrpc_hr.hr_waitq);
+
+ return 0;
+}
+
+static void ptlrpc_stop_hr_threads(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+ int j;
+
+ ptlrpc_hr.hr_stopping = 1;
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs == NULL)
+ continue; /* uninitialized */
+ for (j = 0; j < hrp->hrp_nthrs; j++)
+ wake_up_all(&hrp->hrp_thrs[j].hrt_waitq);
+ }
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs == NULL)
+ continue; /* uninitialized */
+ wait_event(ptlrpc_hr.hr_waitq,
+ atomic_read(&hrp->hrp_nstopped) ==
+ atomic_read(&hrp->hrp_nstarted));
+ }
+}
+
+static int ptlrpc_start_hr_threads(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+ int j;
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ int rc = 0;
+
+ for (j = 0; j < hrp->hrp_nthrs; j++) {
+ struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j];
+ rc = PTR_ERR(kthread_run(ptlrpc_hr_main,
+ &hrp->hrp_thrs[j],
+ "ptlrpc_hr%02d_%03d",
+ hrp->hrp_cpt,
+ hrt->hrt_id));
+ if (IS_ERR_VALUE(rc))
+ break;
+ }
+ wait_event(ptlrpc_hr.hr_waitq,
+ atomic_read(&hrp->hrp_nstarted) == j);
+ if (!IS_ERR_VALUE(rc))
+ continue;
+
+ CERROR("Reply handling thread %d:%d Failed on starting: rc = %d\n",
+ i, j, rc);
+ ptlrpc_stop_hr_threads();
+ return rc;
+ }
+ return 0;
+}
+
+static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
+{
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread;
+ LIST_HEAD (zombie);
+
+ CDEBUG(D_INFO, "Stopping threads for service %s\n",
+ svcpt->scp_service->srv_name);
+
+ spin_lock(&svcpt->scp_lock);
+ /* let the thread know that we would like it to stop asap */
+ list_for_each_entry(thread, &svcpt->scp_threads, t_link) {
+ CDEBUG(D_INFO, "Stopping thread %s #%u\n",
+ svcpt->scp_service->srv_thread_name, thread->t_id);
+ thread_add_flags(thread, SVC_STOPPING);
+ }
+
+ wake_up_all(&svcpt->scp_waitq);
+
+ while (!list_empty(&svcpt->scp_threads)) {
+ thread = list_entry(svcpt->scp_threads.next,
+ struct ptlrpc_thread, t_link);
+ if (thread_is_stopped(thread)) {
+ list_del(&thread->t_link);
+ list_add(&thread->t_link, &zombie);
+ continue;
+ }
+ spin_unlock(&svcpt->scp_lock);
+
+ CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n",
+ svcpt->scp_service->srv_thread_name, thread->t_id);
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_stopped(thread), &lwi);
+
+ spin_lock(&svcpt->scp_lock);
+ }
+
+ spin_unlock(&svcpt->scp_lock);
+
+ while (!list_empty(&zombie)) {
+ thread = list_entry(zombie.next,
+ struct ptlrpc_thread, t_link);
+ list_del(&thread->t_link);
+ OBD_FREE_PTR(thread);
+ }
+}
+
+/**
+ * Stops all threads of a particular service \a svc
+ */
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service != NULL)
+ ptlrpc_svcpt_stop_threads(svcpt);
+ }
+}
+EXPORT_SYMBOL(ptlrpc_stop_all_threads);
+
+int ptlrpc_start_threads(struct ptlrpc_service *svc)
+{
+ int rc = 0;
+ int i;
+ int j;
+
+ /* We require 2 threads min, see note in ptlrpc_server_handle_request */
+ LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT);
+
+ for (i = 0; i < svc->srv_ncpts; i++) {
+ for (j = 0; j < svc->srv_nthrs_cpt_init; j++) {
+ rc = ptlrpc_start_thread(svc->srv_parts[i], 1);
+ if (rc == 0)
+ continue;
+
+ if (rc != -EMFILE)
+ goto failed;
+ /* We have enough threads, don't start more. b=15759 */
+ break;
+ }
+ }
+
+ return 0;
+ failed:
+ CERROR("cannot start %s thread #%d_%d: rc %d\n",
+ svc->srv_thread_name, i, j, rc);
+ ptlrpc_stop_all_threads(svc);
+ return rc;
+}
+EXPORT_SYMBOL(ptlrpc_start_threads);
+
+int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
+{
+ struct l_wait_info lwi = { 0 };
+ struct ptlrpc_thread *thread;
+ struct ptlrpc_service *svc;
+ int rc;
+
+ LASSERT(svcpt != NULL);
+
+ svc = svcpt->scp_service;
+
+ CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n",
+ svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running,
+ svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit);
+
+ again:
+ if (unlikely(svc->srv_is_stopping))
+ return -ESRCH;
+
+ if (!ptlrpc_threads_increasable(svcpt) ||
+ (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) &&
+ svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1))
+ return -EMFILE;
+
+ OBD_CPT_ALLOC_PTR(thread, svc->srv_cptable, svcpt->scp_cpt);
+ if (thread == NULL)
+ return -ENOMEM;
+ init_waitqueue_head(&thread->t_ctl_waitq);
+
+ spin_lock(&svcpt->scp_lock);
+ if (!ptlrpc_threads_increasable(svcpt)) {
+ spin_unlock(&svcpt->scp_lock);
+ OBD_FREE_PTR(thread);
+ return -EMFILE;
+ }
+
+ if (svcpt->scp_nthrs_starting != 0) {
+ /* serialize starting because some modules (obdfilter)
+ * might require unique and contiguous t_id */
+ LASSERT(svcpt->scp_nthrs_starting == 1);
+ spin_unlock(&svcpt->scp_lock);
+ OBD_FREE_PTR(thread);
+ if (wait) {
+ CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n",
+ svc->srv_thread_name, svcpt->scp_thr_nextid);
+ schedule();
+ goto again;
+ }
+
+ CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n",
+ svc->srv_thread_name, svcpt->scp_thr_nextid);
+ return -EAGAIN;
+ }
+
+ svcpt->scp_nthrs_starting++;
+ thread->t_id = svcpt->scp_thr_nextid++;
+ thread_add_flags(thread, SVC_STARTING);
+ thread->t_svcpt = svcpt;
+
+ list_add(&thread->t_link, &svcpt->scp_threads);
+ spin_unlock(&svcpt->scp_lock);
+
+ if (svcpt->scp_cpt >= 0) {
+ snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d",
+ svc->srv_thread_name, svcpt->scp_cpt, thread->t_id);
+ } else {
+ snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d",
+ svc->srv_thread_name, thread->t_id);
+ }
+
+ CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name);
+ rc = PTR_ERR(kthread_run(ptlrpc_main, thread, "%s", thread->t_name));
+ if (IS_ERR_VALUE(rc)) {
+ CERROR("cannot start thread '%s': rc %d\n",
+ thread->t_name, rc);
+ spin_lock(&svcpt->scp_lock);
+ --svcpt->scp_nthrs_starting;
+ if (thread_is_stopping(thread)) {
+ /* this ptlrpc_thread is being handled
+ * by ptlrpc_svcpt_stop_threads now
+ */
+ thread_add_flags(thread, SVC_STOPPED);
+ wake_up(&thread->t_ctl_waitq);
+ spin_unlock(&svcpt->scp_lock);
+ } else {
+ list_del(&thread->t_link);
+ spin_unlock(&svcpt->scp_lock);
+ OBD_FREE_PTR(thread);
+ }
+ return rc;
+ }
+
+ if (!wait)
+ return 0;
+
+ l_wait_event(thread->t_ctl_waitq,
+ thread_is_running(thread) || thread_is_stopped(thread),
+ &lwi);
+
+ rc = thread_is_stopped(thread) ? thread->t_id : 0;
+ return rc;
+}
+
+int ptlrpc_hr_init(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ struct ptlrpc_hr_thread *hrt;
+ int rc;
+ int i;
+ int j;
+ int weight;
+
+ memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
+ ptlrpc_hr.hr_cpt_table = cfs_cpt_table;
+
+ ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table,
+ sizeof(*hrp));
+ if (ptlrpc_hr.hr_partitions == NULL)
+ return -ENOMEM;
+
+ init_waitqueue_head(&ptlrpc_hr.hr_waitq);
+
+ weight = cpumask_weight(topology_thread_cpumask(0));
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ hrp->hrp_cpt = i;
+
+ atomic_set(&hrp->hrp_nstarted, 0);
+ atomic_set(&hrp->hrp_nstopped, 0);
+
+ hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
+ hrp->hrp_nthrs /= weight;
+
+ LASSERT(hrp->hrp_nthrs > 0);
+ OBD_CPT_ALLOC(hrp->hrp_thrs, ptlrpc_hr.hr_cpt_table, i,
+ hrp->hrp_nthrs * sizeof(*hrt));
+ if (hrp->hrp_thrs == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (j = 0; j < hrp->hrp_nthrs; j++) {
+ hrt = &hrp->hrp_thrs[j];
+
+ hrt->hrt_id = j;
+ hrt->hrt_partition = hrp;
+ init_waitqueue_head(&hrt->hrt_waitq);
+ spin_lock_init(&hrt->hrt_lock);
+ INIT_LIST_HEAD(&hrt->hrt_queue);
+ }
+ }
+
+ rc = ptlrpc_start_hr_threads();
+out:
+ if (rc != 0)
+ ptlrpc_hr_fini();
+ return rc;
+}
+
+void ptlrpc_hr_fini(void)
+{
+ struct ptlrpc_hr_partition *hrp;
+ int i;
+
+ if (ptlrpc_hr.hr_partitions == NULL)
+ return;
+
+ ptlrpc_stop_hr_threads();
+
+ cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
+ if (hrp->hrp_thrs != NULL) {
+ OBD_FREE(hrp->hrp_thrs,
+ hrp->hrp_nthrs * sizeof(hrp->hrp_thrs[0]));
+ }
+ }
+
+ cfs_percpt_free(ptlrpc_hr.hr_partitions);
+ ptlrpc_hr.hr_partitions = NULL;
+}
+
+
+/**
+ * Wait until all already scheduled replies are processed.
+ */
+static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
+{
+ while (1) {
+ int rc;
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10),
+ NULL, NULL);
+
+ rc = l_wait_event(svcpt->scp_waitq,
+ atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+ if (rc == 0)
+ break;
+ CWARN("Unexpectedly long timeout %s %p\n",
+ svcpt->scp_service->srv_name, svcpt->scp_service);
+ }
+}
+
+static void
+ptlrpc_service_del_atimer(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+
+ /* early disarm AT timer... */
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service != NULL)
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ }
+}
+
+static void
+ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ struct l_wait_info lwi;
+ int rc;
+ int i;
+
+ /* All history will be culled when the next request buffer is
+ * freed in ptlrpc_service_purge_all() */
+ svc->srv_hist_nrqbds_cpt_max = 0;
+
+ rc = LNetClearLazyPortal(svc->srv_req_portal);
+ LASSERT(rc == 0);
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* Unlink all the request buffers. This forces a 'final'
+ * event with its 'unlink' flag set for each posted rqbd */
+ list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
+ rqbd_list) {
+ rc = LNetMDUnlink(rqbd->rqbd_md_h);
+ LASSERT(rc == 0 || rc == -ENOENT);
+ }
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* Wait for the network to release any buffers
+ * it's currently filling */
+ spin_lock(&svcpt->scp_lock);
+ while (svcpt->scp_nrqbds_posted != 0) {
+ spin_unlock(&svcpt->scp_lock);
+ /* Network access will complete in finite time but
+ * the HUGE timeout lets us CWARN for visibility
+ * of sluggish NALs */
+ lwi = LWI_TIMEOUT_INTERVAL(
+ cfs_time_seconds(LONG_UNLINK),
+ cfs_time_seconds(1), NULL, NULL);
+ rc = l_wait_event(svcpt->scp_waitq,
+ svcpt->scp_nrqbds_posted == 0, &lwi);
+ if (rc == -ETIMEDOUT) {
+ CWARN("Service %s waiting for request buffers\n",
+ svcpt->scp_service->srv_name);
+ }
+ spin_lock(&svcpt->scp_lock);
+ }
+ spin_unlock(&svcpt->scp_lock);
+ }
+}
+
+static void
+ptlrpc_service_purge_all(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_request_buffer_desc *rqbd;
+ struct ptlrpc_request *req;
+ struct ptlrpc_reply_state *rs;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ spin_lock(&svcpt->scp_rep_lock);
+ while (!list_empty(&svcpt->scp_rep_active)) {
+ rs = list_entry(svcpt->scp_rep_active.next,
+ struct ptlrpc_reply_state, rs_list);
+ spin_lock(&rs->rs_lock);
+ ptlrpc_schedule_difficult_reply(rs);
+ spin_unlock(&rs->rs_lock);
+ }
+ spin_unlock(&svcpt->scp_rep_lock);
+
+ /* purge the request queue. NB No new replies (rqbds
+ * all unlinked) and no service threads, so I'm the only
+ * thread noodling the request queue now */
+ while (!list_empty(&svcpt->scp_req_incoming)) {
+ req = list_entry(svcpt->scp_req_incoming.next,
+ struct ptlrpc_request, rq_list);
+
+ list_del(&req->rq_list);
+ svcpt->scp_nreqs_incoming--;
+ ptlrpc_server_finish_request(svcpt, req);
+ }
+
+ while (ptlrpc_server_request_pending(svcpt, true)) {
+ req = ptlrpc_server_request_get(svcpt, true);
+ ptlrpc_server_finish_active_request(svcpt, req);
+ }
+
+ LASSERT(list_empty(&svcpt->scp_rqbd_posted));
+ LASSERT(svcpt->scp_nreqs_incoming == 0);
+ LASSERT(svcpt->scp_nreqs_active == 0);
+ /* history should have been culled by
+ * ptlrpc_server_finish_request */
+ LASSERT(svcpt->scp_hist_nrqbds == 0);
+
+ /* Now free all the request buffers since nothing
+ * references them any more... */
+
+ while (!list_empty(&svcpt->scp_rqbd_idle)) {
+ rqbd = list_entry(svcpt->scp_rqbd_idle.next,
+ struct ptlrpc_request_buffer_desc,
+ rqbd_list);
+ ptlrpc_free_rqbd(rqbd);
+ }
+ ptlrpc_wait_replies(svcpt);
+
+ while (!list_empty(&svcpt->scp_rep_idle)) {
+ rs = list_entry(svcpt->scp_rep_idle.next,
+ struct ptlrpc_reply_state,
+ rs_list);
+ list_del(&rs->rs_list);
+ OBD_FREE_LARGE(rs, svc->srv_max_reply_size);
+ }
+ }
+}
+
+static void
+ptlrpc_service_free(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ struct ptlrpc_at_array *array;
+ int i;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ if (svcpt->scp_service == NULL)
+ break;
+
+ /* In case somebody rearmed this in the meantime */
+ cfs_timer_disarm(&svcpt->scp_at_timer);
+ array = &svcpt->scp_at_array;
+
+ if (array->paa_reqs_array != NULL) {
+ OBD_FREE(array->paa_reqs_array,
+ sizeof(struct list_head) * array->paa_size);
+ array->paa_reqs_array = NULL;
+ }
+
+ if (array->paa_reqs_count != NULL) {
+ OBD_FREE(array->paa_reqs_count,
+ sizeof(__u32) * array->paa_size);
+ array->paa_reqs_count = NULL;
+ }
+ }
+
+ ptlrpc_service_for_each_part(svcpt, i, svc)
+ OBD_FREE_PTR(svcpt);
+
+ if (svc->srv_cpts != NULL)
+ cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts);
+
+ OBD_FREE(svc, offsetof(struct ptlrpc_service,
+ srv_parts[svc->srv_ncpts]));
+}
+
+int ptlrpc_unregister_service(struct ptlrpc_service *service)
+{
+ CDEBUG(D_NET, "%s: tearing down\n", service->srv_name);
+
+ service->srv_is_stopping = 1;
+
+ mutex_lock(&ptlrpc_all_services_mutex);
+ list_del_init(&service->srv_list);
+ mutex_unlock(&ptlrpc_all_services_mutex);
+
+ ptlrpc_service_del_atimer(service);
+ ptlrpc_stop_all_threads(service);
+
+ ptlrpc_service_unlink_rqbd(service);
+ ptlrpc_service_purge_all(service);
+ ptlrpc_service_nrs_cleanup(service);
+
+ ptlrpc_lprocfs_unregister_service(service);
+
+ ptlrpc_service_free(service);
+
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_unregister_service);
+
+/**
+ * Returns 0 if the service is healthy.
+ *
+ * Right now, it just checks to make sure that requests aren't languishing
+ * in the queue. We'll use this health check to govern whether a node needs
+ * to be shot, so it's intentionally non-aggressive. */
+int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt)
+{
+ struct ptlrpc_request *request = NULL;
+ struct timeval right_now;
+ long timediff;
+
+ do_gettimeofday(&right_now);
+
+ spin_lock(&svcpt->scp_req_lock);
+ /* How long has the next entry been waiting? */
+ if (ptlrpc_server_high_pending(svcpt, true))
+ request = ptlrpc_nrs_req_peek_nolock(svcpt, true);
+ else if (ptlrpc_server_normal_pending(svcpt, true))
+ request = ptlrpc_nrs_req_peek_nolock(svcpt, false);
+
+ if (request == NULL) {
+ spin_unlock(&svcpt->scp_req_lock);
+ return 0;
+ }
+
+ timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
+ spin_unlock(&svcpt->scp_req_lock);
+
+ if ((timediff / ONE_MILLION) >
+ (AT_OFF ? obd_timeout * 3 / 2 : at_max)) {
+ CERROR("%s: unhealthy - request has been waiting %lds\n",
+ svcpt->scp_service->srv_name, timediff / ONE_MILLION);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+ptlrpc_service_health_check(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_service_part *svcpt;
+ int i;
+
+ if (svc == NULL)
+ return 0;
+
+ ptlrpc_service_for_each_part(svcpt, i, svc) {
+ int rc = ptlrpc_svcpt_health_check(svcpt);
+
+ if (rc != 0)
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ptlrpc_service_health_check);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
new file mode 100644
index 000000000..d6d92046c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -0,0 +1,4492 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+
+#include <linux/fs.h>
+#include <linux/posix_acl_xattr.h>
+
+#include "../include/obd_support.h"
+#include "../include/obd_class.h"
+#include "../include/lustre_net.h"
+#include "../include/lustre_disk.h"
+void lustre_assert_wire_constants(void)
+{
+ /* Wire protocol assertions generated by 'wirecheck'
+ * (make -C lustre/utils newwiretest)
+ * running on Linux centos6-bis 2.6.32-358.0.1.el6-head
+ * #3 SMP Wed Apr 17 17:37:43 CEST 2013
+ * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC)
+ */
+
+ /* Constants... */
+ LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n",
+ (long long)PTL_RPC_MSG_REQUEST);
+ LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n",
+ (long long)PTL_RPC_MSG_ERR);
+ LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n",
+ (long long)PTL_RPC_MSG_REPLY);
+ LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n",
+ MDS_DIR_END_OFF);
+ LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n",
+ DEAD_HANDLE_MAGIC);
+ CLASSERT(MTI_NAME_MAXLEN == 64);
+ LASSERTF(OST_REPLY == 0, "found %lld\n",
+ (long long)OST_REPLY);
+ LASSERTF(OST_GETATTR == 1, "found %lld\n",
+ (long long)OST_GETATTR);
+ LASSERTF(OST_SETATTR == 2, "found %lld\n",
+ (long long)OST_SETATTR);
+ LASSERTF(OST_READ == 3, "found %lld\n",
+ (long long)OST_READ);
+ LASSERTF(OST_WRITE == 4, "found %lld\n",
+ (long long)OST_WRITE);
+ LASSERTF(OST_CREATE == 5, "found %lld\n",
+ (long long)OST_CREATE);
+ LASSERTF(OST_DESTROY == 6, "found %lld\n",
+ (long long)OST_DESTROY);
+ LASSERTF(OST_GET_INFO == 7, "found %lld\n",
+ (long long)OST_GET_INFO);
+ LASSERTF(OST_CONNECT == 8, "found %lld\n",
+ (long long)OST_CONNECT);
+ LASSERTF(OST_DISCONNECT == 9, "found %lld\n",
+ (long long)OST_DISCONNECT);
+ LASSERTF(OST_PUNCH == 10, "found %lld\n",
+ (long long)OST_PUNCH);
+ LASSERTF(OST_OPEN == 11, "found %lld\n",
+ (long long)OST_OPEN);
+ LASSERTF(OST_CLOSE == 12, "found %lld\n",
+ (long long)OST_CLOSE);
+ LASSERTF(OST_STATFS == 13, "found %lld\n",
+ (long long)OST_STATFS);
+ LASSERTF(OST_SYNC == 16, "found %lld\n",
+ (long long)OST_SYNC);
+ LASSERTF(OST_SET_INFO == 17, "found %lld\n",
+ (long long)OST_SET_INFO);
+ LASSERTF(OST_QUOTACHECK == 18, "found %lld\n",
+ (long long)OST_QUOTACHECK);
+ LASSERTF(OST_QUOTACTL == 19, "found %lld\n",
+ (long long)OST_QUOTACTL);
+ LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
+ (long long)OST_QUOTA_ADJUST_QUNIT);
+ LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
+ (long long)OST_LAST_OPC);
+ LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+ OBD_OBJECT_EOF);
+ LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n",
+ (long long)OST_MIN_PRECREATE);
+ LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n",
+ (long long)OST_MAX_PRECREATE);
+ LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n",
+ OST_LVB_ERR_INIT);
+ LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n",
+ OST_LVB_ERR_MASK);
+ LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n",
+ (long long)MDS_FIRST_OPC);
+ LASSERTF(MDS_GETATTR == 33, "found %lld\n",
+ (long long)MDS_GETATTR);
+ LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n",
+ (long long)MDS_GETATTR_NAME);
+ LASSERTF(MDS_CLOSE == 35, "found %lld\n",
+ (long long)MDS_CLOSE);
+ LASSERTF(MDS_REINT == 36, "found %lld\n",
+ (long long)MDS_REINT);
+ LASSERTF(MDS_READPAGE == 37, "found %lld\n",
+ (long long)MDS_READPAGE);
+ LASSERTF(MDS_CONNECT == 38, "found %lld\n",
+ (long long)MDS_CONNECT);
+ LASSERTF(MDS_DISCONNECT == 39, "found %lld\n",
+ (long long)MDS_DISCONNECT);
+ LASSERTF(MDS_GETSTATUS == 40, "found %lld\n",
+ (long long)MDS_GETSTATUS);
+ LASSERTF(MDS_STATFS == 41, "found %lld\n",
+ (long long)MDS_STATFS);
+ LASSERTF(MDS_PIN == 42, "found %lld\n",
+ (long long)MDS_PIN);
+ LASSERTF(MDS_UNPIN == 43, "found %lld\n",
+ (long long)MDS_UNPIN);
+ LASSERTF(MDS_SYNC == 44, "found %lld\n",
+ (long long)MDS_SYNC);
+ LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n",
+ (long long)MDS_DONE_WRITING);
+ LASSERTF(MDS_SET_INFO == 46, "found %lld\n",
+ (long long)MDS_SET_INFO);
+ LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n",
+ (long long)MDS_QUOTACHECK);
+ LASSERTF(MDS_QUOTACTL == 48, "found %lld\n",
+ (long long)MDS_QUOTACTL);
+ LASSERTF(MDS_GETXATTR == 49, "found %lld\n",
+ (long long)MDS_GETXATTR);
+ LASSERTF(MDS_SETXATTR == 50, "found %lld\n",
+ (long long)MDS_SETXATTR);
+ LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n",
+ (long long)MDS_WRITEPAGE);
+ LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n",
+ (long long)MDS_IS_SUBDIR);
+ LASSERTF(MDS_GET_INFO == 53, "found %lld\n",
+ (long long)MDS_GET_INFO);
+ LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n",
+ (long long)MDS_HSM_STATE_GET);
+ LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n",
+ (long long)MDS_HSM_STATE_SET);
+ LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n",
+ (long long)MDS_HSM_ACTION);
+ LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n",
+ (long long)MDS_HSM_PROGRESS);
+ LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n",
+ (long long)MDS_HSM_REQUEST);
+ LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n",
+ (long long)MDS_HSM_CT_REGISTER);
+ LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n",
+ (long long)MDS_HSM_CT_UNREGISTER);
+ LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n",
+ (long long)MDS_SWAP_LAYOUTS);
+ LASSERTF(MDS_LAST_OPC == 62, "found %lld\n",
+ (long long)MDS_LAST_OPC);
+ LASSERTF(REINT_SETATTR == 1, "found %lld\n",
+ (long long)REINT_SETATTR);
+ LASSERTF(REINT_CREATE == 2, "found %lld\n",
+ (long long)REINT_CREATE);
+ LASSERTF(REINT_LINK == 3, "found %lld\n",
+ (long long)REINT_LINK);
+ LASSERTF(REINT_UNLINK == 4, "found %lld\n",
+ (long long)REINT_UNLINK);
+ LASSERTF(REINT_RENAME == 5, "found %lld\n",
+ (long long)REINT_RENAME);
+ LASSERTF(REINT_OPEN == 6, "found %lld\n",
+ (long long)REINT_OPEN);
+ LASSERTF(REINT_SETXATTR == 7, "found %lld\n",
+ (long long)REINT_SETXATTR);
+ LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
+ (long long)REINT_RMENTRY);
+ LASSERTF(REINT_MAX == 9, "found %lld\n",
+ (long long)REINT_MAX);
+ LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_IT_EXECD);
+ LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_EXECD);
+ LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_NEG);
+ LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_LOOKUP_POS);
+ LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_CREATE);
+ LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_OPEN);
+ LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_COMPLETE);
+ LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_OPEN_REF);
+ LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_ENQ_CREATE_REF);
+ LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
+ (unsigned)DISP_OPEN_LOCK);
+ LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
+ (long long)MDS_STATUS_CONN);
+ LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
+ (long long)MDS_STATUS_LOV);
+ LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
+ (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
+ LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MF_SOM_CHANGE);
+ LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MF_EPOCH_OPEN);
+ LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MF_EPOCH_CLOSE);
+ LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID1);
+ LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID2);
+ LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID3);
+ LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MF_MDC_CANCEL_FID4);
+ LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MF_SOM_AU);
+ LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)MF_GETATTR_LOCK);
+ LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MODE);
+ LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_UID);
+ LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_GID);
+ LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_SIZE);
+ LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATIME);
+ LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MTIME);
+ LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_CTIME);
+ LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATIME_SET);
+ LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_MTIME_SET);
+ LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_FORCE);
+ LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_ATTR_FLAG);
+ LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_KILL_SUID);
+ LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_KILL_SGID);
+ LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_CTIME_SET);
+ LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_FROM_OPEN);
+ LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_BLOCKS);
+ LASSERTF(FLD_QUERY == 900, "found %lld\n",
+ (long long)FLD_QUERY);
+ LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
+ (long long)FLD_FIRST_OPC);
+ LASSERTF(FLD_LAST_OPC == 901, "found %lld\n",
+ (long long)FLD_LAST_OPC);
+ LASSERTF(SEQ_QUERY == 700, "found %lld\n",
+ (long long)SEQ_QUERY);
+ LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n",
+ (long long)SEQ_FIRST_OPC);
+ LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n",
+ (long long)SEQ_LAST_OPC);
+ LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n",
+ (long long)SEQ_ALLOC_SUPER);
+ LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n",
+ (long long)SEQ_ALLOC_META);
+ LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n",
+ (long long)LDLM_ENQUEUE);
+ LASSERTF(LDLM_CONVERT == 102, "found %lld\n",
+ (long long)LDLM_CONVERT);
+ LASSERTF(LDLM_CANCEL == 103, "found %lld\n",
+ (long long)LDLM_CANCEL);
+ LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n",
+ (long long)LDLM_BL_CALLBACK);
+ LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n",
+ (long long)LDLM_CP_CALLBACK);
+ LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n",
+ (long long)LDLM_GL_CALLBACK);
+ LASSERTF(LDLM_SET_INFO == 107, "found %lld\n",
+ (long long)LDLM_SET_INFO);
+ LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n",
+ (long long)LDLM_LAST_OPC);
+ LASSERTF(LCK_MINMODE == 0, "found %lld\n",
+ (long long)LCK_MINMODE);
+ LASSERTF(LCK_EX == 1, "found %lld\n",
+ (long long)LCK_EX);
+ LASSERTF(LCK_PW == 2, "found %lld\n",
+ (long long)LCK_PW);
+ LASSERTF(LCK_PR == 4, "found %lld\n",
+ (long long)LCK_PR);
+ LASSERTF(LCK_CW == 8, "found %lld\n",
+ (long long)LCK_CW);
+ LASSERTF(LCK_CR == 16, "found %lld\n",
+ (long long)LCK_CR);
+ LASSERTF(LCK_NL == 32, "found %lld\n",
+ (long long)LCK_NL);
+ LASSERTF(LCK_GROUP == 64, "found %lld\n",
+ (long long)LCK_GROUP);
+ LASSERTF(LCK_COS == 128, "found %lld\n",
+ (long long)LCK_COS);
+ LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
+ (long long)LCK_MAXMODE);
+ LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
+ (long long)LCK_MODE_NUM);
+ CLASSERT(LDLM_PLAIN == 10);
+ CLASSERT(LDLM_EXTENT == 11);
+ CLASSERT(LDLM_FLOCK == 12);
+ CLASSERT(LDLM_IBITS == 13);
+ CLASSERT(LDLM_MAX_TYPE == 14);
+ CLASSERT(LUSTRE_RES_ID_SEQ_OFF == 0);
+ CLASSERT(LUSTRE_RES_ID_VER_OID_OFF == 1);
+ LASSERTF(UPDATE_OBJ == 1000, "found %lld\n",
+ (long long)UPDATE_OBJ);
+ LASSERTF(UPDATE_LAST_OPC == 1001, "found %lld\n",
+ (long long)UPDATE_LAST_OPC);
+ CLASSERT(LUSTRE_RES_ID_QUOTA_SEQ_OFF == 2);
+ CLASSERT(LUSTRE_RES_ID_QUOTA_VER_OID_OFF == 3);
+ CLASSERT(LUSTRE_RES_ID_HSH_OFF == 3);
+ CLASSERT(LQUOTA_TYPE_USR == 0);
+ CLASSERT(LQUOTA_TYPE_GRP == 1);
+ CLASSERT(LQUOTA_RES_MD == 1);
+ CLASSERT(LQUOTA_RES_DT == 2);
+ LASSERTF(OBD_PING == 400, "found %lld\n",
+ (long long)OBD_PING);
+ LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n",
+ (long long)OBD_LOG_CANCEL);
+ LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n",
+ (long long)OBD_QC_CALLBACK);
+ LASSERTF(OBD_IDX_READ == 403, "found %lld\n",
+ (long long)OBD_IDX_READ);
+ LASSERTF(OBD_LAST_OPC == 404, "found %lld\n",
+ (long long)OBD_LAST_OPC);
+ LASSERTF(QUOTA_DQACQ == 601, "found %lld\n",
+ (long long)QUOTA_DQACQ);
+ LASSERTF(QUOTA_DQREL == 602, "found %lld\n",
+ (long long)QUOTA_DQREL);
+ LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n",
+ (long long)QUOTA_LAST_OPC);
+ LASSERTF(MGS_CONNECT == 250, "found %lld\n",
+ (long long)MGS_CONNECT);
+ LASSERTF(MGS_DISCONNECT == 251, "found %lld\n",
+ (long long)MGS_DISCONNECT);
+ LASSERTF(MGS_EXCEPTION == 252, "found %lld\n",
+ (long long)MGS_EXCEPTION);
+ LASSERTF(MGS_TARGET_REG == 253, "found %lld\n",
+ (long long)MGS_TARGET_REG);
+ LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n",
+ (long long)MGS_TARGET_DEL);
+ LASSERTF(MGS_SET_INFO == 255, "found %lld\n",
+ (long long)MGS_SET_INFO);
+ LASSERTF(MGS_LAST_OPC == 257, "found %lld\n",
+ (long long)MGS_LAST_OPC);
+ LASSERTF(SEC_CTX_INIT == 801, "found %lld\n",
+ (long long)SEC_CTX_INIT);
+ LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n",
+ (long long)SEC_CTX_INIT_CONT);
+ LASSERTF(SEC_CTX_FINI == 803, "found %lld\n",
+ (long long)SEC_CTX_FINI);
+ LASSERTF(SEC_LAST_OPC == 804, "found %lld\n",
+ (long long)SEC_LAST_OPC);
+ /* Sizes and Offsets */
+
+ /* Checks for struct obd_uuid */
+ LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct obd_uuid));
+
+ /* Checks for struct lu_seq_range */
+ LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lu_seq_range));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_start));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_end));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_index));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index));
+ LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lu_seq_range, lsr_flags));
+ LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags));
+ LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n",
+ (long long)LU_SEQ_RANGE_MDT);
+ LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n",
+ (long long)LU_SEQ_RANGE_OST);
+
+ /* Checks for struct lustre_mdt_attrs */
+ LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_mdt_attrs));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat));
+ LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid));
+ LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
+ LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LMAI_RELEASED);
+ LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_HSM);
+ LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_SOM);
+ LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_NOT_IN_OI);
+ LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)LMAC_FID_ON_OST);
+ LASSERTF(OBJ_CREATE == 1, "found %lld\n",
+ (long long)OBJ_CREATE);
+ LASSERTF(OBJ_DESTROY == 2, "found %lld\n",
+ (long long)OBJ_DESTROY);
+ LASSERTF(OBJ_REF_ADD == 3, "found %lld\n",
+ (long long)OBJ_REF_ADD);
+ LASSERTF(OBJ_REF_DEL == 4, "found %lld\n",
+ (long long)OBJ_REF_DEL);
+ LASSERTF(OBJ_ATTR_SET == 5, "found %lld\n",
+ (long long)OBJ_ATTR_SET);
+ LASSERTF(OBJ_ATTR_GET == 6, "found %lld\n",
+ (long long)OBJ_ATTR_GET);
+ LASSERTF(OBJ_XATTR_SET == 7, "found %lld\n",
+ (long long)OBJ_XATTR_SET);
+ LASSERTF(OBJ_XATTR_GET == 8, "found %lld\n",
+ (long long)OBJ_XATTR_GET);
+ LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+ (long long)OBJ_INDEX_LOOKUP);
+ LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n",
+ (long long)OBJ_INDEX_LOOKUP);
+ LASSERTF(OBJ_INDEX_INSERT == 10, "found %lld\n",
+ (long long)OBJ_INDEX_INSERT);
+ LASSERTF(OBJ_INDEX_DELETE == 11, "found %lld\n",
+ (long long)OBJ_INDEX_DELETE);
+
+ /* Checks for struct ost_id */
+ LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct ost_id));
+ LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_id, oi));
+ LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_id *)0)->oi));
+ LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n",
+ (long long)LUSTRE_FID_INIT_OID);
+ LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n",
+ (long long)FID_SEQ_OST_MDT0);
+ LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n",
+ (long long)FID_SEQ_LLOG);
+ LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n",
+ (long long)FID_SEQ_ECHO);
+ LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n",
+ (long long)FID_SEQ_OST_MDT1);
+ LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n",
+ (long long)FID_SEQ_OST_MAX);
+ LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n",
+ (long long)FID_SEQ_RSVD);
+ LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
+ (long long)FID_SEQ_IGIF);
+ LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IGIF_MAX);
+ LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IDIF);
+ LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_IDIF_MAX);
+ LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_START);
+ LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_LOCAL_FILE);
+ LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_DOT_LUSTRE);
+ LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_SPECIAL);
+ LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_QUOTA);
+ LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_QUOTA_GLB);
+ LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_ROOT);
+ LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_NORMAL);
+ LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
+ (long long)FID_SEQ_LOV_DEFAULT);
+ LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_SPECIAL_BFL);
+ LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_DOT_LUSTRE);
+ LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)FID_OID_DOT_LUSTRE_OBF);
+
+ /* Checks for struct lu_dirent */
+ LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lu_dirent));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_fid));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_hash));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_reclen));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_namelen));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_attrs));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs));
+ LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirent, lde_name[0]));
+ LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
+ LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_FID);
+ LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_TYPE);
+ LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)LUDA_64BITHASH);
+
+ /* Checks for struct luda_type */
+ LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
+ (long long)(int)sizeof(struct luda_type));
+ LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct luda_type, lt_type));
+ LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct luda_type *)0)->lt_type));
+
+ /* Checks for struct lu_dirpage */
+ LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lu_dirpage));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_flags));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_pad0));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0));
+ LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0]));
+ LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]));
+ LASSERTF(LDF_EMPTY == 1, "found %lld\n",
+ (long long)LDF_EMPTY);
+ LASSERTF(LDF_COLLIDE == 2, "found %lld\n",
+ (long long)LDF_COLLIDE);
+ LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n",
+ (long long)LU_PAGE_SIZE);
+ /* Checks for union lu_page */
+ LASSERTF((int)sizeof(union lu_page) == 4096, "found %lld\n",
+ (long long)(int)sizeof(union lu_page));
+
+ /* Checks for struct lustre_handle */
+ LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_handle));
+ LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_handle, cookie));
+ LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_handle *)0)->cookie));
+
+ /* Checks for struct lustre_msg_v2 */
+ LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_msg_v2));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_magic));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_flags));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3));
+ LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0]));
+ LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
+ LASSERTF(LUSTRE_MSG_MAGIC_V1 == 0x0BD00BD0, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V1);
+ LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V2);
+ LASSERTF(LUSTRE_MSG_MAGIC_V1_SWABBED == 0xD00BD00B, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V1_SWABBED);
+ LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
+ LUSTRE_MSG_MAGIC_V2_SWABBED);
+
+ /* Checks for struct ptlrpc_body */
+ LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
+ (long long)(int)sizeof(struct ptlrpc_body_v3));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv));
+ CLASSERT(PTLRPC_NUM_VERSIONS == 4);
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
+ CLASSERT(JOBSTATS_JOBID_SIZE == 32);
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions));
+ LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n",
+ (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding));
+ LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n",
+ (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding));
+ LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n",
+ (long long)MSG_PTLRPC_BODY_OFF);
+ LASSERTF(REQ_REC_OFF == 1, "found %lld\n",
+ (long long)REQ_REC_OFF);
+ LASSERTF(REPLY_REC_OFF == 1, "found %lld\n",
+ (long long)REPLY_REC_OFF);
+ LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n",
+ (long long)DLM_LOCKREQ_OFF);
+ LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n",
+ (long long)DLM_REQ_REC_OFF);
+ LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n",
+ (long long)DLM_INTENT_IT_OFF);
+ LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n",
+ (long long)DLM_INTENT_REC_OFF);
+ LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n",
+ (long long)DLM_LOCKREPLY_OFF);
+ LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n",
+ (long long)DLM_REPLY_REC_OFF);
+ LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
+ (long long)MSG_PTLRPC_HEADER_OFF);
+ LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
+ PTLRPC_MSG_VERSION);
+ LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
+ LUSTRE_VERSION_MASK);
+ LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
+ LUSTRE_OBD_VERSION);
+ LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
+ LUSTRE_MDS_VERSION);
+ LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
+ LUSTRE_OST_VERSION);
+ LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
+ LUSTRE_DLM_VERSION);
+ LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
+ LUSTRE_LOG_VERSION);
+ LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
+ LUSTRE_MGS_VERSION);
+ LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
+ (long long)MSGHDR_AT_SUPPORT);
+ LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
+ (long long)MSGHDR_CKSUM_INCOMPAT18);
+ LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_OP_FLAG_MASK);
+ LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
+ (long long)MSG_OP_FLAG_SHIFT);
+ LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
+ (unsigned)MSG_GEN_FLAG_MASK);
+ LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_LAST_REPLAY);
+ LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_RESENT);
+ LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_REPLAY);
+ LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_DELAY_REPLAY);
+ LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_VERSION_REPLAY);
+ LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_REQ_REPLAY_DONE);
+ LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_LOCK_REPLAY_DONE);
+ LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_RECOVERING);
+ LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_RECONNECT);
+ LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_REPLAYABLE);
+ LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_LIBCLIENT);
+ LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_INITIAL);
+ LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_ASYNC);
+ LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_NEXT_VER);
+ LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)MSG_CONNECT_TRANSNO);
+
+ /* Checks for struct obd_connect_data */
+ LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
+ (long long)(int)sizeof(struct obd_connect_data));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_version));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_index));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_unused));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_group));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding1));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding2));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding3));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding4));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding5));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding6));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding7));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding8));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, padding9));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingA));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingB));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingC));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingD));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingE));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE));
+ LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, paddingF));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF));
+ LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RDONLY);
+ LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_INDEX);
+ LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS);
+ LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT);
+ LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SRVLOCK);
+ LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_VERSION);
+ LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_REQPORTAL);
+ LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_ACL);
+ LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_XATTR);
+ LASSERTF(OBD_CONNECT_CROW == 0x200ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CROW);
+ LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_TRUNCLOCK);
+ LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_TRANSNO);
+ LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_IBITS);
+ LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_JOIN);
+ LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_ATTRFID);
+ LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_NODEVOH);
+ LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RMT_CLIENT);
+ LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_RMT_CLIENT_FORCE);
+ LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_BRW_SIZE);
+ LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_QUOTA64);
+ LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS_CAPA);
+ LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_OSS_CAPA);
+ LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CANCELSET);
+ LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SOM);
+ LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_AT);
+ LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LRU_RESIZE);
+ LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MDS_MDS);
+ LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_REAL);
+ LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CHANGE_QS);
+ LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_CKSUM);
+ LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FID);
+ LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_VBR);
+ LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LOV_V3);
+ LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT_SHRINK);
+ LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SKIP_ORPHAN);
+ LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MAX_EASIZE);
+ LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FULL20);
+ LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LAYOUTLOCK);
+ LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_64BITHASH);
+ LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_MAXBYTES);
+ LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_IMP_RECOV);
+ LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_JOBSTATS);
+ LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_UMASK);
+ LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_EINPROGRESS);
+ LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_GRANT_PARAM);
+ LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLOCK_OWNER);
+ LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LVB_TYPE);
+ LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_NANOSEC_TIME);
+ LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_LIGHTWEIGHT);
+ LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_SHORTIO);
+ LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_PINGLESS);
+ LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL,
+ "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
+ LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_CRC32);
+ LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_ADLER);
+ LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)OBD_CKSUM_CRC32C);
+
+ /* Checks for struct obdo */
+ LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
+ (long long)(int)sizeof(struct obdo));
+ LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_valid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_valid));
+ LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_oi));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_oi));
+ LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_seq));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq));
+ LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_size));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_size));
+ LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_mtime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_mtime));
+ LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_atime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_atime));
+ LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_ctime));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_ctime));
+ LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_blocks));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_blocks));
+ LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_grant));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_grant));
+ LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_blksize));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_blksize));
+ LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_mode));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_mode));
+ LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_uid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_uid));
+ LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_gid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_gid));
+ LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_flags));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_flags));
+ LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_nlink));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_nlink));
+ LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_oid));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid));
+ LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_misc));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_misc));
+ LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_ioepoch));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch));
+ LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_stripe_idx));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx));
+ LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_parent_ver));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver));
+ LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_handle));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_handle));
+ LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_lcookie));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_lcookie));
+ LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_uid_h));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_uid_h));
+ LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_gid_h));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_gid_h));
+ LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_data_version));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_data_version));
+ LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_4));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_4));
+ LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_5));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_5));
+ LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_padding_6));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_padding_6));
+ LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLID);
+ LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLATIME);
+ LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMTIME);
+ LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCTIME);
+ LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLSIZE);
+ LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLBLOCKS);
+ LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLBLKSZ);
+ LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMODE);
+ LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLTYPE);
+ LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLUID);
+ LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGID);
+ LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLFLAGS);
+ LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLNLINK);
+ LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGENER);
+ LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRDEV);
+ LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLEASIZE);
+ LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_LINKNAME);
+ LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLHANDLE);
+ LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCKSUM);
+ LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLQOS);
+ LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCOOKIE);
+ LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGROUP);
+ LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLFID);
+ LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLEPOCH);
+ LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGRANT);
+ LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLDIREA);
+ LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLUSRQUOTA);
+ LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGRPQUOTA);
+ LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMODEASIZE);
+ LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_MDS);
+ LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_REINT);
+ LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_MEA);
+ LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL),
+ "found 0x%.16llxULL\n", OBD_MD_TSTATE);
+ LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTR);
+ LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTRLS);
+ LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLXATTRRM);
+ LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLACL);
+ LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTPERM);
+ LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLMDSCAPA);
+ LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLOSSCAPA);
+ LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCKSPLIT);
+ LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLCROSSREF);
+ LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLGETATTRLOCK);
+ LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTLSETFACL);
+ LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTLGETFACL);
+ LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTRSETFACL);
+ LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLRMTRGETFACL);
+ LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_FLDATAVERSION);
+ CLASSERT(OBD_FL_INLINEDATA == 0x00000001);
+ CLASSERT(OBD_FL_OBDMDEXISTS == 0x00000002);
+ CLASSERT(OBD_FL_DELORPHAN == 0x00000004);
+ CLASSERT(OBD_FL_NORPC == 0x00000008);
+ CLASSERT(OBD_FL_IDONLY == 0x00000010);
+ CLASSERT(OBD_FL_RECREATE_OBJS == 0x00000020);
+ CLASSERT(OBD_FL_DEBUG_CHECK == 0x00000040);
+ CLASSERT(OBD_FL_NO_USRQUOTA == 0x00000100);
+ CLASSERT(OBD_FL_NO_GRPQUOTA == 0x00000200);
+ CLASSERT(OBD_FL_CREATE_CROW == 0x00000400);
+ CLASSERT(OBD_FL_SRVLOCK == 0x00000800);
+ CLASSERT(OBD_FL_CKSUM_CRC32 == 0x00001000);
+ CLASSERT(OBD_FL_CKSUM_ADLER == 0x00002000);
+ CLASSERT(OBD_FL_CKSUM_CRC32C == 0x00004000);
+ CLASSERT(OBD_FL_CKSUM_RSVD2 == 0x00008000);
+ CLASSERT(OBD_FL_CKSUM_RSVD3 == 0x00010000);
+ CLASSERT(OBD_FL_SHRINK_GRANT == 0x00020000);
+ CLASSERT(OBD_FL_MMAP == 0x00040000);
+ CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000);
+ CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000);
+ CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000);
+
+ /* Checks for struct lov_ost_data_v1 */
+ LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lov_ost_data_v1));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen));
+ LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx));
+ LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx));
+
+ /* Checks for struct lov_mds_md_v1 */
+ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lov_mds_md_v1));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen));
+ LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
+ CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+
+ /* Checks for struct lov_mds_md_v3 */
+ LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct lov_mds_md_v3));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
+ CLASSERT(LOV_MAXPOOLNAME == 16);
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]));
+ LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
+ LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
+ CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
+ LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_RAID0);
+ LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_RAID1);
+ LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_FIRST);
+ LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
+ (unsigned)LOV_PATTERN_CMOBD);
+
+ /* Checks for struct obd_statfs */
+ LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
+ (long long)(int)sizeof(struct obd_statfs));
+ LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_type));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_type));
+ LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_blocks));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bfree));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bavail));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail));
+ LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_ffree));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fsid));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid));
+ LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_bsize));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize));
+ LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_namelen));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen));
+ LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_state));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_state));
+ LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_fprecreated));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare2));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare3));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare4));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare5));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare6));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare7));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare8));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8));
+ LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n",
+ (long long)(int)offsetof(struct obd_statfs, os_spare9));
+ LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9));
+
+ /* Checks for struct obd_ioobj */
+ LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct obd_ioobj));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_oid));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw));
+ LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt));
+ LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt));
+
+ /* Checks for union lquota_id */
+ LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(union lquota_id));
+
+ LASSERTF(QUOTABLOCK_BITS == 10, "found %lld\n",
+ (long long)QUOTABLOCK_BITS);
+ LASSERTF(QUOTABLOCK_SIZE == 1024, "found %lld\n",
+ (long long)QUOTABLOCK_SIZE);
+
+ /* Checks for struct obd_quotactl */
+ LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct obd_quotactl));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_cmd));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_type));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_id));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_stat));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo));
+ LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_quotactl, qc_dqblk));
+ LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk));
+
+ /* Checks for struct obd_dqinfo */
+ LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct obd_dqinfo));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_flags));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags));
+ LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqinfo, dqi_valid));
+ LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid));
+
+ /* Checks for struct obd_dqblk */
+ LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct obd_dqblk));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_curspace));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_btime));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_itime));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_valid));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid));
+ LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct obd_dqblk, dqb_padding));
+ LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
+ LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
+ Q_QUOTACHECK);
+ LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
+ Q_INITQUOTA);
+ LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
+ Q_GETOINFO);
+ LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
+ Q_GETOQUOTA);
+ LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
+ Q_FINVALIDATE);
+
+ /* Checks for struct lquota_acct_rec */
+ LASSERTF((int)sizeof(struct lquota_acct_rec) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_acct_rec));
+ LASSERTF((int)offsetof(struct lquota_acct_rec, bspace) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_acct_rec, bspace));
+ LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->bspace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_acct_rec *)0)->bspace));
+ LASSERTF((int)offsetof(struct lquota_acct_rec, ispace) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_acct_rec, ispace));
+ LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->ispace) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_acct_rec *)0)->ispace));
+
+ /* Checks for struct lquota_glb_rec */
+ LASSERTF((int)sizeof(struct lquota_glb_rec) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_glb_rec));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_hardlimit) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_hardlimit));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_softlimit) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_softlimit));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_time) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_time));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_time));
+ LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_granted) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_glb_rec, qbr_granted));
+ LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted));
+
+ /* Checks for struct lquota_slv_rec */
+ LASSERTF((int)sizeof(struct lquota_slv_rec) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_slv_rec));
+ LASSERTF((int)offsetof(struct lquota_slv_rec, qsr_granted) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_slv_rec, qsr_granted));
+ LASSERTF((int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted));
+
+ /* Checks for struct idx_info */
+ LASSERTF((int)sizeof(struct idx_info) == 80, "found %lld\n",
+ (long long)(int)sizeof(struct idx_info));
+ LASSERTF((int)offsetof(struct idx_info, ii_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_magic));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_magic));
+ LASSERTF((int)offsetof(struct idx_info, ii_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_flags));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_flags));
+ LASSERTF((int)offsetof(struct idx_info, ii_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_count));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_count));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad0) == 10, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad0));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad0) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad0));
+ LASSERTF((int)offsetof(struct idx_info, ii_attrs) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_attrs));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_attrs) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_attrs));
+ LASSERTF((int)offsetof(struct idx_info, ii_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_fid));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_fid));
+ LASSERTF((int)offsetof(struct idx_info, ii_version) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_version));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_version));
+ LASSERTF((int)offsetof(struct idx_info, ii_hash_start) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_hash_start));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_start));
+ LASSERTF((int)offsetof(struct idx_info, ii_hash_end) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_hash_end));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_end));
+ LASSERTF((int)offsetof(struct idx_info, ii_keysize) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_keysize));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_keysize) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_keysize));
+ LASSERTF((int)offsetof(struct idx_info, ii_recsize) == 58, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_recsize));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_recsize) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_recsize));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad1) == 60, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad1));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad1));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad2) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad2));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad2));
+ LASSERTF((int)offsetof(struct idx_info, ii_pad3) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct idx_info, ii_pad3));
+ LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct idx_info *)0)->ii_pad3));
+ CLASSERT(IDX_INFO_MAGIC == 0x3D37CC37);
+
+ /* Checks for struct lu_idxpage */
+ LASSERTF((int)sizeof(struct lu_idxpage) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct lu_idxpage));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_magic));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_magic));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_flags));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_flags));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_nr) == 6, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_nr));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_nr) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_nr));
+ LASSERTF((int)offsetof(struct lu_idxpage, lip_pad0) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lu_idxpage, lip_pad0));
+ LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_pad0) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_pad0));
+ CLASSERT(LIP_MAGIC == 0x8A6D6B6C);
+ LASSERTF(LIP_HDR_SIZE == 16, "found %lld\n",
+ (long long)LIP_HDR_SIZE);
+ LASSERTF(II_FL_NOHASH == 1, "found %lld\n",
+ (long long)II_FL_NOHASH);
+ LASSERTF(II_FL_VARKEY == 2, "found %lld\n",
+ (long long)II_FL_VARKEY);
+ LASSERTF(II_FL_VARREC == 4, "found %lld\n",
+ (long long)II_FL_VARREC);
+ LASSERTF(II_FL_NONUNQ == 8, "found %lld\n",
+ (long long)II_FL_NONUNQ);
+
+ /* Checks for struct niobuf_remote */
+ LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct niobuf_remote));
+ LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, offset));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
+ LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, len));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
+ LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct niobuf_remote, flags));
+ LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
+ LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
+ OBD_BRW_READ);
+ LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
+ OBD_BRW_WRITE);
+ LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
+ OBD_BRW_SYNC);
+ LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
+ OBD_BRW_CHECK);
+ LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
+ OBD_BRW_FROM_GRANT);
+ LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
+ OBD_BRW_GRANTED);
+ LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
+ OBD_BRW_NOCACHE);
+ LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
+ OBD_BRW_NOQUOTA);
+ LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
+ OBD_BRW_SRVLOCK);
+ LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
+ OBD_BRW_ASYNC);
+ LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
+ OBD_BRW_MEMALLOC);
+
+ /* Checks for struct ost_body */
+ LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n",
+ (long long)(int)sizeof(struct ost_body));
+ LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_body, oa));
+ LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_body *)0)->oa));
+
+ /* Checks for struct ll_fid */
+ LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fid));
+ LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, id));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->id));
+ LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, generation));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->generation));
+ LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fid, f_type));
+ LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fid *)0)->f_type));
+
+ /* Checks for struct mdt_body */
+ LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_body));
+ LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fid1));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
+ LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fid2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
+ LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, handle));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->handle));
+ LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, valid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->valid));
+ LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, size));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->size));
+ LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mtime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
+ LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, atime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->atime));
+ LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, ctime));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
+ LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, blocks));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
+ LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, t_state));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8,
+ "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
+ LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fsuid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
+ LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, fsgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
+ LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, capability));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->capability));
+ LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, mode));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->mode));
+ LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, uid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->uid));
+ LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, gid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->gid));
+ LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, flags));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->flags));
+ LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, rdev));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
+ LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, nlink));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
+ LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, unused2));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
+ LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, suppgid));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
+ LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, eadatasize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
+ LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, aclsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
+ LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, max_mdsize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
+ LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, max_cookiesize));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
+ LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, uid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
+ LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, gid_h));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
+ LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_5));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
+ LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_6));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
+ LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_7));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
+ LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_8));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
+ LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_9));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
+ LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, padding_10));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
+ LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_CLOSED);
+ LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
+ MDS_FMODE_EXEC);
+ LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_EPOCH);
+ LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_TRUNC);
+ LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
+ MDS_FMODE_SOM);
+ LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
+ MDS_OPEN_CREATED);
+ LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
+ MDS_OPEN_CROSS);
+ LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
+ MDS_OPEN_CREAT);
+ LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
+ MDS_OPEN_EXCL);
+ LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
+ MDS_OPEN_TRUNC);
+ LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
+ MDS_OPEN_APPEND);
+ LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
+ MDS_OPEN_SYNC);
+ LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
+ MDS_OPEN_DIRECTORY);
+ LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_BY_FID);
+ LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_DELAY_CREATE);
+ LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_OWNEROVERRIDE);
+ LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_JOIN_FILE);
+ LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_LOCK);
+ LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_HAS_EA);
+ LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
+ MDS_OPEN_HAS_OBJS);
+ LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_NORESTORE);
+ LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_NEWSTRIPE);
+ LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
+ (long long)MDS_OPEN_VOLATILE);
+ LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
+ LUSTRE_SYNC_FL);
+ LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
+ LUSTRE_IMMUTABLE_FL);
+ LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
+ LUSTRE_APPEND_FL);
+ LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
+ LUSTRE_NOATIME_FL);
+ LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
+ LUSTRE_DIRSYNC_FL);
+ LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
+ MDS_INODELOCK_LOOKUP);
+ LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
+ MDS_INODELOCK_UPDATE);
+ LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
+ MDS_INODELOCK_OPEN);
+ LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
+ MDS_INODELOCK_LAYOUT);
+
+ /* Checks for struct mdt_ioepoch */
+ LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_ioepoch));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, handle));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, ioepoch));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, flags));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags));
+ LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_ioepoch, padding));
+ LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding));
+
+ /* Checks for struct mdt_remote_perm */
+ LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_remote_perm));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_uid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_gid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm));
+ LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_remote_perm, rp_padding));
+ LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding));
+ LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETUID_PERM);
+ LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETGID_PERM);
+ LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_SETGRP_PERM);
+ LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_RMTACL_PERM);
+ LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n",
+ (unsigned)CFS_RMTOWN_PERM);
+
+ /* Checks for struct mdt_rec_setattr */
+ LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_setattr));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5));
+
+ /* Checks for struct mdt_rec_create */
+ LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_create));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_rdev));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_umask));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask));
+ LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4));
+
+ /* Checks for struct mdt_rec_link */
+ LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_link));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9));
+
+ /* Checks for struct mdt_rec_unlink */
+ LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_unlink));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9));
+
+ /* Checks for struct mdt_rec_rename */
+ LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_rename));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8));
+
+ /* Checks for struct mdt_rec_setxattr */
+ LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_setxattr));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10));
+ LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11));
+ LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
+
+ /* Checks for struct mdt_rec_reint */
+ LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_reint));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_atime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_size));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_mode));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_flags));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_umask));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask));
+ LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4));
+ LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4));
+
+ /* Checks for struct lmv_desc */
+ LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct lmv_desc));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_tgt_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_pattern));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_1));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_2));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_3));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_padding_4));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4));
+ LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_desc, ld_uuid));
+ LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
+
+ /* Checks for struct lmv_stripe_md */
+ LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct lmv_stripe_md));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
+ CLASSERT(LOV_MAXPOOLNAME == 16);
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
+ LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
+ LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
+
+ /* Checks for struct lov_desc */
+ LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct lov_desc));
+ LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_tgt_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count));
+ LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_pattern));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size));
+ LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_0));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0));
+ LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_1));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
+ LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_padding_2));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
+ LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lov_desc, ld_uuid));
+ LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid));
+ CLASSERT(LOV_DESC_MAGIC == 0xB0CCDE5C);
+
+ /* Checks for struct ldlm_res_id */
+ LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_res_id));
+ CLASSERT(RES_NAME_SIZE == 4);
+ LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_res_id, name[4]));
+ LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4]));
+
+ /* Checks for struct ldlm_extent */
+ LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_extent));
+ LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, start));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->start));
+ LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, end));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->end));
+ LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_extent, gid));
+ LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
+
+ /* Checks for struct ldlm_inodebits */
+ LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_inodebits));
+ LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_inodebits, bits));
+ LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits));
+
+ /* Checks for struct ldlm_flock_wire */
+ LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_flock_wire));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding));
+ LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid));
+ LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid));
+
+ /* Checks for struct ldlm_intent */
+ LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_intent));
+ LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_intent, opc));
+ LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_intent *)0)->opc));
+
+ /* Checks for struct ldlm_resource_desc */
+ LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_resource_desc));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_type));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding));
+ LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_resource_desc, lr_name));
+ LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name));
+
+ /* Checks for struct ldlm_lock_desc */
+ LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_resource));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode));
+ LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data));
+ LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data));
+
+ /* Checks for struct ldlm_request */
+ LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_request));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_flags));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_count));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_desc));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_request, lock_handle));
+ LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle));
+
+ /* Checks for struct ldlm_reply */
+ LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_reply));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_flags));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_padding));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_desc));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_handle));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1));
+ LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2));
+ LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2));
+
+ /* Checks for struct ost_lvb_v1 */
+ LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct ost_lvb_v1));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_size));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime));
+ LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks));
+ LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks));
+
+ /* Checks for struct ost_lvb */
+ LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct ost_lvb));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_size));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_mtime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_atime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_ctime));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_blocks));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns));
+ LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct ost_lvb, lvb_padding));
+ LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding));
+
+ /* Checks for struct lquota_lvb */
+ LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct lquota_lvb));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_flags));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit));
+ LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lquota_lvb, lvb_pad1));
+ LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1));
+ LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n",
+ (long long)LQUOTA_FL_EDQUOT);
+
+ /* Checks for struct ldlm_gl_lquota_desc */
+ LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct ldlm_gl_lquota_desc));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time));
+ LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2));
+ LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
+
+ /* Checks for struct mgs_send_param */
+ LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
+ (long long)(int)sizeof(struct mgs_send_param));
+ CLASSERT(MGS_PARAM_MAXLEN == 1024);
+ LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024]));
+ LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]));
+
+ /* Checks for struct cfg_marker */
+ LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n",
+ (long long)(int)sizeof(struct cfg_marker));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_step));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_flags));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_vers));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_padding));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_createtime));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_canceltime));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_tgtname));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname));
+ LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct cfg_marker, cm_comment));
+ LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment));
+
+ /* Checks for struct llog_logid */
+ LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(struct llog_logid));
+ LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid, lgl_oi));
+ LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi));
+ LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid, lgl_ogen));
+ LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen));
+ CLASSERT(OST_SZ_REC == 274730752);
+ CLASSERT(MDS_UNLINK_REC == 274801668);
+ CLASSERT(MDS_UNLINK64_REC == 275325956);
+ CLASSERT(MDS_SETATTR64_REC == 275325953);
+ CLASSERT(OBD_CFG_REC == 274857984);
+ CLASSERT(LLOG_GEN_REC == 274989056);
+ CLASSERT(CHANGELOG_REC == 275120128);
+ CLASSERT(CHANGELOG_USER_REC == 275185664);
+ CLASSERT(LLOG_HDR_MAGIC == 275010873);
+ CLASSERT(LLOG_LOGID_MAGIC == 275010875);
+
+ /* Checks for struct llog_catid */
+ LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct llog_catid));
+ LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_logid));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding1));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding2));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2));
+ LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_catid, lci_padding3));
+ LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3));
+
+ /* Checks for struct llog_rec_hdr */
+ LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct llog_rec_hdr));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_len));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_index));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_type));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type));
+ LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_hdr, lrh_id));
+ LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id));
+
+ /* Checks for struct llog_rec_tail */
+ LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct llog_rec_tail));
+ LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_tail, lrt_len));
+ LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len));
+ LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct llog_rec_tail, lrt_index));
+ LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index));
+
+ /* Checks for struct llog_logid_rec */
+ LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_logid_rec));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_hdr));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_id));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding1));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding2));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_padding3));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3));
+ LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_logid_rec, lid_tail));
+ LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail));
+
+ /* Checks for struct llog_unlink_rec */
+ LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llog_unlink_rec));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_oid));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_count));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count));
+ LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink_rec, lur_tail));
+ LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail));
+ /* Checks for struct llog_unlink64_rec */
+ LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_unlink64_rec));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_count));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2));
+ LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3));
+ LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3));
+
+ /* Checks for struct llog_setattr64_rec */
+ LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_setattr64_rec));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+ LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
+ LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail));
+
+ /* Checks for struct llog_size_change_rec */
+ LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_size_change_rec));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3));
+ LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail));
+ LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail));
+
+ /* Checks for struct changelog_rec */
+ LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_rec));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_namelen));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_flags));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_type));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_index));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_prev));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_time));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_tfid));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid));
+ LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_rec, cr_pfid));
+ LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
+
+ /* Checks for struct changelog_ext_rec */
+ LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_ext_rec));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
+ LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
+ LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
+
+ /* Checks for struct changelog_setinfo */
+ LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
+ (long long)(int)sizeof(struct changelog_setinfo));
+ LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_setinfo, cs_recno));
+ LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno));
+ LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct changelog_setinfo, cs_id));
+ LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id));
+
+ /* Checks for struct llog_changelog_rec */
+ LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n",
+ (long long)(int)sizeof(struct llog_changelog_rec));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
+ LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
+ LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
+
+ /* Checks for struct llog_changelog_user_rec */
+ LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llog_changelog_user_rec));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec));
+ LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail));
+ LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail));
+
+ /* Checks for struct llog_gen */
+ LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(struct llog_gen));
+ LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen, mnt_cnt));
+ LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt));
+ LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen, conn_cnt));
+ LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt));
+
+ /* Checks for struct llog_gen_rec */
+ LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct llog_gen_rec));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_gen));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen));
+ LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct llog_gen_rec, lgr_tail));
+ LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail));
+
+ /* Checks for struct llog_log_hdr */
+ LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n",
+ (long long)(int)sizeof(struct llog_log_hdr));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_hdr));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_count));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_size));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_flags));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_reserved));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap));
+ LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n",
+ (long long)(int)offsetof(struct llog_log_hdr, llh_tail));
+ LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail));
+
+ /* Checks for struct llog_cookie */
+ LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct llog_cookie));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_lgl));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_subsys));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_index));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index));
+ LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llog_cookie, lgc_padding));
+ LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding));
+
+ /* Checks for struct llogd_body */
+ LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct llogd_body));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_logid));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_llh_flags));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_index));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_saved_index));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_len));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len));
+ LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_body, lgd_cur_offset));
+ LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset));
+ CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501);
+ CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502);
+ CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503);
+ CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504);
+ CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505);
+ CLASSERT(LLOG_ORIGIN_CONNECT == 506);
+ CLASSERT(LLOG_CATINFO == 507);
+ CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508);
+ CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
+ CLASSERT(LLOG_FIRST_OPC == 501);
+ CLASSERT(LLOG_LAST_OPC == 510);
+
+ /* Checks for struct llogd_conn_body */
+ LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
+ (long long)(int)sizeof(struct llogd_conn_body));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid));
+ LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx));
+ LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx));
+
+ /* Checks for struct ll_fiemap_info_key */
+ LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fiemap_info_key));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, name[8]));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, oa));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa));
+ LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap));
+ LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap));
+
+ /* Checks for struct quota_body */
+ LASSERTF((int)sizeof(struct quota_body) == 112, "found %lld\n",
+ (long long)(int)sizeof(struct quota_body));
+ LASSERTF((int)offsetof(struct quota_body, qb_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_fid));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_fid));
+ LASSERTF((int)offsetof(struct quota_body, qb_id) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_id));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_id) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_id));
+ LASSERTF((int)offsetof(struct quota_body, qb_flags) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_flags));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_flags));
+ LASSERTF((int)offsetof(struct quota_body, qb_padding) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_padding));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_padding));
+ LASSERTF((int)offsetof(struct quota_body, qb_count) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_count));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_count) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_count));
+ LASSERTF((int)offsetof(struct quota_body, qb_usage) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_usage));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_usage) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_usage));
+ LASSERTF((int)offsetof(struct quota_body, qb_slv_ver) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_slv_ver));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_slv_ver) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_slv_ver));
+ LASSERTF((int)offsetof(struct quota_body, qb_lockh) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_lockh));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_lockh) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_lockh));
+ LASSERTF((int)offsetof(struct quota_body, qb_glb_lockh) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_glb_lockh));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_glb_lockh) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_glb_lockh));
+ LASSERTF((int)offsetof(struct quota_body, qb_padding1[4]) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct quota_body, qb_padding1[4]));
+ LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding1[4]) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct quota_body *)0)->qb_padding1[4]));
+
+ /* Checks for struct mgs_target_info */
+ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n",
+ (long long)(int)sizeof(struct mgs_target_info));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_config_ver));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_flags));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_nid_count));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_instance));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_fsname));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_svname));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_uuid));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_nids));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids));
+ LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n",
+ (long long)(int)offsetof(struct mgs_target_info, mti_params));
+ LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
+ (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
+
+ /* Checks for struct lustre_capa */
+ LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_capa));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_fid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_opc));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_uid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_gid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_flags));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_keyid));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_timeout));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout));
+ LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_expiry));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry));
+ CLASSERT(CAPA_HMAC_MAX_LEN == 64);
+ LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa, lc_hmac[64]));
+ LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]));
+
+ /* Checks for struct lustre_capa_key */
+ LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_capa_key));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_seq));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_keyid));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid));
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_padding));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding));
+ CLASSERT(CAPA_HMAC_KEY_MAX_LEN == 56);
+ LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_capa_key, lk_key[56]));
+ LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]));
+
+ /* Checks for struct getinfo_fid2path */
+ LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct getinfo_fid2path));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_fid));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_recno));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen));
+ LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0]));
+ LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n",
+ (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]));
+
+ /* Checks for struct ll_user_fiemap */
+ LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct ll_user_fiemap));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_start));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_length));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_flags));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved));
+ LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct ll_user_fiemap, fm_extents));
+ LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents));
+ CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001);
+ CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002);
+ CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
+
+ /* Checks for struct ll_fiemap_extent */
+ LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct ll_fiemap_extent));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
+ LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
+ CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
+ CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
+ CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
+ CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008);
+ CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080);
+ CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100);
+ CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200);
+ CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400);
+ CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800);
+ CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000);
+ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
+
+ /* Checks for type posix_acl_xattr_entry */
+ LASSERTF((int)sizeof(posix_acl_xattr_entry) == 8, "found %lld\n",
+ (long long)(int)sizeof(posix_acl_xattr_entry));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_tag));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_tag));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_perm));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_perm));
+ LASSERTF((int)offsetof(posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_entry, e_id));
+ LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_id));
+
+ /* Checks for type posix_acl_xattr_header */
+ LASSERTF((int)sizeof(posix_acl_xattr_header) == 4, "found %lld\n",
+ (long long)(int)sizeof(posix_acl_xattr_header));
+ LASSERTF((int)offsetof(posix_acl_xattr_header, a_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_header, a_version));
+ LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_version));
+ LASSERTF((int)offsetof(posix_acl_xattr_header, a_entries) == 4, "found %lld\n",
+ (long long)(int)offsetof(posix_acl_xattr_header, a_entries));
+ LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_entries) == 0, "found %lld\n",
+ (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_entries));
+
+ /* Checks for struct link_ea_header */
+ LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct link_ea_header));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_magic));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_reccount));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount));
+ LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, leh_len));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len));
+ LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, padding1));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->padding1));
+ LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_header, padding2));
+ LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_header *)0)->padding2));
+ CLASSERT(LINK_EA_MAGIC == 0x11EAF1DFUL);
+
+ /* Checks for struct link_ea_entry */
+ LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n",
+ (long long)(int)sizeof(struct link_ea_entry));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_reclen));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid));
+ LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n",
+ (long long)(int)offsetof(struct link_ea_entry, lee_name));
+ LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+ /* Checks for struct layout_intent */
+ LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct layout_intent));
+ LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_opc));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_opc));
+ LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_flags));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
+ LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_start));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
+ LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_end));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
+ LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
+ (long long)LAYOUT_INTENT_ACCESS);
+ LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
+ (long long)LAYOUT_INTENT_READ);
+ LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n",
+ (long long)LAYOUT_INTENT_WRITE);
+ LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n",
+ (long long)LAYOUT_INTENT_GLIMPSE);
+ LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n",
+ (long long)LAYOUT_INTENT_TRUNC);
+ LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n",
+ (long long)LAYOUT_INTENT_RELEASE);
+ LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n",
+ (long long)LAYOUT_INTENT_RESTORE);
+
+ /* Checks for struct hsm_action_item */
+ LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_action_item));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_len));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_action));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_fid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_dfid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_extent));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_cookie));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_gid));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid));
+ LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_item, hai_data));
+ LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data));
+
+ /* Checks for struct hsm_action_list */
+ LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_action_list));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_version));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_count));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_compound_id));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_flags));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id));
+ LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, padding1));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1));
+ LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_action_list, hal_fsname));
+ LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname));
+
+ /* Checks for struct hsm_progress */
+ LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_progress));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_fid));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_cookie));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_extent));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_flags));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags));
+ LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, hp_errval));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval));
+ LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress, padding));
+ LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
+ LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
+ HP_FLAG_COMPLETED);
+ LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
+ HP_FLAG_RETRY);
+
+ LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_data_version));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_flags));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_errval));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval));
+ LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, padding));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->padding));
+ LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_copy, hc_hai));
+ LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai));
+
+ /* Checks for struct hsm_progress_kernel */
+ LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_progress_kernel));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version));
+ LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2));
+ LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2));
+
+ /* Checks for struct hsm_user_item */
+ LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_item));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+ LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+ LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+ /* Checks for struct hsm_user_state */
+ LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_states));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+ LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+ LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
+
+ /* Checks for struct hsm_state_set */
+ LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_state_set));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_valid));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_setmask));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask));
+ LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_state_set, hss_clearmask));
+ LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask));
+
+ /* Checks for struct hsm_current_action */
+ LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_current_action));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_state));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_action));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action));
+ LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_current_action, hca_location));
+ LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location));
+
+ /* Checks for struct hsm_request */
+ LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_request));
+ LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_action));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_action));
+ LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_archive_id));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id));
+ LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_flags));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags));
+ LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_itemcount));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount));
+ LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_request, hr_data_len));
+ LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
+ LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
+ (unsigned)HSM_FORCE_ACTION);
+ LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
+ (unsigned)HSM_GHOST_COPY);
+
+ /* Checks for struct hsm_user_request */
+ LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct hsm_user_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_request));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request));
+ LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_request, hur_user_item));
+ LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item));
+
+ /* Checks for struct hsm_user_import */
+ LASSERTF(sizeof(struct hsm_user_import) == 48, "found %lld\n",
+ (long long)sizeof(struct hsm_user_import));
+ LASSERTF(offsetof(struct hsm_user_import, hui_size) == 0,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_size));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_size) == 8,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_size));
+ LASSERTF(offsetof(struct hsm_user_import, hui_uid) == 32,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_uid));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_uid) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_uid));
+ LASSERTF(offsetof(struct hsm_user_import, hui_gid) == 36,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_gid));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_gid) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_gid));
+ LASSERTF(offsetof(struct hsm_user_import, hui_mode) == 40,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_mode));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mode) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_mode));
+ LASSERTF(offsetof(struct hsm_user_import, hui_atime) == 8,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_atime));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime) == 8,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_atime));
+ LASSERTF(offsetof(struct hsm_user_import, hui_atime_ns) == 24,
+ "found %lld\n",
+ (long long)(int)offsetof(struct hsm_user_import, hui_atime_ns));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime_ns) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_atime_ns));
+ LASSERTF(offsetof(struct hsm_user_import, hui_mtime) == 16,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_mtime));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime) == 8,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime));
+ LASSERTF(offsetof(struct hsm_user_import, hui_mtime_ns) == 28,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_mtime_ns));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime_ns) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime_ns));
+ LASSERTF(offsetof(struct hsm_user_import, hui_archive_id) == 44,
+ "found %lld\n",
+ (long long)offsetof(struct hsm_user_import, hui_archive_id));
+ LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_archive_id) == 4,
+ "found %lld\n",
+ (long long)sizeof(((struct hsm_user_import *)0)->hui_archive_id));
+
+ /* Checks for struct update_buf */
+ LASSERTF((int)sizeof(struct update_buf) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct update_buf));
+ LASSERTF((int)offsetof(struct update_buf, ub_magic) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_magic));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_magic) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_magic));
+ LASSERTF((int)offsetof(struct update_buf, ub_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_count));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_count));
+ LASSERTF((int)offsetof(struct update_buf, ub_bufs) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update_buf, ub_bufs));
+ LASSERTF((int)sizeof(((struct update_buf *)0)->ub_bufs) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update_buf *)0)->ub_bufs));
+
+ /* Checks for struct update_reply */
+ LASSERTF((int)sizeof(struct update_reply) == 8, "found %lld\n",
+ (long long)(int)sizeof(struct update_reply));
+ LASSERTF((int)offsetof(struct update_reply, ur_version) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_version));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_version));
+ LASSERTF((int)offsetof(struct update_reply, ur_count) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_count));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_count) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_count));
+ LASSERTF((int)offsetof(struct update_reply, ur_lens) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update_reply, ur_lens));
+ LASSERTF((int)sizeof(((struct update_reply *)0)->ur_lens) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update_reply *)0)->ur_lens));
+
+ /* Checks for struct update */
+ LASSERTF((int)sizeof(struct update) == 56, "found %lld\n",
+ (long long)(int)sizeof(struct update));
+ LASSERTF((int)offsetof(struct update, u_type) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_type));
+ LASSERTF((int)sizeof(((struct update *)0)->u_type) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_type));
+ LASSERTF((int)offsetof(struct update, u_batchid) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_batchid));
+ LASSERTF((int)sizeof(((struct update *)0)->u_batchid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_batchid));
+ LASSERTF((int)offsetof(struct update, u_fid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_fid));
+ LASSERTF((int)sizeof(((struct update *)0)->u_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_fid));
+ LASSERTF((int)offsetof(struct update, u_lens) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_lens));
+ LASSERTF((int)sizeof(((struct update *)0)->u_lens) == 32, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_lens));
+ LASSERTF((int)offsetof(struct update, u_bufs) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct update, u_bufs));
+ LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
+ (long long)(int)sizeof(((struct update *)0)->u_bufs));
+}