diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
commit | 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch) | |
tree | 5e910f0e82173f4ef4f51111366a3f1299037a7b /drivers/staging/lustre/lustre/ptlrpc |
Initial import
Diffstat (limited to 'drivers/staging/lustre/lustre/ptlrpc')
29 files changed, 31743 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile new file mode 100644 index 000000000..fb50cd4c6 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/Makefile @@ -0,0 +1,20 @@ +obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o +LDLM := ../../lustre/ldlm/ + +ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o +ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o +ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o +ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o +ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o +ldlm_objs += $(LDLM)ldlm_pool.o +ldlm_objs += $(LDLM)interval_tree.o +ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o +ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o +ptlrpc_objs += llog_net.o llog_client.o import.o ptlrpcd.o +ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o +ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o +ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o + +ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs) +ptlrpc-$(CONFIG_PROC_FS) += sec_lproc.o +ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.o diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c new file mode 100644 index 000000000..0357f1d45 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -0,0 +1,3149 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +/** Implementation of client-side PortalRPC interfaces */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_lib.h" +#include "../include/lustre_ha.h" +#include "../include/lustre_import.h" +#include "../include/lustre_req_layout.h" + +#include "ptlrpc_internal.h" + +static int ptlrpc_send_new_req(struct ptlrpc_request *req); +static int ptlrpcd_check_work(struct ptlrpc_request *req); + +/** + * Initialize passed in client structure \a cl. + */ +void ptlrpc_init_client(int req_portal, int rep_portal, char *name, + struct ptlrpc_client *cl) +{ + cl->cli_request_portal = req_portal; + cl->cli_reply_portal = rep_portal; + cl->cli_name = name; +} +EXPORT_SYMBOL(ptlrpc_init_client); + +/** + * Return PortalRPC connection for remote uud \a uuid + */ +struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) +{ + struct ptlrpc_connection *c; + lnet_nid_t self; + lnet_process_id_t peer; + int err; + + /* ptlrpc_uuid_to_peer() initializes its 2nd parameter + * before accessing its values. */ + /* coverity[uninit_use_in_call] */ + err = ptlrpc_uuid_to_peer(uuid, &peer, &self); + if (err != 0) { + CNETERR("cannot find peer %s!\n", uuid->uuid); + return NULL; + } + + c = ptlrpc_connection_get(peer, self, uuid); + if (c) { + memcpy(c->c_remote_uuid.uuid, + uuid->uuid, sizeof(c->c_remote_uuid.uuid)); + } + + CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c); + + return c; +} +EXPORT_SYMBOL(ptlrpc_uuid_to_connection); + +/** + * Allocate and initialize new bulk descriptor on the sender. + * Returns pointer to the descriptor or NULL on error. + */ +struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, + unsigned type, unsigned portal) +{ + struct ptlrpc_bulk_desc *desc; + int i; + + OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[npages])); + if (!desc) + return NULL; + + spin_lock_init(&desc->bd_lock); + init_waitqueue_head(&desc->bd_waitq); + desc->bd_max_iov = npages; + desc->bd_iov_count = 0; + desc->bd_portal = portal; + desc->bd_type = type; + desc->bd_md_count = 0; + LASSERT(max_brw > 0); + desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT); + /* PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this + * node. Negotiated ocd_brw_size will always be <= this number. */ + for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++) + LNetInvalidateHandle(&desc->bd_mds[i]); + + return desc; +} + +/** + * Prepare bulk descriptor for specified outgoing request \a req that + * can fit \a npages * pages. \a type is bulk type. \a portal is where + * the bulk to be sent. Used on client-side. + * Returns pointer to newly allocated initialized bulk descriptor or NULL on + * error. + */ +struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, + unsigned npages, unsigned max_brw, + unsigned type, unsigned portal) +{ + struct obd_import *imp = req->rq_import; + struct ptlrpc_bulk_desc *desc; + + LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE); + desc = ptlrpc_new_bulk(npages, max_brw, type, portal); + if (desc == NULL) + return NULL; + + desc->bd_import_generation = req->rq_import_generation; + desc->bd_import = class_import_get(imp); + desc->bd_req = req; + + desc->bd_cbid.cbid_fn = client_bulk_callback; + desc->bd_cbid.cbid_arg = desc; + + /* This makes req own desc, and free it when she frees herself */ + req->rq_bulk = desc; + + return desc; +} +EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); + +/** + * Add a page \a page to the bulk descriptor \a desc. + * Data to transfer in the page starts at offset \a pageoffset and + * amount of data to transfer from the page is \a len + */ +void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, + struct page *page, int pageoffset, int len, int pin) +{ + LASSERT(desc->bd_iov_count < desc->bd_max_iov); + LASSERT(page != NULL); + LASSERT(pageoffset >= 0); + LASSERT(len > 0); + LASSERT(pageoffset + len <= PAGE_CACHE_SIZE); + + desc->bd_nob += len; + + if (pin) + page_cache_get(page); + + ptlrpc_add_bulk_page(desc, page, pageoffset, len); +} +EXPORT_SYMBOL(__ptlrpc_prep_bulk_page); + +/** + * Uninitialize and free bulk descriptor \a desc. + * Works on bulk descriptors both from server and client side. + */ +void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin) +{ + int i; + + LASSERT(desc != NULL); + LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ + LASSERT(desc->bd_md_count == 0); /* network hands off */ + LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); + + sptlrpc_enc_pool_put_pages(desc); + + if (desc->bd_export) + class_export_put(desc->bd_export); + else + class_import_put(desc->bd_import); + + if (unpin) { + for (i = 0; i < desc->bd_iov_count; i++) + page_cache_release(desc->bd_iov[i].kiov_page); + } + + OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, + bd_iov[desc->bd_max_iov])); +} +EXPORT_SYMBOL(__ptlrpc_free_bulk); + +/** + * Set server timelimit for this req, i.e. how long are we willing to wait + * for reply before timing out this request. + */ +void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req) +{ + __u32 serv_est; + int idx; + struct imp_at *at; + + LASSERT(req->rq_import); + + if (AT_OFF) { + /* non-AT settings */ + /** + * \a imp_server_timeout means this is reverse import and + * we send (currently only) ASTs to the client and cannot afford + * to wait too long for the reply, otherwise the other client + * (because of which we are sending this request) would + * timeout waiting for us + */ + req->rq_timeout = req->rq_import->imp_server_timeout ? + obd_timeout / 2 : obd_timeout; + } else { + at = &req->rq_import->imp_at; + idx = import_at_get_index(req->rq_import, + req->rq_request_portal); + serv_est = at_get(&at->iat_service_estimate[idx]); + req->rq_timeout = at_est2timeout(serv_est); + } + /* We could get even fancier here, using history to predict increased + loading... */ + + /* Let the server know what this RPC timeout is by putting it in the + reqmsg*/ + lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout); +} +EXPORT_SYMBOL(ptlrpc_at_set_req_timeout); + +/* Adjust max service estimate based on server value */ +static void ptlrpc_at_adj_service(struct ptlrpc_request *req, + unsigned int serv_est) +{ + int idx; + unsigned int oldse; + struct imp_at *at; + + LASSERT(req->rq_import); + at = &req->rq_import->imp_at; + + idx = import_at_get_index(req->rq_import, req->rq_request_portal); + /* max service estimates are tracked on the server side, + so just keep minimal history here */ + oldse = at_measured(&at->iat_service_estimate[idx], serv_est); + if (oldse != 0) + CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d has changed from %d to %d\n", + req->rq_import->imp_obd->obd_name, req->rq_request_portal, + oldse, at_get(&at->iat_service_estimate[idx])); +} + +/* Expected network latency per remote node (secs) */ +int ptlrpc_at_get_net_latency(struct ptlrpc_request *req) +{ + return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency); +} + +/* Adjust expected network latency */ +static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req, + unsigned int service_time) +{ + unsigned int nl, oldnl; + struct imp_at *at; + time_t now = get_seconds(); + + LASSERT(req->rq_import); + + if (service_time > now - req->rq_sent + 3) { + /* bz16408, however, this can also happen if early reply + * is lost and client RPC is expired and resent, early reply + * or reply of original RPC can still be fit in reply buffer + * of resent RPC, now client is measuring time from the + * resent time, but server sent back service time of original + * RPC. + */ + CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ? + D_ADAPTTO : D_WARNING, + "Reported service time %u > total measured time " + CFS_DURATION_T"\n", service_time, + cfs_time_sub(now, req->rq_sent)); + return; + } + + /* Network latency is total time less server processing time */ + nl = max_t(int, now - req->rq_sent - + service_time, 0) + 1; /* st rounding */ + at = &req->rq_import->imp_at; + + oldnl = at_measured(&at->iat_net_latency, nl); + if (oldnl != 0) + CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) has changed from %d to %d\n", + req->rq_import->imp_obd->obd_name, + obd_uuid2str( + &req->rq_import->imp_connection->c_remote_uuid), + oldnl, at_get(&at->iat_net_latency)); +} + +static int unpack_reply(struct ptlrpc_request *req) +{ + int rc; + + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) { + rc = ptlrpc_unpack_rep_msg(req, req->rq_replen); + if (rc) { + DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc); + return -EPROTO; + } + } + + rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF); + if (rc) { + DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc); + return -EPROTO; + } + return 0; +} + +/** + * Handle an early reply message, called with the rq_lock held. + * If anything goes wrong just ignore it - same as if it never happened + */ +static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_request *early_req; + time_t olddl; + int rc; + + req->rq_early = 0; + spin_unlock(&req->rq_lock); + + rc = sptlrpc_cli_unwrap_early_reply(req, &early_req); + if (rc) { + spin_lock(&req->rq_lock); + return rc; + } + + rc = unpack_reply(early_req); + if (rc == 0) { + /* Expecting to increase the service time estimate here */ + ptlrpc_at_adj_service(req, + lustre_msg_get_timeout(early_req->rq_repmsg)); + ptlrpc_at_adj_net_latency(req, + lustre_msg_get_service_time(early_req->rq_repmsg)); + } + + sptlrpc_cli_finish_early_reply(early_req); + + if (rc != 0) { + spin_lock(&req->rq_lock); + return rc; + } + + /* Adjust the local timeout for this req */ + ptlrpc_at_set_req_timeout(req); + + spin_lock(&req->rq_lock); + olddl = req->rq_deadline; + /* server assumes it now has rq_timeout from when it sent the + * early reply, so client should give it at least that long. */ + req->rq_deadline = get_seconds() + req->rq_timeout + + ptlrpc_at_get_net_latency(req); + + DEBUG_REQ(D_ADAPTTO, req, + "Early reply #%d, new deadline in " CFS_DURATION_T "s (" CFS_DURATION_T "s)", + req->rq_early_count, + cfs_time_sub(req->rq_deadline, get_seconds()), + cfs_time_sub(req->rq_deadline, olddl)); + + return rc; +} + +struct kmem_cache *request_cache; + +int ptlrpc_request_cache_init(void) +{ + request_cache = kmem_cache_create("ptlrpc_cache", + sizeof(struct ptlrpc_request), + 0, SLAB_HWCACHE_ALIGN, NULL); + return request_cache == NULL ? -ENOMEM : 0; +} + +void ptlrpc_request_cache_fini(void) +{ + kmem_cache_destroy(request_cache); +} + +struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags) +{ + struct ptlrpc_request *req; + + OBD_SLAB_ALLOC_PTR_GFP(req, request_cache, flags); + return req; +} + +void ptlrpc_request_cache_free(struct ptlrpc_request *req) +{ + OBD_SLAB_FREE_PTR(req, request_cache); +} + +/** + * Wind down request pool \a pool. + * Frees all requests from the pool too + */ +void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool) +{ + struct list_head *l, *tmp; + struct ptlrpc_request *req; + + LASSERT(pool != NULL); + + spin_lock(&pool->prp_lock); + list_for_each_safe(l, tmp, &pool->prp_req_list) { + req = list_entry(l, struct ptlrpc_request, rq_list); + list_del(&req->rq_list); + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len == pool->prp_rq_size); + OBD_FREE_LARGE(req->rq_reqbuf, pool->prp_rq_size); + ptlrpc_request_cache_free(req); + } + spin_unlock(&pool->prp_lock); + OBD_FREE(pool, sizeof(*pool)); +} +EXPORT_SYMBOL(ptlrpc_free_rq_pool); + +/** + * Allocates, initializes and adds \a num_rq requests to the pool \a pool + */ +void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) +{ + int i; + int size = 1; + + while (size < pool->prp_rq_size) + size <<= 1; + + LASSERTF(list_empty(&pool->prp_req_list) || + size == pool->prp_rq_size, + "Trying to change pool size with nonempty pool from %d to %d bytes\n", + pool->prp_rq_size, size); + + spin_lock(&pool->prp_lock); + pool->prp_rq_size = size; + for (i = 0; i < num_rq; i++) { + struct ptlrpc_request *req; + struct lustre_msg *msg; + + spin_unlock(&pool->prp_lock); + req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (!req) + return; + OBD_ALLOC_LARGE(msg, size); + if (!msg) { + ptlrpc_request_cache_free(req); + return; + } + req->rq_reqbuf = msg; + req->rq_reqbuf_len = size; + req->rq_pool = pool; + spin_lock(&pool->prp_lock); + list_add_tail(&req->rq_list, &pool->prp_req_list); + } + spin_unlock(&pool->prp_lock); +} +EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool); + +/** + * Create and initialize new request pool with given attributes: + * \a num_rq - initial number of requests to create for the pool + * \a msgsize - maximum message size possible for requests in thid pool + * \a populate_pool - function to be called when more requests need to be added + * to the pool + * Returns pointer to newly created pool or NULL on error. + */ +struct ptlrpc_request_pool * +ptlrpc_init_rq_pool(int num_rq, int msgsize, + void (*populate_pool)(struct ptlrpc_request_pool *, int)) +{ + struct ptlrpc_request_pool *pool; + + OBD_ALLOC(pool, sizeof(struct ptlrpc_request_pool)); + if (!pool) + return NULL; + + /* Request next power of two for the allocation, because internally + kernel would do exactly this */ + + spin_lock_init(&pool->prp_lock); + INIT_LIST_HEAD(&pool->prp_req_list); + pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD; + pool->prp_populate = populate_pool; + + populate_pool(pool, num_rq); + + if (list_empty(&pool->prp_req_list)) { + /* have not allocated a single request for the pool */ + OBD_FREE(pool, sizeof(struct ptlrpc_request_pool)); + pool = NULL; + } + return pool; +} +EXPORT_SYMBOL(ptlrpc_init_rq_pool); + +/** + * Fetches one request from pool \a pool + */ +static struct ptlrpc_request * +ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool) +{ + struct ptlrpc_request *request; + struct lustre_msg *reqbuf; + + if (!pool) + return NULL; + + spin_lock(&pool->prp_lock); + + /* See if we have anything in a pool, and bail out if nothing, + * in writeout path, where this matters, this is safe to do, because + * nothing is lost in this case, and when some in-flight requests + * complete, this code will be called again. */ + if (unlikely(list_empty(&pool->prp_req_list))) { + spin_unlock(&pool->prp_lock); + return NULL; + } + + request = list_entry(pool->prp_req_list.next, struct ptlrpc_request, + rq_list); + list_del_init(&request->rq_list); + spin_unlock(&pool->prp_lock); + + LASSERT(request->rq_reqbuf); + LASSERT(request->rq_pool); + + reqbuf = request->rq_reqbuf; + memset(request, 0, sizeof(*request)); + request->rq_reqbuf = reqbuf; + request->rq_reqbuf_len = pool->prp_rq_size; + request->rq_pool = pool; + + return request; +} + +/** + * Returns freed \a request to pool. + */ +static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) +{ + struct ptlrpc_request_pool *pool = request->rq_pool; + + spin_lock(&pool->prp_lock); + LASSERT(list_empty(&request->rq_list)); + LASSERT(!request->rq_receiving_reply); + list_add_tail(&request->rq_list, &pool->prp_req_list); + spin_unlock(&pool->prp_lock); +} + +static int __ptlrpc_request_bufs_pack(struct ptlrpc_request *request, + __u32 version, int opcode, + int count, __u32 *lengths, char **bufs, + struct ptlrpc_cli_ctx *ctx) +{ + struct obd_import *imp = request->rq_import; + int rc; + + if (unlikely(ctx)) + request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx); + else { + rc = sptlrpc_req_get_ctx(request); + if (rc) + goto out_free; + } + + sptlrpc_req_set_flavor(request, opcode); + + rc = lustre_pack_request(request, imp->imp_msg_magic, count, + lengths, bufs); + if (rc) { + LASSERT(!request->rq_pool); + goto out_ctx; + } + + lustre_msg_add_version(request->rq_reqmsg, version); + request->rq_send_state = LUSTRE_IMP_FULL; + request->rq_type = PTL_RPC_MSG_REQUEST; + request->rq_export = NULL; + + request->rq_req_cbid.cbid_fn = request_out_callback; + request->rq_req_cbid.cbid_arg = request; + + request->rq_reply_cbid.cbid_fn = reply_in_callback; + request->rq_reply_cbid.cbid_arg = request; + + request->rq_reply_deadline = 0; + request->rq_phase = RQ_PHASE_NEW; + request->rq_next_phase = RQ_PHASE_UNDEFINED; + + request->rq_request_portal = imp->imp_client->cli_request_portal; + request->rq_reply_portal = imp->imp_client->cli_reply_portal; + + ptlrpc_at_set_req_timeout(request); + + spin_lock_init(&request->rq_lock); + INIT_LIST_HEAD(&request->rq_list); + INIT_LIST_HEAD(&request->rq_timed_list); + INIT_LIST_HEAD(&request->rq_replay_list); + INIT_LIST_HEAD(&request->rq_ctx_chain); + INIT_LIST_HEAD(&request->rq_set_chain); + INIT_LIST_HEAD(&request->rq_history_list); + INIT_LIST_HEAD(&request->rq_exp_list); + init_waitqueue_head(&request->rq_reply_waitq); + init_waitqueue_head(&request->rq_set_waitq); + request->rq_xid = ptlrpc_next_xid(); + atomic_set(&request->rq_refcount, 1); + + lustre_msg_set_opc(request->rq_reqmsg, opcode); + + return 0; +out_ctx: + sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1); +out_free: + class_import_put(imp); + return rc; +} + +int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, + __u32 version, int opcode, char **bufs, + struct ptlrpc_cli_ctx *ctx) +{ + int count; + + count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT); + return __ptlrpc_request_bufs_pack(request, version, opcode, count, + request->rq_pill.rc_area[RCL_CLIENT], + bufs, ctx); +} +EXPORT_SYMBOL(ptlrpc_request_bufs_pack); + +/** + * Pack request buffers for network transfer, performing necessary encryption + * steps if necessary. + */ +int ptlrpc_request_pack(struct ptlrpc_request *request, + __u32 version, int opcode) +{ + int rc; + rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL); + if (rc) + return rc; + + /* For some old 1.8 clients (< 1.8.7), they will LASSERT the size of + * ptlrpc_body sent from server equal to local ptlrpc_body size, so we + * have to send old ptlrpc_body to keep interoperability with these + * clients. + * + * Only three kinds of server->client RPCs so far: + * - LDLM_BL_CALLBACK + * - LDLM_CP_CALLBACK + * - LDLM_GL_CALLBACK + * + * XXX This should be removed whenever we drop the interoperability with + * the these old clients. + */ + if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK || + opcode == LDLM_GL_CALLBACK) + req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY, + sizeof(struct ptlrpc_body_v2), RCL_CLIENT); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_request_pack); + +/** + * Helper function to allocate new request on import \a imp + * and possibly using existing request from pool \a pool if provided. + * Returns allocated request structure with import field filled or + * NULL on error. + */ +static inline +struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp, + struct ptlrpc_request_pool *pool) +{ + struct ptlrpc_request *request = NULL; + + if (pool) + request = ptlrpc_prep_req_from_pool(pool); + + if (!request) + request = ptlrpc_request_cache_alloc(GFP_NOFS); + + if (request) { + LASSERTF((unsigned long)imp > 0x1000, "%p", imp); + LASSERT(imp != LP_POISON); + LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p", + imp->imp_client); + LASSERT(imp->imp_client != LP_POISON); + + request->rq_import = class_import_get(imp); + } else { + CERROR("request allocation out of memory\n"); + } + + return request; +} + +/** + * Helper function for creating a request. + * Calls __ptlrpc_request_alloc to allocate new request structure and inits + * buffer structures according to capsule template \a format. + * Returns allocated request structure pointer or NULL on error. + */ +static struct ptlrpc_request * +ptlrpc_request_alloc_internal(struct obd_import *imp, + struct ptlrpc_request_pool *pool, + const struct req_format *format) +{ + struct ptlrpc_request *request; + + request = __ptlrpc_request_alloc(imp, pool); + if (request == NULL) + return NULL; + + req_capsule_init(&request->rq_pill, request, RCL_CLIENT); + req_capsule_set(&request->rq_pill, format); + return request; +} + +/** + * Allocate new request structure for import \a imp and initialize its + * buffer structure according to capsule template \a format. + */ +struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp, + const struct req_format *format) +{ + return ptlrpc_request_alloc_internal(imp, NULL, format); +} +EXPORT_SYMBOL(ptlrpc_request_alloc); + +/** + * Allocate new request structure for import \a imp from pool \a pool and + * initialize its buffer structure according to capsule template \a format. + */ +struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp, + struct ptlrpc_request_pool *pool, + const struct req_format *format) +{ + return ptlrpc_request_alloc_internal(imp, pool, format); +} +EXPORT_SYMBOL(ptlrpc_request_alloc_pool); + +/** + * For requests not from pool, free memory of the request structure. + * For requests obtained from a pool earlier, return request back to pool. + */ +void ptlrpc_request_free(struct ptlrpc_request *request) +{ + if (request->rq_pool) + __ptlrpc_free_req_to_pool(request); + else + ptlrpc_request_cache_free(request); +} +EXPORT_SYMBOL(ptlrpc_request_free); + +/** + * Allocate new request for operation \a opcode and immediately pack it for + * network transfer. + * Only used for simple requests like OBD_PING where the only important + * part of the request is operation itself. + * Returns allocated request or NULL on error. + */ +struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp, + const struct req_format *format, + __u32 version, int opcode) +{ + struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format); + int rc; + + if (req) { + rc = ptlrpc_request_pack(req, version, opcode); + if (rc) { + ptlrpc_request_free(req); + req = NULL; + } + } + return req; +} +EXPORT_SYMBOL(ptlrpc_request_alloc_pack); + +/** + * Prepare request (fetched from pool \a pool if not NULL) on import \a imp + * for operation \a opcode. Request would contain \a count buffers. + * Sizes of buffers are described in array \a lengths and buffers themselves + * are provided by a pointer \a bufs. + * Returns prepared request structure pointer or NULL on error. + */ +struct ptlrpc_request * +ptlrpc_prep_req_pool(struct obd_import *imp, + __u32 version, int opcode, + int count, __u32 *lengths, char **bufs, + struct ptlrpc_request_pool *pool) +{ + struct ptlrpc_request *request; + int rc; + + request = __ptlrpc_request_alloc(imp, pool); + if (!request) + return NULL; + + rc = __ptlrpc_request_bufs_pack(request, version, opcode, count, + lengths, bufs, NULL); + if (rc) { + ptlrpc_request_free(request); + request = NULL; + } + return request; +} +EXPORT_SYMBOL(ptlrpc_prep_req_pool); + +/** + * Same as ptlrpc_prep_req_pool, but without pool + */ +struct ptlrpc_request * +ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count, + __u32 *lengths, char **bufs) +{ + return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs, + NULL); +} +EXPORT_SYMBOL(ptlrpc_prep_req); + +/** + * Allocate and initialize new request set structure. + * Returns a pointer to the newly allocated set structure or NULL on error. + */ +struct ptlrpc_request_set *ptlrpc_prep_set(void) +{ + struct ptlrpc_request_set *set; + + OBD_ALLOC(set, sizeof(*set)); + if (!set) + return NULL; + atomic_set(&set->set_refcount, 1); + INIT_LIST_HEAD(&set->set_requests); + init_waitqueue_head(&set->set_waitq); + atomic_set(&set->set_new_count, 0); + atomic_set(&set->set_remaining, 0); + spin_lock_init(&set->set_new_req_lock); + INIT_LIST_HEAD(&set->set_new_requests); + INIT_LIST_HEAD(&set->set_cblist); + set->set_max_inflight = UINT_MAX; + set->set_producer = NULL; + set->set_producer_arg = NULL; + set->set_rc = 0; + + return set; +} +EXPORT_SYMBOL(ptlrpc_prep_set); + +/** + * Allocate and initialize new request set structure with flow control + * extension. This extension allows to control the number of requests in-flight + * for the whole set. A callback function to generate requests must be provided + * and the request set will keep the number of requests sent over the wire to + * @max_inflight. + * Returns a pointer to the newly allocated set structure or NULL on error. + */ +struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, + void *arg) + +{ + struct ptlrpc_request_set *set; + + set = ptlrpc_prep_set(); + if (!set) + return NULL; + + set->set_max_inflight = max; + set->set_producer = func; + set->set_producer_arg = arg; + + return set; +} +EXPORT_SYMBOL(ptlrpc_prep_fcset); + +/** + * Wind down and free request set structure previously allocated with + * ptlrpc_prep_set. + * Ensures that all requests on the set have completed and removes + * all requests from the request list in a set. + * If any unsent request happen to be on the list, pretends that they got + * an error in flight and calls their completion handler. + */ +void ptlrpc_set_destroy(struct ptlrpc_request_set *set) +{ + struct list_head *tmp; + struct list_head *next; + int expected_phase; + int n = 0; + + /* Requests on the set should either all be completed, or all be new */ + expected_phase = (atomic_read(&set->set_remaining) == 0) ? + RQ_PHASE_COMPLETE : RQ_PHASE_NEW; + list_for_each(tmp, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + + LASSERT(req->rq_phase == expected_phase); + n++; + } + + LASSERTF(atomic_read(&set->set_remaining) == 0 || + atomic_read(&set->set_remaining) == n, "%d / %d\n", + atomic_read(&set->set_remaining), n); + + list_for_each_safe(tmp, next, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + list_del_init(&req->rq_set_chain); + + LASSERT(req->rq_phase == expected_phase); + + if (req->rq_phase == RQ_PHASE_NEW) { + ptlrpc_req_interpret(NULL, req, -EBADR); + atomic_dec(&set->set_remaining); + } + + spin_lock(&req->rq_lock); + req->rq_set = NULL; + req->rq_invalid_rqset = 0; + spin_unlock(&req->rq_lock); + + ptlrpc_req_finished(req); + } + + LASSERT(atomic_read(&set->set_remaining) == 0); + + ptlrpc_reqset_put(set); +} +EXPORT_SYMBOL(ptlrpc_set_destroy); + +/** + * Add a callback function \a fn to the set. + * This function would be called when all requests on this set are completed. + * The function will be passed \a data argument. + */ +int ptlrpc_set_add_cb(struct ptlrpc_request_set *set, + set_interpreter_func fn, void *data) +{ + struct ptlrpc_set_cbdata *cbdata; + + OBD_ALLOC_PTR(cbdata); + if (cbdata == NULL) + return -ENOMEM; + + cbdata->psc_interpret = fn; + cbdata->psc_data = data; + list_add_tail(&cbdata->psc_item, &set->set_cblist); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_set_add_cb); + +/** + * Add a new request to the general purpose request set. + * Assumes request reference from the caller. + */ +void ptlrpc_set_add_req(struct ptlrpc_request_set *set, + struct ptlrpc_request *req) +{ + LASSERT(list_empty(&req->rq_set_chain)); + + /* The set takes over the caller's request reference */ + list_add_tail(&req->rq_set_chain, &set->set_requests); + req->rq_set = set; + atomic_inc(&set->set_remaining); + req->rq_queued_time = cfs_time_current(); + + if (req->rq_reqmsg != NULL) + lustre_msg_set_jobid(req->rq_reqmsg, NULL); + + if (set->set_producer != NULL) + /* If the request set has a producer callback, the RPC must be + * sent straight away */ + ptlrpc_send_new_req(req); +} +EXPORT_SYMBOL(ptlrpc_set_add_req); + +/** + * Add a request to a request with dedicated server thread + * and wake the thread to make any necessary processing. + * Currently only used for ptlrpcd. + */ +void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, + struct ptlrpc_request *req) +{ + struct ptlrpc_request_set *set = pc->pc_set; + int count, i; + + LASSERT(req->rq_set == NULL); + LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0); + + spin_lock(&set->set_new_req_lock); + /* + * The set takes over the caller's request reference. + */ + req->rq_set = set; + req->rq_queued_time = cfs_time_current(); + list_add_tail(&req->rq_set_chain, &set->set_new_requests); + count = atomic_inc_return(&set->set_new_count); + spin_unlock(&set->set_new_req_lock); + + /* Only need to call wakeup once for the first entry. */ + if (count == 1) { + wake_up(&set->set_waitq); + + /* XXX: It maybe unnecessary to wakeup all the partners. But to + * guarantee the async RPC can be processed ASAP, we have + * no other better choice. It maybe fixed in future. */ + for (i = 0; i < pc->pc_npartners; i++) + wake_up(&pc->pc_partners[i]->pc_set->set_waitq); + } +} +EXPORT_SYMBOL(ptlrpc_set_add_new_req); + +/** + * Based on the current state of the import, determine if the request + * can be sent, is an error, or should be delayed. + * + * Returns true if this request should be delayed. If false, and + * *status is set, then the request can not be sent and *status is the + * error code. If false and status is 0, then request can be sent. + * + * The imp->imp_lock must be held. + */ +static int ptlrpc_import_delay_req(struct obd_import *imp, + struct ptlrpc_request *req, int *status) +{ + int delay = 0; + + LASSERT(status != NULL); + *status = 0; + + if (req->rq_ctx_init || req->rq_ctx_fini) { + /* always allow ctx init/fini rpc go through */ + } else if (imp->imp_state == LUSTRE_IMP_NEW) { + DEBUG_REQ(D_ERROR, req, "Uninitialized import."); + *status = -EIO; + } else if (imp->imp_state == LUSTRE_IMP_CLOSED) { + /* pings may safely race with umount */ + DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ? + D_HA : D_ERROR, req, "IMP_CLOSED "); + *status = -EIO; + } else if (ptlrpc_send_limit_expired(req)) { + /* probably doesn't need to be a D_ERROR after initial testing */ + DEBUG_REQ(D_ERROR, req, "send limit expired "); + *status = -EIO; + } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING && + imp->imp_state == LUSTRE_IMP_CONNECTING) { + /* allow CONNECT even if import is invalid */ + if (atomic_read(&imp->imp_inval_count) != 0) { + DEBUG_REQ(D_ERROR, req, "invalidate in flight"); + *status = -EIO; + } + } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) { + if (!imp->imp_deactive) + DEBUG_REQ(D_NET, req, "IMP_INVALID"); + *status = -ESHUTDOWN; /* bz 12940 */ + } else if (req->rq_import_generation != imp->imp_generation) { + DEBUG_REQ(D_ERROR, req, "req wrong generation:"); + *status = -EIO; + } else if (req->rq_send_state != imp->imp_state) { + /* invalidate in progress - any requests should be drop */ + if (atomic_read(&imp->imp_inval_count) != 0) { + DEBUG_REQ(D_ERROR, req, "invalidate in flight"); + *status = -EIO; + } else if (imp->imp_dlm_fake || req->rq_no_delay) { + *status = -EWOULDBLOCK; + } else if (req->rq_allow_replay && + (imp->imp_state == LUSTRE_IMP_REPLAY || + imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS || + imp->imp_state == LUSTRE_IMP_REPLAY_WAIT || + imp->imp_state == LUSTRE_IMP_RECOVER)) { + DEBUG_REQ(D_HA, req, "allow during recovery.\n"); + } else { + delay = 1; + } + } + + return delay; +} + +/** + * Decide if the error message regarding provided request \a req + * should be printed to the console or not. + * Makes it's decision on request status and other properties. + * Returns 1 to print error on the system console or 0 if not. + */ +static int ptlrpc_console_allow(struct ptlrpc_request *req) +{ + __u32 opc; + int err; + + LASSERT(req->rq_reqmsg != NULL); + opc = lustre_msg_get_opc(req->rq_reqmsg); + + /* Suppress particular reconnect errors which are to be expected. No + * errors are suppressed for the initial connection on an import */ + if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) && + (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) { + + /* Suppress timed out reconnect requests */ + if (req->rq_timedout) + return 0; + + /* Suppress unavailable/again reconnect requests */ + err = lustre_msg_get_status(req->rq_repmsg); + if (err == -ENODEV || err == -EAGAIN) + return 0; + } + + return 1; +} + +/** + * Check request processing status. + * Returns the status. + */ +static int ptlrpc_check_status(struct ptlrpc_request *req) +{ + int err; + + err = lustre_msg_get_status(req->rq_repmsg); + if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) { + struct obd_import *imp = req->rq_import; + __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); + if (ptlrpc_console_allow(req)) + LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s, operation %s failed with %d.\n", + imp->imp_obd->obd_name, + libcfs_nid2str( + imp->imp_connection->c_peer.nid), + ll_opcode2str(opc), err); + return err < 0 ? err : -EINVAL; + } + + if (err < 0) { + DEBUG_REQ(D_INFO, req, "status is %d", err); + } else if (err > 0) { + /* XXX: translate this error from net to host */ + DEBUG_REQ(D_INFO, req, "status is %d", err); + } + + return err; +} + +/** + * save pre-versions of objects into request for replay. + * Versions are obtained from server reply. + * used for VBR. + */ +static void ptlrpc_save_versions(struct ptlrpc_request *req) +{ + struct lustre_msg *repmsg = req->rq_repmsg; + struct lustre_msg *reqmsg = req->rq_reqmsg; + __u64 *versions = lustre_msg_get_versions(repmsg); + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) + return; + + LASSERT(versions); + lustre_msg_set_versions(reqmsg, versions); + CDEBUG(D_INFO, "Client save versions [%#llx/%#llx]\n", + versions[0], versions[1]); +} + +/** + * Callback function called when client receives RPC reply for \a req. + * Returns 0 on success or error code. + * The return value would be assigned to req->rq_status by the caller + * as request processing status. + * This function also decides if the request needs to be saved for later replay. + */ +static int after_reply(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + struct obd_device *obd = req->rq_import->imp_obd; + int rc; + struct timeval work_start; + long timediff; + + LASSERT(obd != NULL); + /* repbuf must be unlinked */ + LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink); + + if (req->rq_reply_truncate) { + if (ptlrpc_no_resend(req)) { + DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d", + req->rq_nob_received, req->rq_repbuf_len); + return -EOVERFLOW; + } + + sptlrpc_cli_free_repbuf(req); + /* Pass the required reply buffer size (include + * space for early reply). + * NB: no need to roundup because alloc_repbuf + * will roundup it */ + req->rq_replen = req->rq_nob_received; + req->rq_nob_received = 0; + spin_lock(&req->rq_lock); + req->rq_resend = 1; + spin_unlock(&req->rq_lock); + return 0; + } + + /* + * NB Until this point, the whole of the incoming message, + * including buflens, status etc is in the sender's byte order. + */ + rc = sptlrpc_cli_unwrap_reply(req); + if (rc) { + DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc); + return rc; + } + + /* + * Security layer unwrap might ask resend this request. + */ + if (req->rq_resend) + return 0; + + rc = unpack_reply(req); + if (rc) + return rc; + + /* retry indefinitely on EINPROGRESS */ + if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS && + ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) { + time_t now = get_seconds(); + + DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS"); + spin_lock(&req->rq_lock); + req->rq_resend = 1; + spin_unlock(&req->rq_lock); + req->rq_nr_resend++; + + /* allocate new xid to avoid reply reconstruction */ + if (!req->rq_bulk) { + /* new xid is already allocated for bulk in + * ptlrpc_check_set() */ + req->rq_xid = ptlrpc_next_xid(); + DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for resend on EINPROGRESS"); + } + + /* Readjust the timeout for current conditions */ + ptlrpc_at_set_req_timeout(req); + /* delay resend to give a chance to the server to get ready. + * The delay is increased by 1s on every resend and is capped to + * the current request timeout (i.e. obd_timeout if AT is off, + * or AT service time x 125% + 5s, see at_est2timeout) */ + if (req->rq_nr_resend > req->rq_timeout) + req->rq_sent = now + req->rq_timeout; + else + req->rq_sent = now + req->rq_nr_resend; + + return 0; + } + + do_gettimeofday(&work_start); + timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL); + if (obd->obd_svc_stats != NULL) { + lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, + timediff); + ptlrpc_lprocfs_rpc_sent(req, timediff); + } + + if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY && + lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) { + DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)", + lustre_msg_get_type(req->rq_repmsg)); + return -EPROTO; + } + + if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING) + CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val); + ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg)); + ptlrpc_at_adj_net_latency(req, + lustre_msg_get_service_time(req->rq_repmsg)); + + rc = ptlrpc_check_status(req); + imp->imp_connect_error = rc; + + if (rc) { + /* + * Either we've been evicted, or the server has failed for + * some reason. Try to reconnect, and if that fails, punt to + * the upcall. + */ + if (ll_rpc_recoverable_error(rc)) { + if (req->rq_send_state != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) { + return rc; + } + ptlrpc_request_handle_notconn(req); + return rc; + } + } else { + /* + * Let's look if server sent slv. Do it only for RPC with + * rc == 0. + */ + ldlm_cli_update_pool(req); + } + + /* + * Store transno in reqmsg for replay. + */ + if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) { + req->rq_transno = lustre_msg_get_transno(req->rq_repmsg); + lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno); + } + + if (imp->imp_replayable) { + spin_lock(&imp->imp_lock); + /* + * No point in adding already-committed requests to the replay + * list, we will just remove them immediately. b=9829 + */ + if (req->rq_transno != 0 && + (req->rq_transno > + lustre_msg_get_last_committed(req->rq_repmsg) || + req->rq_replay)) { + /** version recovery */ + ptlrpc_save_versions(req); + ptlrpc_retain_replayable_request(req, imp); + } else if (req->rq_commit_cb != NULL && + list_empty(&req->rq_replay_list)) { + /* NB: don't call rq_commit_cb if it's already on + * rq_replay_list, ptlrpc_free_committed() will call + * it later, see LU-3618 for details */ + spin_unlock(&imp->imp_lock); + req->rq_commit_cb(req); + spin_lock(&imp->imp_lock); + } + + /* + * Replay-enabled imports return commit-status information. + */ + if (lustre_msg_get_last_committed(req->rq_repmsg)) { + imp->imp_peer_committed_transno = + lustre_msg_get_last_committed(req->rq_repmsg); + } + + ptlrpc_free_committed(imp); + + if (!list_empty(&imp->imp_replay_list)) { + struct ptlrpc_request *last; + + last = list_entry(imp->imp_replay_list.prev, + struct ptlrpc_request, + rq_replay_list); + /* + * Requests with rq_replay stay on the list even if no + * commit is expected. + */ + if (last->rq_transno > imp->imp_peer_committed_transno) + ptlrpc_pinger_commit_expected(imp); + } + + spin_unlock(&imp->imp_lock); + } + + return rc; +} + +/** + * Helper function to send request \a req over the network for the first time + * Also adjusts request phase. + * Returns 0 on success or error code. + */ +static int ptlrpc_send_new_req(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + int rc; + + LASSERT(req->rq_phase == RQ_PHASE_NEW); + if (req->rq_sent && (req->rq_sent > get_seconds()) && + (!req->rq_generation_set || + req->rq_import_generation == imp->imp_generation)) + return 0; + + ptlrpc_rqphase_move(req, RQ_PHASE_RPC); + + spin_lock(&imp->imp_lock); + + if (!req->rq_generation_set) + req->rq_import_generation = imp->imp_generation; + + if (ptlrpc_import_delay_req(imp, req, &rc)) { + spin_lock(&req->rq_lock); + req->rq_waiting = 1; + spin_unlock(&req->rq_lock); + + DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: (%s != %s)", + lustre_msg_get_status(req->rq_reqmsg), + ptlrpc_import_state_name(req->rq_send_state), + ptlrpc_import_state_name(imp->imp_state)); + LASSERT(list_empty(&req->rq_list)); + list_add_tail(&req->rq_list, &imp->imp_delayed_list); + atomic_inc(&req->rq_import->imp_inflight); + spin_unlock(&imp->imp_lock); + return 0; + } + + if (rc != 0) { + spin_unlock(&imp->imp_lock); + req->rq_status = rc; + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + return rc; + } + + LASSERT(list_empty(&req->rq_list)); + list_add_tail(&req->rq_list, &imp->imp_sending_list); + atomic_inc(&req->rq_import->imp_inflight); + spin_unlock(&imp->imp_lock); + + lustre_msg_set_status(req->rq_reqmsg, current_pid()); + + rc = sptlrpc_req_refresh_ctx(req, -1); + if (rc) { + if (req->rq_err) { + req->rq_status = rc; + return 1; + } + spin_lock(&req->rq_lock); + req->rq_wait_ctx = 1; + spin_unlock(&req->rq_lock); + return 0; + } + + CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n", + current_comm(), + imp->imp_obd->obd_uuid.uuid, + lustre_msg_get_status(req->rq_reqmsg), req->rq_xid, + libcfs_nid2str(imp->imp_connection->c_peer.nid), + lustre_msg_get_opc(req->rq_reqmsg)); + + rc = ptl_send_rpc(req, 0); + if (rc) { + DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); + spin_lock(&req->rq_lock); + req->rq_net_err = 1; + spin_unlock(&req->rq_lock); + return rc; + } + return 0; +} + +static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set) +{ + int remaining, rc; + + LASSERT(set->set_producer != NULL); + + remaining = atomic_read(&set->set_remaining); + + /* populate the ->set_requests list with requests until we + * reach the maximum number of RPCs in flight for this set */ + while (atomic_read(&set->set_remaining) < set->set_max_inflight) { + rc = set->set_producer(set, set->set_producer_arg); + if (rc == -ENOENT) { + /* no more RPC to produce */ + set->set_producer = NULL; + set->set_producer_arg = NULL; + return 0; + } + } + + return (atomic_read(&set->set_remaining) - remaining); +} + +/** + * this sends any unsent RPCs in \a set and returns 1 if all are sent + * and no more replies are expected. + * (it is possible to get less replies than requests sent e.g. due to timed out + * requests or requests that we had trouble to send out) + * + * NOTE: This function contains a potential schedule point (cond_resched()). + */ +int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) +{ + struct list_head *tmp, *next; + struct list_head comp_reqs; + int force_timer_recalc = 0; + + if (atomic_read(&set->set_remaining) == 0) + return 1; + + INIT_LIST_HEAD(&comp_reqs); + list_for_each_safe(tmp, next, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + struct obd_import *imp = req->rq_import; + int unregistered = 0; + int rc = 0; + + /* This schedule point is mainly for the ptlrpcd caller of this + * function. Most ptlrpc sets are not long-lived and unbounded + * in length, but at the least the set used by the ptlrpcd is. + * Since the processing time is unbounded, we need to insert an + * explicit schedule point to make the thread well-behaved. + */ + cond_resched(); + + if (req->rq_phase == RQ_PHASE_NEW && + ptlrpc_send_new_req(req)) { + force_timer_recalc = 1; + } + + /* delayed send - skip */ + if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent) + continue; + + /* delayed resend - skip */ + if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend && + req->rq_sent > get_seconds()) + continue; + + if (!(req->rq_phase == RQ_PHASE_RPC || + req->rq_phase == RQ_PHASE_BULK || + req->rq_phase == RQ_PHASE_INTERPRET || + req->rq_phase == RQ_PHASE_UNREGISTERING || + req->rq_phase == RQ_PHASE_COMPLETE)) { + DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); + LBUG(); + } + + if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + LASSERT(req->rq_next_phase != req->rq_phase); + LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); + + /* + * Skip processing until reply is unlinked. We + * can't return to pool before that and we can't + * call interpret before that. We need to make + * sure that all rdma transfers finished and will + * not corrupt any data. + */ + if (ptlrpc_client_recv_or_unlink(req) || + ptlrpc_client_bulk_active(req)) + continue; + + /* + * Turn fail_loc off to prevent it from looping + * forever. + */ + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { + OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK, + OBD_FAIL_ONCE); + } + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) { + OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK, + OBD_FAIL_ONCE); + } + + /* + * Move to next phase if reply was successfully + * unlinked. + */ + ptlrpc_rqphase_move(req, req->rq_next_phase); + } + + if (req->rq_phase == RQ_PHASE_COMPLETE) { + list_move_tail(&req->rq_set_chain, &comp_reqs); + continue; + } + + if (req->rq_phase == RQ_PHASE_INTERPRET) + goto interpret; + + /* + * Note that this also will start async reply unlink. + */ + if (req->rq_net_err && !req->rq_timedout) { + ptlrpc_expire_one_request(req, 1); + + /* + * Check if we still need to wait for unlink. + */ + if (ptlrpc_client_recv_or_unlink(req) || + ptlrpc_client_bulk_active(req)) + continue; + /* If there is no need to resend, fail it now. */ + if (req->rq_no_resend) { + if (req->rq_status == 0) + req->rq_status = -EIO; + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + goto interpret; + } else { + continue; + } + } + + if (req->rq_err) { + spin_lock(&req->rq_lock); + req->rq_replied = 0; + spin_unlock(&req->rq_lock); + if (req->rq_status == 0) + req->rq_status = -EIO; + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + goto interpret; + } + + /* ptlrpc_set_wait->l_wait_event sets lwi_allow_intr + * so it sets rq_intr regardless of individual rpc + * timeouts. The synchronous IO waiting path sets + * rq_intr irrespective of whether ptlrpcd + * has seen a timeout. Our policy is to only interpret + * interrupted rpcs after they have timed out, so we + * need to enforce that here. + */ + + if (req->rq_intr && (req->rq_timedout || req->rq_waiting || + req->rq_wait_ctx)) { + req->rq_status = -EINTR; + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + goto interpret; + } + + if (req->rq_phase == RQ_PHASE_RPC) { + if (req->rq_timedout || req->rq_resend || + req->rq_waiting || req->rq_wait_ctx) { + int status; + + if (!ptlrpc_unregister_reply(req, 1)) + continue; + + spin_lock(&imp->imp_lock); + if (ptlrpc_import_delay_req(imp, req, + &status)) { + /* put on delay list - only if we wait + * recovery finished - before send */ + list_del_init(&req->rq_list); + list_add_tail(&req->rq_list, + &imp-> + imp_delayed_list); + spin_unlock(&imp->imp_lock); + continue; + } + + if (status != 0) { + req->rq_status = status; + ptlrpc_rqphase_move(req, + RQ_PHASE_INTERPRET); + spin_unlock(&imp->imp_lock); + goto interpret; + } + if (ptlrpc_no_resend(req) && + !req->rq_wait_ctx) { + req->rq_status = -ENOTCONN; + ptlrpc_rqphase_move(req, + RQ_PHASE_INTERPRET); + spin_unlock(&imp->imp_lock); + goto interpret; + } + + list_del_init(&req->rq_list); + list_add_tail(&req->rq_list, + &imp->imp_sending_list); + + spin_unlock(&imp->imp_lock); + + spin_lock(&req->rq_lock); + req->rq_waiting = 0; + spin_unlock(&req->rq_lock); + + if (req->rq_timedout || req->rq_resend) { + /* This is re-sending anyways, + * let's mark req as resend. */ + spin_lock(&req->rq_lock); + req->rq_resend = 1; + spin_unlock(&req->rq_lock); + if (req->rq_bulk) { + __u64 old_xid; + + if (!ptlrpc_unregister_bulk(req, 1)) + continue; + + /* ensure previous bulk fails */ + old_xid = req->rq_xid; + req->rq_xid = ptlrpc_next_xid(); + CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", + old_xid, req->rq_xid); + } + } + /* + * rq_wait_ctx is only touched by ptlrpcd, + * so no lock is needed here. + */ + status = sptlrpc_req_refresh_ctx(req, -1); + if (status) { + if (req->rq_err) { + req->rq_status = status; + spin_lock(&req->rq_lock); + req->rq_wait_ctx = 0; + spin_unlock(&req->rq_lock); + force_timer_recalc = 1; + } else { + spin_lock(&req->rq_lock); + req->rq_wait_ctx = 1; + spin_unlock(&req->rq_lock); + } + + continue; + } else { + spin_lock(&req->rq_lock); + req->rq_wait_ctx = 0; + spin_unlock(&req->rq_lock); + } + + rc = ptl_send_rpc(req, 0); + if (rc) { + DEBUG_REQ(D_HA, req, + "send failed: rc = %d", rc); + force_timer_recalc = 1; + spin_lock(&req->rq_lock); + req->rq_net_err = 1; + spin_unlock(&req->rq_lock); + continue; + } + /* need to reset the timeout */ + force_timer_recalc = 1; + } + + spin_lock(&req->rq_lock); + + if (ptlrpc_client_early(req)) { + ptlrpc_at_recv_early_reply(req); + spin_unlock(&req->rq_lock); + continue; + } + + /* Still waiting for a reply? */ + if (ptlrpc_client_recv(req)) { + spin_unlock(&req->rq_lock); + continue; + } + + /* Did we actually receive a reply? */ + if (!ptlrpc_client_replied(req)) { + spin_unlock(&req->rq_lock); + continue; + } + + spin_unlock(&req->rq_lock); + + /* unlink from net because we are going to + * swab in-place of reply buffer */ + unregistered = ptlrpc_unregister_reply(req, 1); + if (!unregistered) + continue; + + req->rq_status = after_reply(req); + if (req->rq_resend) + continue; + + /* If there is no bulk associated with this request, + * then we're done and should let the interpreter + * process the reply. Similarly if the RPC returned + * an error, and therefore the bulk will never arrive. + */ + if (req->rq_bulk == NULL || req->rq_status < 0) { + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + goto interpret; + } + + ptlrpc_rqphase_move(req, RQ_PHASE_BULK); + } + + LASSERT(req->rq_phase == RQ_PHASE_BULK); + if (ptlrpc_client_bulk_active(req)) + continue; + + if (req->rq_bulk->bd_failure) { + /* The RPC reply arrived OK, but the bulk screwed + * up! Dead weird since the server told us the RPC + * was good after getting the REPLY for her GET or + * the ACK for her PUT. */ + DEBUG_REQ(D_ERROR, req, "bulk transfer failed"); + req->rq_status = -EIO; + } + + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); + +interpret: + LASSERT(req->rq_phase == RQ_PHASE_INTERPRET); + + /* This moves to "unregistering" phase we need to wait for + * reply unlink. */ + if (!unregistered && !ptlrpc_unregister_reply(req, 1)) { + /* start async bulk unlink too */ + ptlrpc_unregister_bulk(req, 1); + continue; + } + + if (!ptlrpc_unregister_bulk(req, 1)) + continue; + + /* When calling interpret receiving already should be + * finished. */ + LASSERT(!req->rq_receiving_reply); + + ptlrpc_req_interpret(env, req, req->rq_status); + + if (ptlrpcd_check_work(req)) { + atomic_dec(&set->set_remaining); + continue; + } + ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE); + + CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0, + "Completed RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n", + current_comm(), imp->imp_obd->obd_uuid.uuid, + lustre_msg_get_status(req->rq_reqmsg), req->rq_xid, + libcfs_nid2str(imp->imp_connection->c_peer.nid), + lustre_msg_get_opc(req->rq_reqmsg)); + + spin_lock(&imp->imp_lock); + /* Request already may be not on sending or delaying list. This + * may happen in the case of marking it erroneous for the case + * ptlrpc_import_delay_req(req, status) find it impossible to + * allow sending this rpc and returns *status != 0. */ + if (!list_empty(&req->rq_list)) { + list_del_init(&req->rq_list); + atomic_dec(&imp->imp_inflight); + } + spin_unlock(&imp->imp_lock); + + atomic_dec(&set->set_remaining); + wake_up_all(&imp->imp_recovery_waitq); + + if (set->set_producer) { + /* produce a new request if possible */ + if (ptlrpc_set_producer(set) > 0) + force_timer_recalc = 1; + + /* free the request that has just been completed + * in order not to pollute set->set_requests */ + list_del_init(&req->rq_set_chain); + spin_lock(&req->rq_lock); + req->rq_set = NULL; + req->rq_invalid_rqset = 0; + spin_unlock(&req->rq_lock); + + /* record rq_status to compute the final status later */ + if (req->rq_status != 0) + set->set_rc = req->rq_status; + ptlrpc_req_finished(req); + } else { + list_move_tail(&req->rq_set_chain, &comp_reqs); + } + } + + /* move completed request at the head of list so it's easier for + * caller to find them */ + list_splice(&comp_reqs, &set->set_requests); + + /* If we hit an error, we want to recover promptly. */ + return atomic_read(&set->set_remaining) == 0 || force_timer_recalc; +} +EXPORT_SYMBOL(ptlrpc_check_set); + +/** + * Time out request \a req. is \a async_unlink is set, that means do not wait + * until LNet actually confirms network buffer unlinking. + * Return 1 if we should give up further retrying attempts or 0 otherwise. + */ +int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) +{ + struct obd_import *imp = req->rq_import; + int rc = 0; + + spin_lock(&req->rq_lock); + req->rq_timedout = 1; + spin_unlock(&req->rq_lock); + + DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent "CFS_DURATION_T + "/real "CFS_DURATION_T"]", + req->rq_net_err ? "failed due to network error" : + ((req->rq_real_sent == 0 || + time_before((unsigned long)req->rq_real_sent, (unsigned long)req->rq_sent) || + cfs_time_aftereq(req->rq_real_sent, req->rq_deadline)) ? + "timed out for sent delay" : "timed out for slow reply"), + req->rq_sent, req->rq_real_sent); + + if (imp != NULL && obd_debug_peer_on_timeout) + LNetCtl(IOC_LIBCFS_DEBUG_PEER, &imp->imp_connection->c_peer); + + ptlrpc_unregister_reply(req, async_unlink); + ptlrpc_unregister_bulk(req, async_unlink); + + if (obd_dump_on_timeout) + libcfs_debug_dumplog(); + + if (imp == NULL) { + DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?"); + return 1; + } + + atomic_inc(&imp->imp_timeouts); + + /* The DLM server doesn't want recovery run on its imports. */ + if (imp->imp_dlm_fake) + return 1; + + /* If this request is for recovery or other primordial tasks, + * then error it out here. */ + if (req->rq_ctx_init || req->rq_ctx_fini || + req->rq_send_state != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) { + DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)", + ptlrpc_import_state_name(req->rq_send_state), + ptlrpc_import_state_name(imp->imp_state)); + spin_lock(&req->rq_lock); + req->rq_status = -ETIMEDOUT; + req->rq_err = 1; + spin_unlock(&req->rq_lock); + return 1; + } + + /* if a request can't be resent we can't wait for an answer after + the timeout */ + if (ptlrpc_no_resend(req)) { + DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:"); + rc = 1; + } + + ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg)); + + return rc; +} + +/** + * Time out all uncompleted requests in request set pointed by \a data + * Callback used when waiting on sets with l_wait_event. + * Always returns 1. + */ +int ptlrpc_expired_set(void *data) +{ + struct ptlrpc_request_set *set = data; + struct list_head *tmp; + time_t now = get_seconds(); + + LASSERT(set != NULL); + + /* + * A timeout expired. See which reqs it applies to... + */ + list_for_each(tmp, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + + /* don't expire request waiting for context */ + if (req->rq_wait_ctx) + continue; + + /* Request in-flight? */ + if (!((req->rq_phase == RQ_PHASE_RPC && + !req->rq_waiting && !req->rq_resend) || + (req->rq_phase == RQ_PHASE_BULK))) + continue; + + if (req->rq_timedout || /* already dealt with */ + req->rq_deadline > now) /* not expired */ + continue; + + /* Deal with this guy. Do it asynchronously to not block + * ptlrpcd thread. */ + ptlrpc_expire_one_request(req, 1); + } + + /* + * When waiting for a whole set, we always break out of the + * sleep so we can recalculate the timeout, or enable interrupts + * if everyone's timed out. + */ + return 1; +} +EXPORT_SYMBOL(ptlrpc_expired_set); + +/** + * Sets rq_intr flag in \a req under spinlock. + */ +void ptlrpc_mark_interrupted(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + req->rq_intr = 1; + spin_unlock(&req->rq_lock); +} +EXPORT_SYMBOL(ptlrpc_mark_interrupted); + +/** + * Interrupts (sets interrupted flag) all uncompleted requests in + * a set \a data. Callback for l_wait_event for interruptible waits. + */ +void ptlrpc_interrupted_set(void *data) +{ + struct ptlrpc_request_set *set = data; + struct list_head *tmp; + + LASSERT(set != NULL); + CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set); + + list_for_each(tmp, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + + if (req->rq_phase != RQ_PHASE_RPC && + req->rq_phase != RQ_PHASE_UNREGISTERING) + continue; + + ptlrpc_mark_interrupted(req); + } +} +EXPORT_SYMBOL(ptlrpc_interrupted_set); + +/** + * Get the smallest timeout in the set; this does NOT set a timeout. + */ +int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) +{ + struct list_head *tmp; + time_t now = get_seconds(); + int timeout = 0; + struct ptlrpc_request *req; + int deadline; + + list_for_each(tmp, &set->set_requests) { + req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); + + /* + * Request in-flight? + */ + if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || + (req->rq_phase == RQ_PHASE_BULK) || + (req->rq_phase == RQ_PHASE_NEW))) + continue; + + /* + * Already timed out. + */ + if (req->rq_timedout) + continue; + + /* + * Waiting for ctx. + */ + if (req->rq_wait_ctx) + continue; + + if (req->rq_phase == RQ_PHASE_NEW) + deadline = req->rq_sent; + else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend) + deadline = req->rq_sent; + else + deadline = req->rq_sent + req->rq_timeout; + + if (deadline <= now) /* actually expired already */ + timeout = 1; /* ASAP */ + else if (timeout == 0 || timeout > deadline - now) + timeout = deadline - now; + } + return timeout; +} +EXPORT_SYMBOL(ptlrpc_set_next_timeout); + +/** + * Send all unset request from the set and then wait until all + * requests in the set complete (either get a reply, timeout, get an + * error or otherwise be interrupted). + * Returns 0 on success or error code otherwise. + */ +int ptlrpc_set_wait(struct ptlrpc_request_set *set) +{ + struct list_head *tmp; + struct ptlrpc_request *req; + struct l_wait_info lwi; + int rc, timeout; + + if (set->set_producer) + (void)ptlrpc_set_producer(set); + else + list_for_each(tmp, &set->set_requests) { + req = list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + if (req->rq_phase == RQ_PHASE_NEW) + (void)ptlrpc_send_new_req(req); + } + + if (list_empty(&set->set_requests)) + return 0; + + do { + timeout = ptlrpc_set_next_timeout(set); + + /* wait until all complete, interrupted, or an in-flight + * req times out */ + CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n", + set, timeout); + + if (timeout == 0 && !cfs_signal_pending()) + /* + * No requests are in-flight (ether timed out + * or delayed), so we can allow interrupts. + * We still want to block for a limited time, + * so we allow interrupts during the timeout. + */ + lwi = LWI_TIMEOUT_INTR_ALL(cfs_time_seconds(1), + ptlrpc_expired_set, + ptlrpc_interrupted_set, set); + else + /* + * At least one request is in flight, so no + * interrupts are allowed. Wait until all + * complete, or an in-flight req times out. + */ + lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1), + ptlrpc_expired_set, set); + + rc = l_wait_event(set->set_waitq, ptlrpc_check_set(NULL, set), &lwi); + + /* LU-769 - if we ignored the signal because it was already + * pending when we started, we need to handle it now or we risk + * it being ignored forever */ + if (rc == -ETIMEDOUT && !lwi.lwi_allow_intr && + cfs_signal_pending()) { + sigset_t blocked_sigs = + cfs_block_sigsinv(LUSTRE_FATAL_SIGS); + + /* In fact we only interrupt for the "fatal" signals + * like SIGINT or SIGKILL. We still ignore less + * important signals since ptlrpc set is not easily + * reentrant from userspace again */ + if (cfs_signal_pending()) + ptlrpc_interrupted_set(set); + cfs_restore_sigs(blocked_sigs); + } + + LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); + + /* -EINTR => all requests have been flagged rq_intr so next + * check completes. + * -ETIMEDOUT => someone timed out. When all reqs have + * timed out, signals are enabled allowing completion with + * EINTR. + * I don't really care if we go once more round the loop in + * the error cases -eeb. */ + if (rc == 0 && atomic_read(&set->set_remaining) == 0) { + list_for_each(tmp, &set->set_requests) { + req = list_entry(tmp, struct ptlrpc_request, + rq_set_chain); + spin_lock(&req->rq_lock); + req->rq_invalid_rqset = 1; + spin_unlock(&req->rq_lock); + } + } + } while (rc != 0 || atomic_read(&set->set_remaining) != 0); + + LASSERT(atomic_read(&set->set_remaining) == 0); + + rc = set->set_rc; /* rq_status of already freed requests if any */ + list_for_each(tmp, &set->set_requests) { + req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); + + LASSERT(req->rq_phase == RQ_PHASE_COMPLETE); + if (req->rq_status != 0) + rc = req->rq_status; + } + + if (set->set_interpret != NULL) { + int (*interpreter)(struct ptlrpc_request_set *set, void *, int) = + set->set_interpret; + rc = interpreter(set, set->set_arg, rc); + } else { + struct ptlrpc_set_cbdata *cbdata, *n; + int err; + + list_for_each_entry_safe(cbdata, n, + &set->set_cblist, psc_item) { + list_del_init(&cbdata->psc_item); + err = cbdata->psc_interpret(set, cbdata->psc_data, rc); + if (err && !rc) + rc = err; + OBD_FREE_PTR(cbdata); + } + } + + return rc; +} +EXPORT_SYMBOL(ptlrpc_set_wait); + +/** + * Helper function for request freeing. + * Called when request count reached zero and request needs to be freed. + * Removes request from all sorts of sending/replay lists it might be on, + * frees network buffers if any are present. + * If \a locked is set, that means caller is already holding import imp_lock + * and so we no longer need to reobtain it (for certain lists manipulations) + */ +static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) +{ + if (request == NULL) + return; + LASSERTF(!request->rq_receiving_reply, "req %p\n", request); + LASSERTF(request->rq_rqbd == NULL, "req %p\n", request);/* client-side */ + LASSERTF(list_empty(&request->rq_list), "req %p\n", request); + LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); + LASSERTF(list_empty(&request->rq_exp_list), "req %p\n", request); + LASSERTF(!request->rq_replay, "req %p\n", request); + + req_capsule_fini(&request->rq_pill); + + /* We must take it off the imp_replay_list first. Otherwise, we'll set + * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ + if (request->rq_import != NULL) { + if (!locked) + spin_lock(&request->rq_import->imp_lock); + list_del_init(&request->rq_replay_list); + if (!locked) + spin_unlock(&request->rq_import->imp_lock); + } + LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request); + + if (atomic_read(&request->rq_refcount) != 0) { + DEBUG_REQ(D_ERROR, request, + "freeing request with nonzero refcount"); + LBUG(); + } + + if (request->rq_repbuf != NULL) + sptlrpc_cli_free_repbuf(request); + if (request->rq_export != NULL) { + class_export_put(request->rq_export); + request->rq_export = NULL; + } + if (request->rq_import != NULL) { + class_import_put(request->rq_import); + request->rq_import = NULL; + } + if (request->rq_bulk != NULL) + ptlrpc_free_bulk_pin(request->rq_bulk); + + if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL) + sptlrpc_cli_free_reqbuf(request); + + if (request->rq_cli_ctx) + sptlrpc_req_put_ctx(request, !locked); + + if (request->rq_pool) + __ptlrpc_free_req_to_pool(request); + else + ptlrpc_request_cache_free(request); +} + +static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked); +/** + * Drop one request reference. Must be called with import imp_lock held. + * When reference count drops to zero, request is freed. + */ +void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request) +{ + assert_spin_locked(&request->rq_import->imp_lock); + (void)__ptlrpc_req_finished(request, 1); +} +EXPORT_SYMBOL(ptlrpc_req_finished_with_imp_lock); + +/** + * Helper function + * Drops one reference count for request \a request. + * \a locked set indicates that caller holds import imp_lock. + * Frees the request when reference count reaches zero. + */ +static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) +{ + if (request == NULL) + return 1; + + if (request == LP_POISON || + request->rq_reqmsg == LP_POISON) { + CERROR("dereferencing freed request (bug 575)\n"); + LBUG(); + return 1; + } + + DEBUG_REQ(D_INFO, request, "refcount now %u", + atomic_read(&request->rq_refcount) - 1); + + if (atomic_dec_and_test(&request->rq_refcount)) { + __ptlrpc_free_req(request, locked); + return 1; + } + + return 0; +} + +/** + * Drops one reference count for a request. + */ +void ptlrpc_req_finished(struct ptlrpc_request *request) +{ + __ptlrpc_req_finished(request, 0); +} +EXPORT_SYMBOL(ptlrpc_req_finished); + +/** + * Returns xid of a \a request + */ +__u64 ptlrpc_req_xid(struct ptlrpc_request *request) +{ + return request->rq_xid; +} +EXPORT_SYMBOL(ptlrpc_req_xid); + +/** + * Disengage the client's reply buffer from the network + * NB does _NOT_ unregister any client-side bulk. + * IDEMPOTENT, but _not_ safe against concurrent callers. + * The request owner (i.e. the thread doing the I/O) must call... + * Returns 0 on success or 1 if unregistering cannot be made. + */ +int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) +{ + int rc; + wait_queue_head_t *wq; + struct l_wait_info lwi; + + /* + * Might sleep. + */ + LASSERT(!in_interrupt()); + + /* + * Let's setup deadline for reply unlink. + */ + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && + async && request->rq_reply_deadline == 0) + request->rq_reply_deadline = get_seconds()+LONG_UNLINK; + + /* + * Nothing left to do. + */ + if (!ptlrpc_client_recv_or_unlink(request)) + return 1; + + LNetMDUnlink(request->rq_reply_md_h); + + /* + * Let's check it once again. + */ + if (!ptlrpc_client_recv_or_unlink(request)) + return 1; + + /* + * Move to "Unregistering" phase as reply was not unlinked yet. + */ + ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING); + + /* + * Do not wait for unlink to finish. + */ + if (async) + return 0; + + /* + * We have to l_wait_event() whatever the result, to give liblustre + * a chance to run reply_in_callback(), and to make sure we've + * unlinked before returning a req to the pool. + */ + if (request->rq_set != NULL) + wq = &request->rq_set->set_waitq; + else + wq = &request->rq_reply_waitq; + + for (;;) { + /* Network access will complete in finite time but the HUGE + * timeout lets us CWARN for visibility of sluggish NALs */ + lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), + cfs_time_seconds(1), NULL, NULL); + rc = l_wait_event(*wq, !ptlrpc_client_recv_or_unlink(request), + &lwi); + if (rc == 0) { + ptlrpc_rqphase_move(request, request->rq_next_phase); + return 1; + } + + LASSERT(rc == -ETIMEDOUT); + DEBUG_REQ(D_WARNING, request, + "Unexpectedly long timeout rvcng=%d unlnk=%d/%d", + request->rq_receiving_reply, + request->rq_req_unlink, request->rq_reply_unlink); + } + return 0; +} +EXPORT_SYMBOL(ptlrpc_unregister_reply); + +static void ptlrpc_free_request(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + req->rq_replay = 0; + spin_unlock(&req->rq_lock); + + if (req->rq_commit_cb != NULL) + req->rq_commit_cb(req); + list_del_init(&req->rq_replay_list); + + __ptlrpc_req_finished(req, 1); +} + +/** + * the request is committed and dropped from the replay list of its import + */ +void ptlrpc_request_committed(struct ptlrpc_request *req, int force) +{ + struct obd_import *imp = req->rq_import; + + spin_lock(&imp->imp_lock); + if (list_empty(&req->rq_replay_list)) { + spin_unlock(&imp->imp_lock); + return; + } + + if (force || req->rq_transno <= imp->imp_peer_committed_transno) + ptlrpc_free_request(req); + + spin_unlock(&imp->imp_lock); +} +EXPORT_SYMBOL(ptlrpc_request_committed); + +/** + * Iterates through replay_list on import and prunes + * all requests have transno smaller than last_committed for the + * import and don't have rq_replay set. + * Since requests are sorted in transno order, stops when meeting first + * transno bigger than last_committed. + * caller must hold imp->imp_lock + */ +void ptlrpc_free_committed(struct obd_import *imp) +{ + struct ptlrpc_request *req, *saved; + struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ + bool skip_committed_list = true; + + LASSERT(imp != NULL); + assert_spin_locked(&imp->imp_lock); + + if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked && + imp->imp_generation == imp->imp_last_generation_checked) { + CDEBUG(D_INFO, "%s: skip recheck: last_committed %llu\n", + imp->imp_obd->obd_name, imp->imp_peer_committed_transno); + return; + } + CDEBUG(D_RPCTRACE, "%s: committing for last_committed %llu gen %d\n", + imp->imp_obd->obd_name, imp->imp_peer_committed_transno, + imp->imp_generation); + + if (imp->imp_generation != imp->imp_last_generation_checked) + skip_committed_list = false; + + imp->imp_last_transno_checked = imp->imp_peer_committed_transno; + imp->imp_last_generation_checked = imp->imp_generation; + + list_for_each_entry_safe(req, saved, &imp->imp_replay_list, + rq_replay_list) { + /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ + LASSERT(req != last_req); + last_req = req; + + if (req->rq_transno == 0) { + DEBUG_REQ(D_EMERG, req, "zero transno during replay"); + LBUG(); + } + if (req->rq_import_generation < imp->imp_generation) { + DEBUG_REQ(D_RPCTRACE, req, "free request with old gen"); + goto free_req; + } + + /* not yet committed */ + if (req->rq_transno > imp->imp_peer_committed_transno) { + DEBUG_REQ(D_RPCTRACE, req, "stopping search"); + break; + } + + if (req->rq_replay) { + DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); + list_move_tail(&req->rq_replay_list, + &imp->imp_committed_list); + continue; + } + + DEBUG_REQ(D_INFO, req, "commit (last_committed %llu)", + imp->imp_peer_committed_transno); +free_req: + ptlrpc_free_request(req); + } + if (skip_committed_list) + return; + + list_for_each_entry_safe(req, saved, &imp->imp_committed_list, + rq_replay_list) { + LASSERT(req->rq_transno != 0); + if (req->rq_import_generation < imp->imp_generation) { + DEBUG_REQ(D_RPCTRACE, req, "free stale open request"); + ptlrpc_free_request(req); + } + } +} + +void ptlrpc_cleanup_client(struct obd_import *imp) +{ +} +EXPORT_SYMBOL(ptlrpc_cleanup_client); + +/** + * Schedule previously sent request for resend. + * For bulk requests we assign new xid (to avoid problems with + * lost replies and therefore several transfers landing into same buffer + * from different sending attempts). + */ +void ptlrpc_resend_req(struct ptlrpc_request *req) +{ + DEBUG_REQ(D_HA, req, "going to resend"); + spin_lock(&req->rq_lock); + + /* Request got reply but linked to the import list still. + Let ptlrpc_check_set() to process it. */ + if (ptlrpc_client_replied(req)) { + spin_unlock(&req->rq_lock); + DEBUG_REQ(D_HA, req, "it has reply, so skip it"); + return; + } + + lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 }); + req->rq_status = -EAGAIN; + + req->rq_resend = 1; + req->rq_net_err = 0; + req->rq_timedout = 0; + if (req->rq_bulk) { + __u64 old_xid = req->rq_xid; + + /* ensure previous bulk fails */ + req->rq_xid = ptlrpc_next_xid(); + CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", + old_xid, req->rq_xid); + } + ptlrpc_client_wake_req(req); + spin_unlock(&req->rq_lock); +} +EXPORT_SYMBOL(ptlrpc_resend_req); + +/* XXX: this function and rq_status are currently unused */ +void ptlrpc_restart_req(struct ptlrpc_request *req) +{ + DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request"); + req->rq_status = -ERESTARTSYS; + + spin_lock(&req->rq_lock); + req->rq_restart = 1; + req->rq_timedout = 0; + ptlrpc_client_wake_req(req); + spin_unlock(&req->rq_lock); +} +EXPORT_SYMBOL(ptlrpc_restart_req); + +/** + * Grab additional reference on a request \a req + */ +struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req) +{ + atomic_inc(&req->rq_refcount); + return req; +} +EXPORT_SYMBOL(ptlrpc_request_addref); + +/** + * Add a request to import replay_list. + * Must be called under imp_lock + */ +void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, + struct obd_import *imp) +{ + struct list_head *tmp; + + assert_spin_locked(&imp->imp_lock); + + if (req->rq_transno == 0) { + DEBUG_REQ(D_EMERG, req, "saving request with zero transno"); + LBUG(); + } + + /* clear this for new requests that were resent as well + as resent replayed requests. */ + lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT); + + /* don't re-add requests that have been replayed */ + if (!list_empty(&req->rq_replay_list)) + return; + + lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY); + + LASSERT(imp->imp_replayable); + /* Balanced in ptlrpc_free_committed, usually. */ + ptlrpc_request_addref(req); + list_for_each_prev(tmp, &imp->imp_replay_list) { + struct ptlrpc_request *iter = + list_entry(tmp, struct ptlrpc_request, + rq_replay_list); + + /* We may have duplicate transnos if we create and then + * open a file, or for closes retained if to match creating + * opens, so use req->rq_xid as a secondary key. + * (See bugs 684, 685, and 428.) + * XXX no longer needed, but all opens need transnos! + */ + if (iter->rq_transno > req->rq_transno) + continue; + + if (iter->rq_transno == req->rq_transno) { + LASSERT(iter->rq_xid != req->rq_xid); + if (iter->rq_xid > req->rq_xid) + continue; + } + + list_add(&req->rq_replay_list, &iter->rq_replay_list); + return; + } + + list_add(&req->rq_replay_list, &imp->imp_replay_list); +} +EXPORT_SYMBOL(ptlrpc_retain_replayable_request); + +/** + * Send request and wait until it completes. + * Returns request processing status. + */ +int ptlrpc_queue_wait(struct ptlrpc_request *req) +{ + struct ptlrpc_request_set *set; + int rc; + + LASSERT(req->rq_set == NULL); + LASSERT(!req->rq_receiving_reply); + + set = ptlrpc_prep_set(); + if (set == NULL) { + CERROR("Unable to allocate ptlrpc set."); + return -ENOMEM; + } + + /* for distributed debugging */ + lustre_msg_set_status(req->rq_reqmsg, current_pid()); + + /* add a ref for the set (see comment in ptlrpc_set_add_req) */ + ptlrpc_request_addref(req); + ptlrpc_set_add_req(set, req); + rc = ptlrpc_set_wait(set); + ptlrpc_set_destroy(set); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_queue_wait); + +struct ptlrpc_replay_async_args { + int praa_old_state; + int praa_old_status; +}; + +/** + * Callback used for replayed requests reply processing. + * In case of successful reply calls registered request replay callback. + * In case of error restart replay process. + */ +static int ptlrpc_replay_interpret(const struct lu_env *env, + struct ptlrpc_request *req, + void *data, int rc) +{ + struct ptlrpc_replay_async_args *aa = data; + struct obd_import *imp = req->rq_import; + + atomic_dec(&imp->imp_replay_inflight); + + if (!ptlrpc_client_replied(req)) { + CERROR("request replay timed out, restarting recovery\n"); + rc = -ETIMEDOUT; + goto out; + } + + if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR && + (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN || + lustre_msg_get_status(req->rq_repmsg) == -ENODEV)) { + rc = lustre_msg_get_status(req->rq_repmsg); + goto out; + } + + /** VBR: check version failure */ + if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) { + /** replay was failed due to version mismatch */ + DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n"); + spin_lock(&imp->imp_lock); + imp->imp_vbr_failed = 1; + imp->imp_no_lock_replay = 1; + spin_unlock(&imp->imp_lock); + lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status); + } else { + /** The transno had better not change over replay. */ + LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) == + lustre_msg_get_transno(req->rq_repmsg) || + lustre_msg_get_transno(req->rq_repmsg) == 0, + "%#llx/%#llx\n", + lustre_msg_get_transno(req->rq_reqmsg), + lustre_msg_get_transno(req->rq_repmsg)); + } + + spin_lock(&imp->imp_lock); + /** if replays by version then gap occur on server, no trust to locks */ + if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY) + imp->imp_no_lock_replay = 1; + imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg); + spin_unlock(&imp->imp_lock); + LASSERT(imp->imp_last_replay_transno); + + /* transaction number shouldn't be bigger than the latest replayed */ + if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) { + DEBUG_REQ(D_ERROR, req, + "Reported transno %llu is bigger than the replayed one: %llu", + req->rq_transno, + lustre_msg_get_transno(req->rq_reqmsg)); + rc = -EINVAL; + goto out; + } + + DEBUG_REQ(D_HA, req, "got rep"); + + /* let the callback do fixups, possibly including in the request */ + if (req->rq_replay_cb) + req->rq_replay_cb(req); + + if (ptlrpc_client_replied(req) && + lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) { + DEBUG_REQ(D_ERROR, req, "status %d, old was %d", + lustre_msg_get_status(req->rq_repmsg), + aa->praa_old_status); + } else { + /* Put it back for re-replay. */ + lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status); + } + + /* + * Errors while replay can set transno to 0, but + * imp_last_replay_transno shouldn't be set to 0 anyway + */ + if (req->rq_transno == 0) + CERROR("Transno is 0 during replay!\n"); + + /* continue with recovery */ + rc = ptlrpc_import_recovery_state_machine(imp); + out: + req->rq_send_state = aa->praa_old_state; + + if (rc != 0) + /* this replay failed, so restart recovery */ + ptlrpc_connect_import(imp); + + return rc; +} + +/** + * Prepares and queues request for replay. + * Adds it to ptlrpcd queue for actual sending. + * Returns 0 on success. + */ +int ptlrpc_replay_req(struct ptlrpc_request *req) +{ + struct ptlrpc_replay_async_args *aa; + + LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY); + + LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + aa = ptlrpc_req_async_args(req); + memset(aa, 0, sizeof(*aa)); + + /* Prepare request to be resent with ptlrpcd */ + aa->praa_old_state = req->rq_send_state; + req->rq_send_state = LUSTRE_IMP_REPLAY; + req->rq_phase = RQ_PHASE_NEW; + req->rq_next_phase = RQ_PHASE_UNDEFINED; + if (req->rq_repmsg) + aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg); + req->rq_status = 0; + req->rq_interpret_reply = ptlrpc_replay_interpret; + /* Readjust the timeout for current conditions */ + ptlrpc_at_set_req_timeout(req); + + /* Tell server the net_latency, so the server can calculate how long + * it should wait for next replay */ + lustre_msg_set_service_time(req->rq_reqmsg, + ptlrpc_at_get_net_latency(req)); + DEBUG_REQ(D_HA, req, "REPLAY"); + + atomic_inc(&req->rq_import->imp_replay_inflight); + ptlrpc_request_addref(req); /* ptlrpcd needs a ref */ + + ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1); + return 0; +} +EXPORT_SYMBOL(ptlrpc_replay_req); + +/** + * Aborts all in-flight request on import \a imp sending and delayed lists + */ +void ptlrpc_abort_inflight(struct obd_import *imp) +{ + struct list_head *tmp, *n; + + /* Make sure that no new requests get processed for this import. + * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing + * this flag and then putting requests on sending_list or delayed_list. + */ + spin_lock(&imp->imp_lock); + + /* XXX locking? Maybe we should remove each request with the list + * locked? Also, how do we know if the requests on the list are + * being freed at this time? + */ + list_for_each_safe(tmp, n, &imp->imp_sending_list) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, rq_list); + + DEBUG_REQ(D_RPCTRACE, req, "inflight"); + + spin_lock(&req->rq_lock); + if (req->rq_import_generation < imp->imp_generation) { + req->rq_err = 1; + req->rq_status = -EIO; + ptlrpc_client_wake_req(req); + } + spin_unlock(&req->rq_lock); + } + + list_for_each_safe(tmp, n, &imp->imp_delayed_list) { + struct ptlrpc_request *req = + list_entry(tmp, struct ptlrpc_request, rq_list); + + DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req"); + + spin_lock(&req->rq_lock); + if (req->rq_import_generation < imp->imp_generation) { + req->rq_err = 1; + req->rq_status = -EIO; + ptlrpc_client_wake_req(req); + } + spin_unlock(&req->rq_lock); + } + + /* Last chance to free reqs left on the replay list, but we + * will still leak reqs that haven't committed. */ + if (imp->imp_replayable) + ptlrpc_free_committed(imp); + + spin_unlock(&imp->imp_lock); +} +EXPORT_SYMBOL(ptlrpc_abort_inflight); + +/** + * Abort all uncompleted requests in request set \a set + */ +void ptlrpc_abort_set(struct ptlrpc_request_set *set) +{ + struct list_head *tmp, *pos; + + LASSERT(set != NULL); + + list_for_each_safe(pos, tmp, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(pos, struct ptlrpc_request, + rq_set_chain); + + spin_lock(&req->rq_lock); + if (req->rq_phase != RQ_PHASE_RPC) { + spin_unlock(&req->rq_lock); + continue; + } + + req->rq_err = 1; + req->rq_status = -EINTR; + ptlrpc_client_wake_req(req); + spin_unlock(&req->rq_lock); + } +} + +static __u64 ptlrpc_last_xid; +static spinlock_t ptlrpc_last_xid_lock; + +/** + * Initialize the XID for the node. This is common among all requests on + * this node, and only requires the property that it is monotonically + * increasing. It does not need to be sequential. Since this is also used + * as the RDMA match bits, it is important that a single client NOT have + * the same match bits for two different in-flight requests, hence we do + * NOT want to have an XID per target or similar. + * + * To avoid an unlikely collision between match bits after a client reboot + * (which would deliver old data into the wrong RDMA buffer) initialize + * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s. + * If the time is clearly incorrect, we instead use a 62-bit random number. + * In the worst case the random number will overflow 1M RPCs per second in + * 9133 years, or permutations thereof. + */ +#define YEAR_2004 (1ULL << 30) +void ptlrpc_init_xid(void) +{ + time_t now = get_seconds(); + + spin_lock_init(&ptlrpc_last_xid_lock); + if (now < YEAR_2004) { + cfs_get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid)); + ptlrpc_last_xid >>= 2; + ptlrpc_last_xid |= (1ULL << 61); + } else { + ptlrpc_last_xid = (__u64)now << 20; + } + + /* Always need to be aligned to a power-of-two for multi-bulk BRW */ + CLASSERT((PTLRPC_BULK_OPS_COUNT & (PTLRPC_BULK_OPS_COUNT - 1)) == 0); + ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK; +} + +/** + * Increase xid and returns resulting new value to the caller. + * + * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting + * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC + * itself uses the last bulk xid needed, so the server can determine the + * the number of bulk transfers from the RPC XID and a bitmask. The starting + * xid must align to a power-of-two value. + * + * This is assumed to be true due to the initial ptlrpc_last_xid + * value also being initialized to a power-of-two value. LU-1431 + */ +__u64 ptlrpc_next_xid(void) +{ + __u64 next; + + spin_lock(&ptlrpc_last_xid_lock); + next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; + ptlrpc_last_xid = next; + spin_unlock(&ptlrpc_last_xid_lock); + + return next; +} +EXPORT_SYMBOL(ptlrpc_next_xid); + +/** + * Get a glimpse at what next xid value might have been. + * Returns possible next xid. + */ +__u64 ptlrpc_sample_next_xid(void) +{ +#if BITS_PER_LONG == 32 + /* need to avoid possible word tearing on 32-bit systems */ + __u64 next; + + spin_lock(&ptlrpc_last_xid_lock); + next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; + spin_unlock(&ptlrpc_last_xid_lock); + + return next; +#else + /* No need to lock, since returned value is racy anyways */ + return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; +#endif +} +EXPORT_SYMBOL(ptlrpc_sample_next_xid); + +/** + * Functions for operating ptlrpc workers. + * + * A ptlrpc work is a function which will be running inside ptlrpc context. + * The callback shouldn't sleep otherwise it will block that ptlrpcd thread. + * + * 1. after a work is created, it can be used many times, that is: + * handler = ptlrpcd_alloc_work(); + * ptlrpcd_queue_work(); + * + * queue it again when necessary: + * ptlrpcd_queue_work(); + * ptlrpcd_destroy_work(); + * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but + * it will only be queued once in any time. Also as its name implies, it may + * have delay before it really runs by ptlrpcd thread. + */ +struct ptlrpc_work_async_args { + int (*cb)(const struct lu_env *, void *); + void *cbdata; +}; + +static void ptlrpcd_add_work_req(struct ptlrpc_request *req) +{ + /* re-initialize the req */ + req->rq_timeout = obd_timeout; + req->rq_sent = get_seconds(); + req->rq_deadline = req->rq_sent + req->rq_timeout; + req->rq_reply_deadline = req->rq_deadline; + req->rq_phase = RQ_PHASE_INTERPRET; + req->rq_next_phase = RQ_PHASE_COMPLETE; + req->rq_xid = ptlrpc_next_xid(); + req->rq_import_generation = req->rq_import->imp_generation; + + ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); +} + +static int work_interpreter(const struct lu_env *env, + struct ptlrpc_request *req, void *data, int rc) +{ + struct ptlrpc_work_async_args *arg = data; + + LASSERT(ptlrpcd_check_work(req)); + LASSERT(arg->cb != NULL); + + rc = arg->cb(env, arg->cbdata); + + list_del_init(&req->rq_set_chain); + req->rq_set = NULL; + + if (atomic_dec_return(&req->rq_refcount) > 1) { + atomic_set(&req->rq_refcount, 2); + ptlrpcd_add_work_req(req); + } + return rc; +} + +static int worker_format; + +static int ptlrpcd_check_work(struct ptlrpc_request *req) +{ + return req->rq_pill.rc_fmt == (void *)&worker_format; +} + +/** + * Create a work for ptlrpc. + */ +void *ptlrpcd_alloc_work(struct obd_import *imp, + int (*cb)(const struct lu_env *, void *), void *cbdata) +{ + struct ptlrpc_request *req = NULL; + struct ptlrpc_work_async_args *args; + + might_sleep(); + + if (cb == NULL) + return ERR_PTR(-EINVAL); + + /* copy some code from deprecated fakereq. */ + req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (req == NULL) { + CERROR("ptlrpc: run out of memory!\n"); + return ERR_PTR(-ENOMEM); + } + + req->rq_send_state = LUSTRE_IMP_FULL; + req->rq_type = PTL_RPC_MSG_REQUEST; + req->rq_import = class_import_get(imp); + req->rq_export = NULL; + req->rq_interpret_reply = work_interpreter; + /* don't want reply */ + req->rq_receiving_reply = 0; + req->rq_req_unlink = req->rq_reply_unlink = 0; + req->rq_no_delay = req->rq_no_resend = 1; + req->rq_pill.rc_fmt = (void *)&worker_format; + + spin_lock_init(&req->rq_lock); + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_replay_list); + INIT_LIST_HEAD(&req->rq_set_chain); + INIT_LIST_HEAD(&req->rq_history_list); + INIT_LIST_HEAD(&req->rq_exp_list); + init_waitqueue_head(&req->rq_reply_waitq); + init_waitqueue_head(&req->rq_set_waitq); + atomic_set(&req->rq_refcount, 1); + + CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args)); + args = ptlrpc_req_async_args(req); + args->cb = cb; + args->cbdata = cbdata; + + return req; +} +EXPORT_SYMBOL(ptlrpcd_alloc_work); + +void ptlrpcd_destroy_work(void *handler) +{ + struct ptlrpc_request *req = handler; + + if (req) + ptlrpc_req_finished(req); +} +EXPORT_SYMBOL(ptlrpcd_destroy_work); + +int ptlrpcd_queue_work(void *handler) +{ + struct ptlrpc_request *req = handler; + + /* + * Check if the req is already being queued. + * + * Here comes a trick: it lacks a way of checking if a req is being + * processed reliably in ptlrpc. Here I have to use refcount of req + * for this purpose. This is okay because the caller should use this + * req as opaque data. - Jinshan + */ + LASSERT(atomic_read(&req->rq_refcount) > 0); + if (atomic_inc_return(&req->rq_refcount) == 2) + ptlrpcd_add_work_req(req); + return 0; +} +EXPORT_SYMBOL(ptlrpcd_queue_work); diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c new file mode 100644 index 000000000..7e27397ce --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c @@ -0,0 +1,241 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" + +#include "ptlrpc_internal.h" + +static struct cfs_hash *conn_hash; +static cfs_hash_ops_t conn_hash_ops; + +struct ptlrpc_connection * +ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self, + struct obd_uuid *uuid) +{ + struct ptlrpc_connection *conn, *conn2; + + conn = cfs_hash_lookup(conn_hash, &peer); + if (conn) + goto out; + + OBD_ALLOC_PTR(conn); + if (!conn) + return NULL; + + conn->c_peer = peer; + conn->c_self = self; + INIT_HLIST_NODE(&conn->c_hash); + atomic_set(&conn->c_refcount, 1); + if (uuid) + obd_str2uuid(&conn->c_remote_uuid, uuid->uuid); + + /* + * Add the newly created conn to the hash, on key collision we + * lost a racing addition and must destroy our newly allocated + * connection. The object which exists in the has will be + * returned and may be compared against out object. + */ + /* In the function below, .hs_keycmp resolves to + * conn_keycmp() */ + /* coverity[overrun-buffer-val] */ + conn2 = cfs_hash_findadd_unique(conn_hash, &peer, &conn->c_hash); + if (conn != conn2) { + OBD_FREE_PTR(conn); + conn = conn2; + } +out: + CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); + return conn; +} +EXPORT_SYMBOL(ptlrpc_connection_get); + +int ptlrpc_connection_put(struct ptlrpc_connection *conn) +{ + int rc = 0; + + if (!conn) + return rc; + + LASSERT(atomic_read(&conn->c_refcount) > 1); + + /* + * We do not remove connection from hashtable and + * do not free it even if last caller released ref, + * as we want to have it cached for the case it is + * needed again. + * + * Deallocating it and later creating new connection + * again would be wastful. This way we also avoid + * expensive locking to protect things from get/put + * race when found cached connection is freed by + * ptlrpc_connection_put(). + * + * It will be freed later in module unload time, + * when ptlrpc_connection_fini()->lh_exit->conn_exit() + * path is called. + */ + if (atomic_dec_return(&conn->c_refcount) == 1) + rc = 1; + + CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_connection_put); + +struct ptlrpc_connection * +ptlrpc_connection_addref(struct ptlrpc_connection *conn) +{ + atomic_inc(&conn->c_refcount); + CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); + + return conn; +} +EXPORT_SYMBOL(ptlrpc_connection_addref); + +int ptlrpc_connection_init(void) +{ + conn_hash = cfs_hash_create("CONN_HASH", + HASH_CONN_CUR_BITS, + HASH_CONN_MAX_BITS, + HASH_CONN_BKT_BITS, 0, + CFS_HASH_MIN_THETA, + CFS_HASH_MAX_THETA, + &conn_hash_ops, CFS_HASH_DEFAULT); + if (!conn_hash) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(ptlrpc_connection_init); + +void ptlrpc_connection_fini(void) +{ + cfs_hash_putref(conn_hash); +} +EXPORT_SYMBOL(ptlrpc_connection_fini); + +/* + * Hash operations for net_peer<->connection + */ +static unsigned +conn_hashfn(struct cfs_hash *hs, const void *key, unsigned mask) +{ + return cfs_hash_djb2_hash(key, sizeof(lnet_process_id_t), mask); +} + +static int +conn_keycmp(const void *key, struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + const lnet_process_id_t *conn_key; + + LASSERT(key != NULL); + conn_key = (lnet_process_id_t *)key; + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + + return conn_key->nid == conn->c_peer.nid && + conn_key->pid == conn->c_peer.pid; +} + +static void * +conn_key(struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + return &conn->c_peer; +} + +static void * +conn_object(struct hlist_node *hnode) +{ + return hlist_entry(hnode, struct ptlrpc_connection, c_hash); +} + +static void +conn_get(struct cfs_hash *hs, struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + atomic_inc(&conn->c_refcount); +} + +static void +conn_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + atomic_dec(&conn->c_refcount); +} + +static void +conn_exit(struct cfs_hash *hs, struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + /* + * Nothing should be left. Connection user put it and + * connection also was deleted from table by this time + * so we should have 0 refs. + */ + LASSERTF(atomic_read(&conn->c_refcount) == 0, + "Busy connection with %d refs\n", + atomic_read(&conn->c_refcount)); + OBD_FREE_PTR(conn); +} + +static cfs_hash_ops_t conn_hash_ops = { + .hs_hash = conn_hashfn, + .hs_keycmp = conn_keycmp, + .hs_key = conn_key, + .hs_object = conn_object, + .hs_get = conn_get, + .hs_put_locked = conn_put_locked, + .hs_exit = conn_exit, +}; diff --git a/drivers/staging/lustre/lustre/ptlrpc/errno.c b/drivers/staging/lustre/lustre/ptlrpc/errno.c new file mode 100644 index 000000000..73f8374f1 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/errno.c @@ -0,0 +1,380 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.txt + * + * GPL HEADER END + */ +/* + * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved. + * + * Copyright (c) 2013, Intel Corporation. + */ + +#include "../../include/linux/libcfs/libcfs.h" +#include "../include/lustre/lustre_errno.h" + +/* + * The two translation tables below must define a one-to-one mapping between + * host and network errnos. + * + * EWOULDBLOCK is equal to EAGAIN on all architectures except for parisc, which + * appears irrelevant. Thus, existing references to EWOULDBLOCK are fine. + * + * EDEADLOCK is equal to EDEADLK on x86 but not on sparc, at least. A sparc + * host has no context-free way to determine if a LUSTRE_EDEADLK represents an + * EDEADLK or an EDEADLOCK. Therefore, all existing references to EDEADLOCK + * that need to be transferred on wire have been replaced with EDEADLK. + */ +static int lustre_errno_hton_mapping[] = { + [EPERM] = LUSTRE_EPERM, + [ENOENT] = LUSTRE_ENOENT, + [ESRCH] = LUSTRE_ESRCH, + [EINTR] = LUSTRE_EINTR, + [EIO] = LUSTRE_EIO, + [ENXIO] = LUSTRE_ENXIO, + [E2BIG] = LUSTRE_E2BIG, + [ENOEXEC] = LUSTRE_ENOEXEC, + [EBADF] = LUSTRE_EBADF, + [ECHILD] = LUSTRE_ECHILD, + [EAGAIN] = LUSTRE_EAGAIN, + [ENOMEM] = LUSTRE_ENOMEM, + [EACCES] = LUSTRE_EACCES, + [EFAULT] = LUSTRE_EFAULT, + [ENOTBLK] = LUSTRE_ENOTBLK, + [EBUSY] = LUSTRE_EBUSY, + [EEXIST] = LUSTRE_EEXIST, + [EXDEV] = LUSTRE_EXDEV, + [ENODEV] = LUSTRE_ENODEV, + [ENOTDIR] = LUSTRE_ENOTDIR, + [EISDIR] = LUSTRE_EISDIR, + [EINVAL] = LUSTRE_EINVAL, + [ENFILE] = LUSTRE_ENFILE, + [EMFILE] = LUSTRE_EMFILE, + [ENOTTY] = LUSTRE_ENOTTY, + [ETXTBSY] = LUSTRE_ETXTBSY, + [EFBIG] = LUSTRE_EFBIG, + [ENOSPC] = LUSTRE_ENOSPC, + [ESPIPE] = LUSTRE_ESPIPE, + [EROFS] = LUSTRE_EROFS, + [EMLINK] = LUSTRE_EMLINK, + [EPIPE] = LUSTRE_EPIPE, + [EDOM] = LUSTRE_EDOM, + [ERANGE] = LUSTRE_ERANGE, + [EDEADLK] = LUSTRE_EDEADLK, + [ENAMETOOLONG] = LUSTRE_ENAMETOOLONG, + [ENOLCK] = LUSTRE_ENOLCK, + [ENOSYS] = LUSTRE_ENOSYS, + [ENOTEMPTY] = LUSTRE_ENOTEMPTY, + [ELOOP] = LUSTRE_ELOOP, + [ENOMSG] = LUSTRE_ENOMSG, + [EIDRM] = LUSTRE_EIDRM, + [ECHRNG] = LUSTRE_ECHRNG, + [EL2NSYNC] = LUSTRE_EL2NSYNC, + [EL3HLT] = LUSTRE_EL3HLT, + [EL3RST] = LUSTRE_EL3RST, + [ELNRNG] = LUSTRE_ELNRNG, + [EUNATCH] = LUSTRE_EUNATCH, + [ENOCSI] = LUSTRE_ENOCSI, + [EL2HLT] = LUSTRE_EL2HLT, + [EBADE] = LUSTRE_EBADE, + [EBADR] = LUSTRE_EBADR, + [EXFULL] = LUSTRE_EXFULL, + [ENOANO] = LUSTRE_ENOANO, + [EBADRQC] = LUSTRE_EBADRQC, + [EBADSLT] = LUSTRE_EBADSLT, + [EBFONT] = LUSTRE_EBFONT, + [ENOSTR] = LUSTRE_ENOSTR, + [ENODATA] = LUSTRE_ENODATA, + [ETIME] = LUSTRE_ETIME, + [ENOSR] = LUSTRE_ENOSR, + [ENONET] = LUSTRE_ENONET, + [ENOPKG] = LUSTRE_ENOPKG, + [EREMOTE] = LUSTRE_EREMOTE, + [ENOLINK] = LUSTRE_ENOLINK, + [EADV] = LUSTRE_EADV, + [ESRMNT] = LUSTRE_ESRMNT, + [ECOMM] = LUSTRE_ECOMM, + [EPROTO] = LUSTRE_EPROTO, + [EMULTIHOP] = LUSTRE_EMULTIHOP, + [EDOTDOT] = LUSTRE_EDOTDOT, + [EBADMSG] = LUSTRE_EBADMSG, + [EOVERFLOW] = LUSTRE_EOVERFLOW, + [ENOTUNIQ] = LUSTRE_ENOTUNIQ, + [EBADFD] = LUSTRE_EBADFD, + [EREMCHG] = LUSTRE_EREMCHG, + [ELIBACC] = LUSTRE_ELIBACC, + [ELIBBAD] = LUSTRE_ELIBBAD, + [ELIBSCN] = LUSTRE_ELIBSCN, + [ELIBMAX] = LUSTRE_ELIBMAX, + [ELIBEXEC] = LUSTRE_ELIBEXEC, + [EILSEQ] = LUSTRE_EILSEQ, + [ERESTART] = LUSTRE_ERESTART, + [ESTRPIPE] = LUSTRE_ESTRPIPE, + [EUSERS] = LUSTRE_EUSERS, + [ENOTSOCK] = LUSTRE_ENOTSOCK, + [EDESTADDRREQ] = LUSTRE_EDESTADDRREQ, + [EMSGSIZE] = LUSTRE_EMSGSIZE, + [EPROTOTYPE] = LUSTRE_EPROTOTYPE, + [ENOPROTOOPT] = LUSTRE_ENOPROTOOPT, + [EPROTONOSUPPORT] = LUSTRE_EPROTONOSUPPORT, + [ESOCKTNOSUPPORT] = LUSTRE_ESOCKTNOSUPPORT, + [EOPNOTSUPP] = LUSTRE_EOPNOTSUPP, + [EPFNOSUPPORT] = LUSTRE_EPFNOSUPPORT, + [EAFNOSUPPORT] = LUSTRE_EAFNOSUPPORT, + [EADDRINUSE] = LUSTRE_EADDRINUSE, + [EADDRNOTAVAIL] = LUSTRE_EADDRNOTAVAIL, + [ENETDOWN] = LUSTRE_ENETDOWN, + [ENETUNREACH] = LUSTRE_ENETUNREACH, + [ENETRESET] = LUSTRE_ENETRESET, + [ECONNABORTED] = LUSTRE_ECONNABORTED, + [ECONNRESET] = LUSTRE_ECONNRESET, + [ENOBUFS] = LUSTRE_ENOBUFS, + [EISCONN] = LUSTRE_EISCONN, + [ENOTCONN] = LUSTRE_ENOTCONN, + [ESHUTDOWN] = LUSTRE_ESHUTDOWN, + [ETOOMANYREFS] = LUSTRE_ETOOMANYREFS, + [ETIMEDOUT] = LUSTRE_ETIMEDOUT, + [ECONNREFUSED] = LUSTRE_ECONNREFUSED, + [EHOSTDOWN] = LUSTRE_EHOSTDOWN, + [EHOSTUNREACH] = LUSTRE_EHOSTUNREACH, + [EALREADY] = LUSTRE_EALREADY, + [EINPROGRESS] = LUSTRE_EINPROGRESS, + [ESTALE] = LUSTRE_ESTALE, + [EUCLEAN] = LUSTRE_EUCLEAN, + [ENOTNAM] = LUSTRE_ENOTNAM, + [ENAVAIL] = LUSTRE_ENAVAIL, + [EISNAM] = LUSTRE_EISNAM, + [EREMOTEIO] = LUSTRE_EREMOTEIO, + [EDQUOT] = LUSTRE_EDQUOT, + [ENOMEDIUM] = LUSTRE_ENOMEDIUM, + [EMEDIUMTYPE] = LUSTRE_EMEDIUMTYPE, + [ECANCELED] = LUSTRE_ECANCELED, + [ENOKEY] = LUSTRE_ENOKEY, + [EKEYEXPIRED] = LUSTRE_EKEYEXPIRED, + [EKEYREVOKED] = LUSTRE_EKEYREVOKED, + [EKEYREJECTED] = LUSTRE_EKEYREJECTED, + [EOWNERDEAD] = LUSTRE_EOWNERDEAD, + [ENOTRECOVERABLE] = LUSTRE_ENOTRECOVERABLE, + [ERESTARTSYS] = LUSTRE_ERESTARTSYS, + [ERESTARTNOINTR] = LUSTRE_ERESTARTNOINTR, + [ERESTARTNOHAND] = LUSTRE_ERESTARTNOHAND, + [ENOIOCTLCMD] = LUSTRE_ENOIOCTLCMD, + [ERESTART_RESTARTBLOCK] = LUSTRE_ERESTART_RESTARTBLOCK, + [EBADHANDLE] = LUSTRE_EBADHANDLE, + [ENOTSYNC] = LUSTRE_ENOTSYNC, + [EBADCOOKIE] = LUSTRE_EBADCOOKIE, + [ENOTSUPP] = LUSTRE_ENOTSUPP, + [ETOOSMALL] = LUSTRE_ETOOSMALL, + [ESERVERFAULT] = LUSTRE_ESERVERFAULT, + [EBADTYPE] = LUSTRE_EBADTYPE, + [EJUKEBOX] = LUSTRE_EJUKEBOX, + [EIOCBQUEUED] = LUSTRE_EIOCBQUEUED, +}; + +static int lustre_errno_ntoh_mapping[] = { + [LUSTRE_EPERM] = EPERM, + [LUSTRE_ENOENT] = ENOENT, + [LUSTRE_ESRCH] = ESRCH, + [LUSTRE_EINTR] = EINTR, + [LUSTRE_EIO] = EIO, + [LUSTRE_ENXIO] = ENXIO, + [LUSTRE_E2BIG] = E2BIG, + [LUSTRE_ENOEXEC] = ENOEXEC, + [LUSTRE_EBADF] = EBADF, + [LUSTRE_ECHILD] = ECHILD, + [LUSTRE_EAGAIN] = EAGAIN, + [LUSTRE_ENOMEM] = ENOMEM, + [LUSTRE_EACCES] = EACCES, + [LUSTRE_EFAULT] = EFAULT, + [LUSTRE_ENOTBLK] = ENOTBLK, + [LUSTRE_EBUSY] = EBUSY, + [LUSTRE_EEXIST] = EEXIST, + [LUSTRE_EXDEV] = EXDEV, + [LUSTRE_ENODEV] = ENODEV, + [LUSTRE_ENOTDIR] = ENOTDIR, + [LUSTRE_EISDIR] = EISDIR, + [LUSTRE_EINVAL] = EINVAL, + [LUSTRE_ENFILE] = ENFILE, + [LUSTRE_EMFILE] = EMFILE, + [LUSTRE_ENOTTY] = ENOTTY, + [LUSTRE_ETXTBSY] = ETXTBSY, + [LUSTRE_EFBIG] = EFBIG, + [LUSTRE_ENOSPC] = ENOSPC, + [LUSTRE_ESPIPE] = ESPIPE, + [LUSTRE_EROFS] = EROFS, + [LUSTRE_EMLINK] = EMLINK, + [LUSTRE_EPIPE] = EPIPE, + [LUSTRE_EDOM] = EDOM, + [LUSTRE_ERANGE] = ERANGE, + [LUSTRE_EDEADLK] = EDEADLK, + [LUSTRE_ENAMETOOLONG] = ENAMETOOLONG, + [LUSTRE_ENOLCK] = ENOLCK, + [LUSTRE_ENOSYS] = ENOSYS, + [LUSTRE_ENOTEMPTY] = ENOTEMPTY, + [LUSTRE_ELOOP] = ELOOP, + [LUSTRE_ENOMSG] = ENOMSG, + [LUSTRE_EIDRM] = EIDRM, + [LUSTRE_ECHRNG] = ECHRNG, + [LUSTRE_EL2NSYNC] = EL2NSYNC, + [LUSTRE_EL3HLT] = EL3HLT, + [LUSTRE_EL3RST] = EL3RST, + [LUSTRE_ELNRNG] = ELNRNG, + [LUSTRE_EUNATCH] = EUNATCH, + [LUSTRE_ENOCSI] = ENOCSI, + [LUSTRE_EL2HLT] = EL2HLT, + [LUSTRE_EBADE] = EBADE, + [LUSTRE_EBADR] = EBADR, + [LUSTRE_EXFULL] = EXFULL, + [LUSTRE_ENOANO] = ENOANO, + [LUSTRE_EBADRQC] = EBADRQC, + [LUSTRE_EBADSLT] = EBADSLT, + [LUSTRE_EBFONT] = EBFONT, + [LUSTRE_ENOSTR] = ENOSTR, + [LUSTRE_ENODATA] = ENODATA, + [LUSTRE_ETIME] = ETIME, + [LUSTRE_ENOSR] = ENOSR, + [LUSTRE_ENONET] = ENONET, + [LUSTRE_ENOPKG] = ENOPKG, + [LUSTRE_EREMOTE] = EREMOTE, + [LUSTRE_ENOLINK] = ENOLINK, + [LUSTRE_EADV] = EADV, + [LUSTRE_ESRMNT] = ESRMNT, + [LUSTRE_ECOMM] = ECOMM, + [LUSTRE_EPROTO] = EPROTO, + [LUSTRE_EMULTIHOP] = EMULTIHOP, + [LUSTRE_EDOTDOT] = EDOTDOT, + [LUSTRE_EBADMSG] = EBADMSG, + [LUSTRE_EOVERFLOW] = EOVERFLOW, + [LUSTRE_ENOTUNIQ] = ENOTUNIQ, + [LUSTRE_EBADFD] = EBADFD, + [LUSTRE_EREMCHG] = EREMCHG, + [LUSTRE_ELIBACC] = ELIBACC, + [LUSTRE_ELIBBAD] = ELIBBAD, + [LUSTRE_ELIBSCN] = ELIBSCN, + [LUSTRE_ELIBMAX] = ELIBMAX, + [LUSTRE_ELIBEXEC] = ELIBEXEC, + [LUSTRE_EILSEQ] = EILSEQ, + [LUSTRE_ERESTART] = ERESTART, + [LUSTRE_ESTRPIPE] = ESTRPIPE, + [LUSTRE_EUSERS] = EUSERS, + [LUSTRE_ENOTSOCK] = ENOTSOCK, + [LUSTRE_EDESTADDRREQ] = EDESTADDRREQ, + [LUSTRE_EMSGSIZE] = EMSGSIZE, + [LUSTRE_EPROTOTYPE] = EPROTOTYPE, + [LUSTRE_ENOPROTOOPT] = ENOPROTOOPT, + [LUSTRE_EPROTONOSUPPORT] = EPROTONOSUPPORT, + [LUSTRE_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, + [LUSTRE_EOPNOTSUPP] = EOPNOTSUPP, + [LUSTRE_EPFNOSUPPORT] = EPFNOSUPPORT, + [LUSTRE_EAFNOSUPPORT] = EAFNOSUPPORT, + [LUSTRE_EADDRINUSE] = EADDRINUSE, + [LUSTRE_EADDRNOTAVAIL] = EADDRNOTAVAIL, + [LUSTRE_ENETDOWN] = ENETDOWN, + [LUSTRE_ENETUNREACH] = ENETUNREACH, + [LUSTRE_ENETRESET] = ENETRESET, + [LUSTRE_ECONNABORTED] = ECONNABORTED, + [LUSTRE_ECONNRESET] = ECONNRESET, + [LUSTRE_ENOBUFS] = ENOBUFS, + [LUSTRE_EISCONN] = EISCONN, + [LUSTRE_ENOTCONN] = ENOTCONN, + [LUSTRE_ESHUTDOWN] = ESHUTDOWN, + [LUSTRE_ETOOMANYREFS] = ETOOMANYREFS, + [LUSTRE_ETIMEDOUT] = ETIMEDOUT, + [LUSTRE_ECONNREFUSED] = ECONNREFUSED, + [LUSTRE_EHOSTDOWN] = EHOSTDOWN, + [LUSTRE_EHOSTUNREACH] = EHOSTUNREACH, + [LUSTRE_EALREADY] = EALREADY, + [LUSTRE_EINPROGRESS] = EINPROGRESS, + [LUSTRE_ESTALE] = ESTALE, + [LUSTRE_EUCLEAN] = EUCLEAN, + [LUSTRE_ENOTNAM] = ENOTNAM, + [LUSTRE_ENAVAIL] = ENAVAIL, + [LUSTRE_EISNAM] = EISNAM, + [LUSTRE_EREMOTEIO] = EREMOTEIO, + [LUSTRE_EDQUOT] = EDQUOT, + [LUSTRE_ENOMEDIUM] = ENOMEDIUM, + [LUSTRE_EMEDIUMTYPE] = EMEDIUMTYPE, + [LUSTRE_ECANCELED] = ECANCELED, + [LUSTRE_ENOKEY] = ENOKEY, + [LUSTRE_EKEYEXPIRED] = EKEYEXPIRED, + [LUSTRE_EKEYREVOKED] = EKEYREVOKED, + [LUSTRE_EKEYREJECTED] = EKEYREJECTED, + [LUSTRE_EOWNERDEAD] = EOWNERDEAD, + [LUSTRE_ENOTRECOVERABLE] = ENOTRECOVERABLE, + [LUSTRE_ERESTARTSYS] = ERESTARTSYS, + [LUSTRE_ERESTARTNOINTR] = ERESTARTNOINTR, + [LUSTRE_ERESTARTNOHAND] = ERESTARTNOHAND, + [LUSTRE_ENOIOCTLCMD] = ENOIOCTLCMD, + [LUSTRE_ERESTART_RESTARTBLOCK] = ERESTART_RESTARTBLOCK, + [LUSTRE_EBADHANDLE] = EBADHANDLE, + [LUSTRE_ENOTSYNC] = ENOTSYNC, + [LUSTRE_EBADCOOKIE] = EBADCOOKIE, + [LUSTRE_ENOTSUPP] = ENOTSUPP, + [LUSTRE_ETOOSMALL] = ETOOSMALL, + [LUSTRE_ESERVERFAULT] = ESERVERFAULT, + [LUSTRE_EBADTYPE] = EBADTYPE, + [LUSTRE_EJUKEBOX] = EJUKEBOX, + [LUSTRE_EIOCBQUEUED] = EIOCBQUEUED, +}; + +unsigned int lustre_errno_hton(unsigned int h) +{ + unsigned int n; + + if (h == 0) { + n = 0; + } else if (h < ARRAY_SIZE(lustre_errno_hton_mapping)) { + n = lustre_errno_hton_mapping[h]; + if (n == 0) + goto generic; + } else { +generic: + /* + * A generic errno is better than the unknown one that could + * mean anything to a different host. + */ + n = LUSTRE_EIO; + } + + return n; +} +EXPORT_SYMBOL(lustre_errno_hton); + +unsigned int lustre_errno_ntoh(unsigned int n) +{ + unsigned int h; + + if (n == 0) { + h = 0; + } else if (n < ARRAY_SIZE(lustre_errno_ntoh_mapping)) { + h = lustre_errno_ntoh_mapping[n]; + if (h == 0) + goto generic; + } else { +generic: + /* + * Similar to the situation in lustre_errno_hton(), an unknown + * network errno could coincide with anything. Hence, it is + * better to return a generic errno. + */ + h = EIO; + } + + return h; +} +EXPORT_SYMBOL(lustre_errno_ntoh); diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c new file mode 100644 index 000000000..7f8644e01 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/events.c @@ -0,0 +1,585 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../../include/linux/libcfs/libcfs.h" +# ifdef __mips64__ +# include <linux/kernel.h> +# endif + +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_sec.h" +#include "ptlrpc_internal.h" + +lnet_handle_eq_t ptlrpc_eq_h; + +/* + * Client's outgoing request callback + */ +void request_out_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_request *req = cbid->cbid_arg; + + LASSERT(ev->type == LNET_EVENT_SEND || + ev->type == LNET_EVENT_UNLINK); + LASSERT(ev->unlinked); + + DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); + + sptlrpc_request_out_callback(req); + spin_lock(&req->rq_lock); + req->rq_real_sent = get_seconds(); + if (ev->unlinked) + req->rq_req_unlink = 0; + + if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { + + /* Failed send: make it seem like the reply timed out, just + * like failing sends in client.c does currently... */ + + req->rq_net_err = 1; + ptlrpc_client_wake_req(req); + } + spin_unlock(&req->rq_lock); + + ptlrpc_req_finished(req); +} + +/* + * Client's incoming reply callback + */ +void reply_in_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_request *req = cbid->cbid_arg; + + DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); + + LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); + LASSERT(ev->md.start == req->rq_repbuf); + LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len); + /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests + for adaptive timeouts' early reply. */ + LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0); + + spin_lock(&req->rq_lock); + + req->rq_receiving_reply = 0; + req->rq_early = 0; + if (ev->unlinked) + req->rq_reply_unlink = 0; + + if (ev->status) + goto out_wake; + + if (ev->type == LNET_EVENT_UNLINK) { + LASSERT(ev->unlinked); + DEBUG_REQ(D_NET, req, "unlink"); + goto out_wake; + } + + if (ev->mlength < ev->rlength) { + CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, + req->rq_replen, ev->rlength, ev->offset); + req->rq_reply_truncate = 1; + req->rq_replied = 1; + req->rq_status = -EOVERFLOW; + req->rq_nob_received = ev->rlength + ev->offset; + goto out_wake; + } + + if ((ev->offset == 0) && + ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) { + /* Early reply */ + DEBUG_REQ(D_ADAPTTO, req, + "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d", + ev->mlength, ev->offset, + req->rq_replen, req->rq_replied, ev->unlinked); + + req->rq_early_count++; /* number received, client side */ + + if (req->rq_replied) /* already got the real reply */ + goto out_wake; + + req->rq_early = 1; + req->rq_reply_off = ev->offset; + req->rq_nob_received = ev->mlength; + /* And we're still receiving */ + req->rq_receiving_reply = 1; + } else { + /* Real reply */ + req->rq_rep_swab_mask = 0; + req->rq_replied = 1; + /* Got reply, no resend required */ + req->rq_resend = 0; + req->rq_reply_off = ev->offset; + req->rq_nob_received = ev->mlength; + /* LNetMDUnlink can't be called under the LNET_LOCK, + so we must unlink in ptlrpc_unregister_reply */ + DEBUG_REQ(D_INFO, req, + "reply in flags=%x mlen=%u offset=%d replen=%d", + lustre_msg_get_flags(req->rq_reqmsg), + ev->mlength, ev->offset, req->rq_replen); + } + + req->rq_import->imp_last_reply_time = get_seconds(); + +out_wake: + /* NB don't unlock till after wakeup; req can disappear under us + * since we don't have our own ref */ + ptlrpc_client_wake_req(req); + spin_unlock(&req->rq_lock); +} + +/* + * Client's bulk has been written/read + */ +void client_bulk_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; + struct ptlrpc_request *req; + + LASSERT((desc->bd_type == BULK_PUT_SINK && + ev->type == LNET_EVENT_PUT) || + (desc->bd_type == BULK_GET_SOURCE && + ev->type == LNET_EVENT_GET) || + ev->type == LNET_EVENT_UNLINK); + LASSERT(ev->unlinked); + + if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE)) + ev->status = -EIO; + + if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2, + CFS_FAIL_ONCE)) + ev->status = -EIO; + + CDEBUG((ev->status == 0) ? D_NET : D_ERROR, + "event type %d, status %d, desc %p\n", + ev->type, ev->status, desc); + + spin_lock(&desc->bd_lock); + req = desc->bd_req; + LASSERT(desc->bd_md_count > 0); + desc->bd_md_count--; + + if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) { + desc->bd_nob_transferred += ev->mlength; + desc->bd_sender = ev->sender; + } else { + /* start reconnect and resend if network error hit */ + spin_lock(&req->rq_lock); + req->rq_net_err = 1; + spin_unlock(&req->rq_lock); + } + + if (ev->status != 0) + desc->bd_failure = 1; + + /* NB don't unlock till after wakeup; desc can disappear under us + * otherwise */ + if (desc->bd_md_count == 0) + ptlrpc_client_wake_req(desc->bd_req); + + spin_unlock(&desc->bd_lock); +} + +/* + * We will have percpt request history list for ptlrpc service in upcoming + * patches because we don't want to be serialized by current per-service + * history operations. So we require history ID can (somehow) show arriving + * order w/o grabbing global lock, and user can sort them in userspace. + * + * This is how we generate history ID for ptlrpc_request: + * ---------------------------------------------------- + * | 32 bits | 16 bits | (16 - X)bits | X bits | + * ---------------------------------------------------- + * | seconds | usec / 16 | sequence | CPT id | + * ---------------------------------------------------- + * + * it might not be precise but should be good enough. + */ + +#define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits) + +#define REQS_SEC_SHIFT 32 +#define REQS_USEC_SHIFT 16 +#define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt) + +static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req) +{ + __u64 sec = req->rq_arrival_time.tv_sec; + __u32 usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */ + __u64 new_seq; + + /* set sequence ID for request and add it to history list, + * it must be called with hold svcpt::scp_lock */ + + new_seq = (sec << REQS_SEC_SHIFT) | + (usec << REQS_USEC_SHIFT) | + (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt); + + if (new_seq > svcpt->scp_hist_seq) { + /* This handles the initial case of scp_hist_seq == 0 or + * we just jumped into a new time window */ + svcpt->scp_hist_seq = new_seq; + } else { + LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT); + /* NB: increase sequence number in current usec bucket, + * however, it's possible that we used up all bits for + * sequence and jumped into the next usec bucket (future time), + * then we hope there will be less RPCs per bucket at some + * point, and sequence will catch up again */ + svcpt->scp_hist_seq += (1U << REQS_SEQ_SHIFT(svcpt)); + new_seq = svcpt->scp_hist_seq; + } + + req->rq_history_seq = new_seq; + + list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs); +} + +/* + * Server's incoming request callback + */ +void request_in_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; + struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; + struct ptlrpc_service *service = svcpt->scp_service; + struct ptlrpc_request *req; + + LASSERT(ev->type == LNET_EVENT_PUT || + ev->type == LNET_EVENT_UNLINK); + LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer); + LASSERT((char *)ev->md.start + ev->offset + ev->mlength <= + rqbd->rqbd_buffer + service->srv_buf_size); + + CDEBUG((ev->status == 0) ? D_NET : D_ERROR, + "event type %d, status %d, service %s\n", + ev->type, ev->status, service->srv_name); + + if (ev->unlinked) { + /* If this is the last request message to fit in the + * request buffer we can use the request object embedded in + * rqbd. Note that if we failed to allocate a request, + * we'd have to re-post the rqbd, which we can't do in this + * context. */ + req = &rqbd->rqbd_req; + memset(req, 0, sizeof(*req)); + } else { + LASSERT(ev->type == LNET_EVENT_PUT); + if (ev->status != 0) { + /* We moaned above already... */ + return; + } + req = ptlrpc_request_cache_alloc(GFP_ATOMIC); + if (req == NULL) { + CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n", + service->srv_name, + libcfs_id2str(ev->initiator)); + return; + } + } + + /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, + * flags are reset and scalars are zero. We only set the message + * size to non-zero if this was a successful receive. */ + req->rq_xid = ev->match_bits; + req->rq_reqbuf = ev->md.start + ev->offset; + if (ev->type == LNET_EVENT_PUT && ev->status == 0) + req->rq_reqdata_len = ev->mlength; + do_gettimeofday(&req->rq_arrival_time); + req->rq_peer = ev->initiator; + req->rq_self = ev->target.nid; + req->rq_rqbd = rqbd; + req->rq_phase = RQ_PHASE_NEW; + spin_lock_init(&req->rq_lock); + INIT_LIST_HEAD(&req->rq_timed_list); + INIT_LIST_HEAD(&req->rq_exp_list); + atomic_set(&req->rq_refcount, 1); + if (ev->type == LNET_EVENT_PUT) + CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n", + req, req->rq_xid, ev->mlength); + + CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); + + spin_lock(&svcpt->scp_lock); + + ptlrpc_req_add_history(svcpt, req); + + if (ev->unlinked) { + svcpt->scp_nrqbds_posted--; + CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n", + svcpt->scp_nrqbds_posted); + + /* Normally, don't complain about 0 buffers posted; LNET won't + * drop incoming reqs since we set the portal lazy */ + if (test_req_buffer_pressure && + ev->type != LNET_EVENT_UNLINK && + svcpt->scp_nrqbds_posted == 0) + CWARN("All %s request buffers busy\n", + service->srv_name); + + /* req takes over the network's ref on rqbd */ + } else { + /* req takes a ref on rqbd */ + rqbd->rqbd_refcount++; + } + + list_add_tail(&req->rq_list, &svcpt->scp_req_incoming); + svcpt->scp_nreqs_incoming++; + + /* NB everything can disappear under us once the request + * has been queued and we unlock, so do the wake now... */ + wake_up(&svcpt->scp_waitq); + + spin_unlock(&svcpt->scp_lock); +} + +/* + * Server's outgoing reply callback + */ +void reply_out_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_reply_state *rs = cbid->cbid_arg; + struct ptlrpc_service_part *svcpt = rs->rs_svcpt; + + LASSERT(ev->type == LNET_EVENT_SEND || + ev->type == LNET_EVENT_ACK || + ev->type == LNET_EVENT_UNLINK); + + if (!rs->rs_difficult) { + /* 'Easy' replies have no further processing so I drop the + * net's ref on 'rs' */ + LASSERT(ev->unlinked); + ptlrpc_rs_decref(rs); + return; + } + + LASSERT(rs->rs_on_net); + + if (ev->unlinked) { + /* Last network callback. The net's ref on 'rs' stays put + * until ptlrpc_handle_rs() is done with it */ + spin_lock(&svcpt->scp_rep_lock); + spin_lock(&rs->rs_lock); + + rs->rs_on_net = 0; + if (!rs->rs_no_ack || + rs->rs_transno <= + rs->rs_export->exp_obd->obd_last_committed) + ptlrpc_schedule_difficult_reply(rs); + + spin_unlock(&rs->rs_lock); + spin_unlock(&svcpt->scp_rep_lock); + } +} + + +static void ptlrpc_master_callback(lnet_event_t *ev) +{ + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + void (*callback)(lnet_event_t *ev) = cbid->cbid_fn; + + /* Honestly, it's best to find out early. */ + LASSERT(cbid->cbid_arg != LP_POISON); + LASSERT(callback == request_out_callback || + callback == reply_in_callback || + callback == client_bulk_callback || + callback == request_in_callback || + callback == reply_out_callback); + + callback(ev); +} + +int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, + lnet_process_id_t *peer, lnet_nid_t *self) +{ + int best_dist = 0; + __u32 best_order = 0; + int count = 0; + int rc = -ENOENT; + int portals_compatibility; + int dist; + __u32 order; + lnet_nid_t dst_nid; + lnet_nid_t src_nid; + + portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL); + + peer->pid = LUSTRE_SRV_LNET_PID; + + /* Choose the matching UUID that's closest */ + while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) { + dist = LNetDist(dst_nid, &src_nid, &order); + if (dist < 0) + continue; + + if (dist == 0) { /* local! use loopback LND */ + peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0); + rc = 0; + break; + } + + if (rc < 0 || + dist < best_dist || + (dist == best_dist && order < best_order)) { + best_dist = dist; + best_order = order; + + if (portals_compatibility > 1) { + /* Strong portals compatibility: Zero the nid's + * NET, so if I'm reading new config logs, or + * getting configured by (new) lconf I can + * still talk to old servers. */ + dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid)); + src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid)); + } + peer->nid = dst_nid; + *self = src_nid; + rc = 0; + } + } + + CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); + return rc; +} + +void ptlrpc_ni_fini(void) +{ + wait_queue_head_t waitq; + struct l_wait_info lwi; + int rc; + int retries; + + /* Wait for the event queue to become idle since there may still be + * messages in flight with pending events (i.e. the fire-and-forget + * messages == client requests and "non-difficult" server + * replies */ + + for (retries = 0;; retries++) { + rc = LNetEQFree(ptlrpc_eq_h); + switch (rc) { + default: + LBUG(); + + case 0: + LNetNIFini(); + return; + + case -EBUSY: + if (retries != 0) + CWARN("Event queue still busy\n"); + + /* Wait for a bit */ + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); + l_wait_event(waitq, 0, &lwi); + break; + } + } + /* notreached */ +} + +lnet_pid_t ptl_get_pid(void) +{ + lnet_pid_t pid; + + pid = LUSTRE_SRV_LNET_PID; + return pid; +} + +int ptlrpc_ni_init(void) +{ + int rc; + lnet_pid_t pid; + + pid = ptl_get_pid(); + CDEBUG(D_NET, "My pid is: %x\n", pid); + + /* We're not passing any limits yet... */ + rc = LNetNIInit(pid); + if (rc < 0) { + CDEBUG(D_NET, "Can't init network interface: %d\n", rc); + return -ENOENT; + } + + /* CAVEAT EMPTOR: how we process portals events is _radically_ + * different depending on... */ + /* kernel LNet calls our master callback when there are new event, + * because we are guaranteed to get every event via callback, + * so we just set EQ size to 0 to avoid overhead of serializing + * enqueue/dequeue operations in LNet. */ + rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h); + if (rc == 0) + return 0; + + CERROR("Failed to allocate event queue: %d\n", rc); + LNetNIFini(); + + return -ENOMEM; +} + + +int ptlrpc_init_portals(void) +{ + int rc = ptlrpc_ni_init(); + + if (rc != 0) { + CERROR("network initialisation failed\n"); + return -EIO; + } + rc = ptlrpcd_addref(); + if (rc == 0) + return 0; + + CERROR("rpcd initialisation failed\n"); + ptlrpc_ni_fini(); + return rc; +} + +void ptlrpc_exit_portals(void) +{ + ptlrpcd_decref(); + ptlrpc_ni_fini(); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c new file mode 100644 index 000000000..d5fc689c0 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -0,0 +1,1642 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/import.c + * + * Author: Mike Shaver <shaver@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../include/obd_support.h" +#include "../include/lustre_ha.h" +#include "../include/lustre_net.h" +#include "../include/lustre_import.h" +#include "../include/lustre_export.h" +#include "../include/obd.h" +#include "../include/obd_cksum.h" +#include "../include/obd_class.h" + +#include "ptlrpc_internal.h" + +struct ptlrpc_connect_async_args { + __u64 pcaa_peer_committed; + int pcaa_initial_connect; +}; + +/** + * Updates import \a imp current state to provided \a state value + * Helper function. Must be called under imp_lock. + */ +static void __import_set_state(struct obd_import *imp, + enum lustre_imp_state state) +{ + switch (state) { + case LUSTRE_IMP_CLOSED: + case LUSTRE_IMP_NEW: + case LUSTRE_IMP_DISCON: + case LUSTRE_IMP_CONNECTING: + break; + case LUSTRE_IMP_REPLAY_WAIT: + imp->imp_replay_state = LUSTRE_IMP_REPLAY_LOCKS; + break; + default: + imp->imp_replay_state = LUSTRE_IMP_REPLAY; + } + + imp->imp_state = state; + imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state; + imp->imp_state_hist[imp->imp_state_hist_idx].ish_time = + get_seconds(); + imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) % + IMP_STATE_HIST_LEN; +} + +/* A CLOSED import should remain so. */ +#define IMPORT_SET_STATE_NOLOCK(imp, state) \ +do { \ + if (imp->imp_state != LUSTRE_IMP_CLOSED) { \ + CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \ + imp, obd2cli_tgt(imp->imp_obd), \ + ptlrpc_import_state_name(imp->imp_state), \ + ptlrpc_import_state_name(state)); \ + __import_set_state(imp, state); \ + } \ +} while (0) + +#define IMPORT_SET_STATE(imp, state) \ +do { \ + spin_lock(&imp->imp_lock); \ + IMPORT_SET_STATE_NOLOCK(imp, state); \ + spin_unlock(&imp->imp_lock); \ +} while (0) + + +static int ptlrpc_connect_interpret(const struct lu_env *env, + struct ptlrpc_request *request, + void *data, int rc); +int ptlrpc_import_recovery_state_machine(struct obd_import *imp); + +/* Only this function is allowed to change the import state when it is + * CLOSED. I would rather refcount the import and free it after + * disconnection like we do with exports. To do that, the client_obd + * will need to save the peer info somewhere other than in the import, + * though. */ +int ptlrpc_init_import(struct obd_import *imp) +{ + spin_lock(&imp->imp_lock); + + imp->imp_generation++; + imp->imp_state = LUSTRE_IMP_NEW; + + spin_unlock(&imp->imp_lock); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_init_import); + +#define UUID_STR "_UUID" +void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len) +{ + *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix)) + ? uuid : uuid + strlen(prefix); + + *uuid_len = strlen(*uuid_start); + + if (*uuid_len < strlen(UUID_STR)) + return; + + if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR), + UUID_STR, strlen(UUID_STR))) + *uuid_len -= strlen(UUID_STR); +} +EXPORT_SYMBOL(deuuidify); + +/** + * Returns true if import was FULL, false if import was already not + * connected. + * @imp - import to be disconnected + * @conn_cnt - connection count (epoch) of the request that timed out + * and caused the disconnection. In some cases, multiple + * inflight requests can fail to a single target (e.g. OST + * bulk requests) and if one has already caused a reconnection + * (increasing the import->conn_cnt) the older failure should + * not also cause a reconnection. If zero it forces a reconnect. + */ +int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt) +{ + int rc = 0; + + spin_lock(&imp->imp_lock); + + if (imp->imp_state == LUSTRE_IMP_FULL && + (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) { + char *target_start; + int target_len; + + deuuidify(obd2cli_tgt(imp->imp_obd), NULL, + &target_start, &target_len); + + if (imp->imp_replayable) { + LCONSOLE_WARN("%s: Connection to %.*s (at %s) was lost; in progress operations using this service will wait for recovery to complete\n", + imp->imp_obd->obd_name, target_len, target_start, + libcfs_nid2str(imp->imp_connection->c_peer.nid)); + } else { + LCONSOLE_ERROR_MSG(0x166, "%s: Connection to %.*s (at %s) was lost; in progress operations using this service will fail\n", + imp->imp_obd->obd_name, + target_len, target_start, + libcfs_nid2str(imp->imp_connection->c_peer.nid)); + } + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); + spin_unlock(&imp->imp_lock); + + if (obd_dump_on_timeout) + libcfs_debug_dumplog(); + + obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); + rc = 1; + } else { + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n", + imp->imp_client->cli_name, imp, + (imp->imp_state == LUSTRE_IMP_FULL && + imp->imp_conn_cnt > conn_cnt) ? + "reconnected" : "not connected", imp->imp_conn_cnt, + conn_cnt, ptlrpc_import_state_name(imp->imp_state)); + } + + return rc; +} + +/* Must be called with imp_lock held! */ +static void ptlrpc_deactivate_and_unlock_import(struct obd_import *imp) +{ + assert_spin_locked(&imp->imp_lock); + + CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd)); + imp->imp_invalid = 1; + imp->imp_generation++; + spin_unlock(&imp->imp_lock); + + ptlrpc_abort_inflight(imp); + obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE); +} + +/* + * This acts as a barrier; all existing requests are rejected, and + * no new requests will be accepted until the import is valid again. + */ +void ptlrpc_deactivate_import(struct obd_import *imp) +{ + spin_lock(&imp->imp_lock); + ptlrpc_deactivate_and_unlock_import(imp); +} +EXPORT_SYMBOL(ptlrpc_deactivate_import); + +static unsigned int +ptlrpc_inflight_deadline(struct ptlrpc_request *req, time_t now) +{ + long dl; + + if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || + (req->rq_phase == RQ_PHASE_BULK) || + (req->rq_phase == RQ_PHASE_NEW))) + return 0; + + if (req->rq_timedout) + return 0; + + if (req->rq_phase == RQ_PHASE_NEW) + dl = req->rq_sent; + else + dl = req->rq_deadline; + + if (dl <= now) + return 0; + + return dl - now; +} + +static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp) +{ + time_t now = get_seconds(); + struct list_head *tmp, *n; + struct ptlrpc_request *req; + unsigned int timeout = 0; + + spin_lock(&imp->imp_lock); + list_for_each_safe(tmp, n, &imp->imp_sending_list) { + req = list_entry(tmp, struct ptlrpc_request, rq_list); + timeout = max(ptlrpc_inflight_deadline(req, now), timeout); + } + spin_unlock(&imp->imp_lock); + return timeout; +} + +/** + * This function will invalidate the import, if necessary, then block + * for all the RPC completions, and finally notify the obd to + * invalidate its state (ie cancel locks, clear pending requests, + * etc). + */ +void ptlrpc_invalidate_import(struct obd_import *imp) +{ + struct list_head *tmp, *n; + struct ptlrpc_request *req; + struct l_wait_info lwi; + unsigned int timeout; + int rc; + + atomic_inc(&imp->imp_inval_count); + + if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) + ptlrpc_deactivate_import(imp); + + CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2); + LASSERT(imp->imp_invalid); + + /* Wait forever until inflight == 0. We really can't do it another + * way because in some cases we need to wait for very long reply + * unlink. We can't do anything before that because there is really + * no guarantee that some rdma transfer is not in progress right now. */ + do { + /* Calculate max timeout for waiting on rpcs to error + * out. Use obd_timeout if calculated value is smaller + * than it. */ + if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { + timeout = ptlrpc_inflight_timeout(imp); + timeout += timeout / 3; + + if (timeout == 0) + timeout = obd_timeout; + } else { + /* decrease the interval to increase race condition */ + timeout = 1; + } + + CDEBUG(D_RPCTRACE, + "Sleeping %d sec for inflight to error out\n", + timeout); + + /* Wait for all requests to error out and call completion + * callbacks. Cap it at obd_timeout -- these should all + * have been locally cancelled by ptlrpc_abort_inflight. */ + lwi = LWI_TIMEOUT_INTERVAL( + cfs_timeout_cap(cfs_time_seconds(timeout)), + (timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2, + NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + (atomic_read(&imp->imp_inflight) == 0), + &lwi); + if (rc) { + const char *cli_tgt = obd2cli_tgt(imp->imp_obd); + + CERROR("%s: rc = %d waiting for callback (%d != 0)\n", + cli_tgt, rc, + atomic_read(&imp->imp_inflight)); + + spin_lock(&imp->imp_lock); + if (atomic_read(&imp->imp_inflight) == 0) { + int count = atomic_read(&imp->imp_unregistering); + + /* We know that "unregistering" rpcs only can + * survive in sending or delaying lists (they + * maybe waiting for long reply unlink in + * sluggish nets). Let's check this. If there + * is no inflight and unregistering != 0, this + * is bug. */ + LASSERTF(count == 0, "Some RPCs are still unregistering: %d\n", + count); + + /* Let's save one loop as soon as inflight have + * dropped to zero. No new inflights possible at + * this point. */ + rc = 0; + } else { + list_for_each_safe(tmp, n, + &imp->imp_sending_list) { + req = list_entry(tmp, + struct ptlrpc_request, + rq_list); + DEBUG_REQ(D_ERROR, req, + "still on sending list"); + } + list_for_each_safe(tmp, n, + &imp->imp_delayed_list) { + req = list_entry(tmp, + struct ptlrpc_request, + rq_list); + DEBUG_REQ(D_ERROR, req, + "still on delayed list"); + } + + CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n", + cli_tgt, + ptlrpc_phase2str(RQ_PHASE_UNREGISTERING), + atomic_read(&imp-> + imp_unregistering)); + } + spin_unlock(&imp->imp_lock); + } + } while (rc != 0); + + /* + * Let's additionally check that no new rpcs added to import in + * "invalidate" state. + */ + LASSERT(atomic_read(&imp->imp_inflight) == 0); + obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); + sptlrpc_import_flush_all_ctx(imp); + + atomic_dec(&imp->imp_inval_count); + wake_up_all(&imp->imp_recovery_waitq); +} +EXPORT_SYMBOL(ptlrpc_invalidate_import); + +/* unset imp_invalid */ +void ptlrpc_activate_import(struct obd_import *imp) +{ + struct obd_device *obd = imp->imp_obd; + + spin_lock(&imp->imp_lock); + if (imp->imp_deactive != 0) { + spin_unlock(&imp->imp_lock); + return; + } + + imp->imp_invalid = 0; + spin_unlock(&imp->imp_lock); + obd_import_event(obd, imp, IMP_EVENT_ACTIVE); +} +EXPORT_SYMBOL(ptlrpc_activate_import); + +static void ptlrpc_pinger_force(struct obd_import *imp) +{ + CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(imp->imp_state)); + + spin_lock(&imp->imp_lock); + imp->imp_force_verify = 1; + spin_unlock(&imp->imp_lock); + + if (imp->imp_state != LUSTRE_IMP_CONNECTING) + ptlrpc_pinger_wake_up(); +} + +void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) +{ + LASSERT(!imp->imp_dlm_fake); + + if (ptlrpc_set_import_discon(imp, conn_cnt)) { + if (!imp->imp_replayable) { + CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_obd->obd_name); + ptlrpc_deactivate_import(imp); + } + + ptlrpc_pinger_force(imp); + } +} +EXPORT_SYMBOL(ptlrpc_fail_import); + +int ptlrpc_reconnect_import(struct obd_import *imp) +{ +#ifdef ENABLE_PINGER + struct l_wait_info lwi; + int secs = cfs_time_seconds(obd_timeout); + int rc; + + ptlrpc_pinger_force(imp); + + CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", + obd2cli_tgt(imp->imp_obd), secs); + + lwi = LWI_TIMEOUT(secs, NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), &lwi); + CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(imp->imp_state)); + return rc; +#else + ptlrpc_set_import_discon(imp, 0); + /* Force a new connect attempt */ + ptlrpc_invalidate_import(imp); + /* Do a fresh connect next time by zeroing the handle */ + ptlrpc_disconnect_import(imp, 1); + /* Wait for all invalidate calls to finish */ + if (atomic_read(&imp->imp_inval_count) > 0) { + int rc; + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + (atomic_read(&imp->imp_inval_count) == 0), + &lwi); + if (rc) + CERROR("Interrupted, inval=%d\n", + atomic_read(&imp->imp_inval_count)); + } + + /* Allow reconnect attempts */ + imp->imp_obd->obd_no_recov = 0; + /* Remove 'invalid' flag */ + ptlrpc_activate_import(imp); + /* Attempt a new connect */ + ptlrpc_recover_import(imp, NULL, 0); + return 0; +#endif +} +EXPORT_SYMBOL(ptlrpc_reconnect_import); + +/** + * Connection on import \a imp is changed to another one (if more than one is + * present). We typically chose connection that we have not tried to connect to + * the longest + */ +static int import_select_connection(struct obd_import *imp) +{ + struct obd_import_conn *imp_conn = NULL, *conn; + struct obd_export *dlmexp; + char *target_start; + int target_len, tried_all = 1; + + spin_lock(&imp->imp_lock); + + if (list_empty(&imp->imp_conn_list)) { + CERROR("%s: no connections available\n", + imp->imp_obd->obd_name); + spin_unlock(&imp->imp_lock); + return -EINVAL; + } + + list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { + CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n", + imp->imp_obd->obd_name, + libcfs_nid2str(conn->oic_conn->c_peer.nid), + conn->oic_last_attempt); + + /* If we have not tried this connection since + the last successful attempt, go with this one */ + if ((conn->oic_last_attempt == 0) || + cfs_time_beforeq_64(conn->oic_last_attempt, + imp->imp_last_success_conn)) { + imp_conn = conn; + tried_all = 0; + break; + } + + /* If all of the connections have already been tried + since the last successful connection; just choose the + least recently used */ + if (!imp_conn) + imp_conn = conn; + else if (cfs_time_before_64(conn->oic_last_attempt, + imp_conn->oic_last_attempt)) + imp_conn = conn; + } + + /* if not found, simply choose the current one */ + if (!imp_conn || imp->imp_force_reconnect) { + LASSERT(imp->imp_conn_current); + imp_conn = imp->imp_conn_current; + tried_all = 0; + } + LASSERT(imp_conn->oic_conn); + + /* If we've tried everything, and we're back to the beginning of the + list, increase our timeout and try again. It will be reset when + we do finally connect. (FIXME: really we should wait for all network + state associated with the last connection attempt to drain before + trying to reconnect on it.) */ + if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) { + struct adaptive_timeout *at = &imp->imp_at.iat_net_latency; + if (at_get(at) < CONNECTION_SWITCH_MAX) { + at_measured(at, at_get(at) + CONNECTION_SWITCH_INC); + if (at_get(at) > CONNECTION_SWITCH_MAX) + at_reset(at, CONNECTION_SWITCH_MAX); + } + LASSERT(imp_conn->oic_last_attempt); + CDEBUG(D_HA, "%s: tried all connections, increasing latency to %ds\n", + imp->imp_obd->obd_name, at_get(at)); + } + + imp_conn->oic_last_attempt = cfs_time_current_64(); + + /* switch connection, don't mind if it's same as the current one */ + if (imp->imp_connection) + ptlrpc_connection_put(imp->imp_connection); + imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); + + dlmexp = class_conn2export(&imp->imp_dlm_handle); + LASSERT(dlmexp != NULL); + if (dlmexp->exp_connection) + ptlrpc_connection_put(dlmexp->exp_connection); + dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); + class_export_put(dlmexp); + + if (imp->imp_conn_current != imp_conn) { + if (imp->imp_conn_current) { + deuuidify(obd2cli_tgt(imp->imp_obd), NULL, + &target_start, &target_len); + + CDEBUG(D_HA, "%s: Connection changing to %.*s (at %s)\n", + imp->imp_obd->obd_name, + target_len, target_start, + libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); + } + + imp->imp_conn_current = imp_conn; + } + + CDEBUG(D_HA, "%s: import %p using connection %s/%s\n", + imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid, + libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); + + spin_unlock(&imp->imp_lock); + + return 0; +} + +/* + * must be called under imp_lock + */ +static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) +{ + struct ptlrpc_request *req; + struct list_head *tmp; + + /* The requests in committed_list always have smaller transnos than + * the requests in replay_list */ + if (!list_empty(&imp->imp_committed_list)) { + tmp = imp->imp_committed_list.next; + req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + *transno = req->rq_transno; + if (req->rq_transno == 0) { + DEBUG_REQ(D_ERROR, req, + "zero transno in committed_list"); + LBUG(); + } + return 1; + } + if (!list_empty(&imp->imp_replay_list)) { + tmp = imp->imp_replay_list.next; + req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); + *transno = req->rq_transno; + if (req->rq_transno == 0) { + DEBUG_REQ(D_ERROR, req, "zero transno in replay_list"); + LBUG(); + } + return 1; + } + return 0; +} + +/** + * Attempt to (re)connect import \a imp. This includes all preparations, + * initializing CONNECT RPC request and passing it to ptlrpcd for + * actual sending. + * Returns 0 on success or error code. + */ +int ptlrpc_connect_import(struct obd_import *imp) +{ + struct obd_device *obd = imp->imp_obd; + int initial_connect = 0; + int set_transno = 0; + __u64 committed_before_reconnect = 0; + struct ptlrpc_request *request; + char *bufs[] = { NULL, + obd2cli_tgt(imp->imp_obd), + obd->obd_uuid.uuid, + (char *)&imp->imp_dlm_handle, + (char *)&imp->imp_connect_data }; + struct ptlrpc_connect_async_args *aa; + int rc; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_CLOSED) { + spin_unlock(&imp->imp_lock); + CERROR("can't connect to a closed import\n"); + return -EINVAL; + } else if (imp->imp_state == LUSTRE_IMP_FULL) { + spin_unlock(&imp->imp_lock); + CERROR("already connected\n"); + return 0; + } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) { + spin_unlock(&imp->imp_lock); + CERROR("already connecting\n"); + return -EALREADY; + } + + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING); + + imp->imp_conn_cnt++; + imp->imp_resend_replay = 0; + + if (!lustre_handle_is_used(&imp->imp_remote_handle)) + initial_connect = 1; + else + committed_before_reconnect = imp->imp_peer_committed_transno; + + set_transno = ptlrpc_first_transno(imp, + &imp->imp_connect_data.ocd_transno); + spin_unlock(&imp->imp_lock); + + rc = import_select_connection(imp); + if (rc) + goto out; + + rc = sptlrpc_import_sec_adapt(imp, NULL, NULL); + if (rc) + goto out; + + /* Reset connect flags to the originally requested flags, in case + * the server is updated on-the-fly we will get the new features. */ + imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig; + /* Reset ocd_version each time so the server knows the exact versions */ + imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE; + imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT; + imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18; + + rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd, + &obd->obd_uuid, &imp->imp_connect_data, NULL); + if (rc) + goto out; + + request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT); + if (request == NULL) { + rc = -ENOMEM; + goto out; + } + + rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION, + imp->imp_connect_op, bufs, NULL); + if (rc) { + ptlrpc_request_free(request); + goto out; + } + + /* Report the rpc service time to the server so that it knows how long + * to wait for clients to join recovery */ + lustre_msg_set_service_time(request->rq_reqmsg, + at_timeout2est(request->rq_timeout)); + + /* The amount of time we give the server to process the connect req. + * import_select_connection will increase the net latency on + * repeated reconnect attempts to cover slow networks. + * We override/ignore the server rpc completion estimate here, + * which may be large if this is a reconnect attempt */ + request->rq_timeout = INITIAL_CONNECT_TIMEOUT; + lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout); + + lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_NEXT_VER); + + request->rq_no_resend = request->rq_no_delay = 1; + request->rq_send_state = LUSTRE_IMP_CONNECTING; + /* Allow a slightly larger reply for future growth compatibility */ + req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER, + sizeof(struct obd_connect_data)+16*sizeof(__u64)); + ptlrpc_request_set_replen(request); + request->rq_interpret_reply = ptlrpc_connect_interpret; + + CLASSERT(sizeof(*aa) <= sizeof(request->rq_async_args)); + aa = ptlrpc_req_async_args(request); + memset(aa, 0, sizeof(*aa)); + + aa->pcaa_peer_committed = committed_before_reconnect; + aa->pcaa_initial_connect = initial_connect; + + if (aa->pcaa_initial_connect) { + spin_lock(&imp->imp_lock); + imp->imp_replayable = 1; + spin_unlock(&imp->imp_lock); + lustre_msg_add_op_flags(request->rq_reqmsg, + MSG_CONNECT_INITIAL); + } + + if (set_transno) + lustre_msg_add_op_flags(request->rq_reqmsg, + MSG_CONNECT_TRANSNO); + + DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)", + request->rq_timeout); + ptlrpcd_add_req(request, PDL_POLICY_ROUND, -1); + rc = 0; +out: + if (rc != 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); + } + + return rc; +} +EXPORT_SYMBOL(ptlrpc_connect_import); + +static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp) +{ + int force_verify; + + spin_lock(&imp->imp_lock); + force_verify = imp->imp_force_verify != 0; + spin_unlock(&imp->imp_lock); + + if (force_verify) + ptlrpc_pinger_wake_up(); +} + +static int ptlrpc_busy_reconnect(int rc) +{ + return (rc == -EBUSY) || (rc == -EAGAIN); +} + +/** + * interpret_reply callback for connect RPCs. + * Looks into returned status of connect operation and decides + * what to do with the import - i.e enter recovery, promote it to + * full state for normal operations of disconnect it due to an error. + */ +static int ptlrpc_connect_interpret(const struct lu_env *env, + struct ptlrpc_request *request, + void *data, int rc) +{ + struct ptlrpc_connect_async_args *aa = data; + struct obd_import *imp = request->rq_import; + struct client_obd *cli = &imp->imp_obd->u.cli; + struct lustre_handle old_hdl; + __u64 old_connect_flags; + int msg_flags; + struct obd_connect_data *ocd; + struct obd_export *exp; + int ret; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_CLOSED) { + imp->imp_connect_tried = 1; + spin_unlock(&imp->imp_lock); + return 0; + } + + if (rc) { + /* if this reconnect to busy export - not need select new target + * for connecting*/ + imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc); + spin_unlock(&imp->imp_lock); + ptlrpc_maybe_ping_import_soon(imp); + goto out; + } + spin_unlock(&imp->imp_lock); + + LASSERT(imp->imp_conn_current); + + msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); + + ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA, + RCL_SERVER); + /* server replied obd_connect_data is always bigger */ + ocd = req_capsule_server_sized_get(&request->rq_pill, + &RMF_CONNECT_DATA, ret); + + if (ocd == NULL) { + CERROR("%s: no connect data from server\n", + imp->imp_obd->obd_name); + rc = -EPROTO; + goto out; + } + + spin_lock(&imp->imp_lock); + + /* All imports are pingable */ + imp->imp_pingable = 1; + imp->imp_force_reconnect = 0; + imp->imp_force_verify = 0; + + imp->imp_connect_data = *ocd; + + CDEBUG(D_HA, "%s: connect to target with instance %u\n", + imp->imp_obd->obd_name, ocd->ocd_instance); + exp = class_conn2export(&imp->imp_dlm_handle); + + spin_unlock(&imp->imp_lock); + + /* check that server granted subset of flags we asked for. */ + if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) != + ocd->ocd_connect_flags) { + CERROR("%s: Server didn't granted asked subset of flags: asked=%#llx grranted=%#llx\n", + imp->imp_obd->obd_name, imp->imp_connect_flags_orig, + ocd->ocd_connect_flags); + rc = -EPROTO; + goto out; + } + + if (!exp) { + /* This could happen if export is cleaned during the + connect attempt */ + CERROR("%s: missing export after connect\n", + imp->imp_obd->obd_name); + rc = -ENODEV; + goto out; + } + old_connect_flags = exp_connect_flags(exp); + exp->exp_connect_data = *ocd; + imp->imp_obd->obd_self_export->exp_connect_data = *ocd; + class_export_put(exp); + + obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD); + + if (aa->pcaa_initial_connect) { + spin_lock(&imp->imp_lock); + if (msg_flags & MSG_CONNECT_REPLAYABLE) { + imp->imp_replayable = 1; + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "connected to replayable target: %s\n", + obd2cli_tgt(imp->imp_obd)); + } else { + imp->imp_replayable = 0; + spin_unlock(&imp->imp_lock); + } + + /* if applies, adjust the imp->imp_msg_magic here + * according to reply flags */ + + imp->imp_remote_handle = + *lustre_msg_get_handle(request->rq_repmsg); + + /* Initial connects are allowed for clients with non-random + * uuids when servers are in recovery. Simply signal the + * servers replay is complete and wait in REPLAY_WAIT. */ + if (msg_flags & MSG_CONNECT_RECOVERING) { + CDEBUG(D_HA, "connect to %s during recovery\n", + obd2cli_tgt(imp->imp_obd)); + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); + } else { + IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); + ptlrpc_activate_import(imp); + } + + rc = 0; + goto finish; + } + + /* Determine what recovery state to move the import to. */ + if (MSG_CONNECT_RECONNECT & msg_flags) { + memset(&old_hdl, 0, sizeof(old_hdl)); + if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg), + sizeof(old_hdl))) { + LCONSOLE_WARN("Reconnect to %s (at @%s) failed due bad handle %#llx\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_dlm_handle.cookie); + rc = -ENOTCONN; + goto out; + } + + if (memcmp(&imp->imp_remote_handle, + lustre_msg_get_handle(request->rq_repmsg), + sizeof(imp->imp_remote_handle))) { + int level = msg_flags & MSG_CONNECT_RECOVERING ? + D_HA : D_WARNING; + + /* Bug 16611/14775: if server handle have changed, + * that means some sort of disconnection happened. + * If the server is not in recovery, that also means it + * already erased all of our state because of previous + * eviction. If it is in recovery - we are safe to + * participate since we can reestablish all of our state + * with server again */ + if ((MSG_CONNECT_RECOVERING & msg_flags)) { + CDEBUG(level, "%s@%s changed server handle from %#llx to %#llx but is still in recovery\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_remote_handle.cookie, + lustre_msg_get_handle( + request->rq_repmsg)->cookie); + } else { + LCONSOLE_WARN("Evicted from %s (at %s) after server handle changed from %#llx to %#llx\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection-> \ + c_remote_uuid.uuid, + imp->imp_remote_handle.cookie, + lustre_msg_get_handle( + request->rq_repmsg)->cookie); + } + + + imp->imp_remote_handle = + *lustre_msg_get_handle(request->rq_repmsg); + + if (!(MSG_CONNECT_RECOVERING & msg_flags)) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); + rc = 0; + goto finish; + } + + } else { + CDEBUG(D_HA, "reconnected to %s@%s after partition\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + } + + if (imp->imp_invalid) { + CDEBUG(D_HA, "%s: reconnected but import is invalid; marking evicted\n", + imp->imp_obd->obd_name); + IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); + } else if (MSG_CONNECT_RECOVERING & msg_flags) { + CDEBUG(D_HA, "%s: reconnected to %s during replay\n", + imp->imp_obd->obd_name, + obd2cli_tgt(imp->imp_obd)); + + spin_lock(&imp->imp_lock); + imp->imp_resend_replay = 1; + spin_unlock(&imp->imp_lock); + + IMPORT_SET_STATE(imp, imp->imp_replay_state); + } else { + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + } + } else if ((MSG_CONNECT_RECOVERING & msg_flags) && !imp->imp_invalid) { + LASSERT(imp->imp_replayable); + imp->imp_remote_handle = + *lustre_msg_get_handle(request->rq_repmsg); + imp->imp_last_replay_transno = 0; + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); + } else { + DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)", + imp->imp_obd->obd_name, msg_flags); + imp->imp_remote_handle = + *lustre_msg_get_handle(request->rq_repmsg); + IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); + } + + /* Sanity checks for a reconnected import. */ + if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE)) { + CERROR("imp_replayable flag does not match server after reconnect. We should LBUG right here.\n"); + } + + if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 && + lustre_msg_get_last_committed(request->rq_repmsg) < + aa->pcaa_peer_committed) { + CERROR("%s went back in time (transno %lld was previously committed, server now claims %lld)! See https://bugzilla.lustre.org/show_bug.cgi?id=9646\n", + obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed, + lustre_msg_get_last_committed(request->rq_repmsg)); + } + +finish: + rc = ptlrpc_import_recovery_state_machine(imp); + if (rc != 0) { + if (rc == -ENOTCONN) { + CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + ptlrpc_connect_import(imp); + imp->imp_connect_tried = 1; + return 0; + } + } else { + + spin_lock(&imp->imp_lock); + list_del(&imp->imp_conn_current->oic_item); + list_add(&imp->imp_conn_current->oic_item, + &imp->imp_conn_list); + imp->imp_last_success_conn = + imp->imp_conn_current->oic_last_attempt; + + spin_unlock(&imp->imp_lock); + + if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) && + !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) { + LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %llx, replied %llx\n", + imp->imp_obd->obd_name, + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_connect_flags_orig, + ocd->ocd_connect_flags); + rc = -EPROTO; + goto out; + } + + if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + (ocd->ocd_version > LUSTRE_VERSION_CODE + + LUSTRE_VERSION_OFFSET_WARN || + ocd->ocd_version < LUSTRE_VERSION_CODE - + LUSTRE_VERSION_OFFSET_WARN)) { + /* Sigh, some compilers do not like #ifdef in the middle + of macro arguments */ + const char *older = "older. Consider upgrading server or downgrading client" + ; + const char *newer = "newer than client version. Consider upgrading client" + ; + + LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n", + obd2cli_tgt(imp->imp_obd), + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version), + ocd->ocd_version > LUSTRE_VERSION_CODE ? + newer : older, LUSTRE_VERSION_STRING); + } + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0) + /* Check if server has LU-1252 fix applied to not always swab + * the IR MNE entries. Do this only once per connection. This + * fixup is version-limited, because we don't want to carry the + * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we + * need interop with unpatched 2.2 servers. For newer servers, + * the client will do MNE swabbing only as needed. LU-1644 */ + if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) && + OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 && + OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 && + OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 && + strcmp(imp->imp_obd->obd_type->typ_name, + LUSTRE_MGC_NAME) == 0)) + imp->imp_need_mne_swab = 1; + else /* clear if server was upgraded since last connect */ + imp->imp_need_mne_swab = 0; +#else +#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab" +#endif + + if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) { + /* We sent to the server ocd_cksum_types with bits set + * for algorithms we understand. The server masked off + * the checksum types it doesn't support */ + if ((ocd->ocd_cksum_types & + cksum_types_supported_client()) == 0) { + LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n", + obd2cli_tgt(imp->imp_obd), + ocd->ocd_cksum_types, + cksum_types_supported_client()); + cli->cl_checksum = 0; + cli->cl_supp_cksum_types = OBD_CKSUM_ADLER; + } else { + cli->cl_supp_cksum_types = ocd->ocd_cksum_types; + } + } else { + /* The server does not support OBD_CONNECT_CKSUM. + * Enforce ADLER for backward compatibility*/ + cli->cl_supp_cksum_types = OBD_CKSUM_ADLER; + } + cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types); + + if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) + cli->cl_max_pages_per_rpc = + min(ocd->ocd_brw_size >> PAGE_CACHE_SHIFT, + cli->cl_max_pages_per_rpc); + else if (imp->imp_connect_op == MDS_CONNECT || + imp->imp_connect_op == MGS_CONNECT) + cli->cl_max_pages_per_rpc = 1; + + /* Reset ns_connect_flags only for initial connect. It might be + * changed in while using FS and if we reset it in reconnect + * this leads to losing user settings done before such as + * disable lru_resize, etc. */ + if (old_connect_flags != exp_connect_flags(exp) || + aa->pcaa_initial_connect) { + CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n", + imp->imp_obd->obd_name, ocd->ocd_connect_flags); + imp->imp_obd->obd_namespace->ns_connect_flags = + ocd->ocd_connect_flags; + imp->imp_obd->obd_namespace->ns_orig_connect_flags = + ocd->ocd_connect_flags; + } + + if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) && + (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2)) + /* We need a per-message support flag, because + a. we don't know if the incoming connect reply + supports AT or not (in reply_in_callback) + until we unpack it. + b. failovered server means export and flags are gone + (in ptlrpc_send_reply). + Can only be set when we know AT is supported at + both ends */ + imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT; + else + imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT; + + if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) && + (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2)) + imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18; + else + imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18; + + LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) && + (cli->cl_max_pages_per_rpc > 0)); + } + +out: + imp->imp_connect_tried = 1; + + if (rc != 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); + if (rc == -EACCES) { + /* + * Give up trying to reconnect + * EACCES means client has no permission for connection + */ + imp->imp_obd->obd_no_recov = 1; + ptlrpc_deactivate_import(imp); + } + + if (rc == -EPROTO) { + struct obd_connect_data *ocd; + + /* reply message might not be ready */ + if (request->rq_repmsg == NULL) + return -EPROTO; + + ocd = req_capsule_server_get(&request->rq_pill, + &RMF_CONNECT_DATA); + if (ocd && + (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + (ocd->ocd_version != LUSTRE_VERSION_CODE)) { + /* + * Actually servers are only supposed to refuse + * connection from liblustre clients, so we + * should never see this from VFS context + */ + LCONSOLE_ERROR_MSG(0x16a, "Server %s version (%d.%d.%d.%d) refused connection from this client with an incompatible version (%s). Client must be recompiled\n", + obd2cli_tgt(imp->imp_obd), + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version), + LUSTRE_VERSION_STRING); + ptlrpc_deactivate_import(imp); + IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED); + } + return -EPROTO; + } + + ptlrpc_maybe_ping_import_soon(imp); + + CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n", + obd2cli_tgt(imp->imp_obd), + (char *)imp->imp_connection->c_remote_uuid.uuid, rc); + } + + wake_up_all(&imp->imp_recovery_waitq); + return rc; +} + +/** + * interpret callback for "completed replay" RPCs. + * \see signal_completed_replay + */ +static int completed_replay_interpret(const struct lu_env *env, + struct ptlrpc_request *req, + void *data, int rc) +{ + atomic_dec(&req->rq_import->imp_replay_inflight); + if (req->rq_status == 0 && + !req->rq_import->imp_vbr_failed) { + ptlrpc_import_recovery_state_machine(req->rq_import); + } else { + if (req->rq_import->imp_vbr_failed) { + CDEBUG(D_WARNING, + "%s: version recovery fails, reconnecting\n", + req->rq_import->imp_obd->obd_name); + } else { + CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, reconnecting\n", + req->rq_import->imp_obd->obd_name, + req->rq_status); + } + ptlrpc_connect_import(req->rq_import); + } + + return 0; +} + +/** + * Let server know that we have no requests to replay anymore. + * Achieved by just sending a PING request + */ +static int signal_completed_replay(struct obd_import *imp) +{ + struct ptlrpc_request *req; + + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY))) + return 0; + + LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); + atomic_inc(&imp->imp_replay_inflight); + + req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION, + OBD_PING); + if (req == NULL) { + atomic_dec(&imp->imp_replay_inflight); + return -ENOMEM; + } + + ptlrpc_request_set_replen(req); + req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT; + lustre_msg_add_flags(req->rq_reqmsg, + MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE); + if (AT_OFF) + req->rq_timeout *= 3; + req->rq_interpret_reply = completed_replay_interpret; + + ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); + return 0; +} + +/** + * In kernel code all import invalidation happens in its own + * separate thread, so that whatever application happened to encounter + * a problem could still be killed or otherwise continue + */ +static int ptlrpc_invalidate_import_thread(void *data) +{ + struct obd_import *imp = data; + + unshare_fs_struct(); + + CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n", + imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + + ptlrpc_invalidate_import(imp); + + if (obd_dump_on_eviction) { + CERROR("dump the log upon eviction\n"); + libcfs_debug_dumplog(); + } + + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + ptlrpc_import_recovery_state_machine(imp); + + class_import_put(imp); + return 0; +} + +/** + * This is the state machine for client-side recovery on import. + * + * Typically we have two possibly paths. If we came to server and it is not + * in recovery, we just enter IMP_EVICTED state, invalidate our import + * state and reconnect from scratch. + * If we came to server that is in recovery, we enter IMP_REPLAY import state. + * We go through our list of requests to replay and send them to server one by + * one. + * After sending all request from the list we change import state to + * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server + * and also all the locks we don't yet have and wait for server to grant us. + * After that we send a special "replay completed" request and change import + * state to IMP_REPLAY_WAIT. + * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER + * state and resend all requests from sending list. + * After that we promote import to FULL state and send all delayed requests + * and import is fully operational after that. + * + */ +int ptlrpc_import_recovery_state_machine(struct obd_import *imp) +{ + int rc = 0; + int inflight; + char *target_start; + int target_len; + + if (imp->imp_state == LUSTRE_IMP_EVICTED) { + deuuidify(obd2cli_tgt(imp->imp_obd), NULL, + &target_start, &target_len); + /* Don't care about MGC eviction */ + if (strcmp(imp->imp_obd->obd_type->typ_name, + LUSTRE_MGC_NAME) != 0) { + LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n", + imp->imp_obd->obd_name, target_len, + target_start); + } + CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + /* reset vbr_failed flag upon eviction */ + spin_lock(&imp->imp_lock); + imp->imp_vbr_failed = 0; + spin_unlock(&imp->imp_lock); + + { + struct task_struct *task; + /* bug 17802: XXX client_disconnect_export vs connect request + * race. if client will evicted at this time, we start + * invalidate thread without reference to import and import can + * be freed at same time. */ + class_import_get(imp); + task = kthread_run(ptlrpc_invalidate_import_thread, imp, + "ll_imp_inval"); + if (IS_ERR(task)) { + class_import_put(imp); + CERROR("error starting invalidate thread: %d\n", rc); + rc = PTR_ERR(task); + } else { + rc = 0; + } + return rc; + } + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY) { + CDEBUG(D_HA, "replay requested by %s\n", + obd2cli_tgt(imp->imp_obd)); + rc = ptlrpc_replay_next(imp, &inflight); + if (inflight == 0 && + atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); + rc = ldlm_replay_locks(imp); + if (rc) + goto out; + } + rc = 0; + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) { + if (atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT); + rc = signal_completed_replay(imp); + if (rc) + goto out; + } + + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) { + if (atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + } + } + + if (imp->imp_state == LUSTRE_IMP_RECOVER) { + CDEBUG(D_HA, "reconnected to %s@%s\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + + rc = ptlrpc_resend(imp); + if (rc) + goto out; + IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); + ptlrpc_activate_import(imp); + + deuuidify(obd2cli_tgt(imp->imp_obd), NULL, + &target_start, &target_len); + LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n", + imp->imp_obd->obd_name, + target_len, target_start, + libcfs_nid2str(imp->imp_connection->c_peer.nid)); + } + + if (imp->imp_state == LUSTRE_IMP_FULL) { + wake_up_all(&imp->imp_recovery_waitq); + ptlrpc_wake_delayed(imp); + } + +out: + return rc; +} + +int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) +{ + struct ptlrpc_request *req; + int rq_opc, rc = 0; + + if (imp->imp_obd->obd_force) + goto set_state; + + switch (imp->imp_connect_op) { + case OST_CONNECT: + rq_opc = OST_DISCONNECT; + break; + case MDS_CONNECT: + rq_opc = MDS_DISCONNECT; + break; + case MGS_CONNECT: + rq_opc = MGS_DISCONNECT; + break; + default: + rc = -EINVAL; + CERROR("%s: don't know how to disconnect from %s (connect_op %d): rc = %d\n", + imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), + imp->imp_connect_op, rc); + return rc; + } + + if (ptlrpc_import_in_recovery(imp)) { + struct l_wait_info lwi; + long timeout; + + if (AT_OFF) { + if (imp->imp_server_timeout) + timeout = cfs_time_seconds(obd_timeout / 2); + else + timeout = cfs_time_seconds(obd_timeout); + } else { + int idx = import_at_get_index(imp, + imp->imp_client->cli_request_portal); + timeout = cfs_time_seconds( + at_get(&imp->imp_at.iat_service_estimate[idx])); + } + + lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(timeout), + back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), &lwi); + + } + + spin_lock(&imp->imp_lock); + if (imp->imp_state != LUSTRE_IMP_FULL) + goto out; + spin_unlock(&imp->imp_lock); + + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT, + LUSTRE_OBD_VERSION, rq_opc); + if (req) { + /* We are disconnecting, do not retry a failed DISCONNECT rpc if + * it fails. We can get through the above with a down server + * if the client doesn't know the server is gone yet. */ + req->rq_no_resend = 1; + + /* We want client umounts to happen quickly, no matter the + server state... */ + req->rq_timeout = min_t(int, req->rq_timeout, + INITIAL_CONNECT_TIMEOUT); + + IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING); + req->rq_send_state = LUSTRE_IMP_CONNECTING; + ptlrpc_request_set_replen(req); + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + } + +set_state: + spin_lock(&imp->imp_lock); +out: + if (noclose) + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); + else + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); + memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); + spin_unlock(&imp->imp_lock); + + if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN) + rc = 0; + + return rc; +} +EXPORT_SYMBOL(ptlrpc_disconnect_import); + +void ptlrpc_cleanup_imp(struct obd_import *imp) +{ + spin_lock(&imp->imp_lock); + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); + imp->imp_generation++; + spin_unlock(&imp->imp_lock); + ptlrpc_abort_inflight(imp); +} +EXPORT_SYMBOL(ptlrpc_cleanup_imp); + +/* Adaptive Timeout utils */ +extern unsigned int at_min, at_max, at_history; + +/* Bin into timeslices using AT_BINS bins. + This gives us a max of the last binlimit*AT_BINS secs without the storage, + but still smoothing out a return to normalcy from a slow response. + (E.g. remember the maximum latency in each minute of the last 4 minutes.) */ +int at_measured(struct adaptive_timeout *at, unsigned int val) +{ + unsigned int old = at->at_current; + time_t now = get_seconds(); + time_t binlimit = max_t(time_t, at_history / AT_BINS, 1); + + LASSERT(at); + CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n", + val, at, now - at->at_binstart, at->at_current, + at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]); + + if (val == 0) + /* 0's don't count, because we never want our timeout to + drop to 0, and because 0 could mean an error */ + return 0; + + spin_lock(&at->at_lock); + + if (unlikely(at->at_binstart == 0)) { + /* Special case to remove default from history */ + at->at_current = val; + at->at_worst_ever = val; + at->at_worst_time = now; + at->at_hist[0] = val; + at->at_binstart = now; + } else if (now - at->at_binstart < binlimit) { + /* in bin 0 */ + at->at_hist[0] = max(val, at->at_hist[0]); + at->at_current = max(val, at->at_current); + } else { + int i, shift; + unsigned int maxv = val; + /* move bins over */ + shift = (now - at->at_binstart) / binlimit; + LASSERT(shift > 0); + for (i = AT_BINS - 1; i >= 0; i--) { + if (i >= shift) { + at->at_hist[i] = at->at_hist[i - shift]; + maxv = max(maxv, at->at_hist[i]); + } else { + at->at_hist[i] = 0; + } + } + at->at_hist[0] = val; + at->at_current = maxv; + at->at_binstart += shift * binlimit; + } + + if (at->at_current > at->at_worst_ever) { + at->at_worst_ever = at->at_current; + at->at_worst_time = now; + } + + if (at->at_flags & AT_FLG_NOHIST) + /* Only keep last reported val; keeping the rest of the history + for proc only */ + at->at_current = val; + + if (at_max > 0) + at->at_current = min(at->at_current, at_max); + at->at_current = max(at->at_current, at_min); + + if (at->at_current != old) + CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d (val=%u) hist %u %u %u %u\n", + at, + old, at->at_current, at->at_current - old, val, + at->at_hist[0], at->at_hist[1], at->at_hist[2], + at->at_hist[3]); + + /* if we changed, report the old value */ + old = (at->at_current != old) ? old : 0; + + spin_unlock(&at->at_lock); + return old; +} + +/* Find the imp_at index for a given portal; assign if space available */ +int import_at_get_index(struct obd_import *imp, int portal) +{ + struct imp_at *at = &imp->imp_at; + int i; + + for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { + if (at->iat_portal[i] == portal) + return i; + if (at->iat_portal[i] == 0) + /* unused */ + break; + } + + /* Not found in list, add it under a lock */ + spin_lock(&imp->imp_lock); + + /* Check unused under lock */ + for (; i < IMP_AT_MAX_PORTALS; i++) { + if (at->iat_portal[i] == portal) + goto out; + if (at->iat_portal[i] == 0) + /* unused */ + break; + } + + /* Not enough portals? */ + LASSERT(i < IMP_AT_MAX_PORTALS); + + at->iat_portal[i] = portal; +out: + spin_unlock(&imp->imp_lock); + return i; +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c new file mode 100644 index 000000000..a42335e26 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c @@ -0,0 +1,2442 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/layout.c + * + * Lustre Metadata Target (mdt) request handler + * + * Author: Nikita Danilov <nikita@clusterfs.com> + */ +/* + * This file contains the "capsule/pill" abstraction layered above PTLRPC. + * + * Every struct ptlrpc_request contains a "pill", which points to a description + * of the format that the request conforms to. + */ + +#if !defined(__REQ_LAYOUT_USER__) + +#define DEBUG_SUBSYSTEM S_RPC + +#include <linux/module.h> + +/* LUSTRE_VERSION_CODE */ +#include "../include/lustre_ver.h" + +#include "../include/obd_support.h" +/* lustre_swab_mdt_body */ +#include "../include/lustre/lustre_idl.h" +/* obd2cli_tgt() (required by DEBUG_REQ()) */ +#include "../include/obd.h" + +/* __REQ_LAYOUT_USER__ */ +#endif +/* struct ptlrpc_request, lustre_msg* */ +#include "../include/lustre_req_layout.h" +#include "../include/lustre_acl.h" +#include "../include/lustre_debug.h" + +/* + * RQFs (see below) refer to two struct req_msg_field arrays describing the + * client request and server reply, respectively. + */ +/* empty set of fields... for suitable definition of emptiness. */ +static const struct req_msg_field *empty[] = { + &RMF_PTLRPC_BODY +}; + +static const struct req_msg_field *mgs_target_info_only[] = { + &RMF_PTLRPC_BODY, + &RMF_MGS_TARGET_INFO +}; + +static const struct req_msg_field *mgs_set_info[] = { + &RMF_PTLRPC_BODY, + &RMF_MGS_SEND_PARAM +}; + +static const struct req_msg_field *mgs_config_read_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MGS_CONFIG_BODY +}; + +static const struct req_msg_field *mgs_config_read_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MGS_CONFIG_RES +}; + +static const struct req_msg_field *log_cancel_client[] = { + &RMF_PTLRPC_BODY, + &RMF_LOGCOOKIES +}; + +static const struct req_msg_field *mdt_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY +}; + +static const struct req_msg_field *mdt_body_capa[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1 +}; + +static const struct req_msg_field *quotactl_only[] = { + &RMF_PTLRPC_BODY, + &RMF_OBD_QUOTACTL +}; + +static const struct req_msg_field *quota_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_QUOTA_BODY +}; + +static const struct req_msg_field *ldlm_intent_quota_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_QUOTA_BODY +}; + +static const struct req_msg_field *ldlm_intent_quota_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_DLM_LVB, + &RMF_QUOTA_BODY +}; + +static const struct req_msg_field *mdt_close_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_EPOCH, + &RMF_REC_REINT, + &RMF_CAPA1 +}; + +static const struct req_msg_field *mdt_release_close_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_EPOCH, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_CLOSE_DATA +}; + +static const struct req_msg_field *obd_statfs_server[] = { + &RMF_PTLRPC_BODY, + &RMF_OBD_STATFS +}; + +static const struct req_msg_field *seq_query_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SEQ_OPC, + &RMF_SEQ_RANGE +}; + +static const struct req_msg_field *seq_query_server[] = { + &RMF_PTLRPC_BODY, + &RMF_SEQ_RANGE +}; + +static const struct req_msg_field *fld_query_client[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_OPC, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *fld_query_server[] = { + &RMF_PTLRPC_BODY, + &RMF_FLD_MDFLD +}; + +static const struct req_msg_field *mds_getattr_name_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *mds_reint_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT +}; + +static const struct req_msg_field *mds_reint_create_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *mds_reint_create_slave_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_create_rmt_acl_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_create_sym_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME, + &RMF_SYMTGT, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_open_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_reint_open_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *mds_reint_unlink_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_link_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_rename_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_SYMTGT, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_last_unlink_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_LOGCOOKIES, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *mds_reint_setattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_MDT_EPOCH, + &RMF_EADATA, + &RMF_LOGCOOKIES, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mds_reint_setxattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_REC_REINT, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *mdt_swap_layouts[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_SWAP_LAYOUTS, + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *obd_connect_client[] = { + &RMF_PTLRPC_BODY, + &RMF_TGTUUID, + &RMF_CLUUID, + &RMF_CONN, + &RMF_CONNECT_DATA +}; + +static const struct req_msg_field *obd_connect_server[] = { + &RMF_PTLRPC_BODY, + &RMF_CONNECT_DATA +}; + +static const struct req_msg_field *obd_set_info_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SETINFO_KEY, + &RMF_SETINFO_VAL +}; + +static const struct req_msg_field *ost_grant_shrink_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SETINFO_KEY, + &RMF_OST_BODY +}; + +static const struct req_msg_field *mds_getinfo_client[] = { + &RMF_PTLRPC_BODY, + &RMF_GETINFO_KEY, + &RMF_GETINFO_VALLEN +}; + +static const struct req_msg_field *mds_getinfo_server[] = { + &RMF_PTLRPC_BODY, + &RMF_GETINFO_VAL, +}; + +static const struct req_msg_field *ldlm_enqueue_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ +}; + +static const struct req_msg_field *ldlm_enqueue_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP +}; + +static const struct req_msg_field *ldlm_enqueue_lvb_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_DLM_LVB +}; + +static const struct req_msg_field *ldlm_cp_callback_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_DLM_LVB +}; + +static const struct req_msg_field *ldlm_gl_callback_desc_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_DLM_GL_DESC +}; + +static const struct req_msg_field *ldlm_gl_callback_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_LVB +}; + +static const struct req_msg_field *ldlm_intent_basic_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, +}; + +static const struct req_msg_field *ldlm_intent_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_REINT +}; + +static const struct req_msg_field *ldlm_intent_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL +}; + +static const struct req_msg_field *ldlm_intent_layout_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_LAYOUT_INTENT, + &RMF_EADATA /* for new layout to be set up */ +}; +static const struct req_msg_field *ldlm_intent_open_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *ldlm_intent_getattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */ + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *ldlm_intent_getattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1 +}; + +static const struct req_msg_field *ldlm_intent_create_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_REINT, /* coincides with mds_reint_create_client[] */ + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *ldlm_intent_open_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_REINT, /* coincides with mds_reint_open_client[] */ + &RMF_CAPA1, + &RMF_CAPA2, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *ldlm_intent_unlink_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_REC_REINT, /* coincides with mds_reint_unlink_client[] */ + &RMF_CAPA1, + &RMF_NAME +}; + +static const struct req_msg_field *ldlm_intent_getxattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REQ, + &RMF_LDLM_INTENT, + &RMF_MDT_BODY, + &RMF_CAPA1, +}; + +static const struct req_msg_field *ldlm_intent_getxattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_DLM_REP, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, /* for req_capsule_extend/mdt_intent_policy */ + &RMF_EADATA, + &RMF_EAVALS, + &RMF_EAVALS_LENS +}; + +static const struct req_msg_field *mds_getxattr_client[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_NAME, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_getxattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_EADATA +}; + +static const struct req_msg_field *mds_getattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *mds_setattr_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDT_MD, + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 +}; + +static const struct req_msg_field *mds_update_client[] = { + &RMF_PTLRPC_BODY, + &RMF_UPDATE, +}; + +static const struct req_msg_field *mds_update_server[] = { + &RMF_PTLRPC_BODY, + &RMF_UPDATE_REPLY, +}; + +static const struct req_msg_field *llog_origin_handle_create_client[] = { + &RMF_PTLRPC_BODY, + &RMF_LLOGD_BODY, + &RMF_NAME +}; + +static const struct req_msg_field *llogd_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_LLOGD_BODY +}; + +static const struct req_msg_field *llog_log_hdr_only[] = { + &RMF_PTLRPC_BODY, + &RMF_LLOG_LOG_HDR +}; + +static const struct req_msg_field *llogd_conn_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_LLOGD_CONN_BODY +}; + +static const struct req_msg_field *llog_origin_handle_next_block_server[] = { + &RMF_PTLRPC_BODY, + &RMF_LLOGD_BODY, + &RMF_EADATA +}; + +static const struct req_msg_field *obd_idx_read_client[] = { + &RMF_PTLRPC_BODY, + &RMF_IDX_INFO +}; + +static const struct req_msg_field *obd_idx_read_server[] = { + &RMF_PTLRPC_BODY, + &RMF_IDX_INFO +}; + +static const struct req_msg_field *ost_body_only[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY +}; + +static const struct req_msg_field *ost_body_capa[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY, + &RMF_CAPA1 +}; + +static const struct req_msg_field *ost_destroy_client[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY, + &RMF_DLM_REQ, + &RMF_CAPA1 +}; + + +static const struct req_msg_field *ost_brw_client[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY, + &RMF_OBD_IOOBJ, + &RMF_NIOBUF_REMOTE, + &RMF_CAPA1 +}; + +static const struct req_msg_field *ost_brw_read_server[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY +}; + +static const struct req_msg_field *ost_brw_write_server[] = { + &RMF_PTLRPC_BODY, + &RMF_OST_BODY, + &RMF_RCS +}; + +static const struct req_msg_field *ost_get_info_generic_server[] = { + &RMF_PTLRPC_BODY, + &RMF_GENERIC_DATA, +}; + +static const struct req_msg_field *ost_get_info_generic_client[] = { + &RMF_PTLRPC_BODY, + &RMF_SETINFO_KEY +}; + +static const struct req_msg_field *ost_get_last_id_server[] = { + &RMF_PTLRPC_BODY, + &RMF_OBD_ID +}; + +static const struct req_msg_field *ost_get_last_fid_server[] = { + &RMF_PTLRPC_BODY, + &RMF_FID, +}; + +static const struct req_msg_field *ost_get_fiemap_client[] = { + &RMF_PTLRPC_BODY, + &RMF_FIEMAP_KEY, + &RMF_FIEMAP_VAL +}; + +static const struct req_msg_field *ost_get_fiemap_server[] = { + &RMF_PTLRPC_BODY, + &RMF_FIEMAP_VAL +}; + +static const struct req_msg_field *mdt_hsm_progress[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDS_HSM_PROGRESS, +}; + +static const struct req_msg_field *mdt_hsm_ct_register[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDS_HSM_ARCHIVE, +}; + +static const struct req_msg_field *mdt_hsm_ct_unregister[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, +}; + +static const struct req_msg_field *mdt_hsm_action_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDS_HSM_CURRENT_ACTION, +}; + +static const struct req_msg_field *mdt_hsm_state_get_server[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_HSM_USER_STATE, +}; + +static const struct req_msg_field *mdt_hsm_state_set[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1, + &RMF_HSM_STATE_SET, +}; + +static const struct req_msg_field *mdt_hsm_request[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_MDS_HSM_REQUEST, + &RMF_MDS_HSM_USER_ITEM, + &RMF_GENERIC_DATA, +}; + +static struct req_format *req_formats[] = { + &RQF_OBD_PING, + &RQF_OBD_SET_INFO, + &RQF_OBD_IDX_READ, + &RQF_SEC_CTX, + &RQF_MGS_TARGET_REG, + &RQF_MGS_SET_INFO, + &RQF_MGS_CONFIG_READ, + &RQF_SEQ_QUERY, + &RQF_FLD_QUERY, + &RQF_MDS_CONNECT, + &RQF_MDS_DISCONNECT, + &RQF_MDS_GET_INFO, + &RQF_MDS_GETSTATUS, + &RQF_MDS_STATFS, + &RQF_MDS_GETATTR, + &RQF_MDS_GETATTR_NAME, + &RQF_MDS_GETXATTR, + &RQF_MDS_SYNC, + &RQF_MDS_CLOSE, + &RQF_MDS_RELEASE_CLOSE, + &RQF_MDS_PIN, + &RQF_MDS_UNPIN, + &RQF_MDS_READPAGE, + &RQF_MDS_WRITEPAGE, + &RQF_MDS_IS_SUBDIR, + &RQF_MDS_DONE_WRITING, + &RQF_MDS_REINT, + &RQF_MDS_REINT_CREATE, + &RQF_MDS_REINT_CREATE_RMT_ACL, + &RQF_MDS_REINT_CREATE_SLAVE, + &RQF_MDS_REINT_CREATE_SYM, + &RQF_MDS_REINT_OPEN, + &RQF_MDS_REINT_UNLINK, + &RQF_MDS_REINT_LINK, + &RQF_MDS_REINT_RENAME, + &RQF_MDS_REINT_SETATTR, + &RQF_MDS_REINT_SETXATTR, + &RQF_MDS_QUOTACHECK, + &RQF_MDS_QUOTACTL, + &RQF_MDS_HSM_PROGRESS, + &RQF_MDS_HSM_CT_REGISTER, + &RQF_MDS_HSM_CT_UNREGISTER, + &RQF_MDS_HSM_STATE_GET, + &RQF_MDS_HSM_STATE_SET, + &RQF_MDS_HSM_ACTION, + &RQF_MDS_HSM_REQUEST, + &RQF_MDS_SWAP_LAYOUTS, + &RQF_UPDATE_OBJ, + &RQF_QC_CALLBACK, + &RQF_OST_CONNECT, + &RQF_OST_DISCONNECT, + &RQF_OST_QUOTACHECK, + &RQF_OST_QUOTACTL, + &RQF_OST_GETATTR, + &RQF_OST_SETATTR, + &RQF_OST_CREATE, + &RQF_OST_PUNCH, + &RQF_OST_SYNC, + &RQF_OST_DESTROY, + &RQF_OST_BRW_READ, + &RQF_OST_BRW_WRITE, + &RQF_OST_STATFS, + &RQF_OST_SET_GRANT_INFO, + &RQF_OST_GET_INFO_GENERIC, + &RQF_OST_GET_INFO_LAST_ID, + &RQF_OST_GET_INFO_LAST_FID, + &RQF_OST_SET_INFO_LAST_FID, + &RQF_OST_GET_INFO_FIEMAP, + &RQF_LDLM_ENQUEUE, + &RQF_LDLM_ENQUEUE_LVB, + &RQF_LDLM_CONVERT, + &RQF_LDLM_CANCEL, + &RQF_LDLM_CALLBACK, + &RQF_LDLM_CP_CALLBACK, + &RQF_LDLM_BL_CALLBACK, + &RQF_LDLM_GL_CALLBACK, + &RQF_LDLM_GL_DESC_CALLBACK, + &RQF_LDLM_INTENT, + &RQF_LDLM_INTENT_BASIC, + &RQF_LDLM_INTENT_LAYOUT, + &RQF_LDLM_INTENT_GETATTR, + &RQF_LDLM_INTENT_OPEN, + &RQF_LDLM_INTENT_CREATE, + &RQF_LDLM_INTENT_UNLINK, + &RQF_LDLM_INTENT_GETXATTR, + &RQF_LDLM_INTENT_QUOTA, + &RQF_QUOTA_DQACQ, + &RQF_LOG_CANCEL, + &RQF_LLOG_ORIGIN_HANDLE_CREATE, + &RQF_LLOG_ORIGIN_HANDLE_DESTROY, + &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, + &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, + &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, + &RQF_LLOG_ORIGIN_CONNECT, + &RQF_CONNECT, +}; + +struct req_msg_field { + const __u32 rmf_flags; + const char *rmf_name; + /** + * Field length. (-1) means "variable length". If the + * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length, + * but the actual size must be a whole multiple of \a rmf_size. + */ + const int rmf_size; + void (*rmf_swabber)(void *); + void (*rmf_dumper)(void *); + int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR]; +}; + +enum rmf_flags { + /** + * The field is a string, must be NUL-terminated. + */ + RMF_F_STRING = 1 << 0, + /** + * The field's buffer size need not match the declared \a rmf_size. + */ + RMF_F_NO_SIZE_CHECK = 1 << 1, + /** + * The field's buffer size must be a whole multiple of the declared \a + * rmf_size and the \a rmf_swabber function must work on the declared \a + * rmf_size worth of bytes. + */ + RMF_F_STRUCT_ARRAY = 1 << 2 +}; + +struct req_capsule; + +/* + * Request fields. + */ +#define DEFINE_MSGF(name, flags, size, swabber, dumper) { \ + .rmf_name = (name), \ + .rmf_flags = (flags), \ + .rmf_size = (size), \ + .rmf_swabber = (void (*)(void *))(swabber), \ + .rmf_dumper = (void (*)(void *))(dumper) \ +} + +struct req_msg_field RMF_GENERIC_DATA = + DEFINE_MSGF("generic_data", 0, + -1, NULL, NULL); +EXPORT_SYMBOL(RMF_GENERIC_DATA); + +struct req_msg_field RMF_MGS_TARGET_INFO = + DEFINE_MSGF("mgs_target_info", 0, + sizeof(struct mgs_target_info), + lustre_swab_mgs_target_info, NULL); +EXPORT_SYMBOL(RMF_MGS_TARGET_INFO); + +struct req_msg_field RMF_MGS_SEND_PARAM = + DEFINE_MSGF("mgs_send_param", 0, + sizeof(struct mgs_send_param), + NULL, NULL); +EXPORT_SYMBOL(RMF_MGS_SEND_PARAM); + +struct req_msg_field RMF_MGS_CONFIG_BODY = + DEFINE_MSGF("mgs_config_read request", 0, + sizeof(struct mgs_config_body), + lustre_swab_mgs_config_body, NULL); +EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY); + +struct req_msg_field RMF_MGS_CONFIG_RES = + DEFINE_MSGF("mgs_config_read reply ", 0, + sizeof(struct mgs_config_res), + lustre_swab_mgs_config_res, NULL); +EXPORT_SYMBOL(RMF_MGS_CONFIG_RES); + +struct req_msg_field RMF_U32 = + DEFINE_MSGF("generic u32", 0, + sizeof(__u32), lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_U32); + +struct req_msg_field RMF_SETINFO_VAL = + DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_SETINFO_VAL); + +struct req_msg_field RMF_GETINFO_KEY = + DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_GETINFO_KEY); + +struct req_msg_field RMF_GETINFO_VALLEN = + DEFINE_MSGF("getinfo_vallen", 0, + sizeof(__u32), lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_GETINFO_VALLEN); + +struct req_msg_field RMF_GETINFO_VAL = + DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_GETINFO_VAL); + +struct req_msg_field RMF_SEQ_OPC = + DEFINE_MSGF("seq_query_opc", 0, + sizeof(__u32), lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_SEQ_OPC); + +struct req_msg_field RMF_SEQ_RANGE = + DEFINE_MSGF("seq_query_range", 0, + sizeof(struct lu_seq_range), + lustre_swab_lu_seq_range, NULL); +EXPORT_SYMBOL(RMF_SEQ_RANGE); + +struct req_msg_field RMF_FLD_OPC = + DEFINE_MSGF("fld_query_opc", 0, + sizeof(__u32), lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_FLD_OPC); + +struct req_msg_field RMF_FLD_MDFLD = + DEFINE_MSGF("fld_query_mdfld", 0, + sizeof(struct lu_seq_range), + lustre_swab_lu_seq_range, NULL); +EXPORT_SYMBOL(RMF_FLD_MDFLD); + +struct req_msg_field RMF_MDT_BODY = + DEFINE_MSGF("mdt_body", 0, + sizeof(struct mdt_body), lustre_swab_mdt_body, NULL); +EXPORT_SYMBOL(RMF_MDT_BODY); + +struct req_msg_field RMF_OBD_QUOTACTL = + DEFINE_MSGF("obd_quotactl", 0, + sizeof(struct obd_quotactl), + lustre_swab_obd_quotactl, NULL); +EXPORT_SYMBOL(RMF_OBD_QUOTACTL); + +struct req_msg_field RMF_QUOTA_BODY = + DEFINE_MSGF("quota_body", 0, + sizeof(struct quota_body), lustre_swab_quota_body, NULL); +EXPORT_SYMBOL(RMF_QUOTA_BODY); + +struct req_msg_field RMF_MDT_EPOCH = + DEFINE_MSGF("mdt_ioepoch", 0, + sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL); +EXPORT_SYMBOL(RMF_MDT_EPOCH); + +struct req_msg_field RMF_PTLRPC_BODY = + DEFINE_MSGF("ptlrpc_body", 0, + sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL); +EXPORT_SYMBOL(RMF_PTLRPC_BODY); + +struct req_msg_field RMF_CLOSE_DATA = + DEFINE_MSGF("data_version", 0, + sizeof(struct close_data), lustre_swab_close_data, NULL); +EXPORT_SYMBOL(RMF_CLOSE_DATA); + +struct req_msg_field RMF_OBD_STATFS = + DEFINE_MSGF("obd_statfs", 0, + sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL); +EXPORT_SYMBOL(RMF_OBD_STATFS); + +struct req_msg_field RMF_SETINFO_KEY = + DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_SETINFO_KEY); + +struct req_msg_field RMF_NAME = + DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_NAME); + +struct req_msg_field RMF_SYMTGT = + DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_SYMTGT); + +struct req_msg_field RMF_TGTUUID = + DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL, + NULL); +EXPORT_SYMBOL(RMF_TGTUUID); + +struct req_msg_field RMF_CLUUID = + DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL, + NULL); +EXPORT_SYMBOL(RMF_CLUUID); + +struct req_msg_field RMF_STRING = + DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_STRING); + +struct req_msg_field RMF_LLOGD_BODY = + DEFINE_MSGF("llogd_body", 0, + sizeof(struct llogd_body), lustre_swab_llogd_body, NULL); +EXPORT_SYMBOL(RMF_LLOGD_BODY); + +struct req_msg_field RMF_LLOG_LOG_HDR = + DEFINE_MSGF("llog_log_hdr", 0, + sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL); +EXPORT_SYMBOL(RMF_LLOG_LOG_HDR); + +struct req_msg_field RMF_LLOGD_CONN_BODY = + DEFINE_MSGF("llogd_conn_body", 0, + sizeof(struct llogd_conn_body), + lustre_swab_llogd_conn_body, NULL); +EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY); + +/* + * connection handle received in MDS_CONNECT request. + * + * No swabbing needed because struct lustre_handle contains only a 64-bit cookie + * that the client does not interpret at all. + */ +struct req_msg_field RMF_CONN = + DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL); +EXPORT_SYMBOL(RMF_CONN); + +struct req_msg_field RMF_CONNECT_DATA = + DEFINE_MSGF("cdata", + RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */, + sizeof(struct obd_connect_data), + lustre_swab_connect, NULL); +EXPORT_SYMBOL(RMF_CONNECT_DATA); + +struct req_msg_field RMF_DLM_REQ = + DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */, + sizeof(struct ldlm_request), + lustre_swab_ldlm_request, NULL); +EXPORT_SYMBOL(RMF_DLM_REQ); + +struct req_msg_field RMF_DLM_REP = + DEFINE_MSGF("dlm_rep", 0, + sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL); +EXPORT_SYMBOL(RMF_DLM_REP); + +struct req_msg_field RMF_LDLM_INTENT = + DEFINE_MSGF("ldlm_intent", 0, + sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL); +EXPORT_SYMBOL(RMF_LDLM_INTENT); + +struct req_msg_field RMF_DLM_LVB = + DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_DLM_LVB); + +struct req_msg_field RMF_DLM_GL_DESC = + DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc), + lustre_swab_gl_desc, NULL); +EXPORT_SYMBOL(RMF_DLM_GL_DESC); + +struct req_msg_field RMF_MDT_MD = + DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL); +EXPORT_SYMBOL(RMF_MDT_MD); + +struct req_msg_field RMF_REC_REINT = + DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint), + lustre_swab_mdt_rec_reint, NULL); +EXPORT_SYMBOL(RMF_REC_REINT); + +/* FIXME: this length should be defined as a macro */ +struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1, + NULL, NULL); +EXPORT_SYMBOL(RMF_EADATA); + +struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL); +EXPORT_SYMBOL(RMF_EAVALS); + +struct req_msg_field RMF_ACL = + DEFINE_MSGF("acl", RMF_F_NO_SIZE_CHECK, + LUSTRE_POSIX_ACL_MAX_SIZE, NULL, NULL); +EXPORT_SYMBOL(RMF_ACL); + +/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */ +struct req_msg_field RMF_LOGCOOKIES = + DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */, + sizeof(struct llog_cookie), NULL, NULL); +EXPORT_SYMBOL(RMF_LOGCOOKIES); + +struct req_msg_field RMF_CAPA1 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa, NULL); +EXPORT_SYMBOL(RMF_CAPA1); + +struct req_msg_field RMF_CAPA2 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa, NULL); +EXPORT_SYMBOL(RMF_CAPA2); + +struct req_msg_field RMF_LAYOUT_INTENT = + DEFINE_MSGF("layout_intent", 0, + sizeof(struct layout_intent), lustre_swab_layout_intent, + NULL); +EXPORT_SYMBOL(RMF_LAYOUT_INTENT); + +/* + * OST request field. + */ +struct req_msg_field RMF_OST_BODY = + DEFINE_MSGF("ost_body", 0, + sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body); +EXPORT_SYMBOL(RMF_OST_BODY); + +struct req_msg_field RMF_OBD_IOOBJ = + DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY, + sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo); +EXPORT_SYMBOL(RMF_OBD_IOOBJ); + +struct req_msg_field RMF_NIOBUF_REMOTE = + DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, + sizeof(struct niobuf_remote), lustre_swab_niobuf_remote, + dump_rniobuf); +EXPORT_SYMBOL(RMF_NIOBUF_REMOTE); + +struct req_msg_field RMF_RCS = + DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32), + lustre_swab_generic_32s, dump_rcs); +EXPORT_SYMBOL(RMF_RCS); + +struct req_msg_field RMF_EAVALS_LENS = + DEFINE_MSGF("eavals_lens", RMF_F_STRUCT_ARRAY, sizeof(__u32), + lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_EAVALS_LENS); + +struct req_msg_field RMF_OBD_ID = + DEFINE_MSGF("u64", 0, + sizeof(u64), lustre_swab_ost_last_id, NULL); +EXPORT_SYMBOL(RMF_OBD_ID); + +struct req_msg_field RMF_FID = + DEFINE_MSGF("fid", 0, + sizeof(struct lu_fid), lustre_swab_lu_fid, NULL); +EXPORT_SYMBOL(RMF_FID); + +struct req_msg_field RMF_OST_ID = + DEFINE_MSGF("ost_id", 0, + sizeof(struct ost_id), lustre_swab_ost_id, NULL); +EXPORT_SYMBOL(RMF_OST_ID); + +struct req_msg_field RMF_FIEMAP_KEY = + DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key), + lustre_swab_fiemap, NULL); +EXPORT_SYMBOL(RMF_FIEMAP_KEY); + +struct req_msg_field RMF_FIEMAP_VAL = + DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL); +EXPORT_SYMBOL(RMF_FIEMAP_VAL); + +struct req_msg_field RMF_IDX_INFO = + DEFINE_MSGF("idx_info", 0, sizeof(struct idx_info), + lustre_swab_idx_info, NULL); +EXPORT_SYMBOL(RMF_IDX_INFO); +struct req_msg_field RMF_HSM_USER_STATE = + DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state), + lustre_swab_hsm_user_state, NULL); +EXPORT_SYMBOL(RMF_HSM_USER_STATE); + +struct req_msg_field RMF_HSM_STATE_SET = + DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set), + lustre_swab_hsm_state_set, NULL); +EXPORT_SYMBOL(RMF_HSM_STATE_SET); + +struct req_msg_field RMF_MDS_HSM_PROGRESS = + DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel), + lustre_swab_hsm_progress_kernel, NULL); +EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS); + +struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION = + DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action), + lustre_swab_hsm_current_action, NULL); +EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION); + +struct req_msg_field RMF_MDS_HSM_USER_ITEM = + DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY, + sizeof(struct hsm_user_item), lustre_swab_hsm_user_item, + NULL); +EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM); + +struct req_msg_field RMF_MDS_HSM_ARCHIVE = + DEFINE_MSGF("hsm_archive", 0, + sizeof(__u32), lustre_swab_generic_32s, NULL); +EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE); + +struct req_msg_field RMF_MDS_HSM_REQUEST = + DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request), + lustre_swab_hsm_request, NULL); +EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST); + +struct req_msg_field RMF_UPDATE = DEFINE_MSGF("update", 0, -1, + lustre_swab_update_buf, NULL); +EXPORT_SYMBOL(RMF_UPDATE); + +struct req_msg_field RMF_UPDATE_REPLY = DEFINE_MSGF("update_reply", 0, -1, + lustre_swab_update_reply_buf, + NULL); +EXPORT_SYMBOL(RMF_UPDATE_REPLY); + +struct req_msg_field RMF_SWAP_LAYOUTS = + DEFINE_MSGF("swap_layouts", 0, sizeof(struct mdc_swap_layouts), + lustre_swab_swap_layouts, NULL); +EXPORT_SYMBOL(RMF_SWAP_LAYOUTS); +/* + * Request formats. + */ + +struct req_format { + const char *rf_name; + int rf_idx; + struct { + int nr; + const struct req_msg_field **d; + } rf_fields[RCL_NR]; +}; + +#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \ + .rf_name = name, \ + .rf_fields = { \ + [RCL_CLIENT] = { \ + .nr = client_nr, \ + .d = client \ + }, \ + [RCL_SERVER] = { \ + .nr = server_nr, \ + .d = server \ + } \ + } \ +} + +#define DEFINE_REQ_FMT0(name, client, server) \ +DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server)) + +struct req_format RQF_OBD_PING = + DEFINE_REQ_FMT0("OBD_PING", empty, empty); +EXPORT_SYMBOL(RQF_OBD_PING); + +struct req_format RQF_OBD_SET_INFO = + DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty); +EXPORT_SYMBOL(RQF_OBD_SET_INFO); + +/* Read index file through the network */ +struct req_format RQF_OBD_IDX_READ = + DEFINE_REQ_FMT0("OBD_IDX_READ", + obd_idx_read_client, obd_idx_read_server); +EXPORT_SYMBOL(RQF_OBD_IDX_READ); + +struct req_format RQF_SEC_CTX = + DEFINE_REQ_FMT0("SEC_CTX", empty, empty); +EXPORT_SYMBOL(RQF_SEC_CTX); + +struct req_format RQF_MGS_TARGET_REG = + DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only, + mgs_target_info_only); +EXPORT_SYMBOL(RQF_MGS_TARGET_REG); + +struct req_format RQF_MGS_SET_INFO = + DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info, + mgs_set_info); +EXPORT_SYMBOL(RQF_MGS_SET_INFO); + +struct req_format RQF_MGS_CONFIG_READ = + DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client, + mgs_config_read_server); +EXPORT_SYMBOL(RQF_MGS_CONFIG_READ); + +struct req_format RQF_SEQ_QUERY = + DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server); +EXPORT_SYMBOL(RQF_SEQ_QUERY); + +struct req_format RQF_FLD_QUERY = + DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); +EXPORT_SYMBOL(RQF_FLD_QUERY); + +struct req_format RQF_LOG_CANCEL = + DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); +EXPORT_SYMBOL(RQF_LOG_CANCEL); + +struct req_format RQF_MDS_QUOTACHECK = + DEFINE_REQ_FMT0("MDS_QUOTACHECK", quotactl_only, empty); +EXPORT_SYMBOL(RQF_MDS_QUOTACHECK); + +struct req_format RQF_OST_QUOTACHECK = + DEFINE_REQ_FMT0("OST_QUOTACHECK", quotactl_only, empty); +EXPORT_SYMBOL(RQF_OST_QUOTACHECK); + +struct req_format RQF_MDS_QUOTACTL = + DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only); +EXPORT_SYMBOL(RQF_MDS_QUOTACTL); + +struct req_format RQF_OST_QUOTACTL = + DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only); +EXPORT_SYMBOL(RQF_OST_QUOTACTL); + +struct req_format RQF_QC_CALLBACK = + DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty); +EXPORT_SYMBOL(RQF_QC_CALLBACK); + +struct req_format RQF_QUOTA_DQACQ = + DEFINE_REQ_FMT0("QUOTA_DQACQ", quota_body_only, quota_body_only); +EXPORT_SYMBOL(RQF_QUOTA_DQACQ); + +struct req_format RQF_LDLM_INTENT_QUOTA = + DEFINE_REQ_FMT0("LDLM_INTENT_QUOTA", + ldlm_intent_quota_client, + ldlm_intent_quota_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_QUOTA); + +struct req_format RQF_MDS_GETSTATUS = + DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_GETSTATUS); + +struct req_format RQF_MDS_STATFS = + DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server); +EXPORT_SYMBOL(RQF_MDS_STATFS); + +struct req_format RQF_MDS_SYNC = + DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_SYNC); + +struct req_format RQF_MDS_GETATTR = + DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server); +EXPORT_SYMBOL(RQF_MDS_GETATTR); + +struct req_format RQF_MDS_GETXATTR = + DEFINE_REQ_FMT0("MDS_GETXATTR", + mds_getxattr_client, mds_getxattr_server); +EXPORT_SYMBOL(RQF_MDS_GETXATTR); + +struct req_format RQF_MDS_GETATTR_NAME = + DEFINE_REQ_FMT0("MDS_GETATTR_NAME", + mds_getattr_name_client, mds_getattr_server); +EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME); + +struct req_format RQF_MDS_REINT = + DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_REINT); + +struct req_format RQF_MDS_REINT_CREATE = + DEFINE_REQ_FMT0("MDS_REINT_CREATE", + mds_reint_create_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE); + +struct req_format RQF_MDS_REINT_CREATE_RMT_ACL = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_RMT_ACL", + mds_reint_create_rmt_acl_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_RMT_ACL); + +struct req_format RQF_MDS_REINT_CREATE_SLAVE = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA", + mds_reint_create_slave_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE); + +struct req_format RQF_MDS_REINT_CREATE_SYM = + DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM", + mds_reint_create_sym_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM); + +struct req_format RQF_MDS_REINT_OPEN = + DEFINE_REQ_FMT0("MDS_REINT_OPEN", + mds_reint_open_client, mds_reint_open_server); +EXPORT_SYMBOL(RQF_MDS_REINT_OPEN); + +struct req_format RQF_MDS_REINT_UNLINK = + DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client, + mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK); + +struct req_format RQF_MDS_REINT_LINK = + DEFINE_REQ_FMT0("MDS_REINT_LINK", + mds_reint_link_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_REINT_LINK); + +struct req_format RQF_MDS_REINT_RENAME = + DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client, + mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_REINT_RENAME); + +struct req_format RQF_MDS_REINT_SETATTR = + DEFINE_REQ_FMT0("MDS_REINT_SETATTR", + mds_reint_setattr_client, mds_setattr_server); +EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR); + +struct req_format RQF_MDS_REINT_SETXATTR = + DEFINE_REQ_FMT0("MDS_REINT_SETXATTR", + mds_reint_setxattr_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR); + +struct req_format RQF_MDS_CONNECT = + DEFINE_REQ_FMT0("MDS_CONNECT", + obd_connect_client, obd_connect_server); +EXPORT_SYMBOL(RQF_MDS_CONNECT); + +struct req_format RQF_MDS_DISCONNECT = + DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty); +EXPORT_SYMBOL(RQF_MDS_DISCONNECT); + +struct req_format RQF_MDS_GET_INFO = + DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client, + mds_getinfo_server); +EXPORT_SYMBOL(RQF_MDS_GET_INFO); + +struct req_format RQF_UPDATE_OBJ = + DEFINE_REQ_FMT0("OBJECT_UPDATE_OBJ", mds_update_client, + mds_update_server); +EXPORT_SYMBOL(RQF_UPDATE_OBJ); + +struct req_format RQF_LDLM_ENQUEUE = + DEFINE_REQ_FMT0("LDLM_ENQUEUE", + ldlm_enqueue_client, ldlm_enqueue_lvb_server); +EXPORT_SYMBOL(RQF_LDLM_ENQUEUE); + +struct req_format RQF_LDLM_ENQUEUE_LVB = + DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB", + ldlm_enqueue_client, ldlm_enqueue_lvb_server); +EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB); + +struct req_format RQF_LDLM_CONVERT = + DEFINE_REQ_FMT0("LDLM_CONVERT", + ldlm_enqueue_client, ldlm_enqueue_server); +EXPORT_SYMBOL(RQF_LDLM_CONVERT); + +struct req_format RQF_LDLM_CANCEL = + DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty); +EXPORT_SYMBOL(RQF_LDLM_CANCEL); + +struct req_format RQF_LDLM_CALLBACK = + DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty); +EXPORT_SYMBOL(RQF_LDLM_CALLBACK); + +struct req_format RQF_LDLM_CP_CALLBACK = + DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty); +EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK); + +struct req_format RQF_LDLM_BL_CALLBACK = + DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty); +EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK); + +struct req_format RQF_LDLM_GL_CALLBACK = + DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client, + ldlm_gl_callback_server); +EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK); + +struct req_format RQF_LDLM_GL_DESC_CALLBACK = + DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client, + ldlm_gl_callback_server); +EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK); + +struct req_format RQF_LDLM_INTENT_BASIC = + DEFINE_REQ_FMT0("LDLM_INTENT_BASIC", + ldlm_intent_basic_client, ldlm_enqueue_lvb_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC); + +struct req_format RQF_LDLM_INTENT = + DEFINE_REQ_FMT0("LDLM_INTENT", + ldlm_intent_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT); + +struct req_format RQF_LDLM_INTENT_LAYOUT = + DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ", + ldlm_intent_layout_client, ldlm_enqueue_lvb_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT); + +struct req_format RQF_LDLM_INTENT_GETATTR = + DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR", + ldlm_intent_getattr_client, ldlm_intent_getattr_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR); + +struct req_format RQF_LDLM_INTENT_OPEN = + DEFINE_REQ_FMT0("LDLM_INTENT_OPEN", + ldlm_intent_open_client, ldlm_intent_open_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN); + +struct req_format RQF_LDLM_INTENT_CREATE = + DEFINE_REQ_FMT0("LDLM_INTENT_CREATE", + ldlm_intent_create_client, ldlm_intent_getattr_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE); + +struct req_format RQF_LDLM_INTENT_UNLINK = + DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK", + ldlm_intent_unlink_client, ldlm_intent_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK); + +struct req_format RQF_LDLM_INTENT_GETXATTR = + DEFINE_REQ_FMT0("LDLM_INTENT_GETXATTR", + ldlm_intent_getxattr_client, + ldlm_intent_getxattr_server); +EXPORT_SYMBOL(RQF_LDLM_INTENT_GETXATTR); + +struct req_format RQF_MDS_CLOSE = + DEFINE_REQ_FMT0("MDS_CLOSE", + mdt_close_client, mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_CLOSE); + +struct req_format RQF_MDS_RELEASE_CLOSE = + DEFINE_REQ_FMT0("MDS_CLOSE", + mdt_release_close_client, mds_last_unlink_server); +EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE); + +struct req_format RQF_MDS_PIN = + DEFINE_REQ_FMT0("MDS_PIN", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_PIN); + +struct req_format RQF_MDS_UNPIN = + DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty); +EXPORT_SYMBOL(RQF_MDS_UNPIN); + +struct req_format RQF_MDS_DONE_WRITING = + DEFINE_REQ_FMT0("MDS_DONE_WRITING", + mdt_close_client, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_DONE_WRITING); + +struct req_format RQF_MDS_READPAGE = + DEFINE_REQ_FMT0("MDS_READPAGE", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_READPAGE); + +struct req_format RQF_MDS_HSM_ACTION = + DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server); +EXPORT_SYMBOL(RQF_MDS_HSM_ACTION); + +struct req_format RQF_MDS_HSM_PROGRESS = + DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty); +EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS); + +struct req_format RQF_MDS_HSM_CT_REGISTER = + DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty); +EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER); + +struct req_format RQF_MDS_HSM_CT_UNREGISTER = + DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty); +EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER); + +struct req_format RQF_MDS_HSM_STATE_GET = + DEFINE_REQ_FMT0("MDS_HSM_STATE_GET", + mdt_body_capa, mdt_hsm_state_get_server); +EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET); + +struct req_format RQF_MDS_HSM_STATE_SET = + DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty); +EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET); + +struct req_format RQF_MDS_HSM_REQUEST = + DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty); +EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST); + +struct req_format RQF_MDS_SWAP_LAYOUTS = + DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS", + mdt_swap_layouts, empty); +EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS); + +/* This is for split */ +struct req_format RQF_MDS_WRITEPAGE = + DEFINE_REQ_FMT0("MDS_WRITEPAGE", + mdt_body_capa, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_WRITEPAGE); + +struct req_format RQF_MDS_IS_SUBDIR = + DEFINE_REQ_FMT0("MDS_IS_SUBDIR", + mdt_body_only, mdt_body_only); +EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR); + +struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE = + DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE", + llog_origin_handle_create_client, llogd_body_only); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE); + +struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY = + DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY", + llogd_body_only, llogd_body_only); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY); + +struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK = + DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK", + llogd_body_only, llog_origin_handle_next_block_server); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); + +struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK = + DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK", + llogd_body_only, llog_origin_handle_next_block_server); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK); + +struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER = + DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER", + llogd_body_only, llog_log_hdr_only); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); + +struct req_format RQF_LLOG_ORIGIN_CONNECT = + DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty); +EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT); + +struct req_format RQF_CONNECT = + DEFINE_REQ_FMT0("CONNECT", obd_connect_client, obd_connect_server); +EXPORT_SYMBOL(RQF_CONNECT); + +struct req_format RQF_OST_CONNECT = + DEFINE_REQ_FMT0("OST_CONNECT", + obd_connect_client, obd_connect_server); +EXPORT_SYMBOL(RQF_OST_CONNECT); + +struct req_format RQF_OST_DISCONNECT = + DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty); +EXPORT_SYMBOL(RQF_OST_DISCONNECT); + +struct req_format RQF_OST_GETATTR = + DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only); +EXPORT_SYMBOL(RQF_OST_GETATTR); + +struct req_format RQF_OST_SETATTR = + DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only); +EXPORT_SYMBOL(RQF_OST_SETATTR); + +struct req_format RQF_OST_CREATE = + DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only); +EXPORT_SYMBOL(RQF_OST_CREATE); + +struct req_format RQF_OST_PUNCH = + DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only); +EXPORT_SYMBOL(RQF_OST_PUNCH); + +struct req_format RQF_OST_SYNC = + DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only); +EXPORT_SYMBOL(RQF_OST_SYNC); + +struct req_format RQF_OST_DESTROY = + DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only); +EXPORT_SYMBOL(RQF_OST_DESTROY); + +struct req_format RQF_OST_BRW_READ = + DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server); +EXPORT_SYMBOL(RQF_OST_BRW_READ); + +struct req_format RQF_OST_BRW_WRITE = + DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server); +EXPORT_SYMBOL(RQF_OST_BRW_WRITE); + +struct req_format RQF_OST_STATFS = + DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server); +EXPORT_SYMBOL(RQF_OST_STATFS); + +struct req_format RQF_OST_SET_GRANT_INFO = + DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client, + ost_body_only); +EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO); + +struct req_format RQF_OST_GET_INFO_GENERIC = + DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client, + ost_get_info_generic_server); +EXPORT_SYMBOL(RQF_OST_GET_INFO_GENERIC); + +struct req_format RQF_OST_GET_INFO_LAST_ID = + DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client, + ost_get_last_id_server); +EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID); + +struct req_format RQF_OST_GET_INFO_LAST_FID = + DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", obd_set_info_client, + ost_get_last_fid_server); +EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID); + +struct req_format RQF_OST_SET_INFO_LAST_FID = + DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client, + empty); +EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID); + +struct req_format RQF_OST_GET_INFO_FIEMAP = + DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client, + ost_get_fiemap_server); +EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP); + +#if !defined(__REQ_LAYOUT_USER__) + +/* Convenience macro */ +#define FMT_FIELD(fmt, i, j) (fmt)->rf_fields[(i)].d[(j)] + +/** + * Initializes the capsule abstraction by computing and setting the \a rf_idx + * field of RQFs and the \a rmf_offset field of RMFs. + */ +int req_layout_init(void) +{ + int i; + int j; + int k; + struct req_format *rf = NULL; + + for (i = 0; i < ARRAY_SIZE(req_formats); ++i) { + rf = req_formats[i]; + rf->rf_idx = i; + for (j = 0; j < RCL_NR; ++j) { + LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR); + for (k = 0; k < rf->rf_fields[j].nr; ++k) { + struct req_msg_field *field; + + field = (typeof(field))rf->rf_fields[j].d[k]; + LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY) + || field->rmf_size > 0); + LASSERT(field->rmf_offset[i][j] == 0); + /* + * k + 1 to detect unused format/field + * combinations. + */ + field->rmf_offset[i][j] = k + 1; + } + } + } + return 0; +} +EXPORT_SYMBOL(req_layout_init); + +void req_layout_fini(void) +{ +} +EXPORT_SYMBOL(req_layout_fini); + +/** + * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1. + * + * Actual/expected field sizes are set elsewhere in functions in this file: + * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and + * req_capsule_msg_size(). The \a rc_area information is used by. + * ptlrpc_request_set_replen(). + */ +void req_capsule_init_area(struct req_capsule *pill) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) { + pill->rc_area[RCL_CLIENT][i] = -1; + pill->rc_area[RCL_SERVER][i] = -1; + } +} +EXPORT_SYMBOL(req_capsule_init_area); + +/** + * Initialize a pill. + * + * The \a location indicates whether the caller is executing on the client side + * (RCL_CLIENT) or server side (RCL_SERVER).. + */ +void req_capsule_init(struct req_capsule *pill, + struct ptlrpc_request *req, + enum req_location location) +{ + LASSERT(location == RCL_SERVER || location == RCL_CLIENT); + + /* + * Today all capsules are embedded in ptlrpc_request structs, + * but just in case that ever isn't the case, we don't reach + * into req unless req != NULL and pill is the one embedded in + * the req. + * + * The req->rq_pill_init flag makes it safe to initialize a pill + * twice, which might happen in the OST paths as a result of the + * high-priority RPC queue getting peeked at before ost_handle() + * handles an OST RPC. + */ + if (req != NULL && pill == &req->rq_pill && req->rq_pill_init) + return; + + memset(pill, 0, sizeof(*pill)); + pill->rc_req = req; + pill->rc_loc = location; + req_capsule_init_area(pill); + + if (req != NULL && pill == &req->rq_pill) + req->rq_pill_init = 1; +} +EXPORT_SYMBOL(req_capsule_init); + +void req_capsule_fini(struct req_capsule *pill) +{ +} +EXPORT_SYMBOL(req_capsule_fini); + +static int __req_format_is_sane(const struct req_format *fmt) +{ + return + 0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) && + req_formats[fmt->rf_idx] == fmt; +} + +static struct lustre_msg *__req_msg(const struct req_capsule *pill, + enum req_location loc) +{ + struct ptlrpc_request *req; + + req = pill->rc_req; + return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg; +} + +/** + * Set the format (\a fmt) of a \a pill; format changes are not allowed here + * (see req_capsule_extend()). + */ +void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt) +{ + LASSERT(pill->rc_fmt == NULL || pill->rc_fmt == fmt); + LASSERT(__req_format_is_sane(fmt)); + + pill->rc_fmt = fmt; +} +EXPORT_SYMBOL(req_capsule_set); + +/** + * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in + * yet. + + * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of + * variable-sized fields. The field sizes come from the declared \a rmf_size + * field of a \a pill's \a rc_fmt's RMF's. + */ +int req_capsule_filled_sizes(struct req_capsule *pill, + enum req_location loc) +{ + const struct req_format *fmt = pill->rc_fmt; + int i; + + LASSERT(fmt != NULL); + + for (i = 0; i < fmt->rf_fields[loc].nr; ++i) { + if (pill->rc_area[loc][i] == -1) { + pill->rc_area[loc][i] = + fmt->rf_fields[loc].d[i]->rmf_size; + if (pill->rc_area[loc][i] == -1) { + /* + * Skip the following fields. + * + * If this LASSERT() trips then you're missing a + * call to req_capsule_set_size(). + */ + LASSERT(loc != RCL_SERVER); + break; + } + } + } + return i; +} +EXPORT_SYMBOL(req_capsule_filled_sizes); + +/** + * Capsule equivalent of lustre_pack_request() and lustre_pack_reply(). + * + * This function uses the \a pill's \a rc_area as filled in by + * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by + * this function). + */ +int req_capsule_server_pack(struct req_capsule *pill) +{ + const struct req_format *fmt; + int count; + int rc; + + LASSERT(pill->rc_loc == RCL_SERVER); + fmt = pill->rc_fmt; + LASSERT(fmt != NULL); + + count = req_capsule_filled_sizes(pill, RCL_SERVER); + rc = lustre_pack_reply(pill->rc_req, count, + pill->rc_area[RCL_SERVER], NULL); + if (rc != 0) { + DEBUG_REQ(D_ERROR, pill->rc_req, + "Cannot pack %d fields in format `%s': ", + count, fmt->rf_name); + } + return rc; +} +EXPORT_SYMBOL(req_capsule_server_pack); + +/** + * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill + * corresponding to the given RMF (\a field). + */ +static int __req_capsule_offset(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + int offset; + + offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc]; + LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n", + pill->rc_fmt->rf_name, + field->rmf_name, offset, loc); + offset--; + + LASSERT(0 <= offset && offset < REQ_MAX_FIELD_NR); + return offset; +} + +/** + * Helper for __req_capsule_get(); swabs value / array of values and/or dumps + * them if desired. + */ +static +void +swabber_dumper_helper(struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc, + int offset, + void *value, int len, int dump, void (*swabber)(void *)) +{ + void *p; + int i; + int n; + int do_swab; + int inout = loc == RCL_CLIENT; + + swabber = swabber ?: field->rmf_swabber; + + if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) && + swabber != NULL && value != NULL) + do_swab = 1; + else + do_swab = 0; + + if (!field->rmf_dumper) + dump = 0; + + if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) { + if (dump) { + CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n", + do_swab ? "unswabbed " : "", field->rmf_name); + field->rmf_dumper(value); + } + if (!do_swab) + return; + swabber(value); + ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset); + if (dump) { + CDEBUG(D_RPCTRACE, "Dump of swabbed field %s follows\n", + field->rmf_name); + field->rmf_dumper(value); + } + + return; + } + + /* + * We're swabbing an array; swabber() swabs a single array element, so + * swab every element. + */ + LASSERT((len % field->rmf_size) == 0); + for (p = value, i = 0, n = len / field->rmf_size; + i < n; + i++, p += field->rmf_size) { + if (dump) { + CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, element %d follows\n", + do_swab ? "unswabbed " : "", field->rmf_name, i); + field->rmf_dumper(p); + } + if (!do_swab) + continue; + swabber(p); + if (dump) { + CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, element %d follows\n", + field->rmf_name, i); + field->rmf_dumper(value); + } + } + if (do_swab) + ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset); +} + +/** + * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill + * corresponding to the given RMF (\a field). + * + * The buffer will be swabbed using the given \a swabber. If \a swabber == NULL + * then the \a rmf_swabber from the RMF will be used. Soon there will be no + * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then + * be removed. Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each + * element of the array swabbed. + */ +static void *__req_capsule_get(struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc, + void (*swabber)(void *), + int dump) +{ + const struct req_format *fmt; + struct lustre_msg *msg; + void *value; + int len; + int offset; + + void *(*getter)(struct lustre_msg *m, int n, int minlen); + + static const char *rcl_names[RCL_NR] = { + [RCL_CLIENT] = "client", + [RCL_SERVER] = "server" + }; + + LASSERT(pill != NULL); + LASSERT(pill != LP_POISON); + fmt = pill->rc_fmt; + LASSERT(fmt != NULL); + LASSERT(fmt != LP_POISON); + LASSERT(__req_format_is_sane(fmt)); + + offset = __req_capsule_offset(pill, field, loc); + + msg = __req_msg(pill, loc); + LASSERT(msg != NULL); + + getter = (field->rmf_flags & RMF_F_STRING) ? + (typeof(getter))lustre_msg_string : lustre_msg_buf; + + if (field->rmf_flags & RMF_F_STRUCT_ARRAY) { + /* + * We've already asserted that field->rmf_size > 0 in + * req_layout_init(). + */ + len = lustre_msg_buflen(msg, offset); + if ((len % field->rmf_size) != 0) { + CERROR("%s: array field size mismatch %d modulo %d != 0 (%d)\n", + field->rmf_name, len, field->rmf_size, loc); + return NULL; + } + } else if (pill->rc_area[loc][offset] != -1) { + len = pill->rc_area[loc][offset]; + } else { + len = max(field->rmf_size, 0); + } + value = getter(msg, offset, len); + + if (value == NULL) { + DEBUG_REQ(D_ERROR, pill->rc_req, + "Wrong buffer for field `%s' (%d of %d) in format `%s': %d vs. %d (%s)\n", + field->rmf_name, offset, lustre_msg_bufcount(msg), + fmt->rf_name, lustre_msg_buflen(msg, offset), len, + rcl_names[loc]); + } else { + swabber_dumper_helper(pill, field, loc, offset, value, len, + dump, swabber); + } + + return value; +} + +/** + * Dump a request and/or reply + */ +static void __req_capsule_dump(struct req_capsule *pill, enum req_location loc) +{ + const struct req_format *fmt; + const struct req_msg_field *field; + int len; + int i; + + fmt = pill->rc_fmt; + + DEBUG_REQ(D_RPCTRACE, pill->rc_req, "BEGIN REQ CAPSULE DUMP\n"); + for (i = 0; i < fmt->rf_fields[loc].nr; ++i) { + field = FMT_FIELD(fmt, loc, i); + if (field->rmf_dumper == NULL) { + /* + * FIXME Add a default hex dumper for fields that don't + * have a specific dumper + */ + len = req_capsule_get_size(pill, field, loc); + CDEBUG(D_RPCTRACE, "Field %s has no dumper function; field size is %d\n", + field->rmf_name, len); + } else { + /* It's the dumping side-effect that we're interested in */ + (void) __req_capsule_get(pill, field, loc, NULL, 1); + } + } + CDEBUG(D_RPCTRACE, "END REQ CAPSULE DUMP\n"); +} + +/** + * Dump a request. + */ +void req_capsule_client_dump(struct req_capsule *pill) +{ + __req_capsule_dump(pill, RCL_CLIENT); +} +EXPORT_SYMBOL(req_capsule_client_dump); + +/** + * Dump a reply + */ +void req_capsule_server_dump(struct req_capsule *pill) +{ + __req_capsule_dump(pill, RCL_SERVER); +} +EXPORT_SYMBOL(req_capsule_server_dump); + +/** + * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request + * buffer corresponding to the given RMF (\a field) of a \a pill. + */ +void *req_capsule_client_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0); +} +EXPORT_SYMBOL(req_capsule_client_get); + +/** + * Same as req_capsule_client_get(), but with a \a swabber argument. + * + * Currently unused; will be removed when req_capsule_server_swab_get() is + * unused too. + */ +void *req_capsule_client_swab_get(struct req_capsule *pill, + const struct req_msg_field *field, + void *swabber) +{ + return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0); +} +EXPORT_SYMBOL(req_capsule_client_swab_get); + +/** + * Utility that combines req_capsule_set_size() and req_capsule_client_get(). + * + * First the \a pill's request \a field's size is set (\a rc_area) using + * req_capsule_set_size() with the given \a len. Then the actual buffer is + * returned. + */ +void *req_capsule_client_sized_get(struct req_capsule *pill, + const struct req_msg_field *field, + int len) +{ + req_capsule_set_size(pill, field, RCL_CLIENT, len); + return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0); +} +EXPORT_SYMBOL(req_capsule_client_sized_get); + +/** + * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply + * buffer corresponding to the given RMF (\a field) of a \a pill. + */ +void *req_capsule_server_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0); +} +EXPORT_SYMBOL(req_capsule_server_get); + +/** + * Same as req_capsule_server_get(), but with a \a swabber argument. + * + * Ideally all swabbing should be done pursuant to RMF definitions, with no + * swabbing done outside this capsule abstraction. + */ +void *req_capsule_server_swab_get(struct req_capsule *pill, + const struct req_msg_field *field, + void *swabber) +{ + return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0); +} +EXPORT_SYMBOL(req_capsule_server_swab_get); + +/** + * Utility that combines req_capsule_set_size() and req_capsule_server_get(). + * + * First the \a pill's request \a field's size is set (\a rc_area) using + * req_capsule_set_size() with the given \a len. Then the actual buffer is + * returned. + */ +void *req_capsule_server_sized_get(struct req_capsule *pill, + const struct req_msg_field *field, + int len) +{ + req_capsule_set_size(pill, field, RCL_SERVER, len); + return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0); +} +EXPORT_SYMBOL(req_capsule_server_sized_get); + +void *req_capsule_server_sized_swab_get(struct req_capsule *pill, + const struct req_msg_field *field, + int len, void *swabber) +{ + req_capsule_set_size(pill, field, RCL_SERVER, len); + return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0); +} +EXPORT_SYMBOL(req_capsule_server_sized_swab_get); + +/** + * Returns the buffer of a \a pill corresponding to the given \a field from the + * request (if the caller is executing on the server-side) or reply (if the + * caller is executing on the client-side). + * + * This function convenient for use is code that could be executed on the + * client and server alike. + */ +const void *req_capsule_other_get(struct req_capsule *pill, + const struct req_msg_field *field) +{ + return __req_capsule_get(pill, field, pill->rc_loc ^ 1, NULL, 0); +} +EXPORT_SYMBOL(req_capsule_other_get); + +/** + * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a + * field of the given \a pill. + * + * This function must be used when constructing variable sized fields of a + * request or reply. + */ +void req_capsule_set_size(struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc, int size) +{ + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + + if ((size != field->rmf_size) && + (field->rmf_size != -1) && + !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) && + (size > 0)) { + if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) && + (size % field->rmf_size != 0)) { + CERROR("%s: array field size mismatch %d %% %d != 0 (%d)\n", + field->rmf_name, size, field->rmf_size, loc); + LBUG(); + } else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) && + size < field->rmf_size) { + CERROR("%s: field size mismatch %d != %d (%d)\n", + field->rmf_name, size, field->rmf_size, loc); + LBUG(); + } + } + + pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size; +} +EXPORT_SYMBOL(req_capsule_set_size); + +/** + * Return the actual PTLRPC buffer length of a request or reply (\a loc) + * for the given \a pill's given \a field. + * + * NB: this function doesn't correspond with req_capsule_set_size(), which + * actually sets the size in pill.rc_area[loc][offset], but this function + * returns the message buflen[offset], maybe we should use another name. + */ +int req_capsule_get_size(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + + return lustre_msg_buflen(__req_msg(pill, loc), + __req_capsule_offset(pill, field, loc)); +} +EXPORT_SYMBOL(req_capsule_get_size); + +/** + * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the + * given \a pill's request or reply (\a loc) given the field size recorded in + * the \a pill's rc_area. + * + * See also req_capsule_set_size(). + */ +int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc) +{ + return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic, + pill->rc_fmt->rf_fields[loc].nr, + pill->rc_area[loc]); +} + +/** + * While req_capsule_msg_size() computes the size of a PTLRPC request or reply + * (\a loc) given a \a pill's \a rc_area, this function computes the size of a + * PTLRPC request or reply given only an RQF (\a fmt). + * + * This function should not be used for formats which contain variable size + * fields. + */ +int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt, + enum req_location loc) +{ + int size, i = 0; + + /* + * This function should probably LASSERT() that fmt has no fields with + * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many + * elements in the array there will ultimately be, but then, we could + * assume that there will be at least one element, and that's just what + * we do. + */ + size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr); + if (size < 0) + return size; + + for (; i < fmt->rf_fields[loc].nr; ++i) + if (fmt->rf_fields[loc].d[i]->rmf_size != -1) + size += cfs_size_round(fmt->rf_fields[loc].d[i]-> + rmf_size); + return size; +} + +/** + * Changes the format of an RPC. + * + * The pill must already have been initialized, which means that it already has + * a request format. The new format \a fmt must be an extension of the pill's + * old format. Specifically: the new format must have as many request and reply + * fields as the old one, and all fields shared by the old and new format must + * be at least as large in the new format. + * + * The new format's fields may be of different "type" than the old format, but + * only for fields that are "opaque" blobs: fields which have a) have no + * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a + * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK. For example, + * OBD_SET_INFO has a key field and an opaque value field that gets interpreted + * according to the key field. When the value, according to the key, contains a + * structure (or array thereof) to be swabbed, the format should be changed to + * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set + * accordingly. + */ +void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt) +{ + int i; + int j; + + const struct req_format *old; + + LASSERT(pill->rc_fmt != NULL); + LASSERT(__req_format_is_sane(fmt)); + + old = pill->rc_fmt; + /* + * Sanity checking... + */ + for (i = 0; i < RCL_NR; ++i) { + LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr); + for (j = 0; j < old->rf_fields[i].nr - 1; ++j) { + const struct req_msg_field *ofield = FMT_FIELD(old, i, j); + + /* "opaque" fields can be transmogrified */ + if (ofield->rmf_swabber == NULL && + (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 && + (ofield->rmf_size == -1 || + ofield->rmf_flags == RMF_F_NO_SIZE_CHECK)) + continue; + LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j)); + } + /* + * Last field in old format can be shorter than in new. + */ + LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >= + FMT_FIELD(old, i, j)->rmf_size); + } + + pill->rc_fmt = fmt; +} +EXPORT_SYMBOL(req_capsule_extend); + +/** + * This function returns a non-zero value if the given \a field is present in + * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it + * returns 0. + */ +int req_capsule_has_field(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + + return field->rmf_offset[pill->rc_fmt->rf_idx][loc]; +} +EXPORT_SYMBOL(req_capsule_has_field); + +/** + * Returns a non-zero value if the given \a field is present in the given \a + * pill's PTLRPC request or reply (\a loc), else it returns 0. + */ +int req_capsule_field_present(const struct req_capsule *pill, + const struct req_msg_field *field, + enum req_location loc) +{ + int offset; + + LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); + LASSERT(req_capsule_has_field(pill, field, loc)); + + offset = __req_capsule_offset(pill, field, loc); + return lustre_msg_bufcount(__req_msg(pill, loc)) > offset; +} +EXPORT_SYMBOL(req_capsule_field_present); + +/** + * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC + * request or reply (\a loc). + * + * This is not the opposite of req_capsule_extend(). + */ +void req_capsule_shrink(struct req_capsule *pill, + const struct req_msg_field *field, + unsigned int newlen, + enum req_location loc) +{ + const struct req_format *fmt; + struct lustre_msg *msg; + int len; + int offset; + + fmt = pill->rc_fmt; + LASSERT(fmt != NULL); + LASSERT(__req_format_is_sane(fmt)); + LASSERT(req_capsule_has_field(pill, field, loc)); + LASSERT(req_capsule_field_present(pill, field, loc)); + + offset = __req_capsule_offset(pill, field, loc); + + msg = __req_msg(pill, loc); + len = lustre_msg_buflen(msg, offset); + LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n", + fmt->rf_name, field->rmf_name, len, newlen); + + if (loc == RCL_CLIENT) + pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen, + 1); + else + pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen, + 1); +} +EXPORT_SYMBOL(req_capsule_shrink); + +int req_capsule_server_grow(struct req_capsule *pill, + const struct req_msg_field *field, + unsigned int newlen) +{ + struct ptlrpc_reply_state *rs = pill->rc_req->rq_reply_state, *nrs; + char *from, *to; + int offset, len, rc; + + LASSERT(pill->rc_fmt != NULL); + LASSERT(__req_format_is_sane(pill->rc_fmt)); + LASSERT(req_capsule_has_field(pill, field, RCL_SERVER)); + LASSERT(req_capsule_field_present(pill, field, RCL_SERVER)); + + len = req_capsule_get_size(pill, field, RCL_SERVER); + offset = __req_capsule_offset(pill, field, RCL_SERVER); + if (pill->rc_req->rq_repbuf_len >= + lustre_packed_msg_size(pill->rc_req->rq_repmsg) - len + newlen) + CERROR("Inplace repack might be done\n"); + + pill->rc_req->rq_reply_state = NULL; + req_capsule_set_size(pill, field, RCL_SERVER, newlen); + rc = req_capsule_server_pack(pill); + if (rc) { + /* put old rs back, the caller will decide what to do */ + pill->rc_req->rq_reply_state = rs; + return rc; + } + nrs = pill->rc_req->rq_reply_state; + /* Now we need only buffers, copy first chunk */ + to = lustre_msg_buf(nrs->rs_msg, 0, 0); + from = lustre_msg_buf(rs->rs_msg, 0, 0); + len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) - from; + memcpy(to, from, len); + /* check if we have tail and copy it too */ + if (rs->rs_msg->lm_bufcount > offset + 1) { + to = lustre_msg_buf(nrs->rs_msg, offset + 1, 0); + from = lustre_msg_buf(rs->rs_msg, offset + 1, 0); + offset = rs->rs_msg->lm_bufcount - 1; + len = (char *)lustre_msg_buf(rs->rs_msg, offset, 0) + + cfs_size_round(rs->rs_msg->lm_buflens[offset]) - from; + memcpy(to, from, len); + } + /* drop old reply if everything is fine */ + if (rs->rs_difficult) { + /* copy rs data */ + int i; + + nrs->rs_difficult = 1; + nrs->rs_no_ack = rs->rs_no_ack; + for (i = 0; i < rs->rs_nlocks; i++) { + nrs->rs_locks[i] = rs->rs_locks[i]; + nrs->rs_modes[i] = rs->rs_modes[i]; + nrs->rs_nlocks++; + } + rs->rs_nlocks = 0; + rs->rs_difficult = 0; + rs->rs_no_ack = 0; + } + ptlrpc_rs_decref(rs); + return 0; +} +EXPORT_SYMBOL(req_capsule_server_grow); +/* __REQ_LAYOUT_USER__ */ +#endif diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c new file mode 100644 index 000000000..e9baf5bbe --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c @@ -0,0 +1,366 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/llog_client.c + * + * remote api for llog - client side + * + * Author: Andreas Dilger <adilger@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_LOG + +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/obd_class.h" +#include "../include/lustre_log.h" +#include "../include/lustre_net.h" +#include <linux/list.h> + +#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \ + mutex_lock(&ctxt->loc_mutex); \ + if (ctxt->loc_imp) { \ + imp = class_import_get(ctxt->loc_imp); \ + } else { \ + CERROR("ctxt->loc_imp == NULL for context idx %d." \ + "Unable to complete MDS/OSS recovery," \ + "but I'll try again next time. Not fatal.\n", \ + ctxt->loc_idx); \ + imp = NULL; \ + mutex_unlock(&ctxt->loc_mutex); \ + return (-EINVAL); \ + } \ + mutex_unlock(&ctxt->loc_mutex); \ +} while (0) + +#define LLOG_CLIENT_EXIT(ctxt, imp) do { \ + mutex_lock(&ctxt->loc_mutex); \ + if (ctxt->loc_imp != imp) \ + CWARN("loc_imp has changed from %p to %p\n", \ + ctxt->loc_imp, imp); \ + class_import_put(imp); \ + mutex_unlock(&ctxt->loc_mutex); \ +} while (0) + +/* This is a callback from the llog_* functions. + * Assumes caller has already pushed us into the kernel context. */ +static int llog_client_open(const struct lu_env *env, + struct llog_handle *lgh, struct llog_logid *logid, + char *name, enum llog_open_param open_param) +{ + struct obd_import *imp; + struct llogd_body *body; + struct llog_ctxt *ctxt = lgh->lgh_ctxt; + struct ptlrpc_request *req = NULL; + int rc; + + LLOG_CLIENT_ENTRY(ctxt, imp); + + /* client cannot create llog */ + LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param); + LASSERT(lgh); + + req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE); + if (req == NULL) { + rc = -ENOMEM; + goto out; + } + + if (name) + req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, + strlen(name) + 1); + + rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION, + LLOG_ORIGIN_HANDLE_CREATE); + if (rc) { + ptlrpc_request_free(req); + req = NULL; + goto out; + } + ptlrpc_request_set_replen(req); + + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); + if (logid) + body->lgd_logid = *logid; + body->lgd_ctxt_idx = ctxt->loc_idx - 1; + + if (name) { + char *tmp; + tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME, + strlen(name) + 1); + LASSERT(tmp); + strcpy(tmp, name); + } + + rc = ptlrpc_queue_wait(req); + if (rc) + goto out; + + body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); + if (body == NULL) { + rc = -EFAULT; + goto out; + } + + lgh->lgh_id = body->lgd_logid; + lgh->lgh_ctxt = ctxt; +out: + LLOG_CLIENT_EXIT(ctxt, imp); + ptlrpc_req_finished(req); + return rc; +} + +static int llog_client_destroy(const struct lu_env *env, + struct llog_handle *loghandle) +{ + struct obd_import *imp; + struct ptlrpc_request *req = NULL; + struct llogd_body *body; + int rc; + + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY, + LUSTRE_LOG_VERSION, + LLOG_ORIGIN_HANDLE_DESTROY); + if (req == NULL) { + rc = -ENOMEM; + goto err_exit; + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); + body->lgd_logid = loghandle->lgh_id; + body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; + + if (!(body->lgd_llh_flags & LLOG_F_IS_PLAIN)) + CERROR("%s: wrong llog flags %x\n", imp->imp_obd->obd_name, + body->lgd_llh_flags); + + ptlrpc_request_set_replen(req); + rc = ptlrpc_queue_wait(req); + + ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); + return rc; +} + + +static int llog_client_next_block(const struct lu_env *env, + struct llog_handle *loghandle, + int *cur_idx, int next_idx, + __u64 *cur_offset, void *buf, int len) +{ + struct obd_import *imp; + struct ptlrpc_request *req = NULL; + struct llogd_body *body; + void *ptr; + int rc; + + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, + LUSTRE_LOG_VERSION, + LLOG_ORIGIN_HANDLE_NEXT_BLOCK); + if (req == NULL) { + rc = -ENOMEM; + goto err_exit; + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); + body->lgd_logid = loghandle->lgh_id; + body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; + body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; + body->lgd_index = next_idx; + body->lgd_saved_index = *cur_idx; + body->lgd_len = len; + body->lgd_cur_offset = *cur_offset; + + req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len); + ptlrpc_request_set_replen(req); + rc = ptlrpc_queue_wait(req); + if (rc) + goto out; + + body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); + if (body == NULL) { + rc = -EFAULT; + goto out; + } + + /* The log records are swabbed as they are processed */ + ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); + if (ptr == NULL) { + rc = -EFAULT; + goto out; + } + + *cur_idx = body->lgd_saved_index; + *cur_offset = body->lgd_cur_offset; + + memcpy(buf, ptr, len); +out: + ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); + return rc; +} + +static int llog_client_prev_block(const struct lu_env *env, + struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + struct obd_import *imp; + struct ptlrpc_request *req = NULL; + struct llogd_body *body; + void *ptr; + int rc; + + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, + LUSTRE_LOG_VERSION, + LLOG_ORIGIN_HANDLE_PREV_BLOCK); + if (req == NULL) { + rc = -ENOMEM; + goto err_exit; + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); + body->lgd_logid = loghandle->lgh_id; + body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; + body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; + body->lgd_index = prev_idx; + body->lgd_len = len; + + req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len); + ptlrpc_request_set_replen(req); + + rc = ptlrpc_queue_wait(req); + if (rc) + goto out; + + body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); + if (body == NULL) { + rc = -EFAULT; + goto out; + } + + ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); + if (ptr == NULL) { + rc = -EFAULT; + goto out; + } + + memcpy(buf, ptr, len); +out: + ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); + return rc; +} + +static int llog_client_read_header(const struct lu_env *env, + struct llog_handle *handle) +{ + struct obd_import *imp; + struct ptlrpc_request *req = NULL; + struct llogd_body *body; + struct llog_log_hdr *hdr; + struct llog_rec_hdr *llh_hdr; + int rc; + + LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, + LUSTRE_LOG_VERSION, + LLOG_ORIGIN_HANDLE_READ_HEADER); + if (req == NULL) { + rc = -ENOMEM; + goto err_exit; + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); + body->lgd_logid = handle->lgh_id; + body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1; + body->lgd_llh_flags = handle->lgh_hdr->llh_flags; + + ptlrpc_request_set_replen(req); + rc = ptlrpc_queue_wait(req); + if (rc) + goto out; + + hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR); + if (hdr == NULL) { + rc = -EFAULT; + goto out; + } + + memcpy(handle->lgh_hdr, hdr, sizeof(*hdr)); + handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index; + + /* sanity checks */ + llh_hdr = &handle->lgh_hdr->llh_hdr; + if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) { + CERROR("bad log header magic: %#x (expecting %#x)\n", + llh_hdr->lrh_type, LLOG_HDR_MAGIC); + rc = -EIO; + } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) { + CERROR("incorrectly sized log header: %#x (expecting %#x)\n", + llh_hdr->lrh_len, LLOG_CHUNK_SIZE); + CERROR("you may need to re-run lconf --write_conf.\n"); + rc = -EIO; + } +out: + ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp); + return rc; +} + +static int llog_client_close(const struct lu_env *env, + struct llog_handle *handle) +{ + /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because + the servers all close the file at the end of every + other LLOG_ RPC. */ + return 0; +} + +struct llog_operations llog_client_ops = { + .lop_next_block = llog_client_next_block, + .lop_prev_block = llog_client_prev_block, + .lop_read_header = llog_client_read_header, + .lop_open = llog_client_open, + .lop_destroy = llog_client_destroy, + .lop_close = llog_client_close, +}; +EXPORT_SYMBOL(llog_client_ops); diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c new file mode 100644 index 000000000..dac66f5b3 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c @@ -0,0 +1,72 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/llog_net.c + * + * OST<->MDS recovery logging infrastructure. + * + * Invariants in implementation: + * - we do not share logs among different OST<->MDS connections, so that + * if an OST or MDS fails it need only look at log(s) relevant to itself + * + * Author: Andreas Dilger <adilger@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_LOG + +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/obd_class.h" +#include "../include/lustre_log.h" +#include <linux/list.h> + +int llog_initiator_connect(struct llog_ctxt *ctxt) +{ + struct obd_import *new_imp; + + LASSERT(ctxt); + new_imp = ctxt->loc_obd->u.cli.cl_import; + LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp, + "%p - %p\n", ctxt->loc_imp, new_imp); + mutex_lock(&ctxt->loc_mutex); + if (ctxt->loc_imp != new_imp) { + if (ctxt->loc_imp) + class_import_put(ctxt->loc_imp); + ctxt->loc_imp = class_import_get(new_imp); + } + mutex_unlock(&ctxt->loc_mutex); + return 0; +} +EXPORT_SYMBOL(llog_initiator_connect); diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c new file mode 100644 index 000000000..9533ab976 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c @@ -0,0 +1,1366 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ +#define DEBUG_SUBSYSTEM S_CLASS + + +#include "../include/obd_support.h" +#include "../include/obd.h" +#include "../include/lprocfs_status.h" +#include "../include/lustre/lustre_idl.h" +#include "../include/lustre_net.h" +#include "../include/obd_class.h" +#include "ptlrpc_internal.h" + + +static struct ll_rpc_opcode { + __u32 opcode; + const char *opname; +} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = { + { OST_REPLY, "ost_reply" }, + { OST_GETATTR, "ost_getattr" }, + { OST_SETATTR, "ost_setattr" }, + { OST_READ, "ost_read" }, + { OST_WRITE, "ost_write" }, + { OST_CREATE , "ost_create" }, + { OST_DESTROY, "ost_destroy" }, + { OST_GET_INFO, "ost_get_info" }, + { OST_CONNECT, "ost_connect" }, + { OST_DISCONNECT, "ost_disconnect" }, + { OST_PUNCH, "ost_punch" }, + { OST_OPEN, "ost_open" }, + { OST_CLOSE, "ost_close" }, + { OST_STATFS, "ost_statfs" }, + { 14, NULL }, /* formerly OST_SAN_READ */ + { 15, NULL }, /* formerly OST_SAN_WRITE */ + { OST_SYNC, "ost_sync" }, + { OST_SET_INFO, "ost_set_info" }, + { OST_QUOTACHECK, "ost_quotacheck" }, + { OST_QUOTACTL, "ost_quotactl" }, + { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" }, + { MDS_GETATTR, "mds_getattr" }, + { MDS_GETATTR_NAME, "mds_getattr_lock" }, + { MDS_CLOSE, "mds_close" }, + { MDS_REINT, "mds_reint" }, + { MDS_READPAGE, "mds_readpage" }, + { MDS_CONNECT, "mds_connect" }, + { MDS_DISCONNECT, "mds_disconnect" }, + { MDS_GETSTATUS, "mds_getstatus" }, + { MDS_STATFS, "mds_statfs" }, + { MDS_PIN, "mds_pin" }, + { MDS_UNPIN, "mds_unpin" }, + { MDS_SYNC, "mds_sync" }, + { MDS_DONE_WRITING, "mds_done_writing" }, + { MDS_SET_INFO, "mds_set_info" }, + { MDS_QUOTACHECK, "mds_quotacheck" }, + { MDS_QUOTACTL, "mds_quotactl" }, + { MDS_GETXATTR, "mds_getxattr" }, + { MDS_SETXATTR, "mds_setxattr" }, + { MDS_WRITEPAGE, "mds_writepage" }, + { MDS_IS_SUBDIR, "mds_is_subdir" }, + { MDS_GET_INFO, "mds_get_info" }, + { MDS_HSM_STATE_GET, "mds_hsm_state_get" }, + { MDS_HSM_STATE_SET, "mds_hsm_state_set" }, + { MDS_HSM_ACTION, "mds_hsm_action" }, + { MDS_HSM_PROGRESS, "mds_hsm_progress" }, + { MDS_HSM_REQUEST, "mds_hsm_request" }, + { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" }, + { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" }, + { MDS_SWAP_LAYOUTS, "mds_swap_layouts" }, + { LDLM_ENQUEUE, "ldlm_enqueue" }, + { LDLM_CONVERT, "ldlm_convert" }, + { LDLM_CANCEL, "ldlm_cancel" }, + { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, + { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, + { LDLM_GL_CALLBACK, "ldlm_gl_callback" }, + { LDLM_SET_INFO, "ldlm_set_info" }, + { MGS_CONNECT, "mgs_connect" }, + { MGS_DISCONNECT, "mgs_disconnect" }, + { MGS_EXCEPTION, "mgs_exception" }, + { MGS_TARGET_REG, "mgs_target_reg" }, + { MGS_TARGET_DEL, "mgs_target_del" }, + { MGS_SET_INFO, "mgs_set_info" }, + { MGS_CONFIG_READ, "mgs_config_read" }, + { OBD_PING, "obd_ping" }, + { OBD_LOG_CANCEL, "llog_cancel" }, + { OBD_QC_CALLBACK, "obd_quota_callback" }, + { OBD_IDX_READ, "dt_index_read" }, + { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_open" }, + { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, + { LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" }, + { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" }, + { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" }, + { LLOG_ORIGIN_CONNECT, "llog_origin_connect" }, + { LLOG_CATINFO, "llog_catinfo" }, + { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, + { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, + { QUOTA_DQACQ, "quota_acquire" }, + { QUOTA_DQREL, "quota_release" }, + { SEQ_QUERY, "seq_query" }, + { SEC_CTX_INIT, "sec_ctx_init" }, + { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" }, + { SEC_CTX_FINI, "sec_ctx_fini" }, + { FLD_QUERY, "fld_query" }, + { UPDATE_OBJ, "update_obj" }, +}; + +static struct ll_eopcode { + __u32 opcode; + const char *opname; +} ll_eopcode_table[EXTRA_LAST_OPC] = { + { LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" }, + { LDLM_PLAIN_ENQUEUE, "ldlm_plain_enqueue" }, + { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" }, + { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" }, + { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" }, + { MDS_REINT_SETATTR, "mds_reint_setattr" }, + { MDS_REINT_CREATE, "mds_reint_create" }, + { MDS_REINT_LINK, "mds_reint_link" }, + { MDS_REINT_UNLINK, "mds_reint_unlink" }, + { MDS_REINT_RENAME, "mds_reint_rename" }, + { MDS_REINT_OPEN, "mds_reint_open" }, + { MDS_REINT_SETXATTR, "mds_reint_setxattr" }, + { BRW_READ_BYTES, "read_bytes" }, + { BRW_WRITE_BYTES, "write_bytes" }, +}; + +const char *ll_opcode2str(__u32 opcode) +{ + /* When one of the assertions below fail, chances are that: + * 1) A new opcode was added in include/lustre/lustre_idl.h, + * but is missing from the table above. + * or 2) The opcode space was renumbered or rearranged, + * and the opcode_offset() function in + * ptlrpc_internal.h needs to be modified. + */ + __u32 offset = opcode_offset(opcode); + LASSERTF(offset < LUSTRE_MAX_OPCODES, + "offset %u >= LUSTRE_MAX_OPCODES %u\n", + offset, LUSTRE_MAX_OPCODES); + LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode, + "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n", + offset, ll_rpc_opcode_table[offset].opcode, opcode); + return ll_rpc_opcode_table[offset].opname; +} + +static const char *ll_eopcode2str(__u32 opcode) +{ + LASSERT(ll_eopcode_table[opcode].opcode == opcode); + return ll_eopcode_table[opcode].opname; +} + +#if defined(CONFIG_PROC_FS) +static void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, + char *name, + struct proc_dir_entry **procroot_ret, + struct lprocfs_stats **stats_ret) +{ + struct proc_dir_entry *svc_procroot; + struct lprocfs_stats *svc_stats; + int i, rc; + unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX | + LPROCFS_CNTR_STDDEV; + + LASSERT(*procroot_ret == NULL); + LASSERT(*stats_ret == NULL); + + svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES, + 0); + if (svc_stats == NULL) + return; + + if (dir) { + svc_procroot = lprocfs_register(dir, root, NULL, NULL); + if (IS_ERR(svc_procroot)) { + lprocfs_free_stats(&svc_stats); + return; + } + } else { + svc_procroot = root; + } + + lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR, + svc_counter_config, "req_waittime", "usec"); + lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR, + svc_counter_config, "req_qdepth", "reqs"); + lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR, + svc_counter_config, "req_active", "reqs"); + lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT, + svc_counter_config, "req_timeout", "sec"); + lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR, + svc_counter_config, "reqbuf_avail", "bufs"); + for (i = 0; i < EXTRA_LAST_OPC; i++) { + char *units; + + switch (i) { + case BRW_WRITE_BYTES: + case BRW_READ_BYTES: + units = "bytes"; + break; + default: + units = "reqs"; + break; + } + lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i, + svc_counter_config, + ll_eopcode2str(i), units); + } + for (i = 0; i < LUSTRE_MAX_OPCODES; i++) { + __u32 opcode = ll_rpc_opcode_table[i].opcode; + lprocfs_counter_init(svc_stats, + EXTRA_MAX_OPCODES + i, svc_counter_config, + ll_opcode2str(opcode), "usec"); + } + + rc = lprocfs_register_stats(svc_procroot, name, svc_stats); + if (rc < 0) { + if (dir) + lprocfs_remove(&svc_procroot); + lprocfs_free_stats(&svc_stats); + } else { + if (dir) + *procroot_ret = svc_procroot; + *stats_ret = svc_stats; + } +} + +static int +ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_hist_nrqbds; + + seq_printf(m, "%d\n", total); + return 0; +} +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len); + +static int +ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svc->srv_hist_nrqbds_cpt_max; + + seq_printf(m, "%d\n", total); + return 0; +} + +static ssize_t +ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; + int bufpages; + int val; + int rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val < 0) + return -ERANGE; + + /* This sanity check is more of an insanity check; we can still + * hose a kernel by allowing the request history to grow too + * far. */ + bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (val > totalram_pages / (2 * bufpages)) + return -ERANGE; + + spin_lock(&svc->srv_lock); + + if (val == 0) + svc->srv_hist_nrqbds_cpt_max = 0; + else + svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts)); + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max); + +static int +ptlrpc_lprocfs_threads_min_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + + seq_printf(m, "%d\n", svc->srv_nthrs_cpt_init * svc->srv_ncpts); + return 0; +} + +static ssize_t +ptlrpc_lprocfs_threads_min_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_init = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_min); + +static int +ptlrpc_lprocfs_threads_started_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_nthrs_running; + + seq_printf(m, "%d\n", total); + return 0; +} +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_threads_started); + +static int +ptlrpc_lprocfs_threads_max_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + + seq_printf(m, "%d\n", svc->srv_nthrs_cpt_limit * svc->srv_ncpts); + return 0; +} + +static ssize_t +ptlrpc_lprocfs_threads_max_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_max); + +/** + * \addtogoup nrs + * @{ + */ +extern struct nrs_core nrs_core; + +/** + * Translates \e ptlrpc_nrs_pol_state values to human-readable strings. + * + * \param[in] state The policy state + */ +static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state) +{ + switch (state) { + default: + LBUG(); + case NRS_POL_STATE_INVALID: + return "invalid"; + case NRS_POL_STATE_STOPPED: + return "stopped"; + case NRS_POL_STATE_STOPPING: + return "stopping"; + case NRS_POL_STATE_STARTING: + return "starting"; + case NRS_POL_STATE_STARTED: + return "started"; + } +} + +/** + * Obtains status information for \a policy. + * + * Information is copied in \a info. + * + * \param[in] policy The policy + * \param[out] info Holds returned status information + */ +void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_pol_info *info) +{ + LASSERT(policy != NULL); + LASSERT(info != NULL); + assert_spin_locked(&policy->pol_nrs->nrs_lock); + + memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX); + + info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK); + info->pi_state = policy->pol_state; + /** + * XXX: These are accessed without holding + * ptlrpc_service_part::scp_req_lock. + */ + info->pi_req_queued = policy->pol_req_queued; + info->pi_req_started = policy->pol_req_started; +} + +/** + * Reads and prints policy status information for all policies of a PTLRPC + * service. + */ +static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_nrs *nrs; + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_pol_info *infos; + struct ptlrpc_nrs_pol_info tmp; + unsigned num_pols; + unsigned pol_idx = 0; + bool hp = false; + int i; + int rc = 0; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + /** + * Use the first service partition's regular NRS head in order to obtain + * the number of policies registered with NRS heads of this service. All + * service partitions will have the same number of policies. + */ + nrs = nrs_svcpt2nrs(svc->srv_parts[0], false); + + spin_lock(&nrs->nrs_lock); + num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols; + spin_unlock(&nrs->nrs_lock); + + OBD_ALLOC(infos, num_pols * sizeof(*infos)); + if (infos == NULL) { + rc = -ENOMEM; + goto out; + } +again: + + ptlrpc_service_for_each_part(svcpt, i, svc) { + nrs = nrs_svcpt2nrs(svcpt, hp); + spin_lock(&nrs->nrs_lock); + + pol_idx = 0; + + list_for_each_entry(policy, &nrs->nrs_policy_list, + pol_list) { + LASSERT(pol_idx < num_pols); + + nrs_policy_get_info_locked(policy, &tmp); + /** + * Copy values when handling the first service + * partition. + */ + if (i == 0) { + memcpy(infos[pol_idx].pi_name, tmp.pi_name, + NRS_POL_NAME_MAX); + memcpy(&infos[pol_idx].pi_state, &tmp.pi_state, + sizeof(tmp.pi_state)); + infos[pol_idx].pi_fallback = tmp.pi_fallback; + /** + * For the rest of the service partitions + * sanity-check the values we get. + */ + } else { + LASSERT(strncmp(infos[pol_idx].pi_name, + tmp.pi_name, + NRS_POL_NAME_MAX) == 0); + /** + * Not asserting ptlrpc_nrs_pol_info::pi_state, + * because it may be different between + * instances of the same policy in different + * service partitions. + */ + LASSERT(infos[pol_idx].pi_fallback == + tmp.pi_fallback); + } + + infos[pol_idx].pi_req_queued += tmp.pi_req_queued; + infos[pol_idx].pi_req_started += tmp.pi_req_started; + + pol_idx++; + } + spin_unlock(&nrs->nrs_lock); + } + + /** + * Policy status information output is in YAML format. + * For example: + * + * regular_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 0 + * + * - name: crrn + * state: started + * fallback: no + * queued: 2015 + * active: 384 + * + * high_priority_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 2 + * + * - name: crrn + * state: stopped + * fallback: no + * queued: 0 + * active: 0 + */ + seq_printf(m, "%s\n", + !hp ? "\nregular_requests:" : "high_priority_requests:"); + + for (pol_idx = 0; pol_idx < num_pols; pol_idx++) { + seq_printf(m, " - name: %s\n" + " state: %s\n" + " fallback: %s\n" + " queued: %-20d\n" + " active: %-20d\n\n", + infos[pol_idx].pi_name, + nrs_state2str(infos[pol_idx].pi_state), + infos[pol_idx].pi_fallback ? "yes" : "no", + (int)infos[pol_idx].pi_req_queued, + (int)infos[pol_idx].pi_req_started); + } + + if (!hp && nrs_svc_has_hp(svc)) { + memset(infos, 0, num_pols * sizeof(*infos)); + + /** + * Redo the processing for the service's HP NRS heads' policies. + */ + hp = true; + goto again; + } + +out: + if (infos) + OBD_FREE(infos, num_pols * sizeof(*infos)); + + mutex_unlock(&nrs_core.nrs_mutex); + + return rc; +} + +/** + * The longest valid command string is the maximum policy name size, plus the + * length of the " reg" substring + */ +#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1) + +/** + * Starts and stops a given policy on a PTLRPC service. + * + * Commands consist of the policy name, followed by an optional [reg|hp] token; + * if the optional token is omitted, the operation is performed on both the + * regular and high-priority (if the service has one) NRS head. + */ +static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; + enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH; + char *cmd; + char *cmd_copy = NULL; + char *token; + int rc = 0; + + if (count >= LPROCFS_NRS_WR_MAX_CMD) { + rc = -EINVAL; + goto out; + } + + OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD); + if (cmd == NULL) { + rc = -ENOMEM; + goto out; + } + /** + * strsep() modifies its argument, so keep a copy + */ + cmd_copy = cmd; + + if (copy_from_user(cmd, buffer, count)) { + rc = -EFAULT; + goto out; + } + + cmd[count] = '\0'; + + token = strsep(&cmd, " "); + + if (strlen(token) > NRS_POL_NAME_MAX - 1) { + rc = -EINVAL; + goto out; + } + + /** + * No [reg|hp] token has been specified + */ + if (cmd == NULL) + goto default_queue; + + /** + * The second token is either NULL, or an optional [reg|hp] string + */ + if (strcmp(cmd, "reg") == 0) + queue = PTLRPC_NRS_QUEUE_REG; + else if (strcmp(cmd, "hp") == 0) + queue = PTLRPC_NRS_QUEUE_HP; + else { + rc = -EINVAL; + goto out; + } + +default_queue: + + if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) { + rc = -ENODEV; + goto out; + } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) + queue = PTLRPC_NRS_QUEUE_REG; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START, + false, NULL); + + mutex_unlock(&nrs_core.nrs_mutex); +out: + if (cmd_copy) + OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD); + + return rc < 0 ? rc : count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs); + +/** @} nrs */ + +struct ptlrpc_srh_iterator { + int srhi_idx; + __u64 srhi_seq; + struct ptlrpc_request *srhi_req; +}; + +static int +ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt, + struct ptlrpc_srh_iterator *srhi, + __u64 seq) +{ + struct list_head *e; + struct ptlrpc_request *req; + + if (srhi->srhi_req != NULL && + srhi->srhi_seq > svcpt->scp_hist_seq_culled && + srhi->srhi_seq <= seq) { + /* If srhi_req was set previously, hasn't been culled and + * we're searching for a seq on or after it (i.e. more + * recent), search from it onwards. + * Since the service history is LRU (i.e. culled reqs will + * be near the head), we shouldn't have to do long + * re-scans */ + LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq, + "%s:%d: seek seq %llu, request seq %llu\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + srhi->srhi_seq, srhi->srhi_req->rq_history_seq); + LASSERTF(!list_empty(&svcpt->scp_hist_reqs), + "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled); + e = &srhi->srhi_req->rq_history_list; + } else { + /* search from start */ + e = svcpt->scp_hist_reqs.next; + } + + while (e != &svcpt->scp_hist_reqs) { + req = list_entry(e, struct ptlrpc_request, rq_history_list); + + if (req->rq_history_seq >= seq) { + srhi->srhi_seq = req->rq_history_seq; + srhi->srhi_req = req; + return 0; + } + e = e->next; + } + + return -ENOENT; +} + +/* + * ptlrpc history sequence is used as "position" of seq_file, in some case, + * seq_read() will increase "position" to indicate reading the next + * element, however, low bits of history sequence are reserved for CPT id + * (check the details from comments before ptlrpc_req_add_history), which + * means seq_read() might change CPT id of history sequence and never + * finish reading of requests on a CPT. To make it work, we have to shift + * CPT id to high bits and timestamp to low bits, so seq_read() will only + * increase timestamp which can correctly indicate the next position. + */ + +/* convert seq_file pos to cpt */ +#define PTLRPC_REQ_POS2CPT(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (__u64)(pos) >> (64 - (svc)->srv_cpt_bits)) + +/* make up seq_file pos from cpt */ +#define PTLRPC_REQ_CPT2POS(svc, cpt) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (cpt) << (64 - (svc)->srv_cpt_bits)) + +/* convert sequence to position */ +#define PTLRPC_REQ_SEQ2POS(svc, seq) \ + ((svc)->srv_cpt_bits == 0 ? (seq) : \ + ((seq) >> (svc)->srv_cpt_bits) | \ + ((seq) << (64 - (svc)->srv_cpt_bits))) + +/* convert position to sequence */ +#define PTLRPC_REQ_POS2SEQ(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? (pos) : \ + ((__u64)(pos) << (svc)->srv_cpt_bits) | \ + ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits))) + +static void * +ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) +{ + struct ptlrpc_service *svc = s->private; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_srh_iterator *srhi; + unsigned int cpt; + int rc; + int i; + + if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */ + CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n", + (int)sizeof(loff_t)); + return NULL; + } + + OBD_ALLOC(srhi, sizeof(*srhi)); + if (srhi == NULL) + return NULL; + + srhi->srhi_seq = 0; + srhi->srhi_req = NULL; + + cpt = PTLRPC_REQ_POS2CPT(svc, *pos); + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (i < cpt) /* skip */ + continue; + if (i > cpt) /* make up the lowest position for this CPT */ + *pos = PTLRPC_REQ_CPT2POS(svc, i); + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, + PTLRPC_REQ_POS2SEQ(svc, *pos)); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; +} + +static void +ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter) +{ + struct ptlrpc_srh_iterator *srhi = iter; + + if (srhi != NULL) + OBD_FREE(srhi, sizeof(*srhi)); +} + +static void * +ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s, + void *iter, loff_t *pos) +{ + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + __u64 seq; + int rc; + int i; + + for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) { + svcpt = svc->srv_parts[i]; + + if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */ + srhi->srhi_req = NULL; + seq = srhi->srhi_seq = 0; + } else { /* the next sequence */ + seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits); + } + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; +} + +/* common ost/mdt so_req_printer */ +void target_print_req(void *seq_file, struct ptlrpc_request *req) +{ + /* Called holding srv_lock with irqs disabled. + * Print specific req contents and a newline. + * CAVEAT EMPTOR: check request message length before printing!!! + * You might have received any old crap so you must be just as + * careful here as the service's request parser!!! */ + struct seq_file *sf = seq_file; + + switch (req->rq_phase) { + case RQ_PHASE_NEW: + /* still awaiting a service thread's attention, or rejected + * because the generic request message didn't unpack */ + seq_printf(sf, "<not swabbed>\n"); + break; + case RQ_PHASE_INTERPRET: + /* being handled, so basic msg swabbed, and opc is valid + * but racing with mds_handle() */ + case RQ_PHASE_COMPLETE: + /* been handled by mds_handle() reply state possibly still + * volatile */ + seq_printf(sf, "opc %d\n", lustre_msg_get_opc(req->rq_reqmsg)); + break; + default: + DEBUG_REQ(D_ERROR, req, "bad phase %d", req->rq_phase); + } +} +EXPORT_SYMBOL(target_print_req); + +static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) +{ + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_request *req; + int rc; + + LASSERT(srhi->srhi_idx < svc->srv_ncpts); + + svcpt = svc->srv_parts[srhi->srhi_idx]; + + spin_lock(&svcpt->scp_lock); + + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq); + + if (rc == 0) { + req = srhi->srhi_req; + + /* Print common req fields. + * CAVEAT EMPTOR: we're racing with the service handler + * here. The request could contain any old crap, so you + * must be just as careful as the service's request + * parser. Currently I only print stuff here I know is OK + * to look at coz it was set up in request_in_callback()!!! */ + seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%ld:%lds(%+lds) ", + req->rq_history_seq, libcfs_nid2str(req->rq_self), + libcfs_id2str(req->rq_peer), req->rq_xid, + req->rq_reqlen, ptlrpc_rqphase2str(req), + req->rq_arrival_time.tv_sec, + req->rq_sent - req->rq_arrival_time.tv_sec, + req->rq_sent - req->rq_deadline); + if (svc->srv_ops.so_req_printer == NULL) + seq_printf(s, "\n"); + else + svc->srv_ops.so_req_printer(s, srhi->srhi_req); + } + + spin_unlock(&svcpt->scp_lock); + return rc; +} + +static int +ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file) +{ + static struct seq_operations sops = { + .start = ptlrpc_lprocfs_svc_req_history_start, + .stop = ptlrpc_lprocfs_svc_req_history_stop, + .next = ptlrpc_lprocfs_svc_req_history_next, + .show = ptlrpc_lprocfs_svc_req_history_show, + }; + struct seq_file *seqf; + int rc; + + rc = seq_open(file, &sops); + if (rc) + return rc; + + seqf = file->private_data; + seqf->private = PDE_DATA(inode); + return 0; +} + +/* See also lprocfs_rd_timeouts */ +static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + struct dhms ts; + time_t worstt; + unsigned int cur; + unsigned int worst; + int i; + + if (AT_OFF) { + seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n", + obd_timeout); + return 0; + } + + ptlrpc_service_for_each_part(svcpt, i, svc) { + cur = at_get(&svcpt->scp_at_estimate); + worst = svcpt->scp_at_estimate.at_worst_ever; + worstt = svcpt->scp_at_estimate.at_worst_time; + s2dhms(&ts, get_seconds() - worstt); + + seq_printf(m, "%10s : cur %3u worst %3u (at %ld, " + DHMS_FMT" ago) ", "service", + cur, worst, worstt, DHMS_VARS(&ts)); + + lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate); + } + + return 0; +} +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts); + +static int ptlrpc_lprocfs_hp_ratio_seq_show(struct seq_file *m, void *v) +{ + struct ptlrpc_service *svc = m->private; + seq_printf(m, "%d", svc->srv_hpreq_ratio); + return 0; +} + +static ssize_t ptlrpc_lprocfs_hp_ratio_seq_write(struct file *file, + const char __user *buffer, + size_t count, + loff_t *off) +{ + struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; + int rc; + int val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val < 0) + return -ERANGE; + + spin_lock(&svc->srv_lock); + svc->srv_hpreq_ratio = val; + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_hp_ratio); + +void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, + struct ptlrpc_service *svc) +{ + struct lprocfs_vars lproc_vars[] = { + {.name = "high_priority_ratio", + .fops = &ptlrpc_lprocfs_hp_ratio_fops, + .data = svc}, + {.name = "req_buffer_history_len", + .fops = &ptlrpc_lprocfs_req_history_len_fops, + .data = svc}, + {.name = "req_buffer_history_max", + .fops = &ptlrpc_lprocfs_req_history_max_fops, + .data = svc}, + {.name = "threads_min", + .fops = &ptlrpc_lprocfs_threads_min_fops, + .data = svc}, + {.name = "threads_max", + .fops = &ptlrpc_lprocfs_threads_max_fops, + .data = svc}, + {.name = "threads_started", + .fops = &ptlrpc_lprocfs_threads_started_fops, + .data = svc}, + {.name = "timeouts", + .fops = &ptlrpc_lprocfs_timeouts_fops, + .data = svc}, + {.name = "nrs_policies", + .fops = &ptlrpc_lprocfs_nrs_fops, + .data = svc}, + {NULL} + }; + static const struct file_operations req_history_fops = { + .owner = THIS_MODULE, + .open = ptlrpc_lprocfs_svc_req_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = lprocfs_seq_release, + }; + + int rc; + + ptlrpc_lprocfs_register(entry, svc->srv_name, + "stats", &svc->srv_procroot, + &svc->srv_stats); + + if (svc->srv_procroot == NULL) + return; + + lprocfs_add_vars(svc->srv_procroot, lproc_vars, NULL); + + rc = lprocfs_seq_create(svc->srv_procroot, "req_history", + 0400, &req_history_fops, svc); + if (rc) + CWARN("Error adding the req_history file\n"); +} + +void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) +{ + ptlrpc_lprocfs_register(obddev->obd_proc_entry, NULL, "stats", + &obddev->obd_svc_procroot, + &obddev->obd_svc_stats); +} +EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd); + +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount) +{ + struct lprocfs_stats *svc_stats; + __u32 op = lustre_msg_get_opc(req->rq_reqmsg); + int opc = opcode_offset(op); + + svc_stats = req->rq_import->imp_obd->obd_svc_stats; + if (svc_stats == NULL || opc <= 0) + return; + LASSERT(opc < LUSTRE_MAX_OPCODES); + if (!(op == LDLM_ENQUEUE || op == MDS_REINT)) + lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount); +} + +void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) +{ + struct lprocfs_stats *svc_stats; + int idx; + + if (!req->rq_import) + return; + svc_stats = req->rq_import->imp_obd->obd_svc_stats; + if (!svc_stats) + return; + idx = lustre_msg_get_opc(req->rq_reqmsg); + switch (idx) { + case OST_READ: + idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR; + break; + case OST_WRITE: + idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR; + break; + default: + LASSERTF(0, "unsupported opcode %u\n", idx); + break; + } + + lprocfs_counter_add(svc_stats, idx, bytes); +} + +EXPORT_SYMBOL(ptlrpc_lprocfs_brw); + +void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) +{ + if (svc->srv_procroot != NULL) + lprocfs_remove(&svc->srv_procroot); + + if (svc->srv_stats) + lprocfs_free_stats(&svc->srv_stats); +} + +void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) +{ + if (obd->obd_svc_procroot) + lprocfs_remove(&obd->obd_svc_procroot); + + if (obd->obd_svc_stats) + lprocfs_free_stats(&obd->obd_svc_stats); +} +EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd); + + +#define BUFLEN (UUID_MAX + 5) + +int lprocfs_wr_evict_client(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *obd = ((struct seq_file *)file->private_data)->private; + char *kbuf; + char *tmpbuf; + + OBD_ALLOC(kbuf, BUFLEN); + if (kbuf == NULL) + return -ENOMEM; + + /* + * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1 + * bytes into kbuf, to ensure that the string is NUL-terminated. + * UUID_MAX should include a trailing NUL already. + */ + if (copy_from_user(kbuf, buffer, + min_t(unsigned long, BUFLEN - 1, count))) { + count = -EFAULT; + goto out; + } + tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count)); + /* Kludge code(deadlock situation): the lprocfs lock has been held + * since the client is evicted by writing client's + * uuid/nid to procfs "evict_client" entry. However, + * obd_export_evict_by_uuid() will call lprocfs_remove() to destroy + * the proc entries under the being destroyed export{}, so I have + * to drop the lock at first here. + * - jay, jxiong@clusterfs.com */ + class_incref(obd, __func__, current); + + if (strncmp(tmpbuf, "nid:", 4) == 0) + obd_export_evict_by_nid(obd, tmpbuf + 4); + else if (strncmp(tmpbuf, "uuid:", 5) == 0) + obd_export_evict_by_uuid(obd, tmpbuf + 5); + else + obd_export_evict_by_uuid(obd, tmpbuf); + + class_decref(obd, __func__, current); + +out: + OBD_FREE(kbuf, BUFLEN); + return count; +} +EXPORT_SYMBOL(lprocfs_wr_evict_client); + +#undef BUFLEN + +int lprocfs_wr_ping(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *obd = ((struct seq_file *)file->private_data)->private; + struct ptlrpc_request *req; + int rc; + + LPROCFS_CLIMP_CHECK(obd); + req = ptlrpc_prep_ping(obd->u.cli.cl_import); + LPROCFS_CLIMP_EXIT(obd); + if (req == NULL) + return -ENOMEM; + + req->rq_send_state = LUSTRE_IMP_FULL; + + rc = ptlrpc_queue_wait(req); + + ptlrpc_req_finished(req); + if (rc >= 0) + return count; + return rc; +} +EXPORT_SYMBOL(lprocfs_wr_ping); + +/* Write the connection UUID to this file to attempt to connect to that node. + * The connection UUID is a node's primary NID. For example, + * "echo connection=192.168.0.1@tcp0::instance > .../import". + */ +int lprocfs_wr_import(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *obd = ((struct seq_file *)file->private_data)->private; + struct obd_import *imp = obd->u.cli.cl_import; + char *kbuf = NULL; + char *uuid; + char *ptr; + int do_reconn = 1; + const char prefix[] = "connection="; + const int prefix_len = sizeof(prefix) - 1; + + if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len) + return -EINVAL; + + OBD_ALLOC(kbuf, count + 1); + if (kbuf == NULL) + return -ENOMEM; + + if (copy_from_user(kbuf, buffer, count)) { + count = -EFAULT; + goto out; + } + + kbuf[count] = 0; + + /* only support connection=uuid::instance now */ + if (strncmp(prefix, kbuf, prefix_len) != 0) { + count = -EINVAL; + goto out; + } + + uuid = kbuf + prefix_len; + ptr = strstr(uuid, "::"); + if (ptr) { + __u32 inst; + char *endptr; + + *ptr = 0; + do_reconn = 0; + ptr += strlen("::"); + inst = simple_strtol(ptr, &endptr, 10); + if (*endptr) { + CERROR("config: wrong instance # %s\n", ptr); + } else if (inst != imp->imp_connect_data.ocd_instance) { + CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n", + imp->imp_obd->obd_name, + imp->imp_connect_data.ocd_instance, inst); + do_reconn = 1; + } else { + CDEBUG(D_INFO, "IR: %s has already been connecting to new target(%u)\n", + imp->imp_obd->obd_name, inst); + } + } + + if (do_reconn) + ptlrpc_recover_import(imp, uuid, 1); + +out: + OBD_FREE(kbuf, count + 1); + return count; +} +EXPORT_SYMBOL(lprocfs_wr_import); + +int lprocfs_rd_pinger_recov(struct seq_file *m, void *n) +{ + struct obd_device *obd = m->private; + struct obd_import *imp = obd->u.cli.cl_import; + + LPROCFS_CLIMP_CHECK(obd); + seq_printf(m, "%d\n", !imp->imp_no_pinger_recover); + LPROCFS_CLIMP_EXIT(obd); + + return 0; +} +EXPORT_SYMBOL(lprocfs_rd_pinger_recov); + +int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *obd = ((struct seq_file *)file->private_data)->private; + struct client_obd *cli = &obd->u.cli; + struct obd_import *imp = cli->cl_import; + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val != 0 && val != 1) + return -ERANGE; + + LPROCFS_CLIMP_CHECK(obd); + spin_lock(&imp->imp_lock); + imp->imp_no_pinger_recover = !val; + spin_unlock(&imp->imp_lock); + LPROCFS_CLIMP_EXIT(obd); + + return count; + +} +EXPORT_SYMBOL(lprocfs_wr_pinger_recov); + +#endif /* CONFIG_PROC_FS */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c new file mode 100644 index 000000000..2fa258558 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c @@ -0,0 +1,731 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../include/obd_support.h" +#include "../include/lustre_net.h" +#include "../include/lustre_lib.h" +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "ptlrpc_internal.h" + +/** + * Helper function. Sends \a len bytes from \a base at offset \a offset + * over \a conn connection to portal \a portal. + * Returns 0 on success or error code. + */ +static int ptl_send_buf(lnet_handle_md_t *mdh, void *base, int len, + lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid, + struct ptlrpc_connection *conn, int portal, __u64 xid, + unsigned int offset) +{ + int rc; + lnet_md_t md; + + LASSERT(portal != 0); + LASSERT(conn != NULL); + CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer)); + md.start = base; + md.length = len; + md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1; + md.options = PTLRPC_MD_OPTIONS; + md.user_ptr = cbid; + md.eq_handle = ptlrpc_eq_h; + + if (unlikely(ack == LNET_ACK_REQ && + OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, + OBD_FAIL_ONCE))) { + /* don't ask for the ack to simulate failing client */ + ack = LNET_NOACK_REQ; + } + + rc = LNetMDBind(md, LNET_UNLINK, mdh); + if (unlikely(rc != 0)) { + CERROR("LNetMDBind failed: %d\n", rc); + LASSERT(rc == -ENOMEM); + return -ENOMEM; + } + + CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n", + len, portal, xid, offset); + + rc = LNetPut(conn->c_self, *mdh, ack, + conn->c_peer, portal, xid, offset, 0); + if (unlikely(rc != 0)) { + int rc2; + /* We're going to get an UNLINK event when I unlink below, + * which will complete just like any other failed send, so + * I fall through and return success here! */ + CERROR("LNetPut(%s, %d, %lld) failed: %d\n", + libcfs_id2str(conn->c_peer), portal, xid, rc); + rc2 = LNetMDUnlink(*mdh); + LASSERTF(rc2 == 0, "rc2 = %d\n", rc2); + } + + return 0; +} + +static void mdunlink_iterate_helper(lnet_handle_md_t *bd_mds, int count) +{ + int i; + + for (i = 0; i < count; i++) + LNetMDUnlink(bd_mds[i]); +} + + +/** + * Register bulk at the sender for later transfer. + * Returns 0 on success or error code. + */ +int ptlrpc_register_bulk(struct ptlrpc_request *req) +{ + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + lnet_process_id_t peer; + int rc = 0; + int rc2; + int posted_md; + int total_md; + __u64 xid; + lnet_handle_me_t me_h; + lnet_md_t md; + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) + return 0; + + /* NB no locking required until desc is on the network */ + LASSERT(desc->bd_nob > 0); + LASSERT(desc->bd_md_count == 0); + LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); + LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); + LASSERT(desc->bd_req != NULL); + LASSERT(desc->bd_type == BULK_PUT_SINK || + desc->bd_type == BULK_GET_SOURCE); + + /* cleanup the state of the bulk for it will be reused */ + if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) + desc->bd_nob_transferred = 0; + else + LASSERT(desc->bd_nob_transferred == 0); + + desc->bd_failure = 0; + + peer = desc->bd_import->imp_connection->c_peer; + + LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); + LASSERT(desc->bd_cbid.cbid_arg == desc); + + /* An XID is only used for a single request from the client. + * For retried bulk transfers, a new XID will be allocated in + * in ptlrpc_check_set() if it needs to be resent, so it is not + * using the same RDMA match bits after an error. + * + * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The + * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */ + xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); + LASSERTF(!(desc->bd_registered && + req->rq_send_state != LUSTRE_IMP_REPLAY) || + xid != desc->bd_last_xid, + "registered: %d rq_xid: %llu bd_last_xid: %llu\n", + desc->bd_registered, xid, desc->bd_last_xid); + + total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; + desc->bd_registered = 1; + desc->bd_last_xid = xid; + desc->bd_md_count = total_md; + md.user_ptr = &desc->bd_cbid; + md.eq_handle = ptlrpc_eq_h; + md.threshold = 1; /* PUT or GET */ + + for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { + md.options = PTLRPC_MD_OPTIONS | + ((desc->bd_type == BULK_GET_SOURCE) ? + LNET_MD_OP_GET : LNET_MD_OP_PUT); + ptlrpc_fill_bulk_md(&md, desc, posted_md); + + rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, + LNET_UNLINK, LNET_INS_AFTER, &me_h); + if (rc != 0) { + CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", + desc->bd_import->imp_obd->obd_name, xid, + posted_md, rc); + break; + } + + /* About to let the network at it... */ + rc = LNetMDAttach(me_h, md, LNET_UNLINK, + &desc->bd_mds[posted_md]); + if (rc != 0) { + CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", + desc->bd_import->imp_obd->obd_name, xid, + posted_md, rc); + rc2 = LNetMEUnlink(me_h); + LASSERT(rc2 == 0); + break; + } + } + + if (rc != 0) { + LASSERT(rc == -ENOMEM); + spin_lock(&desc->bd_lock); + desc->bd_md_count -= total_md - posted_md; + spin_unlock(&desc->bd_lock); + LASSERT(desc->bd_md_count >= 0); + mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); + req->rq_status = -ENOMEM; + return -ENOMEM; + } + + /* Set rq_xid to matchbits of the final bulk so that server can + * infer the number of bulks that were prepared */ + req->rq_xid = --xid; + LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), + "bd_last_xid = x%llu, rq_xid = x%llu\n", + desc->bd_last_xid, req->rq_xid); + + spin_lock(&desc->bd_lock); + /* Holler if peer manages to touch buffers before he knows the xid */ + if (desc->bd_md_count != total_md) + CWARN("%s: Peer %s touched %d buffers while I registered\n", + desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), + total_md - desc->bd_md_count); + spin_unlock(&desc->bd_lock); + + CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n", + desc->bd_md_count, + desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", + desc->bd_iov_count, desc->bd_nob, + desc->bd_last_xid, req->rq_xid, desc->bd_portal); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_register_bulk); + +/** + * Disconnect a bulk desc from the network. Idempotent. Not + * thread-safe (i.e. only interlocks with completion callback). + * Returns 1 on success or 0 if network unregistration failed for whatever + * reason. + */ +int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) +{ + struct ptlrpc_bulk_desc *desc = req->rq_bulk; + wait_queue_head_t *wq; + struct l_wait_info lwi; + int rc; + + LASSERT(!in_interrupt()); /* might sleep */ + + /* Let's setup deadline for reply unlink. */ + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && + async && req->rq_bulk_deadline == 0) + req->rq_bulk_deadline = get_seconds() + LONG_UNLINK; + + if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ + return 1; /* never registered */ + + LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ + + /* the unlink ensures the callback happens ASAP and is the last + * one. If it fails, it must be because completion just happened, + * but we must still l_wait_event() in this case to give liblustre + * a chance to run client_bulk_callback() */ + mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); + + if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ + return 1; /* never registered */ + + /* Move to "Unregistering" phase as bulk was not unlinked yet. */ + ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); + + /* Do not wait for unlink to finish. */ + if (async) + return 0; + + if (req->rq_set != NULL) + wq = &req->rq_set->set_waitq; + else + wq = &req->rq_reply_waitq; + + for (;;) { + /* Network access will complete in finite time but the HUGE + * timeout lets us CWARN for visibility of sluggish NALs */ + lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), + cfs_time_seconds(1), NULL, NULL); + rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); + if (rc == 0) { + ptlrpc_rqphase_move(req, req->rq_next_phase); + return 1; + } + + LASSERT(rc == -ETIMEDOUT); + DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", + desc); + } + return 0; +} +EXPORT_SYMBOL(ptlrpc_unregister_bulk); + +static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + struct ptlrpc_service *svc = svcpt->scp_service; + int service_time = max_t(int, get_seconds() - + req->rq_arrival_time.tv_sec, 1); + + if (!(flags & PTLRPC_REPLY_EARLY) && + (req->rq_type != PTL_RPC_MSG_ERR) && + (req->rq_reqmsg != NULL) && + !(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY | + MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) { + /* early replies, errors and recovery requests don't count + * toward our service time estimate */ + int oldse = at_measured(&svcpt->scp_at_estimate, service_time); + + if (oldse != 0) { + DEBUG_REQ(D_ADAPTTO, req, + "svc %s changed estimate from %d to %d", + svc->srv_name, oldse, + at_get(&svcpt->scp_at_estimate)); + } + } + /* Report actual service time for client latency calc */ + lustre_msg_set_service_time(req->rq_repmsg, service_time); + /* Report service time estimate for future client reqs, but report 0 + * (to be ignored by client) if it's a error reply during recovery. + * (bz15815) */ + if (req->rq_type == PTL_RPC_MSG_ERR && + (req->rq_export == NULL || req->rq_export->exp_obd->obd_recovering)) + lustre_msg_set_timeout(req->rq_repmsg, 0); + else + lustre_msg_set_timeout(req->rq_repmsg, + at_get(&svcpt->scp_at_estimate)); + + if (req->rq_reqmsg && + !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { + CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x req_flags=%#x magic=%d:%x/%x len=%d\n", + flags, lustre_msg_get_flags(req->rq_reqmsg), + lustre_msg_is_v1(req->rq_reqmsg), + lustre_msg_get_magic(req->rq_reqmsg), + lustre_msg_get_magic(req->rq_repmsg), req->rq_replen); + } +} + +/** + * Send request reply from request \a req reply buffer. + * \a flags defines reply types + * Returns 0 on success or error code + */ +int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct ptlrpc_connection *conn; + int rc; + + /* We must already have a reply buffer (only ptlrpc_error() may be + * called without one). The reply generated by sptlrpc layer (e.g. + * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must + * have a request buffer which is either the actual (swabbed) incoming + * request, or a saved copy if this is a req saved in + * target_queue_final_reply(). + */ + LASSERT(req->rq_no_reply == 0); + LASSERT(req->rq_reqbuf != NULL); + LASSERT(rs != NULL); + LASSERT((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult); + LASSERT(req->rq_repmsg != NULL); + LASSERT(req->rq_repmsg == rs->rs_msg); + LASSERT(rs->rs_cb_id.cbid_fn == reply_out_callback); + LASSERT(rs->rs_cb_id.cbid_arg == rs); + + /* There may be no rq_export during failover */ + + if (unlikely(req->rq_export && req->rq_export->exp_obd && + req->rq_export->exp_obd->obd_fail)) { + /* Failed obd's only send ENODEV */ + req->rq_type = PTL_RPC_MSG_ERR; + req->rq_status = -ENODEV; + CDEBUG(D_HA, "sending ENODEV from failed obd %d\n", + req->rq_export->exp_obd->obd_minor); + } + + /* In order to keep interoperability with the client (< 2.3) which + * doesn't have pb_jobid in ptlrpc_body, We have to shrink the + * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the + * reply buffer on client will be overflow. + * + * XXX Remove this whenever we drop the interoperability with + * such client. + */ + req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0, + sizeof(struct ptlrpc_body_v2), 1); + + if (req->rq_type != PTL_RPC_MSG_ERR) + req->rq_type = PTL_RPC_MSG_REPLY; + + lustre_msg_set_type(req->rq_repmsg, req->rq_type); + lustre_msg_set_status(req->rq_repmsg, + ptlrpc_status_hton(req->rq_status)); + lustre_msg_set_opc(req->rq_repmsg, + req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0); + + target_pack_pool_reply(req); + + ptlrpc_at_set_reply(req, flags); + + if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) + conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL); + else + conn = ptlrpc_connection_addref(req->rq_export->exp_connection); + + if (unlikely(conn == NULL)) { + CERROR("not replying on NULL connection\n"); /* bug 9635 */ + return -ENOTCONN; + } + ptlrpc_rs_addref(rs); /* +1 ref for the network */ + + rc = sptlrpc_svc_wrap_reply(req); + if (unlikely(rc)) + goto out; + + req->rq_sent = get_seconds(); + + rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, + (rs->rs_difficult && !rs->rs_no_ack) ? + LNET_ACK_REQ : LNET_NOACK_REQ, + &rs->rs_cb_id, conn, + ptlrpc_req2svc(req)->srv_rep_portal, + req->rq_xid, req->rq_reply_off); +out: + if (unlikely(rc != 0)) + ptlrpc_req_drop_rs(req); + ptlrpc_connection_put(conn); + return rc; +} +EXPORT_SYMBOL(ptlrpc_send_reply); + +int ptlrpc_reply(struct ptlrpc_request *req) +{ + if (req->rq_no_reply) + return 0; + return ptlrpc_send_reply(req, 0); +} +EXPORT_SYMBOL(ptlrpc_reply); + +/** + * For request \a req send an error reply back. Create empty + * reply buffers if necessary. + */ +int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult) +{ + int rc; + + if (req->rq_no_reply) + return 0; + + if (!req->rq_repmsg) { + rc = lustre_pack_reply(req, 1, NULL, NULL); + if (rc) + return rc; + } + + if (req->rq_status != -ENOSPC && req->rq_status != -EACCES && + req->rq_status != -EPERM && req->rq_status != -ENOENT && + req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT) + req->rq_type = PTL_RPC_MSG_ERR; + + rc = ptlrpc_send_reply(req, may_be_difficult); + return rc; +} +EXPORT_SYMBOL(ptlrpc_send_error); + +int ptlrpc_error(struct ptlrpc_request *req) +{ + return ptlrpc_send_error(req, 0); +} +EXPORT_SYMBOL(ptlrpc_error); + +/** + * Send request \a request. + * if \a noreply is set, don't expect any reply back and don't set up + * reply buffers. + * Returns 0 on success or error code. + */ +int ptl_send_rpc(struct ptlrpc_request *request, int noreply) +{ + int rc; + int rc2; + int mpflag = 0; + struct ptlrpc_connection *connection; + lnet_handle_me_t reply_me_h; + lnet_md_t reply_md; + struct obd_device *obd = request->rq_import->imp_obd; + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) + return 0; + + LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); + LASSERT(request->rq_wait_ctx == 0); + + /* If this is a re-transmit, we're required to have disengaged + * cleanly from the previous attempt */ + LASSERT(!request->rq_receiving_reply); + LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && + (request->rq_import->imp_state == LUSTRE_IMP_FULL))); + + if (unlikely(obd != NULL && obd->obd_fail)) { + CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", + obd->obd_name); + /* this prevents us from waiting in ptlrpc_queue_wait */ + spin_lock(&request->rq_lock); + request->rq_err = 1; + spin_unlock(&request->rq_lock); + request->rq_status = -ENODEV; + return -ENODEV; + } + + connection = request->rq_import->imp_connection; + + lustre_msg_set_handle(request->rq_reqmsg, + &request->rq_import->imp_remote_handle); + lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); + lustre_msg_set_conn_cnt(request->rq_reqmsg, + request->rq_import->imp_conn_cnt); + lustre_msghdr_set_flags(request->rq_reqmsg, + request->rq_import->imp_msghdr_flags); + + if (request->rq_resend) + lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); + + if (request->rq_memalloc) + mpflag = cfs_memory_pressure_get_and_set(); + + rc = sptlrpc_cli_wrap_request(request); + if (rc) + goto out; + + /* bulk register should be done after wrap_request() */ + if (request->rq_bulk != NULL) { + rc = ptlrpc_register_bulk(request); + if (rc != 0) + goto out; + } + + if (!noreply) { + LASSERT(request->rq_replen != 0); + if (request->rq_repbuf == NULL) { + LASSERT(request->rq_repdata == NULL); + LASSERT(request->rq_repmsg == NULL); + rc = sptlrpc_cli_alloc_repbuf(request, + request->rq_replen); + if (rc) { + /* this prevents us from looping in + * ptlrpc_queue_wait */ + spin_lock(&request->rq_lock); + request->rq_err = 1; + spin_unlock(&request->rq_lock); + request->rq_status = rc; + goto cleanup_bulk; + } + } else { + request->rq_repdata = NULL; + request->rq_repmsg = NULL; + } + + rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ + connection->c_peer, request->rq_xid, 0, + LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); + LASSERT(rc == -ENOMEM); + rc = -ENOMEM; + goto cleanup_bulk; + } + } + + spin_lock(&request->rq_lock); + /* If the MD attach succeeds, there _will_ be a reply_in callback */ + request->rq_receiving_reply = !noreply; + request->rq_req_unlink = 1; + /* We are responsible for unlinking the reply buffer */ + request->rq_reply_unlink = !noreply; + /* Clear any flags that may be present from previous sends. */ + request->rq_replied = 0; + request->rq_err = 0; + request->rq_timedout = 0; + request->rq_net_err = 0; + request->rq_resend = 0; + request->rq_restart = 0; + request->rq_reply_truncate = 0; + spin_unlock(&request->rq_lock); + + if (!noreply) { + reply_md.start = request->rq_repbuf; + reply_md.length = request->rq_repbuf_len; + /* Allow multiple early replies */ + reply_md.threshold = LNET_MD_THRESH_INF; + /* Manage remote for early replies */ + reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | + LNET_MD_MANAGE_REMOTE | + LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */; + reply_md.user_ptr = &request->rq_reply_cbid; + reply_md.eq_handle = ptlrpc_eq_h; + + /* We must see the unlink callback to unset rq_reply_unlink, + so we can't auto-unlink */ + rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, + &request->rq_reply_md_h); + if (rc != 0) { + CERROR("LNetMDAttach failed: %d\n", rc); + LASSERT(rc == -ENOMEM); + spin_lock(&request->rq_lock); + /* ...but the MD attach didn't succeed... */ + request->rq_receiving_reply = 0; + spin_unlock(&request->rq_lock); + rc = -ENOMEM; + goto cleanup_me; + } + + CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n", + request->rq_repbuf_len, request->rq_xid, + request->rq_reply_portal); + } + + /* add references on request for request_out_callback */ + ptlrpc_request_addref(request); + if (obd != NULL && obd->obd_svc_stats != NULL) + lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, + atomic_read(&request->rq_import->imp_inflight)); + + OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); + + do_gettimeofday(&request->rq_arrival_time); + request->rq_sent = get_seconds(); + /* We give the server rq_timeout secs to process the req, and + add the network latency for our local timeout. */ + request->rq_deadline = request->rq_sent + request->rq_timeout + + ptlrpc_at_get_net_latency(request); + + ptlrpc_pinger_sending_on_import(request->rq_import); + + DEBUG_REQ(D_INFO, request, "send flg=%x", + lustre_msg_get_flags(request->rq_reqmsg)); + rc = ptl_send_buf(&request->rq_req_md_h, + request->rq_reqbuf, request->rq_reqdata_len, + LNET_NOACK_REQ, &request->rq_req_cbid, + connection, + request->rq_request_portal, + request->rq_xid, 0); + if (rc == 0) + goto out; + + ptlrpc_req_finished(request); + if (noreply) + goto out; + + cleanup_me: + /* MEUnlink is safe; the PUT didn't even get off the ground, and + * nobody apart from the PUT's target has the right nid+XID to + * access the reply buffer. */ + rc2 = LNetMEUnlink(reply_me_h); + LASSERT(rc2 == 0); + /* UNLINKED callback called synchronously */ + LASSERT(!request->rq_receiving_reply); + + cleanup_bulk: + /* We do sync unlink here as there was no real transfer here so + * the chance to have long unlink to sluggish net is smaller here. */ + ptlrpc_unregister_bulk(request, 0); + out: + if (request->rq_memalloc) + cfs_memory_pressure_restore(mpflag); + return rc; +} +EXPORT_SYMBOL(ptl_send_rpc); + +/** + * Register request buffer descriptor for request receiving. + */ +int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) +{ + struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service; + static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY}; + int rc; + lnet_md_t md; + lnet_handle_me_t me_h; + + CDEBUG(D_NET, "LNetMEAttach: portal %d\n", + service->srv_req_portal); + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD)) + return -ENOMEM; + + /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL, + * which means buffer can only be attached on local CPT, and LND + * threads can find it by grabbing a local lock */ + rc = LNetMEAttach(service->srv_req_portal, + match_id, 0, ~0, LNET_UNLINK, + rqbd->rqbd_svcpt->scp_cpt >= 0 ? + LNET_INS_LOCAL : LNET_INS_AFTER, &me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); + return -ENOMEM; + } + + LASSERT(rqbd->rqbd_refcount == 0); + rqbd->rqbd_refcount = 1; + + md.start = rqbd->rqbd_buffer; + md.length = service->srv_buf_size; + md.max_size = service->srv_max_req_size; + md.threshold = LNET_MD_THRESH_INF; + md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; + md.user_ptr = &rqbd->rqbd_cbid; + md.eq_handle = ptlrpc_eq_h; + + rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); + if (rc == 0) + return 0; + + CERROR("LNetMDAttach failed: %d;\n", rc); + LASSERT(rc == -ENOMEM); + rc = LNetMEUnlink(me_h); + LASSERT(rc == 0); + rqbd->rqbd_refcount = 0; + + return -ENOMEM; +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c new file mode 100644 index 000000000..81ad74732 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/nrs.c @@ -0,0 +1,1754 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011 Intel Corporation + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * lustre/ptlrpc/nrs.c + * + * Network Request Scheduler (NRS) + * + * Allows to reorder the handling of RPCs at servers. + * + * Author: Liang Zhen <liang@whamcloud.com> + * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com> + */ +/** + * \addtogoup nrs + * @{ + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lprocfs_status.h" +#include "../../include/linux/libcfs/libcfs.h" +#include "ptlrpc_internal.h" + +/* XXX: This is just for liblustre. Remove the #if defined directive when the + * "cfs_" prefix is dropped from cfs_list_head. */ +extern struct list_head ptlrpc_all_services; + +/** + * NRS core object. + */ +struct nrs_core nrs_core; + +static int nrs_policy_init(struct ptlrpc_nrs_policy *policy) +{ + return policy->pol_desc->pd_ops->op_policy_init != NULL ? + policy->pol_desc->pd_ops->op_policy_init(policy) : 0; +} + +static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy) +{ + LASSERT(policy->pol_ref == 0); + LASSERT(policy->pol_req_queued == 0); + + if (policy->pol_desc->pd_ops->op_policy_fini != NULL) + policy->pol_desc->pd_ops->op_policy_fini(policy); +} + +static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy, + enum ptlrpc_nrs_ctl opc, void *arg) +{ + /** + * The policy may be stopped, but the lprocfs files and + * ptlrpc_nrs_policy instances remain present until unregistration time. + * Do not perform the ctl operation if the policy is stopped, as + * policy->pol_private will be NULL in such a case. + */ + if (policy->pol_state == NRS_POL_STATE_STOPPED) + return -ENODEV; + + return policy->pol_desc->pd_ops->op_policy_ctl != NULL ? + policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) : + -ENOSYS; +} + +static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy) +{ + struct ptlrpc_nrs *nrs = policy->pol_nrs; + + if (policy->pol_desc->pd_ops->op_policy_stop != NULL) { + spin_unlock(&nrs->nrs_lock); + + policy->pol_desc->pd_ops->op_policy_stop(policy); + + spin_lock(&nrs->nrs_lock); + } + + LASSERT(list_empty(&policy->pol_list_queued)); + LASSERT(policy->pol_req_queued == 0 && + policy->pol_req_started == 0); + + policy->pol_private = NULL; + + policy->pol_state = NRS_POL_STATE_STOPPED; + + if (atomic_dec_and_test(&policy->pol_desc->pd_refs)) + module_put(policy->pol_desc->pd_owner); +} + +static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy) +{ + struct ptlrpc_nrs *nrs = policy->pol_nrs; + + if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping) + return -EPERM; + + if (policy->pol_state == NRS_POL_STATE_STARTING) + return -EAGAIN; + + /* In progress or already stopped */ + if (policy->pol_state != NRS_POL_STATE_STARTED) + return 0; + + policy->pol_state = NRS_POL_STATE_STOPPING; + + /* Immediately make it invisible */ + if (nrs->nrs_policy_primary == policy) { + nrs->nrs_policy_primary = NULL; + + } else { + LASSERT(nrs->nrs_policy_fallback == policy); + nrs->nrs_policy_fallback = NULL; + } + + /* I have the only refcount */ + if (policy->pol_ref == 1) + nrs_policy_stop0(policy); + + return 0; +} + +/** + * Transitions the \a nrs NRS head's primary policy to + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no + * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED. + * + * \param[in] nrs the NRS head to carry out this operation on + */ +static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs) +{ + struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary; + + if (tmp == NULL) + return; + + nrs->nrs_policy_primary = NULL; + + LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED); + tmp->pol_state = NRS_POL_STATE_STOPPING; + + if (tmp->pol_ref == 0) + nrs_policy_stop0(tmp); +} + +/** + * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in + * response to an lprocfs command to start a policy. + * + * If a primary policy different to the current one is specified, this function + * will transition the new policy to the + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition + * the old primary policy (if there is one) to + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding + * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. + * + * If the fallback policy is specified, this is taken to indicate an instruction + * to stop the current primary policy, without substituting it with another + * primary policy, so the primary policy (if any) is transitioned to + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding + * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In + * this case, the fallback policy is only left active in the NRS head. + */ +static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy) +{ + struct ptlrpc_nrs *nrs = policy->pol_nrs; + int rc = 0; + + /** + * Don't allow multiple starting which is too complex, and has no real + * benefit. + */ + if (nrs->nrs_policy_starting) + return -EAGAIN; + + LASSERT(policy->pol_state != NRS_POL_STATE_STARTING); + + if (policy->pol_state == NRS_POL_STATE_STOPPING) + return -EAGAIN; + + if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) { + /** + * This is for cases in which the user sets the policy to the + * fallback policy (currently fifo for all services); i.e. the + * user is resetting the policy to the default; so we stop the + * primary policy, if any. + */ + if (policy == nrs->nrs_policy_fallback) { + nrs_policy_stop_primary(nrs); + return 0; + } + + /** + * If we reach here, we must be setting up the fallback policy + * at service startup time, and only a single policy with the + * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can + * register with NRS core. + */ + LASSERT(nrs->nrs_policy_fallback == NULL); + } else { + /** + * Shouldn't start primary policy if w/o fallback policy. + */ + if (nrs->nrs_policy_fallback == NULL) + return -EPERM; + + if (policy->pol_state == NRS_POL_STATE_STARTED) + return 0; + } + + /** + * Increase the module usage count for policies registering from other + * modules. + */ + if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 && + !try_module_get(policy->pol_desc->pd_owner)) { + atomic_dec(&policy->pol_desc->pd_refs); + CERROR("NRS: cannot get module for policy %s; is it alive?\n", + policy->pol_desc->pd_name); + return -ENODEV; + } + + /** + * Serialize policy starting across the NRS head + */ + nrs->nrs_policy_starting = 1; + + policy->pol_state = NRS_POL_STATE_STARTING; + + if (policy->pol_desc->pd_ops->op_policy_start) { + spin_unlock(&nrs->nrs_lock); + + rc = policy->pol_desc->pd_ops->op_policy_start(policy); + + spin_lock(&nrs->nrs_lock); + if (rc != 0) { + if (atomic_dec_and_test(&policy->pol_desc->pd_refs)) + module_put(policy->pol_desc->pd_owner); + + policy->pol_state = NRS_POL_STATE_STOPPED; + goto out; + } + } + + policy->pol_state = NRS_POL_STATE_STARTED; + + if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) { + /** + * This path is only used at PTLRPC service setup time. + */ + nrs->nrs_policy_fallback = policy; + } else { + /* + * Try to stop the current primary policy if there is one. + */ + nrs_policy_stop_primary(nrs); + + /** + * And set the newly-started policy as the primary one. + */ + nrs->nrs_policy_primary = policy; + } + +out: + nrs->nrs_policy_starting = 0; + + return rc; +} + +/** + * Increases the policy's usage reference count. + */ +static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy) +{ + policy->pol_ref++; +} + +/** + * Decreases the policy's usage reference count, and stops the policy in case it + * was already stopping and have no more outstanding usage references (which + * indicates it has no more queued or started requests, and can be safely + * stopped). + */ +static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy) +{ + LASSERT(policy->pol_ref > 0); + + policy->pol_ref--; + if (unlikely(policy->pol_ref == 0 && + policy->pol_state == NRS_POL_STATE_STOPPING)) + nrs_policy_stop0(policy); +} + +static void nrs_policy_put(struct ptlrpc_nrs_policy *policy) +{ + spin_lock(&policy->pol_nrs->nrs_lock); + nrs_policy_put_locked(policy); + spin_unlock(&policy->pol_nrs->nrs_lock); +} + +/** + * Find and return a policy by name. + */ +static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs, + char *name) +{ + struct ptlrpc_nrs_policy *tmp; + + list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) { + if (strncmp(tmp->pol_desc->pd_name, name, + NRS_POL_NAME_MAX) == 0) { + nrs_policy_get_locked(tmp); + return tmp; + } + } + return NULL; +} + +/** + * Release references for the resource hierarchy moving upwards towards the + * policy instance resource. + */ +static void nrs_resource_put(struct ptlrpc_nrs_resource *res) +{ + struct ptlrpc_nrs_policy *policy = res->res_policy; + + if (policy->pol_desc->pd_ops->op_res_put != NULL) { + struct ptlrpc_nrs_resource *parent; + + for (; res != NULL; res = parent) { + parent = res->res_parent; + policy->pol_desc->pd_ops->op_res_put(policy, res); + } + } +} + +/** + * Obtains references for each resource in the resource hierarchy for request + * \a nrq if it is to be handled by \a policy. + * + * \param[in] policy the policy + * \param[in] nrq the request + * \param[in] moving_req denotes whether this is a call to the function by + * ldlm_lock_reorder_req(), in order to move \a nrq to + * the high-priority NRS head; we should not sleep when + * set. + * + * \retval NULL resource hierarchy references not obtained + * \retval valid-pointer the bottom level of the resource hierarchy + * + * \see ptlrpc_nrs_pol_ops::op_res_get() + */ +static +struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq, + bool moving_req) +{ + /** + * Set to NULL to traverse the resource hierarchy from the top. + */ + struct ptlrpc_nrs_resource *res = NULL; + struct ptlrpc_nrs_resource *tmp = NULL; + int rc; + + while (1) { + rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res, + &tmp, moving_req); + if (rc < 0) { + if (res != NULL) + nrs_resource_put(res); + return NULL; + } + + LASSERT(tmp != NULL); + tmp->res_parent = res; + tmp->res_policy = policy; + res = tmp; + tmp = NULL; + /** + * Return once we have obtained a reference to the bottom level + * of the resource hierarchy. + */ + if (rc > 0) + return res; + } +} + +/** + * Obtains resources for the resource hierarchies and policy references for + * the fallback and current primary policy (if any), that will later be used + * to handle request \a nrq. + * + * \param[in] nrs the NRS head instance that will be handling request \a nrq. + * \param[in] nrq the request that is being handled. + * \param[out] resp the array where references to the resource hierarchy are + * stored. + * \param[in] moving_req is set when obtaining resources while moving a + * request from a policy on the regular NRS head to a + * policy on the HP NRS head (via + * ldlm_lock_reorder_req()). It signifies that + * allocations to get resources should be atomic; for + * a full explanation, see comment in + * ptlrpc_nrs_pol_ops::op_res_get(). + */ +static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs, + struct ptlrpc_nrs_request *nrq, + struct ptlrpc_nrs_resource **resp, + bool moving_req) +{ + struct ptlrpc_nrs_policy *primary = NULL; + struct ptlrpc_nrs_policy *fallback = NULL; + + memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX); + + /** + * Obtain policy references. + */ + spin_lock(&nrs->nrs_lock); + + fallback = nrs->nrs_policy_fallback; + nrs_policy_get_locked(fallback); + + primary = nrs->nrs_policy_primary; + if (primary != NULL) + nrs_policy_get_locked(primary); + + spin_unlock(&nrs->nrs_lock); + + /** + * Obtain resource hierarchy references. + */ + resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req); + LASSERT(resp[NRS_RES_FALLBACK] != NULL); + + if (primary != NULL) { + resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq, + moving_req); + /** + * A primary policy may exist which may not wish to serve a + * particular request for different reasons; release the + * reference on the policy as it will not be used for this + * request. + */ + if (resp[NRS_RES_PRIMARY] == NULL) + nrs_policy_put(primary); + } +} + +/** + * Releases references to resource hierarchies and policies, because they are no + * longer required; used when request handling has been completed, or the + * request is moving to the high priority NRS head. + * + * \param resp the resource hierarchy that is being released + * + * \see ptlrpcnrs_req_hp_move() + * \see ptlrpc_nrs_req_finalize() + */ +static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp) +{ + struct ptlrpc_nrs_policy *pols[NRS_RES_MAX]; + struct ptlrpc_nrs *nrs = NULL; + int i; + + for (i = 0; i < NRS_RES_MAX; i++) { + if (resp[i] != NULL) { + pols[i] = resp[i]->res_policy; + nrs_resource_put(resp[i]); + resp[i] = NULL; + } else { + pols[i] = NULL; + } + } + + for (i = 0; i < NRS_RES_MAX; i++) { + if (pols[i] == NULL) + continue; + + if (nrs == NULL) { + nrs = pols[i]->pol_nrs; + spin_lock(&nrs->nrs_lock); + } + nrs_policy_put_locked(pols[i]); + } + + if (nrs != NULL) + spin_unlock(&nrs->nrs_lock); +} + +/** + * Obtains an NRS request from \a policy for handling or examination; the + * request should be removed in the 'handling' case. + * + * Calling into this function implies we already know the policy has a request + * waiting to be handled. + * + * \param[in] policy the policy from which a request + * \param[in] peek when set, signifies that we just want to examine the + * request, and not handle it, so the request is not removed + * from the policy. + * \param[in] force when set, it will force a policy to return a request if it + * has one pending + * + * \retval the NRS request to be handled + */ +static inline +struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy, + bool peek, bool force) +{ + struct ptlrpc_nrs_request *nrq; + + LASSERT(policy->pol_req_queued > 0); + + nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force); + + LASSERT(ergo(nrq != NULL, nrs_request_policy(nrq) == policy)); + + return nrq; +} + +/** + * Enqueues request \a nrq for later handling, via one one the policies for + * which resources where earlier obtained via nrs_resource_get_safe(). The + * function attempts to enqueue the request first on the primary policy + * (if any), since this is the preferred choice. + * + * \param nrq the request being enqueued + * + * \see nrs_resource_get_safe() + */ +static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq) +{ + struct ptlrpc_nrs_policy *policy; + int rc; + int i; + + /** + * Try in descending order, because the primary policy (if any) is + * the preferred choice. + */ + for (i = NRS_RES_MAX - 1; i >= 0; i--) { + if (nrq->nr_res_ptrs[i] == NULL) + continue; + + nrq->nr_res_idx = i; + policy = nrq->nr_res_ptrs[i]->res_policy; + + rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq); + if (rc == 0) { + policy->pol_nrs->nrs_req_queued++; + policy->pol_req_queued++; + return; + } + } + /** + * Should never get here, as at least the primary policy's + * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always + * succeed. + */ + LBUG(); +} + +/** + * Called when a request has been handled + * + * \param[in] nrs the request that has been handled; can be used for + * job/resource control. + * + * \see ptlrpc_nrs_req_stop_nolock() + */ +static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq) +{ + struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq); + + if (policy->pol_desc->pd_ops->op_req_stop) + policy->pol_desc->pd_ops->op_req_stop(policy, nrq); + + LASSERT(policy->pol_nrs->nrs_req_started > 0); + LASSERT(policy->pol_req_started > 0); + + policy->pol_nrs->nrs_req_started--; + policy->pol_req_started--; +} + +/** + * Handler for operations that can be carried out on policies. + * + * Handles opcodes that are common to all policy types within NRS core, and + * passes any unknown opcodes to the policy-specific control function. + * + * \param[in] nrs the NRS head this policy belongs to. + * \param[in] name the human-readable policy name; should be the same as + * ptlrpc_nrs_pol_desc::pd_name. + * \param[in] opc the opcode of the operation being carried out. + * \param[in,out] arg can be used to pass information in and out between when + * carrying an operation; usually data that is private to + * the policy at some level, or generic policy status + * information. + * + * \retval -ve error condition + * \retval 0 operation was carried out successfully + */ +static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name, + enum ptlrpc_nrs_ctl opc, void *arg) +{ + struct ptlrpc_nrs_policy *policy; + int rc = 0; + + spin_lock(&nrs->nrs_lock); + + policy = nrs_policy_find_locked(nrs, name); + if (policy == NULL) { + rc = -ENOENT; + goto out; + } + + switch (opc) { + /** + * Unknown opcode, pass it down to the policy-specific control + * function for handling. + */ + default: + rc = nrs_policy_ctl_locked(policy, opc, arg); + break; + + /** + * Start \e policy + */ + case PTLRPC_NRS_CTL_START: + rc = nrs_policy_start_locked(policy); + break; + } +out: + if (policy != NULL) + nrs_policy_put_locked(policy); + + spin_unlock(&nrs->nrs_lock); + + return rc; +} + +/** + * Unregisters a policy by name. + * + * \param[in] nrs the NRS head this policy belongs to. + * \param[in] name the human-readable policy name; should be the same as + * ptlrpc_nrs_pol_desc::pd_name + * + * \retval -ve error + * \retval 0 success + */ +static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name) +{ + struct ptlrpc_nrs_policy *policy = NULL; + + spin_lock(&nrs->nrs_lock); + + policy = nrs_policy_find_locked(nrs, name); + if (policy == NULL) { + spin_unlock(&nrs->nrs_lock); + + CERROR("Can't find NRS policy %s\n", name); + return -ENOENT; + } + + if (policy->pol_ref > 1) { + CERROR("Policy %s is busy with %d references\n", name, + (int)policy->pol_ref); + nrs_policy_put_locked(policy); + + spin_unlock(&nrs->nrs_lock); + return -EBUSY; + } + + LASSERT(policy->pol_req_queued == 0); + LASSERT(policy->pol_req_started == 0); + + if (policy->pol_state != NRS_POL_STATE_STOPPED) { + nrs_policy_stop_locked(policy); + LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED); + } + + list_del(&policy->pol_list); + nrs->nrs_num_pols--; + + nrs_policy_put_locked(policy); + + spin_unlock(&nrs->nrs_lock); + + nrs_policy_fini(policy); + + LASSERT(policy->pol_private == NULL); + OBD_FREE_PTR(policy); + + return 0; +} + +/** + * Register a policy from \policy descriptor \a desc with NRS head \a nrs. + * + * \param[in] nrs the NRS head on which the policy will be registered. + * \param[in] desc the policy descriptor from which the information will be + * obtained to register the policy. + * + * \retval -ve error + * \retval 0 success + */ +static int nrs_policy_register(struct ptlrpc_nrs *nrs, + struct ptlrpc_nrs_pol_desc *desc) +{ + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_policy *tmp; + struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt; + int rc; + + LASSERT(svcpt != NULL); + LASSERT(desc->pd_ops != NULL); + LASSERT(desc->pd_ops->op_res_get != NULL); + LASSERT(desc->pd_ops->op_req_get != NULL); + LASSERT(desc->pd_ops->op_req_enqueue != NULL); + LASSERT(desc->pd_ops->op_req_dequeue != NULL); + LASSERT(desc->pd_compat != NULL); + + OBD_CPT_ALLOC_GFP(policy, svcpt->scp_service->srv_cptable, + svcpt->scp_cpt, sizeof(*policy), GFP_NOFS); + if (policy == NULL) + return -ENOMEM; + + policy->pol_nrs = nrs; + policy->pol_desc = desc; + policy->pol_state = NRS_POL_STATE_STOPPED; + policy->pol_flags = desc->pd_flags; + + INIT_LIST_HEAD(&policy->pol_list); + INIT_LIST_HEAD(&policy->pol_list_queued); + + rc = nrs_policy_init(policy); + if (rc != 0) { + OBD_FREE_PTR(policy); + return rc; + } + + spin_lock(&nrs->nrs_lock); + + tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name); + if (tmp != NULL) { + CERROR("NRS policy %s has been registered, can't register it for %s\n", + policy->pol_desc->pd_name, + svcpt->scp_service->srv_name); + nrs_policy_put_locked(tmp); + + spin_unlock(&nrs->nrs_lock); + nrs_policy_fini(policy); + OBD_FREE_PTR(policy); + + return -EEXIST; + } + + list_add_tail(&policy->pol_list, &nrs->nrs_policy_list); + nrs->nrs_num_pols++; + + if (policy->pol_flags & PTLRPC_NRS_FL_REG_START) + rc = nrs_policy_start_locked(policy); + + spin_unlock(&nrs->nrs_lock); + + if (rc != 0) + (void) nrs_policy_unregister(nrs, policy->pol_desc->pd_name); + + return rc; +} + +/** + * Enqueue request \a req using one of the policies its resources are referring + * to. + * + * \param[in] req the request to enqueue. + */ +static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req) +{ + struct ptlrpc_nrs_policy *policy; + + LASSERT(req->rq_nrq.nr_initialized); + LASSERT(!req->rq_nrq.nr_enqueued); + + nrs_request_enqueue(&req->rq_nrq); + req->rq_nrq.nr_enqueued = 1; + + policy = nrs_request_policy(&req->rq_nrq); + /** + * Add the policy to the NRS head's list of policies with enqueued + * requests, if it has not been added there. + */ + if (unlikely(list_empty(&policy->pol_list_queued))) + list_add_tail(&policy->pol_list_queued, + &policy->pol_nrs->nrs_policy_queued); +} + +/** + * Enqueue a request on the high priority NRS head. + * + * \param req the request to enqueue. + */ +static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req) +{ + int opc = lustre_msg_get_opc(req->rq_reqmsg); + + spin_lock(&req->rq_lock); + req->rq_hp = 1; + ptlrpc_nrs_req_add_nolock(req); + if (opc != OBD_PING) + DEBUG_REQ(D_NET, req, "high priority req"); + spin_unlock(&req->rq_lock); +} + +/** + * Returns a boolean predicate indicating whether the policy described by + * \a desc is adequate for use with service \a svc. + * + * \param[in] svc the service + * \param[in] desc the policy descriptor + * + * \retval false the policy is not compatible with the service + * \retval true the policy is compatible with the service + */ +static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc, + const struct ptlrpc_nrs_pol_desc *desc) +{ + return desc->pd_compat(svc, desc); +} + +/** + * Registers all compatible policies in nrs_core.nrs_policies, for NRS head + * \a nrs. + * + * \param[in] nrs the NRS head + * + * \retval -ve error + * \retval 0 success + * + * \pre mutex_is_locked(&nrs_core.nrs_mutex) + * + * \see ptlrpc_service_nrs_setup() + */ +static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs) +{ + struct ptlrpc_nrs_pol_desc *desc; + /* for convenience */ + struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt; + struct ptlrpc_service *svc = svcpt->scp_service; + int rc = -EINVAL; + + LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); + + list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { + if (nrs_policy_compatible(svc, desc)) { + rc = nrs_policy_register(nrs, desc); + if (rc != 0) { + CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n", + desc->pd_name, svcpt->scp_cpt, + svc->srv_name, rc); + /** + * Fail registration if any of the policies' + * registration fails. + */ + break; + } + } + } + + return rc; +} + +/** + * Initializes NRS head \a nrs of service partition \a svcpt, and registers all + * compatible policies in NRS core, with the NRS head. + * + * \param[in] nrs the NRS head + * \param[in] svcpt the PTLRPC service partition to setup + * + * \retval -ve error + * \retval 0 success + * + * \pre mutex_is_locked(&nrs_core.nrs_mutex) + */ +static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs, + struct ptlrpc_service_part *svcpt) +{ + enum ptlrpc_nrs_queue_type queue; + + LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); + + if (nrs == &svcpt->scp_nrs_reg) + queue = PTLRPC_NRS_QUEUE_REG; + else if (nrs == svcpt->scp_nrs_hp) + queue = PTLRPC_NRS_QUEUE_HP; + else + LBUG(); + + nrs->nrs_svcpt = svcpt; + nrs->nrs_queue_type = queue; + spin_lock_init(&nrs->nrs_lock); + INIT_LIST_HEAD(&nrs->nrs_policy_list); + INIT_LIST_HEAD(&nrs->nrs_policy_queued); + + return nrs_register_policies_locked(nrs); +} + +/** + * Allocates a regular and optionally a high-priority NRS head (if the service + * handles high-priority RPCs), and then registers all available compatible + * policies on those NRS heads. + * + * \param[in,out] svcpt the PTLRPC service partition to setup + * + * \pre mutex_is_locked(&nrs_core.nrs_mutex) + */ +static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_nrs *nrs; + int rc; + + LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); + + /** + * Initialize the regular NRS head. + */ + nrs = nrs_svcpt2nrs(svcpt, false); + rc = nrs_svcpt_setup_locked0(nrs, svcpt); + if (rc < 0) + goto out; + + /** + * Optionally allocate a high-priority NRS head. + */ + if (svcpt->scp_service->srv_ops.so_hpreq_handler == NULL) + goto out; + + OBD_CPT_ALLOC_PTR(svcpt->scp_nrs_hp, + svcpt->scp_service->srv_cptable, + svcpt->scp_cpt); + if (svcpt->scp_nrs_hp == NULL) { + rc = -ENOMEM; + goto out; + } + + nrs = nrs_svcpt2nrs(svcpt, true); + rc = nrs_svcpt_setup_locked0(nrs, svcpt); + +out: + return rc; +} + +/** + * Unregisters all policies on all available NRS heads in a service partition; + * called at PTLRPC service unregistration time. + * + * \param[in] svcpt the PTLRPC service partition + * + * \pre mutex_is_locked(&nrs_core.nrs_mutex) + */ +static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_nrs *nrs; + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_policy *tmp; + int rc; + bool hp = false; + + LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); + +again: + nrs = nrs_svcpt2nrs(svcpt, hp); + nrs->nrs_stopping = 1; + + list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, + pol_list) { + rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name); + LASSERT(rc == 0); + } + + /** + * If the service partition has an HP NRS head, clean that up as well. + */ + if (!hp && nrs_svcpt_has_hp(svcpt)) { + hp = true; + goto again; + } + + if (hp) + OBD_FREE_PTR(nrs); +} + +/** + * Returns the descriptor for a policy as identified by by \a name. + * + * \param[in] name the policy name + * + * \retval the policy descriptor + * \retval NULL + */ +static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name) +{ + struct ptlrpc_nrs_pol_desc *tmp; + + list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) { + if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0) + return tmp; + } + return NULL; +} + +/** + * Removes the policy from all supported NRS heads of all partitions of all + * PTLRPC services. + * + * \param[in] desc the policy descriptor to unregister + * + * \retval -ve error + * \retval 0 successfully unregistered policy on all supported NRS heads + * + * \pre mutex_is_locked(&nrs_core.nrs_mutex) + * \pre mutex_is_locked(&ptlrpc_all_services_mutex) + */ +static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc) +{ + struct ptlrpc_nrs *nrs; + struct ptlrpc_service *svc; + struct ptlrpc_service_part *svcpt; + int i; + int rc = 0; + + LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); + LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex)); + + list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { + + if (!nrs_policy_compatible(svc, desc) || + unlikely(svc->srv_is_stopping)) + continue; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + bool hp = false; + +again: + nrs = nrs_svcpt2nrs(svcpt, hp); + rc = nrs_policy_unregister(nrs, desc->pd_name); + /** + * Ignore -ENOENT as the policy may not have registered + * successfully on all service partitions. + */ + if (rc == -ENOENT) { + rc = 0; + } else if (rc != 0) { + CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n", + desc->pd_name, svcpt->scp_cpt, + svcpt->scp_service->srv_name, rc); + return rc; + } + + if (!hp && nrs_svc_has_hp(svc)) { + hp = true; + goto again; + } + } + + if (desc->pd_ops->op_lprocfs_fini != NULL) + desc->pd_ops->op_lprocfs_fini(svc); + } + + return rc; +} + +/** + * Registers a new policy with NRS core. + * + * The function will only succeed if policy registration with all compatible + * service partitions (if any) is successful. + * + * N.B. This function should be called either at ptlrpc module initialization + * time when registering a policy that ships with NRS core, or in a + * module's init() function for policies registering from other modules. + * + * \param[in] conf configuration information for the new policy to register + * + * \retval -ve error + * \retval 0 success + */ +int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf) +{ + struct ptlrpc_service *svc; + struct ptlrpc_nrs_pol_desc *desc; + int rc = 0; + + LASSERT(conf != NULL); + LASSERT(conf->nc_ops != NULL); + LASSERT(conf->nc_compat != NULL); + LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one, + conf->nc_compat_svc_name != NULL)); + LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0, + conf->nc_owner != NULL)); + + conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0'; + + /** + * External policies are not allowed to start immediately upon + * registration, as there is a relatively higher chance that their + * registration might fail. In such a case, some policy instances may + * already have requests queued wen unregistration needs to happen as + * part o cleanup; since there is currently no way to drain requests + * from a policy unless the service is unregistering, we just disallow + * this. + */ + if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) && + (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK | + PTLRPC_NRS_FL_REG_START))) { + CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n", + conf->nc_name); + return -EINVAL; + } + + mutex_lock(&nrs_core.nrs_mutex); + + if (nrs_policy_find_desc_locked(conf->nc_name) != NULL) { + CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n", + conf->nc_name); + rc = -EEXIST; + goto fail; + } + + OBD_ALLOC_PTR(desc); + if (desc == NULL) { + rc = -ENOMEM; + goto fail; + } + + strncpy(desc->pd_name, conf->nc_name, NRS_POL_NAME_MAX); + desc->pd_ops = conf->nc_ops; + desc->pd_compat = conf->nc_compat; + desc->pd_compat_svc_name = conf->nc_compat_svc_name; + if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0) + desc->pd_owner = conf->nc_owner; + desc->pd_flags = conf->nc_flags; + atomic_set(&desc->pd_refs, 0); + + /** + * For policies that are held in the same module as NRS (currently + * ptlrpc), do not register the policy with all compatible services, + * as the services will not have started at this point, since we are + * calling from ptlrpc module initialization code. In such cases each + * service will register all compatible policies later, via + * ptlrpc_service_nrs_setup(). + */ + if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0) + goto internal; + + /** + * Register the new policy on all compatible services + */ + mutex_lock(&ptlrpc_all_services_mutex); + + list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { + struct ptlrpc_service_part *svcpt; + int i; + int rc2; + + if (!nrs_policy_compatible(svc, desc) || + unlikely(svc->srv_is_stopping)) + continue; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + struct ptlrpc_nrs *nrs; + bool hp = false; +again: + nrs = nrs_svcpt2nrs(svcpt, hp); + rc = nrs_policy_register(nrs, desc); + if (rc != 0) { + CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n", + desc->pd_name, svcpt->scp_cpt, + svcpt->scp_service->srv_name, rc); + + rc2 = nrs_policy_unregister_locked(desc); + /** + * Should not fail at this point + */ + LASSERT(rc2 == 0); + mutex_unlock(&ptlrpc_all_services_mutex); + OBD_FREE_PTR(desc); + goto fail; + } + + if (!hp && nrs_svc_has_hp(svc)) { + hp = true; + goto again; + } + } + + /** + * No need to take a reference to other modules here, as we + * will be calling from the module's init() function. + */ + if (desc->pd_ops->op_lprocfs_init != NULL) { + rc = desc->pd_ops->op_lprocfs_init(svc); + if (rc != 0) { + rc2 = nrs_policy_unregister_locked(desc); + /** + * Should not fail at this point + */ + LASSERT(rc2 == 0); + mutex_unlock(&ptlrpc_all_services_mutex); + OBD_FREE_PTR(desc); + goto fail; + } + } + } + + mutex_unlock(&ptlrpc_all_services_mutex); +internal: + list_add_tail(&desc->pd_list, &nrs_core.nrs_policies); +fail: + mutex_unlock(&nrs_core.nrs_mutex); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_nrs_policy_register); + +/** + * Unregisters a previously registered policy with NRS core. All instances of + * the policy on all NRS heads of all supported services are removed. + * + * N.B. This function should only be called from a module's exit() function. + * Although it can be used for policies that ship alongside NRS core, the + * function is primarily intended for policies that register externally, + * from other modules. + * + * \param[in] conf configuration information for the policy to unregister + * + * \retval -ve error + * \retval 0 success + */ +int ptlrpc_nrs_policy_unregister(struct ptlrpc_nrs_pol_conf *conf) +{ + struct ptlrpc_nrs_pol_desc *desc; + int rc; + + LASSERT(conf != NULL); + + if (conf->nc_flags & PTLRPC_NRS_FL_FALLBACK) { + CERROR("Unable to unregister a fallback policy, unless the PTLRPC service is stopping.\n"); + return -EPERM; + } + + conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0'; + + mutex_lock(&nrs_core.nrs_mutex); + + desc = nrs_policy_find_desc_locked(conf->nc_name); + if (desc == NULL) { + CERROR("Failing to unregister NRS policy %s which has not been registered with NRS core!\n", + conf->nc_name); + rc = -ENOENT; + goto not_exist; + } + + mutex_lock(&ptlrpc_all_services_mutex); + + rc = nrs_policy_unregister_locked(desc); + if (rc < 0) { + if (rc == -EBUSY) + CERROR("Please first stop policy %s on all service partitions and then retry to unregister the policy.\n", + conf->nc_name); + goto fail; + } + + CDEBUG(D_INFO, "Unregistering policy %s from NRS core.\n", + conf->nc_name); + + list_del(&desc->pd_list); + OBD_FREE_PTR(desc); + +fail: + mutex_unlock(&ptlrpc_all_services_mutex); + +not_exist: + mutex_unlock(&nrs_core.nrs_mutex); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_nrs_policy_unregister); + +/** + * Setup NRS heads on all service partitions of service \a svc, and register + * all compatible policies on those NRS heads. + * + * To be called from within ptl + * \param[in] svc the service to setup + * + * \retval -ve error, the calling logic should eventually call + * ptlrpc_service_nrs_cleanup() to undo any work performed + * by this function. + * + * \see ptlrpc_register_service() + * \see ptlrpc_service_nrs_cleanup() + */ +int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + const struct ptlrpc_nrs_pol_desc *desc; + int i; + int rc = 0; + + mutex_lock(&nrs_core.nrs_mutex); + + /** + * Initialize NRS heads on all service CPTs. + */ + ptlrpc_service_for_each_part(svcpt, i, svc) { + rc = nrs_svcpt_setup_locked(svcpt); + if (rc != 0) + goto failed; + } + + /** + * Set up lprocfs interfaces for all supported policies for the + * service. + */ + list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { + if (!nrs_policy_compatible(svc, desc)) + continue; + + if (desc->pd_ops->op_lprocfs_init != NULL) { + rc = desc->pd_ops->op_lprocfs_init(svc); + if (rc != 0) + goto failed; + } + } + +failed: + + mutex_unlock(&nrs_core.nrs_mutex); + + return rc; +} + +/** + * Unregisters all policies on all service partitions of service \a svc. + * + * \param[in] svc the PTLRPC service to unregister + */ +void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + const struct ptlrpc_nrs_pol_desc *desc; + int i; + + mutex_lock(&nrs_core.nrs_mutex); + + /** + * Clean up NRS heads on all service partitions + */ + ptlrpc_service_for_each_part(svcpt, i, svc) + nrs_svcpt_cleanup_locked(svcpt); + + /** + * Clean up lprocfs interfaces for all supported policies for the + * service. + */ + list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { + if (!nrs_policy_compatible(svc, desc)) + continue; + + if (desc->pd_ops->op_lprocfs_fini != NULL) + desc->pd_ops->op_lprocfs_fini(svc); + } + + mutex_unlock(&nrs_core.nrs_mutex); +} + +/** + * Obtains NRS head resources for request \a req. + * + * These could be either on the regular or HP NRS head of \a svcpt; resources + * taken on the regular head can later be swapped for HP head resources by + * ldlm_lock_reorder_req(). + * + * \param[in] svcpt the service partition + * \param[in] req the request + * \param[in] hp which NRS head of \a svcpt to use + */ +void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req, bool hp) +{ + struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); + + memset(&req->rq_nrq, 0, sizeof(req->rq_nrq)); + nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs, + false); + + /** + * It is fine to access \e nr_initialized without locking as there is + * no contention at this early stage. + */ + req->rq_nrq.nr_initialized = 1; +} + +/** + * Releases resources for a request; is called after the request has been + * handled. + * + * \param[in] req the request + * + * \see ptlrpc_server_finish_request() + */ +void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req) +{ + if (req->rq_nrq.nr_initialized) { + nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs); + /* no protection on bit nr_initialized because no + * contention at this late stage */ + req->rq_nrq.nr_finalized = 1; + } +} + +void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req) +{ + if (req->rq_nrq.nr_started) + nrs_request_stop(&req->rq_nrq); +} + +/** + * Enqueues request \a req on either the regular or high-priority NRS head + * of service partition \a svcpt. + * + * \param[in] svcpt the service partition + * \param[in] req the request to be enqueued + * \param[in] hp whether to enqueue the request on the regular or + * high-priority NRS head. + */ +void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req, bool hp) +{ + spin_lock(&svcpt->scp_req_lock); + + if (hp) + ptlrpc_nrs_hpreq_add_nolock(req); + else + ptlrpc_nrs_req_add_nolock(req); + + spin_unlock(&svcpt->scp_req_lock); +} + +static void nrs_request_removed(struct ptlrpc_nrs_policy *policy) +{ + LASSERT(policy->pol_nrs->nrs_req_queued > 0); + LASSERT(policy->pol_req_queued > 0); + + policy->pol_nrs->nrs_req_queued--; + policy->pol_req_queued--; + + /** + * If the policy has no more requests queued, remove it from + * ptlrpc_nrs::nrs_policy_queued. + */ + if (unlikely(policy->pol_req_queued == 0)) { + list_del_init(&policy->pol_list_queued); + + /** + * If there are other policies with queued requests, move the + * current policy to the end so that we can round robin over + * all policies and drain the requests. + */ + } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) { + LASSERT(policy->pol_req_queued < + policy->pol_nrs->nrs_req_queued); + + list_move_tail(&policy->pol_list_queued, + &policy->pol_nrs->nrs_policy_queued); + } +} + +/** + * Obtains a request for handling from an NRS head of service partition + * \a svcpt. + * + * \param[in] svcpt the service partition + * \param[in] hp whether to obtain a request from the regular or + * high-priority NRS head. + * \param[in] peek when set, signifies that we just want to examine the + * request, and not handle it, so the request is not removed + * from the policy. + * \param[in] force when set, it will force a policy to return a request if it + * has one pending + * + * \retval the request to be handled + * \retval NULL the head has no requests to serve + */ +struct ptlrpc_request * +ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp, + bool peek, bool force) +{ + struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_request *nrq; + + /** + * Always try to drain requests from all NRS polices even if they are + * inactive, because the user can change policy status at runtime. + */ + list_for_each_entry(policy, &nrs->nrs_policy_queued, + pol_list_queued) { + nrq = nrs_request_get(policy, peek, force); + if (nrq != NULL) { + if (likely(!peek)) { + nrq->nr_started = 1; + + policy->pol_req_started++; + policy->pol_nrs->nrs_req_started++; + + nrs_request_removed(policy); + } + + return container_of(nrq, struct ptlrpc_request, rq_nrq); + } + } + + return NULL; +} + +/** + * Dequeues request \a req from the policy it has been enqueued on. + * + * \param[in] req the request + */ +void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req) +{ + struct ptlrpc_nrs_policy *policy = nrs_request_policy(&req->rq_nrq); + + policy->pol_desc->pd_ops->op_req_dequeue(policy, &req->rq_nrq); + + req->rq_nrq.nr_enqueued = 0; + + nrs_request_removed(policy); +} + +/** + * Returns whether there are any requests currently enqueued on any of the + * policies of service partition's \a svcpt NRS head specified by \a hp. Should + * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable + * result. + * + * \param[in] svcpt the service partition to enquire. + * \param[in] hp whether the regular or high-priority NRS head is to be + * enquired. + * + * \retval false the indicated NRS head has no enqueued requests. + * \retval true the indicated NRS head has some enqueued requests. + */ +bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp) +{ + struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); + + return nrs->nrs_req_queued > 0; +}; + +/** + * Moves request \a req from the regular to the high-priority NRS head. + * + * \param[in] req the request to move + */ +void ptlrpc_nrs_req_hp_move(struct ptlrpc_request *req) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + struct ptlrpc_nrs_request *nrq = &req->rq_nrq; + struct ptlrpc_nrs_resource *res1[NRS_RES_MAX]; + struct ptlrpc_nrs_resource *res2[NRS_RES_MAX]; + + /** + * Obtain the high-priority NRS head resources. + */ + nrs_resource_get_safe(nrs_svcpt2nrs(svcpt, true), nrq, res1, true); + + spin_lock(&svcpt->scp_req_lock); + + if (!ptlrpc_nrs_req_can_move(req)) + goto out; + + ptlrpc_nrs_req_del_nolock(req); + + memcpy(res2, nrq->nr_res_ptrs, NRS_RES_MAX * sizeof(res2[0])); + memcpy(nrq->nr_res_ptrs, res1, NRS_RES_MAX * sizeof(res1[0])); + + ptlrpc_nrs_hpreq_add_nolock(req); + + memcpy(res1, res2, NRS_RES_MAX * sizeof(res1[0])); +out: + spin_unlock(&svcpt->scp_req_lock); + + /** + * Release either the regular NRS head resources if we moved the + * request, or the high-priority NRS head resources if we took a + * reference earlier in this function and ptlrpc_nrs_req_can_move() + * returned false. + */ + nrs_resource_put_safe(res1); +} + +/** + * Carries out a control operation \a opc on the policy identified by the + * human-readable \a name, on either all partitions, or only on the first + * partition of service \a svc. + * + * \param[in] svc the service the policy belongs to. + * \param[in] queue whether to carry out the command on the policy which + * belongs to the regular, high-priority, or both NRS + * heads of service partitions of \a svc. + * \param[in] name the policy to act upon, by human-readable name + * \param[in] opc the opcode of the operation to carry out + * \param[in] single when set, the operation will only be carried out on the + * NRS heads of the first service partition of \a svc. + * This is useful for some policies which e.g. share + * identical values on the same parameters of different + * service partitions; when reading these parameters via + * lprocfs, these policies may just want to obtain and + * print out the values from the first service partition. + * Storing these values centrally elsewhere then could be + * another solution for this. + * \param[in,out] arg can be used as a generic in/out buffer between control + * operations and the user environment. + * + *\retval -ve error condition + *\retval 0 operation was carried out successfully + */ +int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc, + enum ptlrpc_nrs_queue_type queue, char *name, + enum ptlrpc_nrs_ctl opc, bool single, void *arg) +{ + struct ptlrpc_service_part *svcpt; + int i; + int rc = 0; + + LASSERT(opc != PTLRPC_NRS_CTL_INVALID); + + if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0) + return -EINVAL; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) { + rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name, + opc, arg); + if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG && + single)) + goto out; + } + + if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) { + /** + * XXX: We could optionally check for + * nrs_svc_has_hp(svc) here, and return an error if it + * is false. Right now we rely on the policies' lprocfs + * handlers that call the present function to make this + * check; if they fail to do so, they might hit the + * assertion inside nrs_svcpt2nrs() below. + */ + rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name, + opc, arg); + if (rc != 0 || single) + goto out; + } + } +out: + return rc; +} + + +/* ptlrpc/nrs_fifo.c */ +extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo; + +/** + * Adds all policies that ship with the ptlrpc module, to NRS core's list of + * policies \e nrs_core.nrs_policies. + * + * \retval 0 all policies have been registered successfully + * \retval -ve error + */ +int ptlrpc_nrs_init(void) +{ + int rc; + + mutex_init(&nrs_core.nrs_mutex); + INIT_LIST_HEAD(&nrs_core.nrs_policies); + + rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo); + if (rc != 0) + goto fail; + + + return rc; +fail: + /** + * Since no PTLRPC services have been started at this point, all we need + * to do for cleanup is to free the descriptors. + */ + ptlrpc_nrs_fini(); + + return rc; +} + +/** + * Removes all policy descriptors from nrs_core::nrs_policies, and frees the + * policy descriptors. + * + * Since all PTLRPC services are stopped at this point, there are no more + * instances of any policies, because each service will have stopped its policy + * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the + * descriptors here. + */ +void ptlrpc_nrs_fini(void) +{ + struct ptlrpc_nrs_pol_desc *desc; + struct ptlrpc_nrs_pol_desc *tmp; + + list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies, + pd_list) { + list_del_init(&desc->pd_list); + OBD_FREE_PTR(desc); + } +} + +/** @} nrs */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c new file mode 100644 index 000000000..eb40c01db --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c @@ -0,0 +1,270 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011 Intel Corporation + * + * Copyright 2012 Xyratex Technology Limited + */ +/* + * lustre/ptlrpc/nrs_fifo.c + * + * Network Request Scheduler (NRS) FIFO policy + * + * Handles RPCs in a FIFO manner, as received from the network. This policy is + * a logical wrapper around previous, non-NRS functionality. It is used as the + * default and fallback policy for all types of RPCs on all PTLRPC service + * partitions, for both regular and high-priority NRS heads. Default here means + * the policy is the one enabled at PTLRPC service partition startup time, and + * fallback means the policy is used to handle RPCs that are not handled + * successfully or are not handled at all by any primary policy that may be + * enabled on a given NRS head. + * + * Author: Liang Zhen <liang@whamcloud.com> + * Author: Nikitas Angelinas <nikitas_angelinas@xyratex.com> + */ +/** + * \addtogoup nrs + * @{ + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../../include/linux/libcfs/libcfs.h" +#include "ptlrpc_internal.h" + +/** + * \name fifo + * + * The FIFO policy is a logical wrapper around previous, non-NRS functionality. + * It schedules RPCs in the same order as they are queued from LNet. + * + * @{ + */ + +#define NRS_POL_NAME_FIFO "fifo" + +/** + * Is called before the policy transitions into + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a + * policy-specific private data structure. + * + * \param[in] policy The policy to start + * + * \retval -ENOMEM OOM error + * \retval 0 success + * + * \see nrs_policy_register() + * \see nrs_policy_ctl() + */ +static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy) +{ + struct nrs_fifo_head *head; + + OBD_CPT_ALLOC_PTR(head, nrs_pol2cptab(policy), nrs_pol2cptid(policy)); + if (head == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&head->fh_list); + policy->pol_private = head; + return 0; +} + +/** + * Is called before the policy transitions into + * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific + * private data structure. + * + * \param[in] policy The policy to stop + * + * \see nrs_policy_stop0() + */ +static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy) +{ + struct nrs_fifo_head *head = policy->pol_private; + + LASSERT(head != NULL); + LASSERT(list_empty(&head->fh_list)); + + OBD_FREE_PTR(head); +} + +/** + * Is called for obtaining a FIFO policy resource. + * + * \param[in] policy The policy on which the request is being asked for + * \param[in] nrq The request for which resources are being taken + * \param[in] parent Parent resource, unused in this policy + * \param[out] resp Resources references are placed in this array + * \param[in] moving_req Signifies limited caller context; unused in this + * policy + * + * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since + * it implements a simple scheduling algorithm in which request + * priority is determined on the request arrival order, it does not + * need to maintain a set of resources that would otherwise be used + * to calculate a request's priority. + * + * \see nrs_resource_get_safe() + */ +static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq, + const struct ptlrpc_nrs_resource *parent, + struct ptlrpc_nrs_resource **resp, bool moving_req) +{ + /** + * Just return the resource embedded inside nrs_fifo_head, and end this + * resource hierarchy reference request. + */ + *resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res; + return 1; +} + +/** + * Called when getting a request from the FIFO policy for handling, or just + * peeking; removes the request from the policy when it is to be handled. + * + * \param[in] policy The policy + * \param[in] peek When set, signifies that we just want to examine the + * request, and not handle it, so the request is not removed + * from the policy. + * \param[in] force Force the policy to return a request; unused in this + * policy + * + * \retval The request to be handled; this is the next request in the FIFO + * queue + * + * \see ptlrpc_nrs_req_get_nolock() + * \see nrs_request_get() + */ +static +struct ptlrpc_nrs_request *nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy, + bool peek, bool force) +{ + struct nrs_fifo_head *head = policy->pol_private; + struct ptlrpc_nrs_request *nrq; + + nrq = unlikely(list_empty(&head->fh_list)) ? NULL : + list_entry(head->fh_list.next, struct ptlrpc_nrs_request, + nr_u.fifo.fr_list); + + if (likely(!peek && nrq != NULL)) { + struct ptlrpc_request *req = container_of(nrq, + struct ptlrpc_request, + rq_nrq); + + list_del_init(&nrq->nr_u.fifo.fr_list); + + CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: %llu\n", + policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer), + nrq->nr_u.fifo.fr_sequence); + } + + return nrq; +} + +/** + * Adds request \a nrq to \a policy's list of queued requests + * + * \param[in] policy The policy + * \param[in] nrq The request to add + * + * \retval 0 success; nrs_request_enqueue() assumes this function will always + * succeed + */ +static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq) +{ + struct nrs_fifo_head *head; + + head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head, + fh_res); + /** + * Only used for debugging + */ + nrq->nr_u.fifo.fr_sequence = head->fh_sequence++; + list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list); + + return 0; +} + +/** + * Removes request \a nrq from \a policy's list of queued requests. + * + * \param[in] policy The policy + * \param[in] nrq The request to remove + */ +static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq) +{ + LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list)); + list_del_init(&nrq->nr_u.fifo.fr_list); +} + +/** + * Prints a debug statement right before the request \a nrq stops being + * handled. + * + * \param[in] policy The policy handling the request + * \param[in] nrq The request being handled + * + * \see ptlrpc_server_finish_request() + * \see ptlrpc_nrs_req_stop_nolock() + */ +static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_request *nrq) +{ + struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request, + rq_nrq); + + CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n", + policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer), + nrq->nr_u.fifo.fr_sequence); +} + +/** + * FIFO policy operations + */ +static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = { + .op_policy_start = nrs_fifo_start, + .op_policy_stop = nrs_fifo_stop, + .op_res_get = nrs_fifo_res_get, + .op_req_get = nrs_fifo_req_get, + .op_req_enqueue = nrs_fifo_req_add, + .op_req_dequeue = nrs_fifo_req_del, + .op_req_stop = nrs_fifo_req_stop, +}; + +/** + * FIFO policy configuration + */ +struct ptlrpc_nrs_pol_conf nrs_conf_fifo = { + .nc_name = NRS_POL_NAME_FIFO, + .nc_ops = &nrs_fifo_ops, + .nc_compat = nrs_policy_compat_all, + .nc_flags = PTLRPC_NRS_FL_FALLBACK | + PTLRPC_NRS_FL_REG_START +}; + +/** @} fifo */ + +/** @} nrs */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c new file mode 100644 index 000000000..b51af9bf3 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c @@ -0,0 +1,2536 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/pack_generic.c + * + * (Un)packing of OST requests + * + * Author: Peter J. Braam <braam@clusterfs.com> + * Author: Phil Schwan <phil@clusterfs.com> + * Author: Eric Barton <eeb@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/obd_cksum.h" +#include "../include/lustre/ll_fiemap.h" + +static inline int lustre_msg_hdr_size_v2(int count) +{ + return cfs_size_round(offsetof(struct lustre_msg_v2, + lm_buflens[count])); +} + +int lustre_msg_hdr_size(__u32 magic, int count) +{ + switch (magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_hdr_size_v2(count); + default: + LASSERTF(0, "incorrect message magic: %08x\n", magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_hdr_size); + +void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, + int index) +{ + if (inout) + lustre_set_req_swabbed(req, index); + else + lustre_set_rep_swabbed(req, index); +} +EXPORT_SYMBOL(ptlrpc_buf_set_swabbed); + +int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, + int index) +{ + if (inout) + return (ptlrpc_req_need_swab(req) && + !lustre_req_swabbed(req, index)); + else + return (ptlrpc_rep_need_swab(req) && + !lustre_rep_swabbed(req, index)); +} +EXPORT_SYMBOL(ptlrpc_buf_need_swab); + +static inline int lustre_msg_check_version_v2(struct lustre_msg_v2 *msg, + __u32 version) +{ + __u32 ver = lustre_msg_get_version(msg); + return (ver & LUSTRE_VERSION_MASK) != version; +} + +int lustre_msg_check_version(struct lustre_msg *msg, __u32 version) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + CERROR("msg v1 not supported - please upgrade you system\n"); + return -EINVAL; + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_check_version_v2(msg, version); + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_check_version); + +/* early reply size */ +int lustre_msg_early_size(void) +{ + static int size; + if (!size) { + /* Always reply old ptlrpc_body_v2 to keep interoperability + * with the old client (< 2.3) which doesn't have pb_jobid + * in the ptlrpc_body. + * + * XXX Remove this whenever we drop interoperability with such + * client. + */ + __u32 pblen = sizeof(struct ptlrpc_body_v2); + size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen); + } + return size; +} +EXPORT_SYMBOL(lustre_msg_early_size); + +int lustre_msg_size_v2(int count, __u32 *lengths) +{ + int size; + int i; + + size = lustre_msg_hdr_size_v2(count); + for (i = 0; i < count; i++) + size += cfs_size_round(lengths[i]); + + return size; +} +EXPORT_SYMBOL(lustre_msg_size_v2); + +/* This returns the size of the buffer that is required to hold a lustre_msg + * with the given sub-buffer lengths. + * NOTE: this should only be used for NEW requests, and should always be + * in the form of a v2 request. If this is a connection to a v1 + * target then the first buffer will be stripped because the ptlrpc + * data is part of the lustre_msg_v1 header. b=14043 */ +int lustre_msg_size(__u32 magic, int count, __u32 *lens) +{ + __u32 size[] = { sizeof(struct ptlrpc_body) }; + + if (!lens) { + LASSERT(count == 1); + lens = size; + } + + LASSERT(count > 0); + LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2)); + + switch (magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_size_v2(count, lens); + default: + LASSERTF(0, "incorrect message magic: %08x\n", magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_size); + +/* This is used to determine the size of a buffer that was already packed + * and will correctly handle the different message formats. */ +int lustre_packed_msg_size(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_packed_msg_size); + +void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, + char **bufs) +{ + char *ptr; + int i; + + msg->lm_bufcount = count; + /* XXX: lm_secflvr uninitialized here */ + msg->lm_magic = LUSTRE_MSG_MAGIC_V2; + + for (i = 0; i < count; i++) + msg->lm_buflens[i] = lens[i]; + + if (bufs == NULL) + return; + + ptr = (char *)msg + lustre_msg_hdr_size_v2(count); + for (i = 0; i < count; i++) { + char *tmp = bufs[i]; + LOGL(tmp, lens[i], ptr); + } +} +EXPORT_SYMBOL(lustre_init_msg_v2); + +static int lustre_pack_request_v2(struct ptlrpc_request *req, + int count, __u32 *lens, char **bufs) +{ + int reqlen, rc; + + reqlen = lustre_msg_size_v2(count, lens); + + rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); + if (rc) + return rc; + + req->rq_reqlen = reqlen; + + lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs); + lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION); + return 0; +} + +int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count, + __u32 *lens, char **bufs) +{ + __u32 size[] = { sizeof(struct ptlrpc_body) }; + + if (!lens) { + LASSERT(count == 1); + lens = size; + } + + LASSERT(count > 0); + LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); + + /* only use new format, we don't need to be compatible with 1.4 */ + return lustre_pack_request_v2(req, count, lens, bufs); +} +EXPORT_SYMBOL(lustre_pack_request); + +#if RS_DEBUG +LIST_HEAD(ptlrpc_rs_debug_lru); +spinlock_t ptlrpc_rs_debug_lock; + +#define PTLRPC_RS_DEBUG_LRU_ADD(rs) \ +do { \ + spin_lock(&ptlrpc_rs_debug_lock); \ + list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru); \ + spin_unlock(&ptlrpc_rs_debug_lock); \ +} while (0) + +#define PTLRPC_RS_DEBUG_LRU_DEL(rs) \ +do { \ + spin_lock(&ptlrpc_rs_debug_lock); \ + list_del(&(rs)->rs_debug_list); \ + spin_unlock(&ptlrpc_rs_debug_lock); \ +} while (0) +#else +# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while (0) +# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while (0) +#endif + +struct ptlrpc_reply_state * +lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_reply_state *rs = NULL; + + spin_lock(&svcpt->scp_rep_lock); + + /* See if we have anything in a pool, and wait if nothing */ + while (list_empty(&svcpt->scp_rep_idle)) { + struct l_wait_info lwi; + int rc; + + spin_unlock(&svcpt->scp_rep_lock); + /* If we cannot get anything for some long time, we better + * bail out instead of waiting infinitely */ + lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL); + rc = l_wait_event(svcpt->scp_rep_waitq, + !list_empty(&svcpt->scp_rep_idle), &lwi); + if (rc != 0) + goto out; + spin_lock(&svcpt->scp_rep_lock); + } + + rs = list_entry(svcpt->scp_rep_idle.next, + struct ptlrpc_reply_state, rs_list); + list_del(&rs->rs_list); + + spin_unlock(&svcpt->scp_rep_lock); + + memset(rs, 0, svcpt->scp_service->srv_max_reply_size); + rs->rs_size = svcpt->scp_service->srv_max_reply_size; + rs->rs_svcpt = svcpt; + rs->rs_prealloc = 1; +out: + return rs; +} + +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_service_part *svcpt = rs->rs_svcpt; + + spin_lock(&svcpt->scp_rep_lock); + list_add(&rs->rs_list, &svcpt->scp_rep_idle); + spin_unlock(&svcpt->scp_rep_lock); + wake_up(&svcpt->scp_rep_waitq); +} + +int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, + __u32 *lens, char **bufs, int flags) +{ + struct ptlrpc_reply_state *rs; + int msg_len, rc; + + LASSERT(req->rq_reply_state == NULL); + + if ((flags & LPRFL_EARLY_REPLY) == 0) { + spin_lock(&req->rq_lock); + req->rq_packed_final = 1; + spin_unlock(&req->rq_lock); + } + + msg_len = lustre_msg_size_v2(count, lens); + rc = sptlrpc_svc_alloc_rs(req, msg_len); + if (rc) + return rc; + + rs = req->rq_reply_state; + atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ + rs->rs_cb_id.cbid_fn = reply_out_callback; + rs->rs_cb_id.cbid_arg = rs; + rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt; + INIT_LIST_HEAD(&rs->rs_exp_list); + INIT_LIST_HEAD(&rs->rs_obd_list); + INIT_LIST_HEAD(&rs->rs_list); + spin_lock_init(&rs->rs_lock); + + req->rq_replen = msg_len; + req->rq_reply_state = rs; + req->rq_repmsg = rs->rs_msg; + + lustre_init_msg_v2(rs->rs_msg, count, lens, bufs); + lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION); + + PTLRPC_RS_DEBUG_LRU_ADD(rs); + + return 0; +} +EXPORT_SYMBOL(lustre_pack_reply_v2); + +int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens, + char **bufs, int flags) +{ + int rc = 0; + __u32 size[] = { sizeof(struct ptlrpc_body) }; + + if (!lens) { + LASSERT(count == 1); + lens = size; + } + + LASSERT(count > 0); + LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); + + switch (req->rq_reqmsg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + rc = lustre_pack_reply_v2(req, count, lens, bufs, flags); + break; + default: + LASSERTF(0, "incorrect message magic: %08x\n", + req->rq_reqmsg->lm_magic); + rc = -EINVAL; + } + if (rc != 0) + CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc, + lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens)); + return rc; +} +EXPORT_SYMBOL(lustre_pack_reply_flags); + +int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens, + char **bufs) +{ + return lustre_pack_reply_flags(req, count, lens, bufs, 0); +} +EXPORT_SYMBOL(lustre_pack_reply); + +void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size) +{ + int i, offset, buflen, bufcount; + + LASSERT(m != NULL); + LASSERT(n >= 0); + + bufcount = m->lm_bufcount; + if (unlikely(n >= bufcount)) { + CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n", + m, n, bufcount); + return NULL; + } + + buflen = m->lm_buflens[n]; + if (unlikely(buflen < min_size)) { + CERROR("msg %p buffer[%d] size %d too small (required %d, opc=%d)\n", + m, n, buflen, min_size, + n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m)); + return NULL; + } + + offset = lustre_msg_hdr_size_v2(bufcount); + for (i = 0; i < n; i++) + offset += cfs_size_round(m->lm_buflens[i]); + + return (char *)m + offset; +} + +void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size) +{ + switch (m->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_buf_v2(m, n, min_size); + default: + LASSERTF(0, "incorrect message magic: %08x(msg:%p)\n", m->lm_magic, m); + return NULL; + } +} +EXPORT_SYMBOL(lustre_msg_buf); + +int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment, + unsigned int newlen, int move_data) +{ + char *tail = NULL, *newpos; + int tail_len = 0, n; + + LASSERT(msg); + LASSERT(msg->lm_bufcount > segment); + LASSERT(msg->lm_buflens[segment] >= newlen); + + if (msg->lm_buflens[segment] == newlen) + goto out; + + if (move_data && msg->lm_bufcount > segment + 1) { + tail = lustre_msg_buf_v2(msg, segment + 1, 0); + for (n = segment + 1; n < msg->lm_bufcount; n++) + tail_len += cfs_size_round(msg->lm_buflens[n]); + } + + msg->lm_buflens[segment] = newlen; + + if (tail && tail_len) { + newpos = lustre_msg_buf_v2(msg, segment + 1, 0); + LASSERT(newpos <= tail); + if (newpos != tail) + memmove(newpos, tail, tail_len); + } +out: + return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); +} + +/* + * for @msg, shrink @segment to size @newlen. if @move_data is non-zero, + * we also move data forward from @segment + 1. + * + * if @newlen == 0, we remove the segment completely, but we still keep the + * totally bufcount the same to save possible data moving. this will leave a + * unused segment with size 0 at the tail, but that's ok. + * + * return new msg size after shrinking. + * + * CAUTION: + * + if any buffers higher than @segment has been filled in, must call shrink + * with non-zero @move_data. + * + caller should NOT keep pointers to msg buffers which higher than @segment + * after call shrink. + */ +int lustre_shrink_msg(struct lustre_msg *msg, int segment, + unsigned int newlen, int move_data) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_shrink_msg_v2(msg, segment, newlen, move_data); + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_shrink_msg); + +void lustre_free_reply_state(struct ptlrpc_reply_state *rs) +{ + PTLRPC_RS_DEBUG_LRU_DEL(rs); + + LASSERT(atomic_read(&rs->rs_refcount) == 0); + LASSERT(!rs->rs_difficult || rs->rs_handled); + LASSERT(!rs->rs_on_net); + LASSERT(!rs->rs_scheduled); + LASSERT(rs->rs_export == NULL); + LASSERT(rs->rs_nlocks == 0); + LASSERT(list_empty(&rs->rs_exp_list)); + LASSERT(list_empty(&rs->rs_obd_list)); + + sptlrpc_svc_free_rs(rs); +} +EXPORT_SYMBOL(lustre_free_reply_state); + +static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len) +{ + int swabbed, required_len, i; + + /* Now we know the sender speaks my language. */ + required_len = lustre_msg_hdr_size_v2(0); + if (len < required_len) { + /* can't even look inside the message */ + CERROR("message length %d too small for lustre_msg\n", len); + return -EINVAL; + } + + swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED); + + if (swabbed) { + __swab32s(&m->lm_magic); + __swab32s(&m->lm_bufcount); + __swab32s(&m->lm_secflvr); + __swab32s(&m->lm_repsize); + __swab32s(&m->lm_cksum); + __swab32s(&m->lm_flags); + CLASSERT(offsetof(typeof(*m), lm_padding_2) != 0); + CLASSERT(offsetof(typeof(*m), lm_padding_3) != 0); + } + + required_len = lustre_msg_hdr_size_v2(m->lm_bufcount); + if (len < required_len) { + /* didn't receive all the buffer lengths */ + CERROR("message length %d too small for %d buflens\n", + len, m->lm_bufcount); + return -EINVAL; + } + + for (i = 0; i < m->lm_bufcount; i++) { + if (swabbed) + __swab32s(&m->lm_buflens[i]); + required_len += cfs_size_round(m->lm_buflens[i]); + } + + if (len < required_len) { + CERROR("len: %d, required_len %d\n", len, required_len); + CERROR("bufcount: %d\n", m->lm_bufcount); + for (i = 0; i < m->lm_bufcount; i++) + CERROR("buffer %d length %d\n", i, m->lm_buflens[i]); + return -EINVAL; + } + + return swabbed; +} + +int __lustre_unpack_msg(struct lustre_msg *m, int len) +{ + int required_len, rc; + + /* We can provide a slightly better error log, if we check the + * message magic and version first. In the future, struct + * lustre_msg may grow, and we'd like to log a version mismatch, + * rather than a short message. + * + */ + required_len = offsetof(struct lustre_msg, lm_magic) + + sizeof(m->lm_magic); + if (len < required_len) { + /* can't even look inside the message */ + CERROR("message length %d too small for magic/version check\n", + len); + return -EINVAL; + } + + rc = lustre_unpack_msg_v2(m, len); + + return rc; +} +EXPORT_SYMBOL(__lustre_unpack_msg); + +int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len) +{ + int rc; + rc = __lustre_unpack_msg(req->rq_reqmsg, len); + if (rc == 1) { + lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); + rc = 0; + } + return rc; +} +EXPORT_SYMBOL(ptlrpc_unpack_req_msg); + +int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len) +{ + int rc; + rc = __lustre_unpack_msg(req->rq_repmsg, len); + if (rc == 1) { + lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); + rc = 0; + } + return rc; +} +EXPORT_SYMBOL(ptlrpc_unpack_rep_msg); + +static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req, + const int inout, int offset) +{ + struct ptlrpc_body *pb; + struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg; + + pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2)); + if (!pb) { + CERROR("error unpacking ptlrpc body\n"); + return -EFAULT; + } + if (ptlrpc_buf_need_swab(req, inout, offset)) { + lustre_swab_ptlrpc_body(pb); + ptlrpc_buf_set_swabbed(req, inout, offset); + } + + if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) { + CERROR("wrong lustre_msg version %08x\n", pb->pb_version); + return -EINVAL; + } + + if (!inout) + pb->pb_status = ptlrpc_status_ntoh(pb->pb_status); + + return 0; +} + +int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset) +{ + switch (req->rq_reqmsg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_unpack_ptlrpc_body_v2(req, 1, offset); + default: + CERROR("bad lustre msg magic: %08x\n", + req->rq_reqmsg->lm_magic); + return -EINVAL; + } +} + +int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset) +{ + switch (req->rq_repmsg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_unpack_ptlrpc_body_v2(req, 0, offset); + default: + CERROR("bad lustre msg magic: %08x\n", + req->rq_repmsg->lm_magic); + return -EINVAL; + } +} + +static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n) +{ + if (n >= m->lm_bufcount) + return 0; + + return m->lm_buflens[n]; +} + +/** + * lustre_msg_buflen - return the length of buffer \a n in message \a m + * \param m lustre_msg (request or reply) to look at + * \param n message index (base 0) + * + * returns zero for non-existent message indices + */ +int lustre_msg_buflen(struct lustre_msg *m, int n) +{ + switch (m->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_msg_buflen_v2(m, n); + default: + CERROR("incorrect message magic: %08x\n", m->lm_magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_buflen); + +static inline void +lustre_msg_set_buflen_v2(struct lustre_msg_v2 *m, int n, int len) +{ + if (n >= m->lm_bufcount) + LBUG(); + + m->lm_buflens[n] = len; +} + +void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len) +{ + switch (m->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + lustre_msg_set_buflen_v2(m, n, len); + return; + default: + LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic); + } +} + +EXPORT_SYMBOL(lustre_msg_set_buflen); + +/* NB return the bufcount for lustre_msg_v2 format, so if message is packed + * in V1 format, the result is one bigger. (add struct ptlrpc_body). */ +int lustre_msg_bufcount(struct lustre_msg *m) +{ + switch (m->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return m->lm_bufcount; + default: + CERROR("incorrect message magic: %08x\n", m->lm_magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_bufcount); + +char *lustre_msg_string(struct lustre_msg *m, int index, int max_len) +{ + /* max_len == 0 means the string should fill the buffer */ + char *str; + int slen, blen; + + switch (m->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + str = lustre_msg_buf_v2(m, index, 0); + blen = lustre_msg_buflen_v2(m, index); + break; + default: + LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic); + } + + if (str == NULL) { + CERROR("can't unpack string in msg %p buffer[%d]\n", m, index); + return NULL; + } + + slen = strnlen(str, blen); + + if (slen == blen) { /* not NULL terminated */ + CERROR("can't unpack non-NULL terminated string in msg %p buffer[%d] len %d\n", + m, index, blen); + return NULL; + } + + if (max_len == 0) { + if (slen != blen - 1) { + CERROR("can't unpack short string in msg %p buffer[%d] len %d: strlen %d\n", + m, index, blen, slen); + return NULL; + } + } else if (slen > max_len) { + CERROR("can't unpack oversized string in msg %p buffer[%d] len %d strlen %d: max %d expected\n", + m, index, blen, slen, max_len); + return NULL; + } + + return str; +} +EXPORT_SYMBOL(lustre_msg_string); + +/* Wrap up the normal fixed length cases */ +static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index, + int min_size, void *swabber) +{ + void *ptr = NULL; + + LASSERT(msg != NULL); + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + ptr = lustre_msg_buf_v2(msg, index, min_size); + break; + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + } + + if (ptr && swabber) + ((void (*)(void *))swabber)(ptr); + + return ptr; +} + +static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg) +{ + return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, + sizeof(struct ptlrpc_body_v2)); +} + +__u32 lustre_msghdr_get_flags(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + case LUSTRE_MSG_MAGIC_V1_SWABBED: + return 0; + case LUSTRE_MSG_MAGIC_V2: + /* already in host endian */ + return msg->lm_flags; + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msghdr_get_flags); + +void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: + msg->lm_flags = flags; + return; + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + +__u32 lustre_msg_get_flags(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_flags; + } + default: + /* flags might be printed in debug code while message + * uninitialized */ + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_flags); + +void lustre_msg_add_flags(struct lustre_msg *msg, int flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_flags |= flags; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_add_flags); + +void lustre_msg_set_flags(struct lustre_msg *msg, int flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_flags = flags; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_flags); + +void lustre_msg_clear_flags(struct lustre_msg *msg, int flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_flags &= ~(MSG_GEN_FLAG_MASK & flags); + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_clear_flags); + +__u32 lustre_msg_get_op_flags(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_op_flags; + } + default: + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_op_flags); + +void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_op_flags |= flags; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_add_op_flags); + +void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_op_flags |= flags; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_op_flags); + +struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return NULL; + } + return &pb->pb_handle; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return NULL; + } +} +EXPORT_SYMBOL(lustre_msg_get_handle); + +__u32 lustre_msg_get_type(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return PTL_RPC_MSG_ERR; + } + return pb->pb_type; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return PTL_RPC_MSG_ERR; + } +} +EXPORT_SYMBOL(lustre_msg_get_type); + +__u32 lustre_msg_get_version(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_version; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_version); + +void lustre_msg_add_version(struct lustre_msg *msg, int version) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_version |= version; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_add_version); + +__u32 lustre_msg_get_opc(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_opc; + } + default: + CERROR("incorrect message magic: %08x(msg:%p)\n", msg->lm_magic, msg); + LBUG(); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_opc); + +__u64 lustre_msg_get_last_xid(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_last_xid; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_last_xid); + +__u64 lustre_msg_get_last_committed(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_last_committed; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_last_committed); + +__u64 *lustre_msg_get_versions(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return NULL; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return NULL; + } + return pb->pb_pre_versions; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return NULL; + } +} +EXPORT_SYMBOL(lustre_msg_get_versions); + +__u64 lustre_msg_get_transno(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_transno; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_transno); + +int lustre_msg_get_status(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return -EINVAL; + } + return pb->pb_status; + } + default: + /* status might be printed in debug code while message + * uninitialized */ + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_get_status); + +__u64 lustre_msg_get_slv(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return -EINVAL; + } + return pb->pb_slv; + } + default: + CERROR("invalid msg magic %08x\n", msg->lm_magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_get_slv); + + +void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return; + } + pb->pb_slv = slv; + return; + } + default: + CERROR("invalid msg magic %x\n", msg->lm_magic); + return; + } +} +EXPORT_SYMBOL(lustre_msg_set_slv); + +__u32 lustre_msg_get_limit(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return -EINVAL; + } + return pb->pb_limit; + } + default: + CERROR("invalid msg magic %x\n", msg->lm_magic); + return -EINVAL; + } +} +EXPORT_SYMBOL(lustre_msg_get_limit); + + +void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return; + } + pb->pb_limit = limit; + return; + } + default: + CERROR("invalid msg magic %08x\n", msg->lm_magic); + return; + } +} +EXPORT_SYMBOL(lustre_msg_set_limit); + +__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + } + return pb->pb_conn_cnt; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_conn_cnt); + +int lustre_msg_is_v1(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + case LUSTRE_MSG_MAGIC_V1_SWABBED: + return 1; + default: + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_is_v1); + +__u32 lustre_msg_get_magic(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return msg->lm_magic; + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} +EXPORT_SYMBOL(lustre_msg_get_magic); + +__u32 lustre_msg_get_timeout(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + case LUSTRE_MSG_MAGIC_V1_SWABBED: + return 0; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + + } + return pb->pb_timeout; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} + +__u32 lustre_msg_get_service_time(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + case LUSTRE_MSG_MAGIC_V1_SWABBED: + return 0; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + if (!pb) { + CERROR("invalid msg %p: no ptlrpc body!\n", msg); + return 0; + + } + return pb->pb_service_time; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} + +char *lustre_msg_get_jobid(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + case LUSTRE_MSG_MAGIC_V1_SWABBED: + return NULL; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = + lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, + sizeof(struct ptlrpc_body)); + if (!pb) + return NULL; + + return pb->pb_jobid; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return NULL; + } +} +EXPORT_SYMBOL(lustre_msg_get_jobid); + +__u32 lustre_msg_get_cksum(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return msg->lm_cksum; + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} + +__u32 lustre_msg_calc_cksum(struct lustre_msg *msg) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + __u32 crc; + unsigned int hsize = 4; + cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb, + lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF), + NULL, 0, (unsigned char *)&crc, &hsize); + return crc; + } + default: + CERROR("incorrect message magic: %08x\n", msg->lm_magic); + return 0; + } +} + +void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_handle = *handle; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_handle); + +void lustre_msg_set_type(struct lustre_msg *msg, __u32 type) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_type = type; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_type); + +void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_opc = opc; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_opc); + +void lustre_msg_set_last_xid(struct lustre_msg *msg, __u64 last_xid) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_last_xid = last_xid; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_last_xid); + +void lustre_msg_set_last_committed(struct lustre_msg *msg, __u64 last_committed) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_last_committed = last_committed; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_last_committed); + +void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_pre_versions[0] = versions[0]; + pb->pb_pre_versions[1] = versions[1]; + pb->pb_pre_versions[2] = versions[2]; + pb->pb_pre_versions[3] = versions[3]; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_versions); + +void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_transno = transno; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_transno); + +void lustre_msg_set_status(struct lustre_msg *msg, __u32 status) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_status = status; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_status); + +void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_conn_cnt = conn_cnt; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_conn_cnt); + +void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_timeout = timeout; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + +void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: { + struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + pb->pb_service_time = service_time; + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + +void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: { + __u32 opc = lustre_msg_get_opc(msg); + struct ptlrpc_body *pb; + + /* Don't set jobid for ldlm ast RPCs, they've been shrunk. + * See the comment in ptlrpc_request_pack(). */ + if (!opc || opc == LDLM_BL_CALLBACK || + opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK) + return; + + pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, + sizeof(struct ptlrpc_body)); + LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); + + if (jobid != NULL) + memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE); + else if (pb->pb_jobid[0] == '\0') + lustre_get_jobid(pb->pb_jobid); + return; + } + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} +EXPORT_SYMBOL(lustre_msg_set_jobid); + +void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum) +{ + switch (msg->lm_magic) { + case LUSTRE_MSG_MAGIC_V1: + return; + case LUSTRE_MSG_MAGIC_V2: + msg->lm_cksum = cksum; + return; + default: + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); + } +} + + +void ptlrpc_request_set_replen(struct ptlrpc_request *req) +{ + int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER); + + req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, + req->rq_pill.rc_area[RCL_SERVER]); + if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2) + req->rq_reqmsg->lm_repsize = req->rq_replen; +} +EXPORT_SYMBOL(ptlrpc_request_set_replen); + +void ptlrpc_req_set_repsize(struct ptlrpc_request *req, int count, __u32 *lens) +{ + req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens); + if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2) + req->rq_reqmsg->lm_repsize = req->rq_replen; +} +EXPORT_SYMBOL(ptlrpc_req_set_repsize); + +/** + * Send a remote set_info_async. + * + * This may go from client to server or server to client. + */ +int do_set_info_async(struct obd_import *imp, + int opcode, int version, + u32 keylen, void *key, + u32 vallen, void *val, + struct ptlrpc_request_set *set) +{ + struct ptlrpc_request *req; + char *tmp; + int rc; + + req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO); + if (req == NULL) + return -ENOMEM; + + req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, + RCL_CLIENT, keylen); + req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL, + RCL_CLIENT, vallen); + rc = ptlrpc_request_pack(req, version, opcode); + if (rc) { + ptlrpc_request_free(req); + return rc; + } + + tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); + memcpy(tmp, key, keylen); + tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL); + memcpy(tmp, val, vallen); + + ptlrpc_request_set_replen(req); + + if (set) { + ptlrpc_set_add_req(set, req); + ptlrpc_check_set(NULL, set); + } else { + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + } + + return rc; +} +EXPORT_SYMBOL(do_set_info_async); + +/* byte flipping routines for all wire types declared in + * lustre_idl.h implemented here. + */ +void lustre_swab_ptlrpc_body(struct ptlrpc_body *b) +{ + __swab32s(&b->pb_type); + __swab32s(&b->pb_version); + __swab32s(&b->pb_opc); + __swab32s(&b->pb_status); + __swab64s(&b->pb_last_xid); + __swab64s(&b->pb_last_seen); + __swab64s(&b->pb_last_committed); + __swab64s(&b->pb_transno); + __swab32s(&b->pb_flags); + __swab32s(&b->pb_op_flags); + __swab32s(&b->pb_conn_cnt); + __swab32s(&b->pb_timeout); + __swab32s(&b->pb_service_time); + __swab32s(&b->pb_limit); + __swab64s(&b->pb_slv); + __swab64s(&b->pb_pre_versions[0]); + __swab64s(&b->pb_pre_versions[1]); + __swab64s(&b->pb_pre_versions[2]); + __swab64s(&b->pb_pre_versions[3]); + CLASSERT(offsetof(typeof(*b), pb_padding) != 0); + /* While we need to maintain compatibility between + * clients and servers without ptlrpc_body_v2 (< 2.3) + * do not swab any fields beyond pb_jobid, as we are + * using this swab function for both ptlrpc_body + * and ptlrpc_body_v2. */ + CLASSERT(offsetof(typeof(*b), pb_jobid) != 0); +} +EXPORT_SYMBOL(lustre_swab_ptlrpc_body); + +void lustre_swab_connect(struct obd_connect_data *ocd) +{ + __swab64s(&ocd->ocd_connect_flags); + __swab32s(&ocd->ocd_version); + __swab32s(&ocd->ocd_grant); + __swab64s(&ocd->ocd_ibits_known); + __swab32s(&ocd->ocd_index); + __swab32s(&ocd->ocd_brw_size); + /* ocd_blocksize and ocd_inodespace don't need to be swabbed because + * they are 8-byte values */ + __swab16s(&ocd->ocd_grant_extent); + __swab32s(&ocd->ocd_unused); + __swab64s(&ocd->ocd_transno); + __swab32s(&ocd->ocd_group); + __swab32s(&ocd->ocd_cksum_types); + __swab32s(&ocd->ocd_instance); + /* Fields after ocd_cksum_types are only accessible by the receiver + * if the corresponding flag in ocd_connect_flags is set. Accessing + * any field after ocd_maxbytes on the receiver without a valid flag + * may result in out-of-bound memory access and kernel oops. */ + if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE) + __swab32s(&ocd->ocd_max_easize); + if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES) + __swab64s(&ocd->ocd_maxbytes); + CLASSERT(offsetof(typeof(*ocd), padding1) != 0); + CLASSERT(offsetof(typeof(*ocd), padding2) != 0); + CLASSERT(offsetof(typeof(*ocd), padding3) != 0); + CLASSERT(offsetof(typeof(*ocd), padding4) != 0); + CLASSERT(offsetof(typeof(*ocd), padding5) != 0); + CLASSERT(offsetof(typeof(*ocd), padding6) != 0); + CLASSERT(offsetof(typeof(*ocd), padding7) != 0); + CLASSERT(offsetof(typeof(*ocd), padding8) != 0); + CLASSERT(offsetof(typeof(*ocd), padding9) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingA) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingB) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingC) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingD) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingE) != 0); + CLASSERT(offsetof(typeof(*ocd), paddingF) != 0); +} + +void lustre_swab_obdo(struct obdo *o) +{ + __swab64s(&o->o_valid); + lustre_swab_ost_id(&o->o_oi); + __swab64s(&o->o_parent_seq); + __swab64s(&o->o_size); + __swab64s(&o->o_mtime); + __swab64s(&o->o_atime); + __swab64s(&o->o_ctime); + __swab64s(&o->o_blocks); + __swab64s(&o->o_grant); + __swab32s(&o->o_blksize); + __swab32s(&o->o_mode); + __swab32s(&o->o_uid); + __swab32s(&o->o_gid); + __swab32s(&o->o_flags); + __swab32s(&o->o_nlink); + __swab32s(&o->o_parent_oid); + __swab32s(&o->o_misc); + __swab64s(&o->o_ioepoch); + __swab32s(&o->o_stripe_idx); + __swab32s(&o->o_parent_ver); + /* o_handle is opaque */ + /* o_lcookie is swabbed elsewhere */ + __swab32s(&o->o_uid_h); + __swab32s(&o->o_gid_h); + __swab64s(&o->o_data_version); + CLASSERT(offsetof(typeof(*o), o_padding_4) != 0); + CLASSERT(offsetof(typeof(*o), o_padding_5) != 0); + CLASSERT(offsetof(typeof(*o), o_padding_6) != 0); + +} +EXPORT_SYMBOL(lustre_swab_obdo); + +void lustre_swab_obd_statfs(struct obd_statfs *os) +{ + __swab64s(&os->os_type); + __swab64s(&os->os_blocks); + __swab64s(&os->os_bfree); + __swab64s(&os->os_bavail); + __swab64s(&os->os_files); + __swab64s(&os->os_ffree); + /* no need to swab os_fsid */ + __swab32s(&os->os_bsize); + __swab32s(&os->os_namelen); + __swab64s(&os->os_maxbytes); + __swab32s(&os->os_state); + CLASSERT(offsetof(typeof(*os), os_fprecreated) != 0); + CLASSERT(offsetof(typeof(*os), os_spare2) != 0); + CLASSERT(offsetof(typeof(*os), os_spare3) != 0); + CLASSERT(offsetof(typeof(*os), os_spare4) != 0); + CLASSERT(offsetof(typeof(*os), os_spare5) != 0); + CLASSERT(offsetof(typeof(*os), os_spare6) != 0); + CLASSERT(offsetof(typeof(*os), os_spare7) != 0); + CLASSERT(offsetof(typeof(*os), os_spare8) != 0); + CLASSERT(offsetof(typeof(*os), os_spare9) != 0); +} +EXPORT_SYMBOL(lustre_swab_obd_statfs); + +void lustre_swab_obd_ioobj(struct obd_ioobj *ioo) +{ + lustre_swab_ost_id(&ioo->ioo_oid); + __swab32s(&ioo->ioo_max_brw); + __swab32s(&ioo->ioo_bufcnt); +} +EXPORT_SYMBOL(lustre_swab_obd_ioobj); + +void lustre_swab_niobuf_remote(struct niobuf_remote *nbr) +{ + __swab64s(&nbr->offset); + __swab32s(&nbr->len); + __swab32s(&nbr->flags); +} +EXPORT_SYMBOL(lustre_swab_niobuf_remote); + +void lustre_swab_ost_body(struct ost_body *b) +{ + lustre_swab_obdo(&b->oa); +} +EXPORT_SYMBOL(lustre_swab_ost_body); + +void lustre_swab_ost_last_id(u64 *id) +{ + __swab64s(id); +} +EXPORT_SYMBOL(lustre_swab_ost_last_id); + +void lustre_swab_generic_32s(__u32 *val) +{ + __swab32s(val); +} +EXPORT_SYMBOL(lustre_swab_generic_32s); + +void lustre_swab_gl_desc(union ldlm_gl_desc *desc) +{ + lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid); + __swab64s(&desc->lquota_desc.gl_flags); + __swab64s(&desc->lquota_desc.gl_ver); + __swab64s(&desc->lquota_desc.gl_hardlimit); + __swab64s(&desc->lquota_desc.gl_softlimit); + __swab64s(&desc->lquota_desc.gl_time); + CLASSERT(offsetof(typeof(desc->lquota_desc), gl_pad2) != 0); +} + +void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb) +{ + __swab64s(&lvb->lvb_size); + __swab64s(&lvb->lvb_mtime); + __swab64s(&lvb->lvb_atime); + __swab64s(&lvb->lvb_ctime); + __swab64s(&lvb->lvb_blocks); +} +EXPORT_SYMBOL(lustre_swab_ost_lvb_v1); + +void lustre_swab_ost_lvb(struct ost_lvb *lvb) +{ + __swab64s(&lvb->lvb_size); + __swab64s(&lvb->lvb_mtime); + __swab64s(&lvb->lvb_atime); + __swab64s(&lvb->lvb_ctime); + __swab64s(&lvb->lvb_blocks); + __swab32s(&lvb->lvb_mtime_ns); + __swab32s(&lvb->lvb_atime_ns); + __swab32s(&lvb->lvb_ctime_ns); + __swab32s(&lvb->lvb_padding); +} +EXPORT_SYMBOL(lustre_swab_ost_lvb); + +void lustre_swab_lquota_lvb(struct lquota_lvb *lvb) +{ + __swab64s(&lvb->lvb_flags); + __swab64s(&lvb->lvb_id_may_rel); + __swab64s(&lvb->lvb_id_rel); + __swab64s(&lvb->lvb_id_qunit); + __swab64s(&lvb->lvb_pad1); +} +EXPORT_SYMBOL(lustre_swab_lquota_lvb); + +void lustre_swab_mdt_body(struct mdt_body *b) +{ + lustre_swab_lu_fid(&b->fid1); + lustre_swab_lu_fid(&b->fid2); + /* handle is opaque */ + __swab64s(&b->valid); + __swab64s(&b->size); + __swab64s(&b->mtime); + __swab64s(&b->atime); + __swab64s(&b->ctime); + __swab64s(&b->blocks); + __swab64s(&b->ioepoch); + __swab64s(&b->t_state); + __swab32s(&b->fsuid); + __swab32s(&b->fsgid); + __swab32s(&b->capability); + __swab32s(&b->mode); + __swab32s(&b->uid); + __swab32s(&b->gid); + __swab32s(&b->flags); + __swab32s(&b->rdev); + __swab32s(&b->nlink); + CLASSERT(offsetof(typeof(*b), unused2) != 0); + __swab32s(&b->suppgid); + __swab32s(&b->eadatasize); + __swab32s(&b->aclsize); + __swab32s(&b->max_mdsize); + __swab32s(&b->max_cookiesize); + __swab32s(&b->uid_h); + __swab32s(&b->gid_h); + CLASSERT(offsetof(typeof(*b), padding_5) != 0); +} +EXPORT_SYMBOL(lustre_swab_mdt_body); + +void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b) +{ + /* handle is opaque */ + __swab64s(&b->ioepoch); + __swab32s(&b->flags); + CLASSERT(offsetof(typeof(*b), padding) != 0); +} +EXPORT_SYMBOL(lustre_swab_mdt_ioepoch); + +void lustre_swab_mgs_target_info(struct mgs_target_info *mti) +{ + int i; + __swab32s(&mti->mti_lustre_ver); + __swab32s(&mti->mti_stripe_index); + __swab32s(&mti->mti_config_ver); + __swab32s(&mti->mti_flags); + __swab32s(&mti->mti_instance); + __swab32s(&mti->mti_nid_count); + CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64)); + for (i = 0; i < MTI_NIDS_MAX; i++) + __swab64s(&mti->mti_nids[i]); +} +EXPORT_SYMBOL(lustre_swab_mgs_target_info); + +void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry) +{ + int i; + + __swab64s(&entry->mne_version); + __swab32s(&entry->mne_instance); + __swab32s(&entry->mne_index); + __swab32s(&entry->mne_length); + + /* mne_nid_(count|type) must be one byte size because we're gonna + * access it w/o swapping. */ + CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8)); + CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8)); + + /* remove this assertion if ipv6 is supported. */ + LASSERT(entry->mne_nid_type == 0); + for (i = 0; i < entry->mne_nid_count; i++) { + CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64)); + __swab64s(&entry->u.nids[i]); + } +} +EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry); + +void lustre_swab_mgs_config_body(struct mgs_config_body *body) +{ + __swab64s(&body->mcb_offset); + __swab32s(&body->mcb_units); + __swab16s(&body->mcb_type); +} +EXPORT_SYMBOL(lustre_swab_mgs_config_body); + +void lustre_swab_mgs_config_res(struct mgs_config_res *body) +{ + __swab64s(&body->mcr_offset); + __swab64s(&body->mcr_size); +} +EXPORT_SYMBOL(lustre_swab_mgs_config_res); + +static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i) +{ + __swab64s(&i->dqi_bgrace); + __swab64s(&i->dqi_igrace); + __swab32s(&i->dqi_flags); + __swab32s(&i->dqi_valid); +} + +static void lustre_swab_obd_dqblk(struct obd_dqblk *b) +{ + __swab64s(&b->dqb_ihardlimit); + __swab64s(&b->dqb_isoftlimit); + __swab64s(&b->dqb_curinodes); + __swab64s(&b->dqb_bhardlimit); + __swab64s(&b->dqb_bsoftlimit); + __swab64s(&b->dqb_curspace); + __swab64s(&b->dqb_btime); + __swab64s(&b->dqb_itime); + __swab32s(&b->dqb_valid); + CLASSERT(offsetof(typeof(*b), dqb_padding) != 0); +} + +void lustre_swab_obd_quotactl(struct obd_quotactl *q) +{ + __swab32s(&q->qc_cmd); + __swab32s(&q->qc_type); + __swab32s(&q->qc_id); + __swab32s(&q->qc_stat); + lustre_swab_obd_dqinfo(&q->qc_dqinfo); + lustre_swab_obd_dqblk(&q->qc_dqblk); +} +EXPORT_SYMBOL(lustre_swab_obd_quotactl); + +void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p) +{ + __swab32s(&p->rp_uid); + __swab32s(&p->rp_gid); + __swab32s(&p->rp_fsuid); + __swab32s(&p->rp_fsuid_h); + __swab32s(&p->rp_fsgid); + __swab32s(&p->rp_fsgid_h); + __swab32s(&p->rp_access_perm); + __swab32s(&p->rp_padding); +}; +EXPORT_SYMBOL(lustre_swab_mdt_remote_perm); + +void lustre_swab_fid2path(struct getinfo_fid2path *gf) +{ + lustre_swab_lu_fid(&gf->gf_fid); + __swab64s(&gf->gf_recno); + __swab32s(&gf->gf_linkno); + __swab32s(&gf->gf_pathlen); +} +EXPORT_SYMBOL(lustre_swab_fid2path); + +void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent) +{ + __swab64s(&fm_extent->fe_logical); + __swab64s(&fm_extent->fe_physical); + __swab64s(&fm_extent->fe_length); + __swab32s(&fm_extent->fe_flags); + __swab32s(&fm_extent->fe_device); +} + +void lustre_swab_fiemap(struct ll_user_fiemap *fiemap) +{ + int i; + + __swab64s(&fiemap->fm_start); + __swab64s(&fiemap->fm_length); + __swab32s(&fiemap->fm_flags); + __swab32s(&fiemap->fm_mapped_extents); + __swab32s(&fiemap->fm_extent_count); + __swab32s(&fiemap->fm_reserved); + + for (i = 0; i < fiemap->fm_mapped_extents; i++) + lustre_swab_fiemap_extent(&fiemap->fm_extents[i]); +} +EXPORT_SYMBOL(lustre_swab_fiemap); + +void lustre_swab_idx_info(struct idx_info *ii) +{ + __swab32s(&ii->ii_magic); + __swab32s(&ii->ii_flags); + __swab16s(&ii->ii_count); + __swab32s(&ii->ii_attrs); + lustre_swab_lu_fid(&ii->ii_fid); + __swab64s(&ii->ii_version); + __swab64s(&ii->ii_hash_start); + __swab64s(&ii->ii_hash_end); + __swab16s(&ii->ii_keysize); + __swab16s(&ii->ii_recsize); +} + +void lustre_swab_lip_header(struct lu_idxpage *lip) +{ + /* swab header */ + __swab32s(&lip->lip_magic); + __swab16s(&lip->lip_flags); + __swab16s(&lip->lip_nr); +} +EXPORT_SYMBOL(lustre_swab_lip_header); + +void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr) +{ + __swab32s(&rr->rr_opcode); + __swab32s(&rr->rr_cap); + __swab32s(&rr->rr_fsuid); + /* rr_fsuid_h is unused */ + __swab32s(&rr->rr_fsgid); + /* rr_fsgid_h is unused */ + __swab32s(&rr->rr_suppgid1); + /* rr_suppgid1_h is unused */ + __swab32s(&rr->rr_suppgid2); + /* rr_suppgid2_h is unused */ + lustre_swab_lu_fid(&rr->rr_fid1); + lustre_swab_lu_fid(&rr->rr_fid2); + __swab64s(&rr->rr_mtime); + __swab64s(&rr->rr_atime); + __swab64s(&rr->rr_ctime); + __swab64s(&rr->rr_size); + __swab64s(&rr->rr_blocks); + __swab32s(&rr->rr_bias); + __swab32s(&rr->rr_mode); + __swab32s(&rr->rr_flags); + __swab32s(&rr->rr_flags_h); + __swab32s(&rr->rr_umask); + + CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0); +}; +EXPORT_SYMBOL(lustre_swab_mdt_rec_reint); + +void lustre_swab_lov_desc(struct lov_desc *ld) +{ + __swab32s(&ld->ld_tgt_count); + __swab32s(&ld->ld_active_tgt_count); + __swab32s(&ld->ld_default_stripe_count); + __swab32s(&ld->ld_pattern); + __swab64s(&ld->ld_default_stripe_size); + __swab64s(&ld->ld_default_stripe_offset); + __swab32s(&ld->ld_qos_maxage); + /* uuid endian insensitive */ +} +EXPORT_SYMBOL(lustre_swab_lov_desc); + +void lustre_swab_lmv_desc(struct lmv_desc *ld) +{ + __swab32s(&ld->ld_tgt_count); + __swab32s(&ld->ld_active_tgt_count); + __swab32s(&ld->ld_default_stripe_count); + __swab32s(&ld->ld_pattern); + __swab64s(&ld->ld_default_hash_size); + __swab32s(&ld->ld_qos_maxage); + /* uuid endian insensitive */ +} + +void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea) +{ + __swab32s(&mea->mea_magic); + __swab32s(&mea->mea_count); + __swab32s(&mea->mea_master); + CLASSERT(offsetof(typeof(*mea), mea_padding) != 0); +} + +void lustre_swab_lmv_user_md(struct lmv_user_md *lum) +{ + int i; + + __swab32s(&lum->lum_magic); + __swab32s(&lum->lum_stripe_count); + __swab32s(&lum->lum_stripe_offset); + __swab32s(&lum->lum_hash_type); + __swab32s(&lum->lum_type); + CLASSERT(offsetof(typeof(*lum), lum_padding1) != 0); + CLASSERT(offsetof(typeof(*lum), lum_padding2) != 0); + CLASSERT(offsetof(typeof(*lum), lum_padding3) != 0); + + for (i = 0; i < lum->lum_stripe_count; i++) { + __swab32s(&lum->lum_objects[i].lum_mds); + lustre_swab_lu_fid(&lum->lum_objects[i].lum_fid); + } + +} +EXPORT_SYMBOL(lustre_swab_lmv_user_md); + +static void print_lum(struct lov_user_md *lum) +{ + CDEBUG(D_OTHER, "lov_user_md %p:\n", lum); + CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic); + CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern); + CDEBUG(D_OTHER, "\tlmm_object_id: %llu\n", lmm_oi_id(&lum->lmm_oi)); + CDEBUG(D_OTHER, "\tlmm_object_gr: %llu\n", lmm_oi_seq(&lum->lmm_oi)); + CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size); + CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count); + CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n", + lum->lmm_stripe_offset); +} + +static void lustre_swab_lmm_oi(struct ost_id *oi) +{ + __swab64s(&oi->oi.oi_id); + __swab64s(&oi->oi.oi_seq); +} + +static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum) +{ + __swab32s(&lum->lmm_magic); + __swab32s(&lum->lmm_pattern); + lustre_swab_lmm_oi(&lum->lmm_oi); + __swab32s(&lum->lmm_stripe_size); + __swab16s(&lum->lmm_stripe_count); + __swab16s(&lum->lmm_stripe_offset); + print_lum(lum); +} + +void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum) +{ + CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n"); + lustre_swab_lov_user_md_common(lum); +} +EXPORT_SYMBOL(lustre_swab_lov_user_md_v1); + +void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum) +{ + CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n"); + lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum); + /* lmm_pool_name nothing to do with char */ +} +EXPORT_SYMBOL(lustre_swab_lov_user_md_v3); + +void lustre_swab_lov_mds_md(struct lov_mds_md *lmm) +{ + CDEBUG(D_IOCTL, "swabbing lov_mds_md\n"); + __swab32s(&lmm->lmm_magic); + __swab32s(&lmm->lmm_pattern); + lustre_swab_lmm_oi(&lmm->lmm_oi); + __swab32s(&lmm->lmm_stripe_size); + __swab16s(&lmm->lmm_stripe_count); + __swab16s(&lmm->lmm_layout_gen); +} +EXPORT_SYMBOL(lustre_swab_lov_mds_md); + +void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, + int stripe_count) +{ + int i; + + for (i = 0; i < stripe_count; i++) { + lustre_swab_ost_id(&(lod[i].l_ost_oi)); + __swab32s(&(lod[i].l_ost_gen)); + __swab32s(&(lod[i].l_ost_idx)); + } +} +EXPORT_SYMBOL(lustre_swab_lov_user_md_objects); + +void lustre_swab_ldlm_res_id(struct ldlm_res_id *id) +{ + int i; + + for (i = 0; i < RES_NAME_SIZE; i++) + __swab64s(&id->name[i]); +} +EXPORT_SYMBOL(lustre_swab_ldlm_res_id); + +void lustre_swab_ldlm_policy_data(ldlm_wire_policy_data_t *d) +{ + /* the lock data is a union and the first two fields are always an + * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock + * data the same way. */ + __swab64s(&d->l_extent.start); + __swab64s(&d->l_extent.end); + __swab64s(&d->l_extent.gid); + __swab64s(&d->l_flock.lfw_owner); + __swab32s(&d->l_flock.lfw_pid); +} +EXPORT_SYMBOL(lustre_swab_ldlm_policy_data); + +void lustre_swab_ldlm_intent(struct ldlm_intent *i) +{ + __swab64s(&i->opc); +} +EXPORT_SYMBOL(lustre_swab_ldlm_intent); + +void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r) +{ + __swab32s(&r->lr_type); + CLASSERT(offsetof(typeof(*r), lr_padding) != 0); + lustre_swab_ldlm_res_id(&r->lr_name); +} +EXPORT_SYMBOL(lustre_swab_ldlm_resource_desc); + +void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l) +{ + lustre_swab_ldlm_resource_desc(&l->l_resource); + __swab32s(&l->l_req_mode); + __swab32s(&l->l_granted_mode); + lustre_swab_ldlm_policy_data(&l->l_policy_data); +} +EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc); + +void lustre_swab_ldlm_request(struct ldlm_request *rq) +{ + __swab32s(&rq->lock_flags); + lustre_swab_ldlm_lock_desc(&rq->lock_desc); + __swab32s(&rq->lock_count); + /* lock_handle[] opaque */ +} +EXPORT_SYMBOL(lustre_swab_ldlm_request); + +void lustre_swab_ldlm_reply(struct ldlm_reply *r) +{ + __swab32s(&r->lock_flags); + CLASSERT(offsetof(typeof(*r), lock_padding) != 0); + lustre_swab_ldlm_lock_desc(&r->lock_desc); + /* lock_handle opaque */ + __swab64s(&r->lock_policy_res1); + __swab64s(&r->lock_policy_res2); +} +EXPORT_SYMBOL(lustre_swab_ldlm_reply); + +void lustre_swab_quota_body(struct quota_body *b) +{ + lustre_swab_lu_fid(&b->qb_fid); + lustre_swab_lu_fid((struct lu_fid *)&b->qb_id); + __swab32s(&b->qb_flags); + __swab64s(&b->qb_count); + __swab64s(&b->qb_usage); + __swab64s(&b->qb_slv_ver); +} + +/* Dump functions */ +void dump_ioo(struct obd_ioobj *ioo) +{ + CDEBUG(D_RPCTRACE, + "obd_ioobj: ioo_oid=" DOSTID ", ioo_max_brw=%#x, ioo_bufct=%d\n", + POSTID(&ioo->ioo_oid), ioo->ioo_max_brw, + ioo->ioo_bufcnt); +} +EXPORT_SYMBOL(dump_ioo); + +void dump_rniobuf(struct niobuf_remote *nb) +{ + CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n", + nb->offset, nb->len, nb->flags); +} +EXPORT_SYMBOL(dump_rniobuf); + +void dump_obdo(struct obdo *oa) +{ + __u32 valid = oa->o_valid; + + CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid); + if (valid & OBD_MD_FLID) + CDEBUG(D_RPCTRACE, "obdo: id = "DOSTID"\n", POSTID(&oa->o_oi)); + if (valid & OBD_MD_FLFID) + CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = %#llx\n", + oa->o_parent_seq); + if (valid & OBD_MD_FLSIZE) + CDEBUG(D_RPCTRACE, "obdo: o_size = %lld\n", oa->o_size); + if (valid & OBD_MD_FLMTIME) + CDEBUG(D_RPCTRACE, "obdo: o_mtime = %lld\n", oa->o_mtime); + if (valid & OBD_MD_FLATIME) + CDEBUG(D_RPCTRACE, "obdo: o_atime = %lld\n", oa->o_atime); + if (valid & OBD_MD_FLCTIME) + CDEBUG(D_RPCTRACE, "obdo: o_ctime = %lld\n", oa->o_ctime); + if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ + CDEBUG(D_RPCTRACE, "obdo: o_blocks = %lld\n", oa->o_blocks); + if (valid & OBD_MD_FLGRANT) + CDEBUG(D_RPCTRACE, "obdo: o_grant = %lld\n", oa->o_grant); + if (valid & OBD_MD_FLBLKSZ) + CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize); + if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE)) + CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n", + oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) | + (valid & OBD_MD_FLMODE ? ~S_IFMT : 0))); + if (valid & OBD_MD_FLUID) + CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid); + if (valid & OBD_MD_FLUID) + CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h); + if (valid & OBD_MD_FLGID) + CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid); + if (valid & OBD_MD_FLGID) + CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h); + if (valid & OBD_MD_FLFLAGS) + CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags); + if (valid & OBD_MD_FLNLINK) + CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink); + else if (valid & OBD_MD_FLCKSUM) + CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n", + oa->o_nlink); + if (valid & OBD_MD_FLGENER) + CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n", + oa->o_parent_oid); + if (valid & OBD_MD_FLEPOCH) + CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = %lld\n", + oa->o_ioepoch); + if (valid & OBD_MD_FLFID) { + CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n", + oa->o_stripe_idx); + CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n", + oa->o_parent_ver); + } + if (valid & OBD_MD_FLHANDLE) + CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n", + oa->o_handle.cookie); + if (valid & OBD_MD_FLCOOKIE) + CDEBUG(D_RPCTRACE, "obdo: o_lcookie = (llog_cookie dumping not yet implemented)\n"); +} +EXPORT_SYMBOL(dump_obdo); + +void dump_ost_body(struct ost_body *ob) +{ + dump_obdo(&ob->oa); +} +EXPORT_SYMBOL(dump_ost_body); + +void dump_rcs(__u32 *rc) +{ + CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc); +} +EXPORT_SYMBOL(dump_rcs); + +static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req) +{ + LASSERT(req->rq_reqmsg); + + switch (req->rq_reqmsg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF); + default: + CERROR("bad lustre msg magic: %#08X\n", + req->rq_reqmsg->lm_magic); + } + return 0; +} + +static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req) +{ + LASSERT(req->rq_repmsg); + + switch (req->rq_repmsg->lm_magic) { + case LUSTRE_MSG_MAGIC_V2: + return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF); + default: + /* uninitialized yet */ + return 0; + } +} + +void _debug_req(struct ptlrpc_request *req, + struct libcfs_debug_msg_data *msgdata, + const char *fmt, ...) +{ + int req_ok = req->rq_reqmsg != NULL; + int rep_ok = req->rq_repmsg != NULL; + lnet_nid_t nid = LNET_NID_ANY; + va_list args; + + if (ptlrpc_req_need_swab(req)) { + req_ok = req_ok && req_ptlrpc_body_swabbed(req); + rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req); + } + + if (req->rq_import && req->rq_import->imp_connection) + nid = req->rq_import->imp_connection->c_peer.nid; + else if (req->rq_export && req->rq_export->exp_connection) + nid = req->rq_export->exp_connection->c_peer.nid; + + va_start(args, fmt); + libcfs_debug_vmsg2(msgdata, fmt, args, + " req@%p x%llu/t%lld(%lld) o%d->%s@%s:%d/%d lens %d/%d e %d to %d dl " CFS_TIME_T " ref %d fl " REQ_FLAGS_FMT "/%x/%x rc %d/%d\n", + req, req->rq_xid, req->rq_transno, + req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0, + req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1, + req->rq_import ? + req->rq_import->imp_obd->obd_name : + req->rq_export ? + req->rq_export->exp_client_uuid.uuid : + "<?>", + libcfs_nid2str(nid), + req->rq_request_portal, req->rq_reply_portal, + req->rq_reqlen, req->rq_replen, + req->rq_early_count, req->rq_timedout, + req->rq_deadline, + atomic_read(&req->rq_refcount), + DEBUG_REQ_FLAGS(req), + req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1, + rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1, + req->rq_status, + rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1); + va_end(args); +} +EXPORT_SYMBOL(_debug_req); + +void lustre_swab_lustre_capa(struct lustre_capa *c) +{ + lustre_swab_lu_fid(&c->lc_fid); + __swab64s(&c->lc_opc); + __swab64s(&c->lc_uid); + __swab64s(&c->lc_gid); + __swab32s(&c->lc_flags); + __swab32s(&c->lc_keyid); + __swab32s(&c->lc_timeout); + __swab32s(&c->lc_expiry); +} +EXPORT_SYMBOL(lustre_swab_lustre_capa); + +void lustre_swab_lustre_capa_key(struct lustre_capa_key *k) +{ + __swab64s(&k->lk_seq); + __swab32s(&k->lk_keyid); + CLASSERT(offsetof(typeof(*k), lk_padding) != 0); +} +EXPORT_SYMBOL(lustre_swab_lustre_capa_key); + +void lustre_swab_hsm_user_state(struct hsm_user_state *state) +{ + __swab32s(&state->hus_states); + __swab32s(&state->hus_archive_id); +} +EXPORT_SYMBOL(lustre_swab_hsm_user_state); + +void lustre_swab_hsm_state_set(struct hsm_state_set *hss) +{ + __swab32s(&hss->hss_valid); + __swab64s(&hss->hss_setmask); + __swab64s(&hss->hss_clearmask); + __swab32s(&hss->hss_archive_id); +} +EXPORT_SYMBOL(lustre_swab_hsm_state_set); + +void lustre_swab_hsm_extent(struct hsm_extent *extent) +{ + __swab64s(&extent->offset); + __swab64s(&extent->length); +} + +void lustre_swab_hsm_current_action(struct hsm_current_action *action) +{ + __swab32s(&action->hca_state); + __swab32s(&action->hca_action); + lustre_swab_hsm_extent(&action->hca_location); +} +EXPORT_SYMBOL(lustre_swab_hsm_current_action); + +void lustre_swab_hsm_user_item(struct hsm_user_item *hui) +{ + lustre_swab_lu_fid(&hui->hui_fid); + lustre_swab_hsm_extent(&hui->hui_extent); +} +EXPORT_SYMBOL(lustre_swab_hsm_user_item); + +void lustre_swab_layout_intent(struct layout_intent *li) +{ + __swab32s(&li->li_opc); + __swab32s(&li->li_flags); + __swab64s(&li->li_start); + __swab64s(&li->li_end); +} +EXPORT_SYMBOL(lustre_swab_layout_intent); + +void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk) +{ + lustre_swab_lu_fid(&hpk->hpk_fid); + __swab64s(&hpk->hpk_cookie); + __swab64s(&hpk->hpk_extent.offset); + __swab64s(&hpk->hpk_extent.length); + __swab16s(&hpk->hpk_flags); + __swab16s(&hpk->hpk_errval); +} +EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel); + +void lustre_swab_hsm_request(struct hsm_request *hr) +{ + __swab32s(&hr->hr_action); + __swab32s(&hr->hr_archive_id); + __swab64s(&hr->hr_flags); + __swab32s(&hr->hr_itemcount); + __swab32s(&hr->hr_data_len); +} +EXPORT_SYMBOL(lustre_swab_hsm_request); + +void lustre_swab_update_buf(struct update_buf *ub) +{ + __swab32s(&ub->ub_magic); + __swab32s(&ub->ub_count); +} +EXPORT_SYMBOL(lustre_swab_update_buf); + +void lustre_swab_update_reply_buf(struct update_reply *ur) +{ + int i; + + __swab32s(&ur->ur_version); + __swab32s(&ur->ur_count); + for (i = 0; i < ur->ur_count; i++) + __swab32s(&ur->ur_lens[i]); +} +EXPORT_SYMBOL(lustre_swab_update_reply_buf); + +void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl) +{ + __swab64s(&msl->msl_flags); +} +EXPORT_SYMBOL(lustre_swab_swap_layouts); + +void lustre_swab_close_data(struct close_data *cd) +{ + lustre_swab_lu_fid(&cd->cd_fid); + __swab64s(&cd->cd_data_version); +} +EXPORT_SYMBOL(lustre_swab_close_data); diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c new file mode 100644 index 000000000..e1334c24e --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c @@ -0,0 +1,75 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_lib.h" +#include "../include/lustre_ha.h" +#include "../include/lustre_import.h" + +#include "ptlrpc_internal.h" + + +void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc, + int mdidx) +{ + CLASSERT(PTLRPC_MAX_BRW_PAGES < LI_POISON); + + LASSERT(mdidx < desc->bd_md_max_brw); + LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); + LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | + LNET_MD_PHYS))); + + md->options |= LNET_MD_KIOV; + md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV); + md->length = min_t(unsigned int, LNET_MAX_IOV, md->length); + if (desc->bd_enc_iov) + md->start = &desc->bd_enc_iov[mdidx * LNET_MAX_IOV]; + else + md->start = &desc->bd_iov[mdidx * LNET_MAX_IOV]; +} + +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len) +{ + lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; + + kiov->kiov_page = page; + kiov->kiov_offset = pageoffset; + kiov->kiov_len = len; + + desc->bd_iov_count++; +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c new file mode 100644 index 000000000..9dbda9332 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c @@ -0,0 +1,678 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/pinger.c + * + * Portal-RPC reconnection and replay operations, for use in recovery. + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "ptlrpc_internal.h" + +static int suppress_pings; +module_param(suppress_pings, int, 0644); +MODULE_PARM_DESC(suppress_pings, "Suppress pings"); + +struct mutex pinger_mutex; +static LIST_HEAD(pinger_imports); +static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list); + +int ptlrpc_pinger_suppress_pings(void) +{ + return suppress_pings; +} +EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings); + +struct ptlrpc_request * +ptlrpc_prep_ping(struct obd_import *imp) +{ + struct ptlrpc_request *req; + + req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, + LUSTRE_OBD_VERSION, OBD_PING); + if (req) { + ptlrpc_request_set_replen(req); + req->rq_no_resend = req->rq_no_delay = 1; + } + return req; +} + +int ptlrpc_obd_ping(struct obd_device *obd) +{ + int rc; + struct ptlrpc_request *req; + + req = ptlrpc_prep_ping(obd->u.cli.cl_import); + if (req == NULL) + return -ENOMEM; + + req->rq_send_state = LUSTRE_IMP_FULL; + + rc = ptlrpc_queue_wait(req); + + ptlrpc_req_finished(req); + + return rc; +} +EXPORT_SYMBOL(ptlrpc_obd_ping); + +int ptlrpc_ping(struct obd_import *imp) +{ + struct ptlrpc_request *req; + + req = ptlrpc_prep_ping(imp); + if (req == NULL) { + CERROR("OOM trying to ping %s->%s\n", + imp->imp_obd->obd_uuid.uuid, + obd2cli_tgt(imp->imp_obd)); + return -ENOMEM; + } + + DEBUG_REQ(D_INFO, req, "pinging %s->%s", + imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); + ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); + + return 0; +} + +void ptlrpc_update_next_ping(struct obd_import *imp, int soon) +{ + int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL; + if (imp->imp_state == LUSTRE_IMP_DISCON) { + int dtime = max_t(int, CONNECTION_SWITCH_MIN, + AT_OFF ? 0 : + at_get(&imp->imp_at.iat_net_latency)); + time = min(time, dtime); + } + imp->imp_next_ping = cfs_time_shift(time); +} + +void ptlrpc_ping_import_soon(struct obd_import *imp) +{ + imp->imp_next_ping = cfs_time_current(); +} + +static inline int imp_is_deactive(struct obd_import *imp) +{ + return (imp->imp_deactive || + OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE)); +} + +static inline int ptlrpc_next_reconnect(struct obd_import *imp) +{ + if (imp->imp_server_timeout) + return cfs_time_shift(obd_timeout / 2); + else + return cfs_time_shift(obd_timeout); +} + +long pinger_check_timeout(unsigned long time) +{ + struct timeout_item *item; + unsigned long timeout = PING_INTERVAL; + + /* The timeout list is a increase order sorted list */ + mutex_lock(&pinger_mutex); + list_for_each_entry(item, &timeout_list, ti_chain) { + int ti_timeout = item->ti_timeout; + if (timeout > ti_timeout) + timeout = ti_timeout; + break; + } + mutex_unlock(&pinger_mutex); + + return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), + cfs_time_current()); +} + +static bool ir_up; + +void ptlrpc_pinger_ir_up(void) +{ + CDEBUG(D_HA, "IR up\n"); + ir_up = true; +} +EXPORT_SYMBOL(ptlrpc_pinger_ir_up); + +void ptlrpc_pinger_ir_down(void) +{ + CDEBUG(D_HA, "IR down\n"); + ir_up = false; +} +EXPORT_SYMBOL(ptlrpc_pinger_ir_down); + +static void ptlrpc_pinger_process_import(struct obd_import *imp, + unsigned long this_ping) +{ + int level; + int force; + int force_next; + int suppress; + + spin_lock(&imp->imp_lock); + + level = imp->imp_state; + force = imp->imp_force_verify; + force_next = imp->imp_force_next_verify; + /* + * This will be used below only if the import is "FULL". + */ + suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS); + + imp->imp_force_verify = 0; + + if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) && + !force) { + spin_unlock(&imp->imp_lock); + return; + } + + imp->imp_force_next_verify = 0; + + spin_unlock(&imp->imp_lock); + + CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n", + imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(level), level, force, force_next, + imp->imp_deactive, imp->imp_pingable, suppress); + + if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { + /* wait for a while before trying recovery again */ + imp->imp_next_ping = ptlrpc_next_reconnect(imp); + if (!imp->imp_no_pinger_recover) + ptlrpc_initiate_recovery(imp); + } else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov || + imp_is_deactive(imp)) { + CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n", + imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), + ptlrpc_import_state_name(level)); + if (force) { + spin_lock(&imp->imp_lock); + imp->imp_force_verify = 1; + spin_unlock(&imp->imp_lock); + } + } else if ((imp->imp_pingable && !suppress) || force_next || force) { + ptlrpc_ping(imp); + } +} + +static int ptlrpc_pinger_main(void *arg) +{ + struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; + + /* Record that the thread is running */ + thread_set_flags(thread, SVC_RUNNING); + wake_up(&thread->t_ctl_waitq); + + /* And now, loop forever, pinging as needed. */ + while (1) { + unsigned long this_ping = cfs_time_current(); + struct l_wait_info lwi; + long time_to_next_wake; + struct timeout_item *item; + struct list_head *iter; + + mutex_lock(&pinger_mutex); + list_for_each_entry(item, &timeout_list, ti_chain) { + item->ti_cb(item, item->ti_cb_data); + } + list_for_each(iter, &pinger_imports) { + struct obd_import *imp = + list_entry(iter, struct obd_import, + imp_pinger_chain); + + ptlrpc_pinger_process_import(imp, this_ping); + /* obd_timeout might have changed */ + if (imp->imp_pingable && imp->imp_next_ping && + cfs_time_after(imp->imp_next_ping, + cfs_time_add(this_ping, + cfs_time_seconds(PING_INTERVAL)))) + ptlrpc_update_next_ping(imp, 0); + } + mutex_unlock(&pinger_mutex); + /* update memory usage info */ + obd_update_maxusage(); + + /* Wait until the next ping time, or until we're stopped. */ + time_to_next_wake = pinger_check_timeout(this_ping); + /* The ping sent by ptlrpc_send_rpc may get sent out + say .01 second after this. + ptlrpc_pinger_sending_on_import will then set the + next ping time to next_ping + .01 sec, which means + we will SKIP the next ping at next_ping, and the + ping will get sent 2 timeouts from now! Beware. */ + CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" (" + CFS_TIME_T")\n", time_to_next_wake, + cfs_time_add(this_ping, + cfs_time_seconds(PING_INTERVAL))); + if (time_to_next_wake > 0) { + lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake, + cfs_time_seconds(1)), + NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + thread_is_stopping(thread) || + thread_is_event(thread), + &lwi); + if (thread_test_and_clear_flags(thread, SVC_STOPPING)) { + break; + } else { + /* woken after adding import to reset timer */ + thread_test_and_clear_flags(thread, SVC_EVENT); + } + } + } + + thread_set_flags(thread, SVC_STOPPED); + wake_up(&thread->t_ctl_waitq); + + CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid()); + return 0; +} + +static struct ptlrpc_thread pinger_thread; + +int ptlrpc_start_pinger(void) +{ + struct l_wait_info lwi = { 0 }; + int rc; + + if (!thread_is_init(&pinger_thread) && + !thread_is_stopped(&pinger_thread)) + return -EALREADY; + + init_waitqueue_head(&pinger_thread.t_ctl_waitq); + + strcpy(pinger_thread.t_name, "ll_ping"); + + /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we + * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */ + rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread, + "%s", pinger_thread.t_name)); + if (IS_ERR_VALUE(rc)) { + CERROR("cannot start thread: %d\n", rc); + return rc; + } + l_wait_event(pinger_thread.t_ctl_waitq, + thread_is_running(&pinger_thread), &lwi); + + if (suppress_pings) + CWARN("Pings will be suppressed at the request of the administrator. The configuration shall meet the additional requirements described in the manual. (Search for the \"suppress_pings\" kernel module parameter.)\n"); + + return 0; +} + +int ptlrpc_pinger_remove_timeouts(void); + +int ptlrpc_stop_pinger(void) +{ + struct l_wait_info lwi = { 0 }; + int rc = 0; + + if (thread_is_init(&pinger_thread) || + thread_is_stopped(&pinger_thread)) + return -EALREADY; + + ptlrpc_pinger_remove_timeouts(); + thread_set_flags(&pinger_thread, SVC_STOPPING); + wake_up(&pinger_thread.t_ctl_waitq); + + l_wait_event(pinger_thread.t_ctl_waitq, + thread_is_stopped(&pinger_thread), &lwi); + + return rc; +} + +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ + ptlrpc_update_next_ping(imp, 0); +} +EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); + +void ptlrpc_pinger_commit_expected(struct obd_import *imp) +{ + ptlrpc_update_next_ping(imp, 1); + assert_spin_locked(&imp->imp_lock); + /* + * Avoid reading stale imp_connect_data. When not sure if pings are + * expected or not on next connection, we assume they are not and force + * one anyway to guarantee the chance of updating + * imp_peer_committed_transno. + */ + if (imp->imp_state != LUSTRE_IMP_FULL || + OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS)) + imp->imp_force_next_verify = 1; +} + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + if (!list_empty(&imp->imp_pinger_chain)) + return -EALREADY; + + mutex_lock(&pinger_mutex); + CDEBUG(D_HA, "adding pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); + /* if we add to pinger we want recovery on this import */ + imp->imp_obd->obd_no_recov = 0; + ptlrpc_update_next_ping(imp, 0); + /* XXX sort, blah blah */ + list_add_tail(&imp->imp_pinger_chain, &pinger_imports); + class_import_get(imp); + + ptlrpc_pinger_wake_up(); + mutex_unlock(&pinger_mutex); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_pinger_add_import); + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + if (list_empty(&imp->imp_pinger_chain)) + return -ENOENT; + + mutex_lock(&pinger_mutex); + list_del_init(&imp->imp_pinger_chain); + CDEBUG(D_HA, "removing pingable import %s->%s\n", + imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); + /* if we remove from pinger we don't want recovery on this import */ + imp->imp_obd->obd_no_recov = 1; + class_import_put(imp); + mutex_unlock(&pinger_mutex); + return 0; +} +EXPORT_SYMBOL(ptlrpc_pinger_del_import); + +/** + * Register a timeout callback to the pinger list, and the callback will + * be called when timeout happens. + */ +struct timeout_item *ptlrpc_new_timeout(int time, enum timeout_event event, + timeout_cb_t cb, void *data) +{ + struct timeout_item *ti; + + OBD_ALLOC_PTR(ti); + if (!ti) + return NULL; + + INIT_LIST_HEAD(&ti->ti_obd_list); + INIT_LIST_HEAD(&ti->ti_chain); + ti->ti_timeout = time; + ti->ti_event = event; + ti->ti_cb = cb; + ti->ti_cb_data = data; + + return ti; +} + +/** + * Register timeout event on the pinger thread. + * Note: the timeout list is an sorted list with increased timeout value. + */ +static struct timeout_item* +ptlrpc_pinger_register_timeout(int time, enum timeout_event event, + timeout_cb_t cb, void *data) +{ + struct timeout_item *item, *tmp; + + LASSERT(mutex_is_locked(&pinger_mutex)); + + list_for_each_entry(item, &timeout_list, ti_chain) + if (item->ti_event == event) + goto out; + + item = ptlrpc_new_timeout(time, event, cb, data); + if (item) { + list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) { + if (tmp->ti_timeout < time) { + list_add(&item->ti_chain, &tmp->ti_chain); + goto out; + } + } + list_add(&item->ti_chain, &timeout_list); + } +out: + return item; +} + +/* Add a client_obd to the timeout event list, when timeout(@time) + * happens, the callback(@cb) will be called. + */ +int ptlrpc_add_timeout_client(int time, enum timeout_event event, + timeout_cb_t cb, void *data, + struct list_head *obd_list) +{ + struct timeout_item *ti; + + mutex_lock(&pinger_mutex); + ti = ptlrpc_pinger_register_timeout(time, event, cb, data); + if (!ti) { + mutex_unlock(&pinger_mutex); + return -EINVAL; + } + list_add(obd_list, &ti->ti_obd_list); + mutex_unlock(&pinger_mutex); + return 0; +} +EXPORT_SYMBOL(ptlrpc_add_timeout_client); + +int ptlrpc_del_timeout_client(struct list_head *obd_list, + enum timeout_event event) +{ + struct timeout_item *ti = NULL, *item; + + if (list_empty(obd_list)) + return 0; + mutex_lock(&pinger_mutex); + list_del_init(obd_list); + /** + * If there are no obd attached to the timeout event + * list, remove this timeout event from the pinger + */ + list_for_each_entry(item, &timeout_list, ti_chain) { + if (item->ti_event == event) { + ti = item; + break; + } + } + LASSERTF(ti != NULL, "ti is NULL !\n"); + if (list_empty(&ti->ti_obd_list)) { + list_del(&ti->ti_chain); + OBD_FREE_PTR(ti); + } + mutex_unlock(&pinger_mutex); + return 0; +} +EXPORT_SYMBOL(ptlrpc_del_timeout_client); + +int ptlrpc_pinger_remove_timeouts(void) +{ + struct timeout_item *item, *tmp; + + mutex_lock(&pinger_mutex); + list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { + LASSERT(list_empty(&item->ti_obd_list)); + list_del(&item->ti_chain); + OBD_FREE_PTR(item); + } + mutex_unlock(&pinger_mutex); + return 0; +} + +void ptlrpc_pinger_wake_up(void) +{ + thread_add_flags(&pinger_thread, SVC_EVENT); + wake_up(&pinger_thread.t_ctl_waitq); +} + +/* Ping evictor thread */ +#define PET_READY 1 +#define PET_TERMINATE 2 + +static int pet_refcount; +static int pet_state; +static wait_queue_head_t pet_waitq; +LIST_HEAD(pet_list); +static DEFINE_SPINLOCK(pet_lock); + +int ping_evictor_wake(struct obd_export *exp) +{ + struct obd_device *obd; + + spin_lock(&pet_lock); + if (pet_state != PET_READY) { + /* eventually the new obd will call here again. */ + spin_unlock(&pet_lock); + return 1; + } + + obd = class_exp2obd(exp); + if (list_empty(&obd->obd_evict_list)) { + class_incref(obd, "evictor", obd); + list_add(&obd->obd_evict_list, &pet_list); + } + spin_unlock(&pet_lock); + + wake_up(&pet_waitq); + return 0; +} + +static int ping_evictor_main(void *arg) +{ + struct obd_device *obd; + struct obd_export *exp; + struct l_wait_info lwi = { 0 }; + time_t expire_time; + + unshare_fs_struct(); + + CDEBUG(D_HA, "Starting Ping Evictor\n"); + pet_state = PET_READY; + while (1) { + l_wait_event(pet_waitq, (!list_empty(&pet_list)) || + (pet_state == PET_TERMINATE), &lwi); + + /* loop until all obd's will be removed */ + if ((pet_state == PET_TERMINATE) && list_empty(&pet_list)) + break; + + /* we only get here if pet_exp != NULL, and the end of this + * loop is the only place which sets it NULL again, so lock + * is not strictly necessary. */ + spin_lock(&pet_lock); + obd = list_entry(pet_list.next, struct obd_device, + obd_evict_list); + spin_unlock(&pet_lock); + + expire_time = get_seconds() - PING_EVICT_TIMEOUT; + + CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n", + obd->obd_name, expire_time); + + /* Exports can't be deleted out of the list while we hold + * the obd lock (class_unlink_export), which means we can't + * lose the last ref on the export. If they've already been + * removed from the list, we won't find them here. */ + spin_lock(&obd->obd_dev_lock); + while (!list_empty(&obd->obd_exports_timed)) { + exp = list_entry(obd->obd_exports_timed.next, + struct obd_export, + exp_obd_chain_timed); + if (expire_time > exp->exp_last_request_time) { + class_export_get(exp); + spin_unlock(&obd->obd_dev_lock); + LCONSOLE_WARN("%s: haven't heard from client %s (at %s) in %ld seconds. I think it's dead, and I am evicting it. exp %p, cur %ld expire %ld last %ld\n", + obd->obd_name, + obd_uuid2str(&exp->exp_client_uuid), + obd_export_nid2str(exp), + (long)(get_seconds() - + exp->exp_last_request_time), + exp, (long)get_seconds(), + (long)expire_time, + (long)exp->exp_last_request_time); + CDEBUG(D_HA, "Last request was at %ld\n", + exp->exp_last_request_time); + class_fail_export(exp); + class_export_put(exp); + spin_lock(&obd->obd_dev_lock); + } else { + /* List is sorted, so everyone below is ok */ + break; + } + } + spin_unlock(&obd->obd_dev_lock); + + spin_lock(&pet_lock); + list_del_init(&obd->obd_evict_list); + spin_unlock(&pet_lock); + + class_decref(obd, "evictor", obd); + } + CDEBUG(D_HA, "Exiting Ping Evictor\n"); + + return 0; +} + +void ping_evictor_start(void) +{ + struct task_struct *task; + + if (++pet_refcount > 1) + return; + + init_waitqueue_head(&pet_waitq); + + task = kthread_run(ping_evictor_main, NULL, "ll_evictor"); + if (IS_ERR(task)) { + pet_refcount--; + CERROR("Cannot start ping evictor thread: %ld\n", + PTR_ERR(task)); + } +} +EXPORT_SYMBOL(ping_evictor_start); + +void ping_evictor_stop(void) +{ + if (--pet_refcount > 0) + return; + + pet_state = PET_TERMINATE; + wake_up(&pet_waitq); +} +EXPORT_SYMBOL(ping_evictor_stop); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h new file mode 100644 index 000000000..a66dc3c6d --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h @@ -0,0 +1,312 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +/* Intramodule declarations for ptlrpc. */ + +#ifndef PTLRPC_INTERNAL_H +#define PTLRPC_INTERNAL_H + +#include "../ldlm/ldlm_internal.h" + +struct ldlm_namespace; +struct obd_import; +struct ldlm_res_id; +struct ptlrpc_request_set; +extern int test_req_buffer_pressure; +extern struct mutex ptlrpc_all_services_mutex; + +int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait); +/* ptlrpcd.c */ +int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc); + +/* client.c */ +struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw, + unsigned type, unsigned portal); +int ptlrpc_request_cache_init(void); +void ptlrpc_request_cache_fini(void); +struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags); +void ptlrpc_request_cache_free(struct ptlrpc_request *req); +void ptlrpc_init_xid(void); + +/* events.c */ +int ptlrpc_init_portals(void); +void ptlrpc_exit_portals(void); + +void ptlrpc_request_handle_notconn(struct ptlrpc_request *); +void lustre_assert_wire_constants(void); +int ptlrpc_import_in_recovery(struct obd_import *imp); +int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt); +void ptlrpc_handle_failed_import(struct obd_import *imp); +int ptlrpc_replay_next(struct obd_import *imp, int *inflight); +void ptlrpc_initiate_recovery(struct obd_import *imp); + +int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset); +int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset); + +#if defined(CONFIG_PROC_FS) +void ptlrpc_lprocfs_register_service(struct proc_dir_entry *proc_entry, + struct ptlrpc_service *svc); +void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc); +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount); +void ptlrpc_lprocfs_do_request_stat(struct ptlrpc_request *req, + long q_usec, long work_usec); +#else +#define ptlrpc_lprocfs_register_service(params...) do {} while (0) +#define ptlrpc_lprocfs_unregister_service(params...) do {} while (0) +#define ptlrpc_lprocfs_rpc_sent(params...) do {} while (0) +#define ptlrpc_lprocfs_do_request_stat(params...) do {} while (0) +#endif /* CONFIG_PROC_FS */ + +/* NRS */ + +/** + * NRS core object. + * + * Holds NRS core fields. + */ +struct nrs_core { + /** + * Protects nrs_core::nrs_policies, serializes external policy + * registration/unregistration, and NRS core lprocfs operations. + */ + struct mutex nrs_mutex; + /* XXX: This is just for liblustre. Remove the #if defined directive + * when the * "cfs_" prefix is dropped from cfs_list_head. */ + /** + * List of all policy descriptors registered with NRS core; protected + * by nrs_core::nrs_mutex. + */ + struct list_head nrs_policies; + +}; + +int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc); +void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc); + +void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req, bool hp); +void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req); +void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req); +void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req, bool hp); + +struct ptlrpc_request * +ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp, + bool peek, bool force); + +static inline struct ptlrpc_request * +ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp, + bool force) +{ + return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force); +} + +static inline struct ptlrpc_request * +ptlrpc_nrs_req_peek_nolock(struct ptlrpc_service_part *svcpt, bool hp) +{ + return ptlrpc_nrs_req_get_nolock0(svcpt, hp, true, false); +} + +void ptlrpc_nrs_req_del_nolock(struct ptlrpc_request *req); +bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp); + +int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc, + enum ptlrpc_nrs_queue_type queue, char *name, + enum ptlrpc_nrs_ctl opc, bool single, void *arg); + +int ptlrpc_nrs_init(void); +void ptlrpc_nrs_fini(void); + +static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt) +{ + return svcpt->scp_nrs_hp != NULL; +} + +static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc) +{ + /** + * If the first service partition has an HP NRS head, all service + * partitions will. + */ + return nrs_svcpt_has_hp(svc->srv_parts[0]); +} + +static inline +struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp) +{ + LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt))); + return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg; +} + +static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy) +{ + return policy->pol_nrs->nrs_svcpt->scp_cpt; +} + +static inline +struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy) +{ + return policy->pol_nrs->nrs_svcpt->scp_service; +} + +static inline +struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy) +{ + return policy->pol_nrs->nrs_svcpt; +} + +static inline +struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy) +{ + return nrs_pol2svc(policy)->srv_cptable; +} + +static inline struct ptlrpc_nrs_resource * +nrs_request_resource(struct ptlrpc_nrs_request *nrq) +{ + LASSERT(nrq->nr_initialized); + LASSERT(!nrq->nr_finalized); + + return nrq->nr_res_ptrs[nrq->nr_res_idx]; +} + +static inline +struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq) +{ + return nrs_request_resource(nrq)->res_policy; +} + +#define NRS_LPROCFS_QUANTUM_NAME_REG "reg_quantum:" +#define NRS_LPROCFS_QUANTUM_NAME_HP "hp_quantum:" + +/** + * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum. + */ +#define LPROCFS_NRS_QUANTUM_MAX 65535 + +/** + * Max valid command string is the size of the labels, plus "65535" twice, plus + * a separating space character. + */ +#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD \ + sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " " \ + NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX)) + +/* recovd_thread.c */ + +int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink); + +/* pers.c */ +void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc, + int mdcnt); +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, + int pageoffset, int len); + +/* pack_generic.c */ +struct ptlrpc_reply_state * +lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt); +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs); + +/* pinger.c */ +int ptlrpc_start_pinger(void); +int ptlrpc_stop_pinger(void); +void ptlrpc_pinger_sending_on_import(struct obd_import *imp); +void ptlrpc_pinger_commit_expected(struct obd_import *imp); +void ptlrpc_pinger_wake_up(void); +void ptlrpc_ping_import_soon(struct obd_import *imp); +int ping_evictor_wake(struct obd_export *exp); + +/* sec_null.c */ +int sptlrpc_null_init(void); +void sptlrpc_null_fini(void); + +/* sec_plain.c */ +int sptlrpc_plain_init(void); +void sptlrpc_plain_fini(void); + +/* sec_bulk.c */ +int sptlrpc_enc_pool_init(void); +void sptlrpc_enc_pool_fini(void); +int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v); + +/* sec_lproc.c */ +#if defined(CONFIG_PROC_FS) +int sptlrpc_lproc_init(void); +void sptlrpc_lproc_fini(void); +#else +static inline int sptlrpc_lproc_init(void) +{ return 0; } +static inline void sptlrpc_lproc_fini(void) {} +#endif + +/* sec_gc.c */ +int sptlrpc_gc_init(void); +void sptlrpc_gc_fini(void); + +/* sec_config.c */ +void sptlrpc_conf_choose_flavor(enum lustre_sec_part from, + enum lustre_sec_part to, + struct obd_uuid *target, + lnet_nid_t nid, + struct sptlrpc_flavor *sf); +int sptlrpc_conf_init(void); +void sptlrpc_conf_fini(void); + +/* sec.c */ +int sptlrpc_init(void); +void sptlrpc_fini(void); + +static inline int ll_rpc_recoverable_error(int rc) +{ + return (rc == -ENOTCONN || rc == -ENODEV); +} + +static inline int tgt_mod_init(void) +{ + return 0; +} + +static inline void tgt_mod_exit(void) +{ + return; +} + +static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set) +{ + if (atomic_dec_and_test(&set->set_refcount)) + OBD_FREE_PTR(set); +} +#endif /* PTLRPC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c new file mode 100644 index 000000000..5268887ca --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c @@ -0,0 +1,171 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC + + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_req_layout.h" + +#include "ptlrpc_internal.h" + +extern spinlock_t ptlrpc_last_xid_lock; +#if RS_DEBUG +extern spinlock_t ptlrpc_rs_debug_lock; +#endif +extern struct mutex pinger_mutex; +extern struct mutex ptlrpcd_mutex; + +__init int ptlrpc_init(void) +{ + int rc, cleanup_phase = 0; + + lustre_assert_wire_constants(); +#if RS_DEBUG + spin_lock_init(&ptlrpc_rs_debug_lock); +#endif + mutex_init(&ptlrpc_all_services_mutex); + mutex_init(&pinger_mutex); + mutex_init(&ptlrpcd_mutex); + ptlrpc_init_xid(); + + rc = req_layout_init(); + if (rc) + return rc; + + rc = ptlrpc_hr_init(); + if (rc) + return rc; + + cleanup_phase = 1; + rc = ptlrpc_request_cache_init(); + if (rc) + goto cleanup; + + cleanup_phase = 2; + rc = ptlrpc_init_portals(); + if (rc) + goto cleanup; + + cleanup_phase = 3; + + rc = ptlrpc_connection_init(); + if (rc) + goto cleanup; + + cleanup_phase = 4; + ptlrpc_put_connection_superhack = ptlrpc_connection_put; + + rc = ptlrpc_start_pinger(); + if (rc) + goto cleanup; + + cleanup_phase = 5; + rc = ldlm_init(); + if (rc) + goto cleanup; + + cleanup_phase = 6; + rc = sptlrpc_init(); + if (rc) + goto cleanup; + + cleanup_phase = 7; + rc = ptlrpc_nrs_init(); + if (rc) + goto cleanup; + + cleanup_phase = 8; + rc = tgt_mod_init(); + if (rc) + goto cleanup; + return 0; + +cleanup: + switch (cleanup_phase) { + case 8: + ptlrpc_nrs_fini(); + /* Fall through */ + case 7: + sptlrpc_fini(); + /* Fall through */ + case 6: + ldlm_exit(); + /* Fall through */ + case 5: + ptlrpc_stop_pinger(); + /* Fall through */ + case 4: + ptlrpc_connection_fini(); + /* Fall through */ + case 3: + ptlrpc_exit_portals(); + /* Fall through */ + case 2: + ptlrpc_request_cache_fini(); + /* Fall through */ + case 1: + ptlrpc_hr_fini(); + req_layout_fini(); + /* Fall through */ + default: ; + } + + return rc; +} + +static void __exit ptlrpc_exit(void) +{ + tgt_mod_exit(); + ptlrpc_nrs_fini(); + sptlrpc_fini(); + ldlm_exit(); + ptlrpc_stop_pinger(); + ptlrpc_exit_portals(); + ptlrpc_request_cache_fini(); + ptlrpc_hr_fini(); + ptlrpc_connection_fini(); +} + +MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>"); +MODULE_DESCRIPTION("Lustre Request Processor and Lock Management"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); + +module_init(ptlrpc_init); +module_exit(ptlrpc_exit); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c new file mode 100644 index 000000000..0c178ec0e --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c @@ -0,0 +1,811 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/ptlrpcd.c + */ + +/** \defgroup ptlrpcd PortalRPC daemon + * + * ptlrpcd is a special thread with its own set where other user might add + * requests when they don't want to wait for their completion. + * PtlRPCD will take care of sending such requests and then processing their + * replies and calling completion callbacks as necessary. + * The callbacks are called directly from ptlrpcd context. + * It is important to never significantly block (esp. on RPCs!) within such + * completion handler or a deadlock might occur where ptlrpcd enters some + * callback that attempts to send another RPC and wait for it to return, + * during which time ptlrpcd is completely blocked, so e.g. if import + * fails, recovery cannot progress because connection requests are also + * sent by ptlrpcd. + * + * @{ + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/lustre_net.h" +#include "../include/lustre_lib.h" +#include "../include/lustre_ha.h" +#include "../include/obd_class.h" /* for obd_zombie */ +#include "../include/obd_support.h" /* for OBD_FAIL_CHECK */ +#include "../include/cl_object.h" /* cl_env_{get,put}() */ +#include "../include/lprocfs_status.h" + +#include "ptlrpc_internal.h" + +struct ptlrpcd { + int pd_size; + int pd_index; + int pd_nthreads; + struct ptlrpcd_ctl pd_thread_rcv; + struct ptlrpcd_ctl pd_threads[0]; +}; + +static int max_ptlrpcds; +module_param(max_ptlrpcds, int, 0644); +MODULE_PARM_DESC(max_ptlrpcds, "Max ptlrpcd thread count to be started."); + +static int ptlrpcd_bind_policy = PDB_POLICY_PAIR; +module_param(ptlrpcd_bind_policy, int, 0644); +MODULE_PARM_DESC(ptlrpcd_bind_policy, "Ptlrpcd threads binding mode."); +static struct ptlrpcd *ptlrpcds; + +struct mutex ptlrpcd_mutex; +static int ptlrpcd_users; + +void ptlrpcd_wake(struct ptlrpc_request *req) +{ + struct ptlrpc_request_set *rq_set = req->rq_set; + + LASSERT(rq_set != NULL); + + wake_up(&rq_set->set_waitq); +} +EXPORT_SYMBOL(ptlrpcd_wake); + +static struct ptlrpcd_ctl * +ptlrpcd_select_pc(struct ptlrpc_request *req, pdl_policy_t policy, int index) +{ + int idx = 0; + + if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL) + return &ptlrpcds->pd_thread_rcv; + + switch (policy) { + case PDL_POLICY_SAME: + idx = smp_processor_id() % ptlrpcds->pd_nthreads; + break; + case PDL_POLICY_LOCAL: + /* Before CPU partition patches available, process it the same + * as "PDL_POLICY_ROUND". */ +# ifdef CFS_CPU_MODE_NUMA +# warning "fix this code to use new CPU partition APIs" +# endif + /* Fall through to PDL_POLICY_ROUND until the CPU + * CPU partition patches are available. */ + index = -1; + case PDL_POLICY_PREFERRED: + if (index >= 0 && index < num_online_cpus()) { + idx = index % ptlrpcds->pd_nthreads; + break; + } + /* Fall through to PDL_POLICY_ROUND for bad index. */ + default: + /* Fall through to PDL_POLICY_ROUND for unknown policy. */ + case PDL_POLICY_ROUND: + /* We do not care whether it is strict load balance. */ + idx = ptlrpcds->pd_index + 1; + if (idx == smp_processor_id()) + idx++; + idx %= ptlrpcds->pd_nthreads; + ptlrpcds->pd_index = idx; + break; + } + + return &ptlrpcds->pd_threads[idx]; +} + +/** + * Move all request from an existing request set to the ptlrpcd queue. + * All requests from the set must be in phase RQ_PHASE_NEW. + */ +void ptlrpcd_add_rqset(struct ptlrpc_request_set *set) +{ + struct list_head *tmp, *pos; + struct ptlrpcd_ctl *pc; + struct ptlrpc_request_set *new; + int count, i; + + pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1); + new = pc->pc_set; + + list_for_each_safe(pos, tmp, &set->set_requests) { + struct ptlrpc_request *req = + list_entry(pos, struct ptlrpc_request, + rq_set_chain); + + LASSERT(req->rq_phase == RQ_PHASE_NEW); + req->rq_set = new; + req->rq_queued_time = cfs_time_current(); + } + + spin_lock(&new->set_new_req_lock); + list_splice_init(&set->set_requests, &new->set_new_requests); + i = atomic_read(&set->set_remaining); + count = atomic_add_return(i, &new->set_new_count); + atomic_set(&set->set_remaining, 0); + spin_unlock(&new->set_new_req_lock); + if (count == i) { + wake_up(&new->set_waitq); + + /* XXX: It maybe unnecessary to wakeup all the partners. But to + * guarantee the async RPC can be processed ASAP, we have + * no other better choice. It maybe fixed in future. */ + for (i = 0; i < pc->pc_npartners; i++) + wake_up(&pc->pc_partners[i]->pc_set->set_waitq); + } +} +EXPORT_SYMBOL(ptlrpcd_add_rqset); + +/** + * Return transferred RPCs count. + */ +static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des, + struct ptlrpc_request_set *src) +{ + struct list_head *tmp, *pos; + struct ptlrpc_request *req; + int rc = 0; + + spin_lock(&src->set_new_req_lock); + if (likely(!list_empty(&src->set_new_requests))) { + list_for_each_safe(pos, tmp, &src->set_new_requests) { + req = list_entry(pos, struct ptlrpc_request, + rq_set_chain); + req->rq_set = des; + } + list_splice_init(&src->set_new_requests, + &des->set_requests); + rc = atomic_read(&src->set_new_count); + atomic_add(rc, &des->set_remaining); + atomic_set(&src->set_new_count, 0); + } + spin_unlock(&src->set_new_req_lock); + return rc; +} + +/** + * Requests that are added to the ptlrpcd queue are sent via + * ptlrpcd_check->ptlrpc_check_set(). + */ +void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx) +{ + struct ptlrpcd_ctl *pc; + + if (req->rq_reqmsg) + lustre_msg_set_jobid(req->rq_reqmsg, NULL); + + spin_lock(&req->rq_lock); + if (req->rq_invalid_rqset) { + struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5), + back_to_sleep, NULL); + + req->rq_invalid_rqset = 0; + spin_unlock(&req->rq_lock); + l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi); + } else if (req->rq_set) { + /* If we have a valid "rq_set", just reuse it to avoid double + * linked. */ + LASSERT(req->rq_phase == RQ_PHASE_NEW); + LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY); + + /* ptlrpc_check_set will decrease the count */ + atomic_inc(&req->rq_set->set_remaining); + spin_unlock(&req->rq_lock); + wake_up(&req->rq_set->set_waitq); + return; + } else { + spin_unlock(&req->rq_lock); + } + + pc = ptlrpcd_select_pc(req, policy, idx); + + DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]", + req, pc->pc_name, pc->pc_index); + + ptlrpc_set_add_new_req(pc, req); +} +EXPORT_SYMBOL(ptlrpcd_add_req); + +static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set) +{ + atomic_inc(&set->set_refcount); +} + +/** + * Check if there is more work to do on ptlrpcd set. + * Returns 1 if yes. + */ +static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc) +{ + struct list_head *tmp, *pos; + struct ptlrpc_request *req; + struct ptlrpc_request_set *set = pc->pc_set; + int rc = 0; + int rc2; + + if (atomic_read(&set->set_new_count)) { + spin_lock(&set->set_new_req_lock); + if (likely(!list_empty(&set->set_new_requests))) { + list_splice_init(&set->set_new_requests, + &set->set_requests); + atomic_add(atomic_read(&set->set_new_count), + &set->set_remaining); + atomic_set(&set->set_new_count, 0); + /* + * Need to calculate its timeout. + */ + rc = 1; + } + spin_unlock(&set->set_new_req_lock); + } + + /* We should call lu_env_refill() before handling new requests to make + * sure that env key the requests depending on really exists. + */ + rc2 = lu_env_refill(env); + if (rc2 != 0) { + /* + * XXX This is very awkward situation, because + * execution can neither continue (request + * interpreters assume that env is set up), nor repeat + * the loop (as this potentially results in a tight + * loop of -ENOMEM's). + * + * Fortunately, refill only ever does something when + * new modules are loaded, i.e., early during boot up. + */ + CERROR("Failure to refill session: %d\n", rc2); + return rc; + } + + if (atomic_read(&set->set_remaining)) + rc |= ptlrpc_check_set(env, set); + + /* NB: ptlrpc_check_set has already moved completed request at the + * head of seq::set_requests */ + list_for_each_safe(pos, tmp, &set->set_requests) { + req = list_entry(pos, struct ptlrpc_request, rq_set_chain); + if (req->rq_phase != RQ_PHASE_COMPLETE) + break; + + list_del_init(&req->rq_set_chain); + req->rq_set = NULL; + ptlrpc_req_finished(req); + } + + if (rc == 0) { + /* + * If new requests have been added, make sure to wake up. + */ + rc = atomic_read(&set->set_new_count); + + /* If we have nothing to do, check whether we can take some + * work from our partner threads. */ + if (rc == 0 && pc->pc_npartners > 0) { + struct ptlrpcd_ctl *partner; + struct ptlrpc_request_set *ps; + int first = pc->pc_cursor; + + do { + partner = pc->pc_partners[pc->pc_cursor++]; + if (pc->pc_cursor >= pc->pc_npartners) + pc->pc_cursor = 0; + if (partner == NULL) + continue; + + spin_lock(&partner->pc_lock); + ps = partner->pc_set; + if (ps == NULL) { + spin_unlock(&partner->pc_lock); + continue; + } + + ptlrpc_reqset_get(ps); + spin_unlock(&partner->pc_lock); + + if (atomic_read(&ps->set_new_count)) { + rc = ptlrpcd_steal_rqset(set, ps); + if (rc > 0) + CDEBUG(D_RPCTRACE, "transfer %d async RPCs [%d->%d]\n", + rc, partner->pc_index, + pc->pc_index); + } + ptlrpc_reqset_put(ps); + } while (rc == 0 && pc->pc_cursor != first); + } + } + + return rc; +} + +/** + * Main ptlrpcd thread. + * ptlrpc's code paths like to execute in process context, so we have this + * thread which spins on a set which contains the rpcs and sends them. + * + */ +static int ptlrpcd(void *arg) +{ + struct ptlrpcd_ctl *pc = arg; + struct ptlrpc_request_set *set = pc->pc_set; + struct lu_env env = { .le_ses = NULL }; + int rc, exit = 0; + + unshare_fs_struct(); +#if defined(CONFIG_SMP) + if (test_bit(LIOD_BIND, &pc->pc_flags)) { + int index = pc->pc_index; + + if (index >= 0 && index < num_possible_cpus()) { + while (!cpu_online(index)) { + if (++index >= num_possible_cpus()) + index = 0; + } + set_cpus_allowed_ptr(current, + cpumask_of_node(cpu_to_node(index))); + } + } +#endif + /* + * XXX So far only "client" ptlrpcd uses an environment. In + * the future, ptlrpcd thread (or a thread-set) has to given + * an argument, describing its "scope". + */ + rc = lu_context_init(&env.le_ctx, + LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF); + complete(&pc->pc_starting); + + if (rc != 0) + return rc; + + /* + * This mainloop strongly resembles ptlrpc_set_wait() except that our + * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when + * there are requests in the set. New requests come in on the set's + * new_req_list and ptlrpcd_check() moves them into the set. + */ + do { + struct l_wait_info lwi; + int timeout; + + timeout = ptlrpc_set_next_timeout(set); + lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1), + ptlrpc_expired_set, set); + + lu_context_enter(&env.le_ctx); + l_wait_event(set->set_waitq, + ptlrpcd_check(&env, pc), &lwi); + lu_context_exit(&env.le_ctx); + + /* + * Abort inflight rpcs for forced stop case. + */ + if (test_bit(LIOD_STOP, &pc->pc_flags)) { + if (test_bit(LIOD_FORCE, &pc->pc_flags)) + ptlrpc_abort_set(set); + exit++; + } + + /* + * Let's make one more loop to make sure that ptlrpcd_check() + * copied all raced new rpcs into the set so we can kill them. + */ + } while (exit < 2); + + /* + * Wait for inflight requests to drain. + */ + if (!list_empty(&set->set_requests)) + ptlrpc_set_wait(set); + lu_context_fini(&env.le_ctx); + + complete(&pc->pc_finishing); + + return 0; +} + +/* XXX: We want multiple CPU cores to share the async RPC load. So we start many + * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by + * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified + * CPU core. But binding all ptlrpcd threads maybe cause response delay + * because of some CPU core(s) busy with other loads. + * + * For example: "ls -l", some async RPCs for statahead are assigned to + * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy + * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l" + * thread, statahead thread, and ptlrpcd thread can run in parallel), under + * such case, the statahead async RPCs can not be processed in time, it is + * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may + * be better. But it breaks former data transfer policy. + * + * So we shouldn't be blind for avoiding the data transfer. We make some + * compromise: divide the ptlrpcd threads pool into two parts. One part is + * for bound mode, each ptlrpcd thread in this part is bound to some CPU + * core. The other part is for free mode, all the ptlrpcd threads in the + * part can be scheduled on any CPU core. We specify some partnership + * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s), + * and the async RPC load within the partners are shared. + * + * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd + * thread can be scheduled in time), and try to guarantee the async RPC + * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled + * on any CPU core). + * + * As for how to specify the partnership between bound mode ptlrpcd + * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use + * <free bound> pair. In future, we can specify some more complex + * partnership based on the patches for CPU partition. But before such + * patches are available, we prefer to use the simplest one. + */ +# ifdef CFS_CPU_MODE_NUMA +# warning "fix ptlrpcd_bind() to use new CPU partition APIs" +# endif +static int ptlrpcd_bind(int index, int max) +{ + struct ptlrpcd_ctl *pc; + int rc = 0; +#if defined(CONFIG_NUMA) + cpumask_t mask; +#endif + + LASSERT(index <= max - 1); + pc = &ptlrpcds->pd_threads[index]; + switch (ptlrpcd_bind_policy) { + case PDB_POLICY_NONE: + pc->pc_npartners = -1; + break; + case PDB_POLICY_FULL: + pc->pc_npartners = 0; + set_bit(LIOD_BIND, &pc->pc_flags); + break; + case PDB_POLICY_PAIR: + LASSERT(max % 2 == 0); + pc->pc_npartners = 1; + break; + case PDB_POLICY_NEIGHBOR: +#if defined(CONFIG_NUMA) + { + int i; + cpumask_copy(&mask, cpumask_of_node(cpu_to_node(index))); + for (i = max; i < num_online_cpus(); i++) + cpumask_clear_cpu(i, &mask); + pc->pc_npartners = cpumask_weight(&mask) - 1; + set_bit(LIOD_BIND, &pc->pc_flags); + } +#else + LASSERT(max >= 3); + pc->pc_npartners = 2; +#endif + break; + default: + CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy); + rc = -EINVAL; + } + + if (rc == 0 && pc->pc_npartners > 0) { + OBD_ALLOC(pc->pc_partners, + sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners); + if (pc->pc_partners == NULL) { + pc->pc_npartners = 0; + rc = -ENOMEM; + } else { + switch (ptlrpcd_bind_policy) { + case PDB_POLICY_PAIR: + if (index & 0x1) { + set_bit(LIOD_BIND, &pc->pc_flags); + pc->pc_partners[0] = &ptlrpcds-> + pd_threads[index - 1]; + ptlrpcds->pd_threads[index - 1]. + pc_partners[0] = pc; + } + break; + case PDB_POLICY_NEIGHBOR: +#if defined(CONFIG_NUMA) + { + struct ptlrpcd_ctl *ppc; + int i, pidx; + /* partners are cores in the same NUMA node. + * setup partnership only with ptlrpcd threads + * that are already initialized + */ + for (pidx = 0, i = 0; i < index; i++) { + if (cpumask_test_cpu(i, &mask)) { + ppc = &ptlrpcds->pd_threads[i]; + pc->pc_partners[pidx++] = ppc; + ppc->pc_partners[ppc-> + pc_npartners++] = pc; + } + } + /* adjust number of partners to the number + * of partnership really setup */ + pc->pc_npartners = pidx; + } +#else + if (index & 0x1) + set_bit(LIOD_BIND, &pc->pc_flags); + if (index > 0) { + pc->pc_partners[0] = &ptlrpcds-> + pd_threads[index - 1]; + ptlrpcds->pd_threads[index - 1]. + pc_partners[1] = pc; + if (index == max - 1) { + pc->pc_partners[1] = + &ptlrpcds->pd_threads[0]; + ptlrpcds->pd_threads[0]. + pc_partners[0] = pc; + } + } +#endif + break; + } + } + } + + return rc; +} + + +int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc) +{ + int rc; + + /* + * Do not allow start second thread for one pc. + */ + if (test_and_set_bit(LIOD_START, &pc->pc_flags)) { + CWARN("Starting second thread (%s) for same pc %p\n", + name, pc); + return 0; + } + + pc->pc_index = index; + init_completion(&pc->pc_starting); + init_completion(&pc->pc_finishing); + spin_lock_init(&pc->pc_lock); + strlcpy(pc->pc_name, name, sizeof(pc->pc_name)); + pc->pc_set = ptlrpc_prep_set(); + if (pc->pc_set == NULL) { + rc = -ENOMEM; + goto out; + } + + /* + * So far only "client" ptlrpcd uses an environment. In the future, + * ptlrpcd thread (or a thread-set) has to be given an argument, + * describing its "scope". + */ + rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER); + if (rc != 0) + goto out_set; + + { + struct task_struct *task; + if (index >= 0) { + rc = ptlrpcd_bind(index, max); + if (rc < 0) + goto out_env; + } + + task = kthread_run(ptlrpcd, pc, "%s", pc->pc_name); + if (IS_ERR(task)) { + rc = PTR_ERR(task); + goto out_env; + } + + wait_for_completion(&pc->pc_starting); + } + return 0; + +out_env: + lu_context_fini(&pc->pc_env.le_ctx); + +out_set: + if (pc->pc_set != NULL) { + struct ptlrpc_request_set *set = pc->pc_set; + + spin_lock(&pc->pc_lock); + pc->pc_set = NULL; + spin_unlock(&pc->pc_lock); + ptlrpc_set_destroy(set); + } + clear_bit(LIOD_BIND, &pc->pc_flags); + +out: + clear_bit(LIOD_START, &pc->pc_flags); + return rc; +} + +void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force) +{ + if (!test_bit(LIOD_START, &pc->pc_flags)) { + CWARN("Thread for pc %p was not started\n", pc); + return; + } + + set_bit(LIOD_STOP, &pc->pc_flags); + if (force) + set_bit(LIOD_FORCE, &pc->pc_flags); + wake_up(&pc->pc_set->set_waitq); +} + +void ptlrpcd_free(struct ptlrpcd_ctl *pc) +{ + struct ptlrpc_request_set *set = pc->pc_set; + + if (!test_bit(LIOD_START, &pc->pc_flags)) { + CWARN("Thread for pc %p was not started\n", pc); + goto out; + } + + wait_for_completion(&pc->pc_finishing); + lu_context_fini(&pc->pc_env.le_ctx); + + spin_lock(&pc->pc_lock); + pc->pc_set = NULL; + spin_unlock(&pc->pc_lock); + ptlrpc_set_destroy(set); + + clear_bit(LIOD_START, &pc->pc_flags); + clear_bit(LIOD_STOP, &pc->pc_flags); + clear_bit(LIOD_FORCE, &pc->pc_flags); + clear_bit(LIOD_BIND, &pc->pc_flags); + +out: + if (pc->pc_npartners > 0) { + LASSERT(pc->pc_partners != NULL); + + OBD_FREE(pc->pc_partners, + sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners); + pc->pc_partners = NULL; + } + pc->pc_npartners = 0; +} + +static void ptlrpcd_fini(void) +{ + int i; + + if (ptlrpcds != NULL) { + for (i = 0; i < ptlrpcds->pd_nthreads; i++) + ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0); + for (i = 0; i < ptlrpcds->pd_nthreads; i++) + ptlrpcd_free(&ptlrpcds->pd_threads[i]); + ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0); + ptlrpcd_free(&ptlrpcds->pd_thread_rcv); + OBD_FREE(ptlrpcds, ptlrpcds->pd_size); + ptlrpcds = NULL; + } +} + +static int ptlrpcd_init(void) +{ + int nthreads = num_online_cpus(); + char name[16]; + int size, i = -1, j, rc = 0; + + if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads) + nthreads = max_ptlrpcds; + if (nthreads < 2) + nthreads = 2; + if (nthreads < 3 && ptlrpcd_bind_policy == PDB_POLICY_NEIGHBOR) + ptlrpcd_bind_policy = PDB_POLICY_PAIR; + else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR) + nthreads &= ~1; /* make sure it is even */ + + size = offsetof(struct ptlrpcd, pd_threads[nthreads]); + OBD_ALLOC(ptlrpcds, size); + if (ptlrpcds == NULL) { + rc = -ENOMEM; + goto out; + } + + snprintf(name, sizeof(name), "ptlrpcd_rcv"); + set_bit(LIOD_RECOVERY, &ptlrpcds->pd_thread_rcv.pc_flags); + rc = ptlrpcd_start(-1, nthreads, name, &ptlrpcds->pd_thread_rcv); + if (rc < 0) + goto out; + + /* XXX: We start nthreads ptlrpc daemons. Each of them can process any + * non-recovery async RPC to improve overall async RPC efficiency. + * + * But there are some issues with async I/O RPCs and async non-I/O + * RPCs processed in the same set under some cases. The ptlrpcd may + * be blocked by some async I/O RPC(s), then will cause other async + * non-I/O RPC(s) can not be processed in time. + * + * Maybe we should distinguish blocked async RPCs from non-blocked + * async RPCs, and process them in different ptlrpcd sets to avoid + * unnecessary dependency. But how to distribute async RPCs load + * among all the ptlrpc daemons becomes another trouble. */ + for (i = 0; i < nthreads; i++) { + snprintf(name, sizeof(name), "ptlrpcd_%d", i); + rc = ptlrpcd_start(i, nthreads, name, &ptlrpcds->pd_threads[i]); + if (rc < 0) + goto out; + } + + ptlrpcds->pd_size = size; + ptlrpcds->pd_index = 0; + ptlrpcds->pd_nthreads = nthreads; + +out: + if (rc != 0 && ptlrpcds != NULL) { + for (j = 0; j <= i; j++) + ptlrpcd_stop(&ptlrpcds->pd_threads[j], 0); + for (j = 0; j <= i; j++) + ptlrpcd_free(&ptlrpcds->pd_threads[j]); + ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0); + ptlrpcd_free(&ptlrpcds->pd_thread_rcv); + OBD_FREE(ptlrpcds, size); + ptlrpcds = NULL; + } + + return 0; +} + +int ptlrpcd_addref(void) +{ + int rc = 0; + + mutex_lock(&ptlrpcd_mutex); + if (++ptlrpcd_users == 1) + rc = ptlrpcd_init(); + mutex_unlock(&ptlrpcd_mutex); + return rc; +} +EXPORT_SYMBOL(ptlrpcd_addref); + +void ptlrpcd_decref(void) +{ + mutex_lock(&ptlrpcd_mutex); + if (--ptlrpcd_users == 0) + ptlrpcd_fini(); + mutex_unlock(&ptlrpcd_mutex); +} +EXPORT_SYMBOL(ptlrpcd_decref); +/** @} ptlrpcd */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c new file mode 100644 index 000000000..7b1d72947 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c @@ -0,0 +1,379 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/recover.c + * + * Author: Mike Shaver <shaver@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/obd_support.h" +#include "../include/lustre_ha.h" +#include "../include/lustre_net.h" +#include "../include/lustre_import.h" +#include "../include/lustre_export.h" +#include "../include/obd.h" +#include "../include/obd_class.h" +#include <linux/list.h> + +#include "ptlrpc_internal.h" + +/** + * Start recovery on disconnected import. + * This is done by just attempting a connect + */ +void ptlrpc_initiate_recovery(struct obd_import *imp) +{ + CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd)); + ptlrpc_connect_import(imp); +} + +/** + * Identify what request from replay list needs to be replayed next + * (based on what we have already replayed) and send it to server. + */ +int ptlrpc_replay_next(struct obd_import *imp, int *inflight) +{ + int rc = 0; + struct list_head *tmp, *pos; + struct ptlrpc_request *req = NULL; + __u64 last_transno; + + *inflight = 0; + + /* It might have committed some after we last spoke, so make sure we + * get rid of them now. + */ + spin_lock(&imp->imp_lock); + imp->imp_last_transno_checked = 0; + ptlrpc_free_committed(imp); + last_transno = imp->imp_last_replay_transno; + spin_unlock(&imp->imp_lock); + + CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n", + imp, obd2cli_tgt(imp->imp_obd), + imp->imp_peer_committed_transno, last_transno); + + /* Do I need to hold a lock across this iteration? We shouldn't be + * racing with any additions to the list, because we're in recovery + * and are therefore not processing additional requests to add. Calls + * to ptlrpc_free_committed might commit requests, but nothing "newer" + * than the one we're replaying (it can't be committed until it's + * replayed, and we're doing that here). l_f_e_safe protects against + * problems with the current request being committed, in the unlikely + * event of that race. So, in conclusion, I think that it's safe to + * perform this list-walk without the imp_lock held. + * + * But, the {mdc,osc}_replay_open callbacks both iterate + * request lists, and have comments saying they assume the + * imp_lock is being held by ptlrpc_replay, but it's not. it's + * just a little race... + */ + + /* Replay all the committed open requests on committed_list first */ + if (!list_empty(&imp->imp_committed_list)) { + tmp = imp->imp_committed_list.prev; + req = list_entry(tmp, struct ptlrpc_request, + rq_replay_list); + + /* The last request on committed_list hasn't been replayed */ + if (req->rq_transno > last_transno) { + /* Since the imp_committed_list is immutable before + * all of it's requests being replayed, it's safe to + * use a cursor to accelerate the search */ + imp->imp_replay_cursor = imp->imp_replay_cursor->next; + + while (imp->imp_replay_cursor != + &imp->imp_committed_list) { + req = list_entry(imp->imp_replay_cursor, + struct ptlrpc_request, + rq_replay_list); + if (req->rq_transno > last_transno) + break; + + req = NULL; + imp->imp_replay_cursor = + imp->imp_replay_cursor->next; + } + } else { + /* All requests on committed_list have been replayed */ + imp->imp_replay_cursor = &imp->imp_committed_list; + req = NULL; + } + } + + /* All the requests in committed list have been replayed, let's replay + * the imp_replay_list */ + if (req == NULL) { + list_for_each_safe(tmp, pos, &imp->imp_replay_list) { + req = list_entry(tmp, struct ptlrpc_request, + rq_replay_list); + + if (req->rq_transno > last_transno) + break; + req = NULL; + } + } + + /* If need to resend the last sent transno (because a reconnect + * has occurred), then stop on the matching req and send it again. + * If, however, the last sent transno has been committed then we + * continue replay from the next request. */ + if (req != NULL && imp->imp_resend_replay) + lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); + + spin_lock(&imp->imp_lock); + imp->imp_resend_replay = 0; + spin_unlock(&imp->imp_lock); + + if (req != NULL) { + rc = ptlrpc_replay_req(req); + if (rc) { + CERROR("recovery replay error %d for req %llu\n", + rc, req->rq_xid); + return rc; + } + *inflight = 1; + } + return rc; +} + +/** + * Schedule resending of request on sending_list. This is done after + * we completed replaying of requests and locks. + */ +int ptlrpc_resend(struct obd_import *imp) +{ + struct ptlrpc_request *req, *next; + + /* As long as we're in recovery, nothing should be added to the sending + * list, so we don't need to hold the lock during this iteration and + * resend process. + */ + /* Well... what if lctl recover is called twice at the same time? + */ + spin_lock(&imp->imp_lock); + if (imp->imp_state != LUSTRE_IMP_RECOVER) { + spin_unlock(&imp->imp_lock); + return -1; + } + + list_for_each_entry_safe(req, next, &imp->imp_sending_list, + rq_list) { + LASSERTF((long)req > PAGE_CACHE_SIZE && req != LP_POISON, + "req %p bad\n", req); + LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); + if (!ptlrpc_no_resend(req)) + ptlrpc_resend_req(req); + } + spin_unlock(&imp->imp_lock); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_resend); + +/** + * Go through all requests in delayed list and wake their threads + * for resending + */ +void ptlrpc_wake_delayed(struct obd_import *imp) +{ + struct list_head *tmp, *pos; + struct ptlrpc_request *req; + + spin_lock(&imp->imp_lock); + list_for_each_safe(tmp, pos, &imp->imp_delayed_list) { + req = list_entry(tmp, struct ptlrpc_request, rq_list); + + DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); + ptlrpc_client_wake_req(req); + } + spin_unlock(&imp->imp_lock); +} +EXPORT_SYMBOL(ptlrpc_wake_delayed); + +void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) +{ + struct obd_import *imp = failed_req->rq_import; + + CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n", + imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid); + + if (ptlrpc_set_import_discon(imp, + lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) { + if (!imp->imp_replayable) { + CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n", + obd2cli_tgt(imp->imp_obd), + imp->imp_connection->c_remote_uuid.uuid, + imp->imp_obd->obd_name); + ptlrpc_deactivate_import(imp); + } + /* to control recovery via lctl {disable|enable}_recovery */ + if (imp->imp_deactive == 0) + ptlrpc_connect_import(imp); + } + + /* Wait for recovery to complete and resend. If evicted, then + this request will be errored out later.*/ + spin_lock(&failed_req->rq_lock); + if (!failed_req->rq_no_resend) + failed_req->rq_resend = 1; + spin_unlock(&failed_req->rq_lock); +} + +/** + * Administratively active/deactive a client. + * This should only be called by the ioctl interface, currently + * - the lctl deactivate and activate commands + * - echo 0/1 >> /proc/osc/XXX/active + * - client umount -f (ll_umount_begin) + */ +int ptlrpc_set_import_active(struct obd_import *imp, int active) +{ + struct obd_device *obd = imp->imp_obd; + int rc = 0; + + LASSERT(obd); + + /* When deactivating, mark import invalid, and abort in-flight + * requests. */ + if (!active) { + LCONSOLE_WARN("setting import %s INACTIVE by administrator request\n", + obd2cli_tgt(imp->imp_obd)); + + /* set before invalidate to avoid messages about imp_inval + * set without imp_deactive in ptlrpc_import_delay_req */ + spin_lock(&imp->imp_lock); + imp->imp_deactive = 1; + spin_unlock(&imp->imp_lock); + + obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE); + + ptlrpc_invalidate_import(imp); + } + + /* When activating, mark import valid, and attempt recovery */ + if (active) { + CDEBUG(D_HA, "setting import %s VALID\n", + obd2cli_tgt(imp->imp_obd)); + + spin_lock(&imp->imp_lock); + imp->imp_deactive = 0; + spin_unlock(&imp->imp_lock); + obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE); + + rc = ptlrpc_recover_import(imp, NULL, 0); + } + + return rc; +} +EXPORT_SYMBOL(ptlrpc_set_import_active); + +/* Attempt to reconnect an import */ +int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) +{ + int rc = 0; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive || + atomic_read(&imp->imp_inval_count)) + rc = -EINVAL; + spin_unlock(&imp->imp_lock); + if (rc) + goto out; + + /* force import to be disconnected. */ + ptlrpc_set_import_discon(imp, 0); + + if (new_uuid) { + struct obd_uuid uuid; + + /* intruct import to use new uuid */ + obd_str2uuid(&uuid, new_uuid); + rc = import_set_conn_priority(imp, &uuid); + if (rc) + goto out; + } + + /* Check if reconnect is already in progress */ + spin_lock(&imp->imp_lock); + if (imp->imp_state != LUSTRE_IMP_DISCON) { + imp->imp_force_verify = 1; + rc = -EALREADY; + } + spin_unlock(&imp->imp_lock); + if (rc) + goto out; + + rc = ptlrpc_connect_import(imp); + if (rc) + goto out; + + if (!async) { + struct l_wait_info lwi; + int secs = cfs_time_seconds(obd_timeout); + + CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", + obd2cli_tgt(imp->imp_obd), secs); + + lwi = LWI_TIMEOUT(secs, NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), &lwi); + CDEBUG(D_HA, "%s: recovery finished\n", + obd2cli_tgt(imp->imp_obd)); + } + +out: + return rc; +} +EXPORT_SYMBOL(ptlrpc_recover_import); + +int ptlrpc_import_in_recovery(struct obd_import *imp) +{ + int in_recovery = 1; + + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_FULL || + imp->imp_state == LUSTRE_IMP_CLOSED || + imp->imp_state == LUSTRE_IMP_DISCON || + imp->imp_obd->obd_no_recov) + in_recovery = 0; + spin_unlock(&imp->imp_lock); + + return in_recovery; +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c new file mode 100644 index 000000000..21e9dc9d5 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -0,0 +1,2459 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/crypto.h> +#include <linux/key.h> + +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/lustre_net.h" +#include "../include/lustre_import.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_sec.h" + +#include "ptlrpc_internal.h" + +/*********************************************** + * policy registers * + ***********************************************/ + +static rwlock_t policy_lock; +static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = { + NULL, +}; + +int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) +{ + __u16 number = policy->sp_policy; + + LASSERT(policy->sp_name); + LASSERT(policy->sp_cops); + LASSERT(policy->sp_sops); + + if (number >= SPTLRPC_POLICY_MAX) + return -EINVAL; + + write_lock(&policy_lock); + if (unlikely(policies[number])) { + write_unlock(&policy_lock); + return -EALREADY; + } + policies[number] = policy; + write_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: registered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_register_policy); + +int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) +{ + __u16 number = policy->sp_policy; + + LASSERT(number < SPTLRPC_POLICY_MAX); + + write_lock(&policy_lock); + if (unlikely(policies[number] == NULL)) { + write_unlock(&policy_lock); + CERROR("%s: already unregistered\n", policy->sp_name); + return -EINVAL; + } + + LASSERT(policies[number] == policy); + policies[number] = NULL; + write_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_unregister_policy); + +static +struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor) +{ + static DEFINE_MUTEX(load_mutex); + static atomic_t loaded = ATOMIC_INIT(0); + struct ptlrpc_sec_policy *policy; + __u16 number = SPTLRPC_FLVR_POLICY(flavor); + __u16 flag = 0; + + if (number >= SPTLRPC_POLICY_MAX) + return NULL; + + while (1) { + read_lock(&policy_lock); + policy = policies[number]; + if (policy && !try_module_get(policy->sp_owner)) + policy = NULL; + if (policy == NULL) + flag = atomic_read(&loaded); + read_unlock(&policy_lock); + + if (policy != NULL || flag != 0 || + number != SPTLRPC_POLICY_GSS) + break; + + /* try to load gss module, once */ + mutex_lock(&load_mutex); + if (atomic_read(&loaded) == 0) { + if (request_module("ptlrpc_gss") == 0) + CDEBUG(D_SEC, + "module ptlrpc_gss loaded on demand\n"); + else + CERROR("Unable to load module ptlrpc_gss\n"); + + atomic_set(&loaded, 1); + } + mutex_unlock(&load_mutex); + } + + return policy; +} + +__u32 sptlrpc_name2flavor_base(const char *name) +{ + if (!strcmp(name, "null")) + return SPTLRPC_FLVR_NULL; + if (!strcmp(name, "plain")) + return SPTLRPC_FLVR_PLAIN; + if (!strcmp(name, "krb5n")) + return SPTLRPC_FLVR_KRB5N; + if (!strcmp(name, "krb5a")) + return SPTLRPC_FLVR_KRB5A; + if (!strcmp(name, "krb5i")) + return SPTLRPC_FLVR_KRB5I; + if (!strcmp(name, "krb5p")) + return SPTLRPC_FLVR_KRB5P; + + return SPTLRPC_FLVR_INVALID; +} +EXPORT_SYMBOL(sptlrpc_name2flavor_base); + +const char *sptlrpc_flavor2name_base(__u32 flvr) +{ + __u32 base = SPTLRPC_FLVR_BASE(flvr); + + if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) + return "null"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN)) + return "plain"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N)) + return "krb5n"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A)) + return "krb5a"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I)) + return "krb5i"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P)) + return "krb5p"; + + CERROR("invalid wire flavor 0x%x\n", flvr); + return "invalid"; +} +EXPORT_SYMBOL(sptlrpc_flavor2name_base); + +char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf, + char *buf, int bufsize) +{ + if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) + snprintf(buf, bufsize, "hash:%s", + sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg)); + else + snprintf(buf, bufsize, "%s", + sptlrpc_flavor2name_base(sf->sf_rpc)); + + buf[bufsize - 1] = '\0'; + return buf; +} +EXPORT_SYMBOL(sptlrpc_flavor2name_bulk); + +char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize) +{ + strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize); + + /* + * currently we don't support customized bulk specification for + * flavors other than plain + */ + if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) { + char bspec[16]; + + bspec[0] = '-'; + sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1); + strlcat(buf, bspec, bufsize); + } + + return buf; +} +EXPORT_SYMBOL(sptlrpc_flavor2name); + +char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_SEC_FL_REVERSE) + strlcat(buf, "reverse,", bufsize); + if (flags & PTLRPC_SEC_FL_ROOTONLY) + strlcat(buf, "rootonly,", bufsize); + if (flags & PTLRPC_SEC_FL_UDESC) + strlcat(buf, "udesc,", bufsize); + if (flags & PTLRPC_SEC_FL_BULK) + strlcat(buf, "bulk,", bufsize); + if (buf[0] == '\0') + strlcat(buf, "-,", bufsize); + + return buf; +} +EXPORT_SYMBOL(sptlrpc_secflags2str); + +/************************************************** + * client context APIs * + **************************************************/ + +static +struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec) +{ + struct vfs_cred vcred; + int create = 1, remove_dead = 1; + + LASSERT(sec); + LASSERT(sec->ps_policy->sp_cops->lookup_ctx); + + if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE | + PTLRPC_SEC_FL_ROOTONLY)) { + vcred.vc_uid = 0; + vcred.vc_gid = 0; + if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) { + create = 0; + remove_dead = 0; + } + } else { + vcred.vc_uid = from_kuid(&init_user_ns, current_uid()); + vcred.vc_gid = from_kgid(&init_user_ns, current_gid()); + } + + return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred, + create, remove_dead); +} + +struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx) +{ + atomic_inc(&ctx->cc_refcount); + return ctx; +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_get); + +void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) +{ + struct ptlrpc_sec *sec = ctx->cc_sec; + + LASSERT(sec); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); + + if (!atomic_dec_and_test(&ctx->cc_refcount)) + return; + + sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync); +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_put); + +/** + * Expire the client context immediately. + * + * \pre Caller must hold at least 1 reference on the \a ctx. + */ +void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(ctx->cc_ops->force_die); + ctx->cc_ops->force_die(ctx, 0); +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_expire); + +/** + * To wake up the threads who are waiting for this client context. Called + * after some status change happened on \a ctx. + */ +void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_request *req, *next; + + spin_lock(&ctx->cc_lock); + list_for_each_entry_safe(req, next, &ctx->cc_req_list, + rq_ctx_chain) { + list_del_init(&req->rq_ctx_chain); + ptlrpc_client_wake_req(req); + } + spin_unlock(&ctx->cc_lock); +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup); + +int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + LASSERT(ctx->cc_ops); + + if (ctx->cc_ops->display == NULL) + return 0; + + return ctx->cc_ops->display(ctx, buf, bufsize); +} + +static int import_sec_check_expire(struct obd_import *imp) +{ + int adapt = 0; + + spin_lock(&imp->imp_lock); + if (imp->imp_sec_expire && + imp->imp_sec_expire < get_seconds()) { + adapt = 1; + imp->imp_sec_expire = 0; + } + spin_unlock(&imp->imp_lock); + + if (!adapt) + return 0; + + CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n"); + return sptlrpc_import_sec_adapt(imp, NULL, NULL); +} + +static int import_sec_validate_get(struct obd_import *imp, + struct ptlrpc_sec **sec) +{ + int rc; + + if (unlikely(imp->imp_sec_expire)) { + rc = import_sec_check_expire(imp); + if (rc) + return rc; + } + + *sec = sptlrpc_import_sec_ref(imp); + if (*sec == NULL) { + CERROR("import %p (%s) with no sec\n", + imp, ptlrpc_import_state_name(imp->imp_state)); + return -EACCES; + } + + if (unlikely((*sec)->ps_dying)) { + CERROR("attempt to use dying sec %p\n", sec); + sptlrpc_sec_put(*sec); + return -EACCES; + } + + return 0; +} + +/** + * Given a \a req, find or allocate a appropriate context for it. + * \pre req->rq_cli_ctx == NULL. + * + * \retval 0 succeed, and req->rq_cli_ctx is set. + * \retval -ev error number, and req->rq_cli_ctx == NULL. + */ +int sptlrpc_req_get_ctx(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + struct ptlrpc_sec *sec; + int rc; + + LASSERT(!req->rq_cli_ctx); + LASSERT(imp); + + rc = import_sec_validate_get(imp, &sec); + if (rc) + return rc; + + req->rq_cli_ctx = get_my_ctx(sec); + + sptlrpc_sec_put(sec); + + if (!req->rq_cli_ctx) { + CERROR("req %p: fail to get context\n", req); + return -ENOMEM; + } + + return 0; +} + +/** + * Drop the context for \a req. + * \pre req->rq_cli_ctx != NULL. + * \post req->rq_cli_ctx == NULL. + * + * If \a sync == 0, this function should return quickly without sleep; + * otherwise it might trigger and wait for the whole process of sending + * an context-destroying rpc to server. + */ +void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync) +{ + LASSERT(req); + LASSERT(req->rq_cli_ctx); + + /* request might be asked to release earlier while still + * in the context waiting list. + */ + if (!list_empty(&req->rq_ctx_chain)) { + spin_lock(&req->rq_cli_ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&req->rq_cli_ctx->cc_lock); + } + + sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync); + req->rq_cli_ctx = NULL; +} + +static +int sptlrpc_req_ctx_switch(struct ptlrpc_request *req, + struct ptlrpc_cli_ctx *oldctx, + struct ptlrpc_cli_ctx *newctx) +{ + struct sptlrpc_flavor old_flvr; + char *reqmsg = NULL; /* to workaround old gcc */ + int reqmsg_size; + int rc = 0; + + LASSERT(req->rq_reqmsg); + LASSERT(req->rq_reqlen); + LASSERT(req->rq_replen); + + CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n", + req, + oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec), + newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec), + oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name, + newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name); + + /* save flavor */ + old_flvr = req->rq_flvr; + + /* save request message */ + reqmsg_size = req->rq_reqlen; + if (reqmsg_size != 0) { + OBD_ALLOC_LARGE(reqmsg, reqmsg_size); + if (reqmsg == NULL) + return -ENOMEM; + memcpy(reqmsg, req->rq_reqmsg, reqmsg_size); + } + + /* release old req/rep buf */ + req->rq_cli_ctx = oldctx; + sptlrpc_cli_free_reqbuf(req); + sptlrpc_cli_free_repbuf(req); + req->rq_cli_ctx = newctx; + + /* recalculate the flavor */ + sptlrpc_req_set_flavor(req, 0); + + /* alloc new request buffer + * we don't need to alloc reply buffer here, leave it to the + * rest procedure of ptlrpc */ + if (reqmsg_size != 0) { + rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size); + if (!rc) { + LASSERT(req->rq_reqmsg); + memcpy(req->rq_reqmsg, reqmsg, reqmsg_size); + } else { + CWARN("failed to alloc reqbuf: %d\n", rc); + req->rq_flvr = old_flvr; + } + + OBD_FREE_LARGE(reqmsg, reqmsg_size); + } + return rc; +} + +/** + * If current context of \a req is dead somehow, e.g. we just switched flavor + * thus marked original contexts dead, we'll find a new context for it. if + * no switch is needed, \a req will end up with the same context. + * + * \note a request must have a context, to keep other parts of code happy. + * In any case of failure during the switching, we must restore the old one. + */ +int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx; + struct ptlrpc_cli_ctx *newctx; + int rc; + + LASSERT(oldctx); + + sptlrpc_cli_ctx_get(oldctx); + sptlrpc_req_put_ctx(req, 0); + + rc = sptlrpc_req_get_ctx(req); + if (unlikely(rc)) { + LASSERT(!req->rq_cli_ctx); + + /* restore old ctx */ + req->rq_cli_ctx = oldctx; + return rc; + } + + newctx = req->rq_cli_ctx; + LASSERT(newctx); + + if (unlikely(newctx == oldctx && + test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) { + /* + * still get the old dead ctx, usually means system too busy + */ + CDEBUG(D_SEC, + "ctx (%p, fl %lx) doesn't switch, relax a little bit\n", + newctx, newctx->cc_flags); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } else { + /* + * it's possible newctx == oldctx if we're switching + * subflavor with the same sec. + */ + rc = sptlrpc_req_ctx_switch(req, oldctx, newctx); + if (rc) { + /* restore old ctx */ + sptlrpc_req_put_ctx(req, 0); + req->rq_cli_ctx = oldctx; + return rc; + } + + LASSERT(req->rq_cli_ctx == newctx); + } + + sptlrpc_cli_ctx_put(oldctx, 1); + return 0; +} +EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx); + +static +int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) +{ + if (cli_ctx_is_refreshed(ctx)) + return 1; + return 0; +} + +static +int ctx_refresh_timeout(void *data) +{ + struct ptlrpc_request *req = data; + int rc; + + /* conn_cnt is needed in expire_one_request */ + lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); + + rc = ptlrpc_expire_one_request(req, 1); + /* if we started recovery, we should mark this ctx dead; otherwise + * in case of lgssd died nobody would retire this ctx, following + * connecting will still find the same ctx thus cause deadlock. + * there's an assumption that expire time of the request should be + * later than the context refresh expire time. + */ + if (rc == 0) + req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0); + return rc; +} + +static +void ctx_refresh_interrupt(void *data) +{ + struct ptlrpc_request *req = data; + + spin_lock(&req->rq_lock); + req->rq_intr = 1; + spin_unlock(&req->rq_lock); +} + +static +void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx) +{ + spin_lock(&ctx->cc_lock); + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); +} + +/** + * To refresh the context of \req, if it's not up-to-date. + * \param timeout + * - < 0: don't wait + * - = 0: wait until success or fatal error occur + * - > 0: timeout value (in seconds) + * + * The status of the context could be subject to be changed by other threads + * at any time. We allow this race, but once we return with 0, the caller will + * suppose it's uptodated and keep using it until the owning rpc is done. + * + * \retval 0 only if the context is uptodated. + * \retval -ev error number. + */ +int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec *sec; + struct l_wait_info lwi; + int rc; + + LASSERT(ctx); + + if (req->rq_ctx_init || req->rq_ctx_fini) + return 0; + + /* + * during the process a request's context might change type even + * (e.g. from gss ctx to null ctx), so each loop we need to re-check + * everything + */ +again: + rc = import_sec_validate_get(req->rq_import, &sec); + if (rc) + return rc; + + if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) { + CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n", + req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc); + req_off_ctx_list(req, ctx); + sptlrpc_req_replace_dead_ctx(req); + ctx = req->rq_cli_ctx; + } + sptlrpc_sec_put(sec); + + if (cli_ctx_is_eternal(ctx)) + return 0; + + if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) { + LASSERT(ctx->cc_ops->refresh); + ctx->cc_ops->refresh(ctx); + } + LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0); + + LASSERT(ctx->cc_ops->validate); + if (ctx->cc_ops->validate(ctx) == 0) { + req_off_ctx_list(req, ctx); + return 0; + } + + if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) { + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + req_off_ctx_list(req, ctx); + return -EPERM; + } + + /* + * There's a subtle issue for resending RPCs, suppose following + * situation: + * 1. the request was sent to server. + * 2. recovery was kicked start, after finished the request was + * marked as resent. + * 3. resend the request. + * 4. old reply from server received, we accept and verify the reply. + * this has to be success, otherwise the error will be aware + * by application. + * 5. new reply from server received, dropped by LNet. + * + * Note the xid of old & new request is the same. We can't simply + * change xid for the resent request because the server replies on + * it for reply reconstruction. + * + * Commonly the original context should be uptodate because we + * have a expiry nice time; server will keep its context because + * we at least hold a ref of old context which prevent context + * destroying RPC being sent. So server still can accept the request + * and finish the RPC. But if that's not the case: + * 1. If server side context has been trimmed, a NO_CONTEXT will + * be returned, gss_cli_ctx_verify/unseal will switch to new + * context by force. + * 2. Current context never be refreshed, then we are fine: we + * never really send request with old context before. + */ + if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) && + unlikely(req->rq_reqmsg) && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + req_off_ctx_list(req, ctx); + return 0; + } + + if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { + req_off_ctx_list(req, ctx); + /* + * don't switch ctx if import was deactivated + */ + if (req->rq_import->imp_deactive) { + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + return -EINTR; + } + + rc = sptlrpc_req_replace_dead_ctx(req); + if (rc) { + LASSERT(ctx == req->rq_cli_ctx); + CERROR("req %p: failed to replace dead ctx %p: %d\n", + req, ctx, rc); + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + return rc; + } + + ctx = req->rq_cli_ctx; + goto again; + } + + /* + * Now we're sure this context is during upcall, add myself into + * waiting list + */ + spin_lock(&ctx->cc_lock); + if (list_empty(&req->rq_ctx_chain)) + list_add(&req->rq_ctx_chain, &ctx->cc_req_list); + spin_unlock(&ctx->cc_lock); + + if (timeout < 0) + return -EWOULDBLOCK; + + /* Clear any flags that may be present from previous sends */ + LASSERT(req->rq_receiving_reply == 0); + spin_lock(&req->rq_lock); + req->rq_err = 0; + req->rq_timedout = 0; + req->rq_resend = 0; + req->rq_restart = 0; + spin_unlock(&req->rq_lock); + + lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout, + ctx_refresh_interrupt, req); + rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); + + /* + * following cases could lead us here: + * - successfully refreshed; + * - interrupted; + * - timedout, and we don't want recover from the failure; + * - timedout, and waked up upon recovery finished; + * - someone else mark this ctx dead by force; + * - someone invalidate the req and call ptlrpc_client_wake_req(), + * e.g. ptlrpc_abort_inflight(); + */ + if (!cli_ctx_is_refreshed(ctx)) { + /* timed out or interrupted */ + req_off_ctx_list(req, ctx); + + LASSERT(rc != 0); + return rc; + } + + goto again; +} + +/** + * Initialize flavor settings for \a req, according to \a opcode. + * + * \note this could be called in two situations: + * - new request from ptlrpc_pre_req(), with proper @opcode + * - old request which changed ctx in the middle, with @opcode == 0 + */ +void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) +{ + struct ptlrpc_sec *sec; + + LASSERT(req->rq_import); + LASSERT(req->rq_cli_ctx); + LASSERT(req->rq_cli_ctx->cc_sec); + LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0); + + /* special security flags according to opcode */ + switch (opcode) { + case OST_READ: + case MDS_READPAGE: + case MGS_CONFIG_READ: + case OBD_IDX_READ: + req->rq_bulk_read = 1; + break; + case OST_WRITE: + case MDS_WRITEPAGE: + req->rq_bulk_write = 1; + break; + case SEC_CTX_INIT: + req->rq_ctx_init = 1; + break; + case SEC_CTX_FINI: + req->rq_ctx_fini = 1; + break; + case 0: + /* init/fini rpc won't be resend, so can't be here */ + LASSERT(req->rq_ctx_init == 0); + LASSERT(req->rq_ctx_fini == 0); + + /* cleanup flags, which should be recalculated */ + req->rq_pack_udesc = 0; + req->rq_pack_bulk = 0; + break; + } + + sec = req->rq_cli_ctx->cc_sec; + + spin_lock(&sec->ps_lock); + req->rq_flvr = sec->ps_flvr; + spin_unlock(&sec->ps_lock); + + /* force SVC_NULL for context initiation rpc, SVC_INTG for context + * destruction rpc */ + if (unlikely(req->rq_ctx_init)) + flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL); + else if (unlikely(req->rq_ctx_fini)) + flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG); + + /* user descriptor flag, null security can't do it anyway */ + if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) && + (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL)) + req->rq_pack_udesc = 1; + + /* bulk security flag */ + if ((req->rq_bulk_read || req->rq_bulk_write) && + sptlrpc_flavor_has_bulk(&req->rq_flvr)) + req->rq_pack_bulk = 1; +} + +void sptlrpc_request_out_callback(struct ptlrpc_request *req) +{ + if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV) + return; + + LASSERT(req->rq_clrbuf); + if (req->rq_pool || !req->rq_reqbuf) + return; + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; +} + +/** + * Given an import \a imp, check whether current user has a valid context + * or not. We may create a new context and try to refresh it, and try + * repeatedly try in case of non-fatal errors. Return 0 means success. + */ +int sptlrpc_import_check_ctx(struct obd_import *imp) +{ + struct ptlrpc_sec *sec; + struct ptlrpc_cli_ctx *ctx; + struct ptlrpc_request *req = NULL; + int rc; + + might_sleep(); + + sec = sptlrpc_import_sec_ref(imp); + ctx = get_my_ctx(sec); + sptlrpc_sec_put(sec); + + if (!ctx) + return -ENOMEM; + + if (cli_ctx_is_eternal(ctx) || + ctx->cc_ops->validate(ctx) == 0) { + sptlrpc_cli_ctx_put(ctx, 1); + return 0; + } + + if (cli_ctx_is_error(ctx)) { + sptlrpc_cli_ctx_put(ctx, 1); + return -EACCES; + } + + req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (!req) + return -ENOMEM; + + spin_lock_init(&req->rq_lock); + atomic_set(&req->rq_refcount, 10000); + INIT_LIST_HEAD(&req->rq_ctx_chain); + init_waitqueue_head(&req->rq_reply_waitq); + init_waitqueue_head(&req->rq_set_waitq); + req->rq_import = imp; + req->rq_flvr = sec->ps_flvr; + req->rq_cli_ctx = ctx; + + rc = sptlrpc_req_refresh_ctx(req, 0); + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1); + ptlrpc_request_cache_free(req); + + return rc; +} + +/** + * Used by ptlrpc client, to perform the pre-defined security transformation + * upon the request message of \a req. After this function called, + * req->rq_reqmsg is still accessible as clear text. + */ +int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc = 0; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* we wrap bulk request here because now we can be sure + * the context is uptodate. + */ + if (req->rq_bulk) { + rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk); + if (rc) + return rc; + } + + switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { + case SPTLRPC_SVC_NULL: + case SPTLRPC_SVC_AUTH: + case SPTLRPC_SVC_INTG: + LASSERT(ctx->cc_ops->sign); + rc = ctx->cc_ops->sign(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->seal); + rc = ctx->cc_ops->seal(ctx, req); + break; + default: + LBUG(); + } + + if (rc == 0) { + LASSERT(req->rq_reqdata_len); + LASSERT(req->rq_reqdata_len % 8 == 0); + LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len); + } + + return rc; +} + +static int do_cli_unwrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata); + LASSERT(req->rq_repmsg == NULL); + + req->rq_rep_swab_mask = 0; + + rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len); + switch (rc) { + case 1: + lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); + case 0: + break; + default: + CERROR("failed unpack reply: x%llu\n", req->rq_xid); + return -EPROTO; + } + + if (req->rq_repdata_len < sizeof(struct lustre_msg)) { + CERROR("replied data length %d too small\n", + req->rq_repdata_len); + return -EPROTO; + } + + if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) != + SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) { + CERROR("reply policy %u doesn't match request policy %u\n", + SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr), + SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)); + return -EPROTO; + } + + switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { + case SPTLRPC_SVC_NULL: + case SPTLRPC_SVC_AUTH: + case SPTLRPC_SVC_INTG: + LASSERT(ctx->cc_ops->verify); + rc = ctx->cc_ops->verify(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->unseal); + rc = ctx->cc_ops->unseal(ctx, req); + break; + default: + LBUG(); + } + LASSERT(rc || req->rq_repmsg || req->rq_resend); + + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL && + !req->rq_ctx_init) + req->rq_rep_swab_mask = 0; + return rc; +} + +/** + * Used by ptlrpc client, to perform security transformation upon the reply + * message of \a req. After return successfully, req->rq_repmsg points to + * the reply message in clear text. + * + * \pre the reply buffer should have been un-posted from LNet, so nothing is + * going to change. + */ +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) +{ + LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata == NULL); + LASSERT(req->rq_repmsg == NULL); + LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len); + + if (req->rq_reply_off == 0 && + (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { + CERROR("real reply with offset 0\n"); + return -EPROTO; + } + + if (req->rq_reply_off % 8 != 0) { + CERROR("reply at odd offset %u\n", req->rq_reply_off); + return -EPROTO; + } + + req->rq_repdata = (struct lustre_msg *) + (req->rq_repbuf + req->rq_reply_off); + req->rq_repdata_len = req->rq_nob_received; + + return do_cli_unwrap_reply(req); +} + +/** + * Used by ptlrpc client, to perform security transformation upon the early + * reply message of \a req. We expect the rq_reply_off is 0, and + * rq_nob_received is the early reply size. + * + * Because the receive buffer might be still posted, the reply data might be + * changed at any time, no matter we're holding rq_lock or not. For this reason + * we allocate a separate ptlrpc_request and reply buffer for early reply + * processing. + * + * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request. + * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned + * \a *req_ret to release it. + * \retval -ev error number, and \a req_ret will not be set. + */ +int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, + struct ptlrpc_request **req_ret) +{ + struct ptlrpc_request *early_req; + char *early_buf; + int early_bufsz, early_size; + int rc; + + early_req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (early_req == NULL) + return -ENOMEM; + + early_size = req->rq_nob_received; + early_bufsz = size_roundup_power2(early_size); + OBD_ALLOC_LARGE(early_buf, early_bufsz); + if (early_buf == NULL) { + rc = -ENOMEM; + goto err_req; + } + + /* sanity checkings and copy data out, do it inside spinlock */ + spin_lock(&req->rq_lock); + + if (req->rq_replied) { + spin_unlock(&req->rq_lock); + rc = -EALREADY; + goto err_buf; + } + + LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata == NULL); + LASSERT(req->rq_repmsg == NULL); + + if (req->rq_reply_off != 0) { + CERROR("early reply with offset %u\n", req->rq_reply_off); + spin_unlock(&req->rq_lock); + rc = -EPROTO; + goto err_buf; + } + + if (req->rq_nob_received != early_size) { + /* even another early arrived the size should be the same */ + CERROR("data size has changed from %u to %u\n", + early_size, req->rq_nob_received); + spin_unlock(&req->rq_lock); + rc = -EINVAL; + goto err_buf; + } + + if (req->rq_nob_received < sizeof(struct lustre_msg)) { + CERROR("early reply length %d too small\n", + req->rq_nob_received); + spin_unlock(&req->rq_lock); + rc = -EALREADY; + goto err_buf; + } + + memcpy(early_buf, req->rq_repbuf, early_size); + spin_unlock(&req->rq_lock); + + spin_lock_init(&early_req->rq_lock); + early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx); + early_req->rq_flvr = req->rq_flvr; + early_req->rq_repbuf = early_buf; + early_req->rq_repbuf_len = early_bufsz; + early_req->rq_repdata = (struct lustre_msg *) early_buf; + early_req->rq_repdata_len = early_size; + early_req->rq_early = 1; + early_req->rq_reqmsg = req->rq_reqmsg; + + rc = do_cli_unwrap_reply(early_req); + if (rc) { + DEBUG_REQ(D_ADAPTTO, early_req, + "error %d unwrap early reply", rc); + goto err_ctx; + } + + LASSERT(early_req->rq_repmsg); + *req_ret = early_req; + return 0; + +err_ctx: + sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); +err_buf: + OBD_FREE_LARGE(early_buf, early_bufsz); +err_req: + ptlrpc_request_cache_free(early_req); + return rc; +} + +/** + * Used by ptlrpc client, to release a processed early reply \a early_req. + * + * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply(). + */ +void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req) +{ + LASSERT(early_req->rq_repbuf); + LASSERT(early_req->rq_repdata); + LASSERT(early_req->rq_repmsg); + + sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); + OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len); + ptlrpc_request_cache_free(early_req); +} + +/************************************************** + * sec ID * + **************************************************/ + +/* + * "fixed" sec (e.g. null) use sec_id < 0 + */ +static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1); + +int sptlrpc_get_next_secid(void) +{ + return atomic_inc_return(&sptlrpc_sec_id); +} +EXPORT_SYMBOL(sptlrpc_get_next_secid); + +/************************************************** + * client side high-level security APIs * + **************************************************/ + +static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid, + int grace, int force) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT(policy->sp_cops); + LASSERT(policy->sp_cops->flush_ctx_cache); + + return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force); +} + +static void sec_cop_destroy_sec(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT_ATOMIC_ZERO(&sec->ps_refcount); + LASSERT_ATOMIC_ZERO(&sec->ps_nctx); + LASSERT(policy->sp_cops->destroy_sec); + + CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec); + + policy->sp_cops->destroy_sec(sec); + sptlrpc_policy_put(policy); +} + +void sptlrpc_sec_destroy(struct ptlrpc_sec *sec) +{ + sec_cop_destroy_sec(sec); +} +EXPORT_SYMBOL(sptlrpc_sec_destroy); + +static void sptlrpc_sec_kill(struct ptlrpc_sec *sec) +{ + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + if (sec->ps_policy->sp_cops->kill_sec) { + sec->ps_policy->sp_cops->kill_sec(sec); + + sec_cop_flush_ctx_cache(sec, -1, 1, 1); + } +} + +struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec) +{ + if (sec) + atomic_inc(&sec->ps_refcount); + + return sec; +} +EXPORT_SYMBOL(sptlrpc_sec_get); + +void sptlrpc_sec_put(struct ptlrpc_sec *sec) +{ + if (sec) { + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + if (atomic_dec_and_test(&sec->ps_refcount)) { + sptlrpc_gc_del_sec(sec); + sec_cop_destroy_sec(sec); + } + } +} +EXPORT_SYMBOL(sptlrpc_sec_put); + +/* + * policy module is responsible for taking reference of import + */ +static +struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp, + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *sf, + enum lustre_sec_part sp) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_sec *sec; + char str[32]; + + if (svc_ctx) { + LASSERT(imp->imp_dlm_fake == 1); + + CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(sf, str, sizeof(str))); + + policy = sptlrpc_policy_get(svc_ctx->sc_policy); + sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; + } else { + LASSERT(imp->imp_dlm_fake == 0); + + CDEBUG(D_SEC, "%s %s: select security flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(sf, str, sizeof(str))); + + policy = sptlrpc_wireflavor2policy(sf->sf_rpc); + if (!policy) { + CERROR("invalid flavor 0x%x\n", sf->sf_rpc); + return NULL; + } + } + + sec = policy->sp_cops->create_sec(imp, svc_ctx, sf); + if (sec) { + atomic_inc(&sec->ps_refcount); + + sec->ps_part = sp; + + if (sec->ps_gc_interval && policy->sp_cops->gc_ctx) + sptlrpc_gc_add_sec(sec); + } else { + sptlrpc_policy_put(policy); + } + + return sec; +} + +struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp) +{ + struct ptlrpc_sec *sec; + + spin_lock(&imp->imp_lock); + sec = sptlrpc_sec_get(imp->imp_sec); + spin_unlock(&imp->imp_lock); + + return sec; +} +EXPORT_SYMBOL(sptlrpc_import_sec_ref); + +static void sptlrpc_import_sec_install(struct obd_import *imp, + struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec *old_sec; + + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + spin_lock(&imp->imp_lock); + old_sec = imp->imp_sec; + imp->imp_sec = sec; + spin_unlock(&imp->imp_lock); + + if (old_sec) { + sptlrpc_sec_kill(old_sec); + + /* balance the ref taken by this import */ + sptlrpc_sec_put(old_sec); + } +} + +static inline +int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2) +{ + return (memcmp(sf1, sf2, sizeof(*sf1)) == 0); +} + +static inline +void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src) +{ + *dst = *src; +} + +static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp, + struct ptlrpc_sec *sec, + struct sptlrpc_flavor *sf) +{ + char str1[32], str2[32]; + + if (sec->ps_flvr.sf_flags != sf->sf_flags) + CDEBUG(D_SEC, "changing sec flags: %s -> %s\n", + sptlrpc_secflags2str(sec->ps_flvr.sf_flags, + str1, sizeof(str1)), + sptlrpc_secflags2str(sf->sf_flags, + str2, sizeof(str2))); + + spin_lock(&sec->ps_lock); + flavor_copy(&sec->ps_flvr, sf); + spin_unlock(&sec->ps_lock); +} + +/** + * To get an appropriate ptlrpc_sec for the \a imp, according to the current + * configuration. Upon called, imp->imp_sec may or may not be NULL. + * + * - regular import: \a svc_ctx should be NULL and \a flvr is ignored; + * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request. + */ +int sptlrpc_import_sec_adapt(struct obd_import *imp, + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *flvr) +{ + struct ptlrpc_connection *conn; + struct sptlrpc_flavor sf; + struct ptlrpc_sec *sec, *newsec; + enum lustre_sec_part sp; + char str[24]; + int rc = 0; + + might_sleep(); + + if (imp == NULL) + return 0; + + conn = imp->imp_connection; + + if (svc_ctx == NULL) { + struct client_obd *cliobd = &imp->imp_obd->u.cli; + /* + * normal import, determine flavor from rule set, except + * for mgc the flavor is predetermined. + */ + if (cliobd->cl_sp_me == LUSTRE_SP_MGC) + sf = cliobd->cl_flvr_mgc; + else + sptlrpc_conf_choose_flavor(cliobd->cl_sp_me, + cliobd->cl_sp_to, + &cliobd->cl_target_uuid, + conn->c_self, &sf); + + sp = imp->imp_obd->u.cli.cl_sp_me; + } else { + /* reverse import, determine flavor from incoming request */ + sf = *flvr; + + if (sf.sf_rpc != SPTLRPC_FLVR_NULL) + sf.sf_flags = PTLRPC_SEC_FL_REVERSE | + PTLRPC_SEC_FL_ROOTONLY; + + sp = sptlrpc_target_sec_part(imp->imp_obd); + } + + sec = sptlrpc_import_sec_ref(imp); + if (sec) { + char str2[24]; + + if (flavor_equal(&sf, &sec->ps_flvr)) + goto out; + + CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid), + sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)), + sptlrpc_flavor2name(&sf, str2, sizeof(str2))); + + if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) == + SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) && + SPTLRPC_FLVR_MECH(sf.sf_rpc) == + SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) { + sptlrpc_import_sec_adapt_inplace(imp, sec, &sf); + goto out; + } + } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) != + SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) { + CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid), + LNET_NIDNET(conn->c_self), + sptlrpc_flavor2name(&sf, str, sizeof(str))); + } + + mutex_lock(&imp->imp_sec_mutex); + + newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp); + if (newsec) { + sptlrpc_import_sec_install(imp, newsec); + } else { + CERROR("import %s->%s: failed to create new sec\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid)); + rc = -EPERM; + } + + mutex_unlock(&imp->imp_sec_mutex); +out: + sptlrpc_sec_put(sec); + return rc; +} + +void sptlrpc_import_sec_put(struct obd_import *imp) +{ + if (imp->imp_sec) { + sptlrpc_sec_kill(imp->imp_sec); + + sptlrpc_sec_put(imp->imp_sec); + imp->imp_sec = NULL; + } +} + +static void import_flush_ctx_common(struct obd_import *imp, + uid_t uid, int grace, int force) +{ + struct ptlrpc_sec *sec; + + if (imp == NULL) + return; + + sec = sptlrpc_import_sec_ref(imp); + if (sec == NULL) + return; + + sec_cop_flush_ctx_cache(sec, uid, grace, force); + sptlrpc_sec_put(sec); +} + +void sptlrpc_import_flush_root_ctx(struct obd_import *imp) +{ + /* it's important to use grace mode, see explain in + * sptlrpc_req_refresh_ctx() */ + import_flush_ctx_common(imp, 0, 1, 1); +} + +void sptlrpc_import_flush_my_ctx(struct obd_import *imp) +{ + import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()), + 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); + +void sptlrpc_import_flush_all_ctx(struct obd_import *imp) +{ + import_flush_ctx_common(imp, -1, 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx); + +/** + * Used by ptlrpc client to allocate request buffer of \a req. Upon return + * successfully, req->rq_reqmsg points to a buffer with size \a msgsize. + */ +int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + int rc; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_reqmsg == NULL); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); + + policy = ctx->cc_sec->ps_policy; + rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize); + if (!rc) { + LASSERT(req->rq_reqmsg); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* zeroing preallocated buffer */ + if (req->rq_pool) + memset(req->rq_reqmsg, 0, msgsize); + } + + return rc; +} + +/** + * Used by ptlrpc client to free request buffer of \a req. After this + * req->rq_reqmsg is set to NULL and should not be accessed anymore. + */ +void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); + + if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL) + return; + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_reqbuf(ctx->cc_sec, req); + req->rq_reqmsg = NULL; +} + +/* + * NOTE caller must guarantee the buffer size is enough for the enlargement + */ +void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, + int segment, int newsize) +{ + void *src, *dst; + int oldsize, oldmsg_size, movesize; + + LASSERT(segment < msg->lm_bufcount); + LASSERT(msg->lm_buflens[segment] <= newsize); + + if (msg->lm_buflens[segment] == newsize) + return; + + /* nothing to do if we are enlarging the last segment */ + if (segment == msg->lm_bufcount - 1) { + msg->lm_buflens[segment] = newsize; + return; + } + + oldsize = msg->lm_buflens[segment]; + + src = lustre_msg_buf(msg, segment + 1, 0); + msg->lm_buflens[segment] = newsize; + dst = lustre_msg_buf(msg, segment + 1, 0); + msg->lm_buflens[segment] = oldsize; + + /* move from segment + 1 to end segment */ + LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2); + oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg); + LASSERT(movesize >= 0); + + if (movesize) + memmove(dst, src, movesize); + + /* note we don't clear the ares where old data live, not secret */ + + /* finally set new segment size */ + msg->lm_buflens[segment] = newsize; +} +EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace); + +/** + * Used by ptlrpc client to enlarge the \a segment of request message pointed + * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be + * preserved after the enlargement. this must be called after original request + * buffer being allocated. + * + * \note after this be called, rq_reqmsg and rq_reqlen might have been changed, + * so caller should refresh its local pointers if needed. + */ +int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, + int segment, int newsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_cops *cops; + struct lustre_msg *msg = req->rq_reqmsg; + + LASSERT(ctx); + LASSERT(msg); + LASSERT(msg->lm_bufcount > segment); + LASSERT(msg->lm_buflens[segment] <= newsize); + + if (msg->lm_buflens[segment] == newsize) + return 0; + + cops = ctx->cc_sec->ps_policy->sp_cops; + LASSERT(cops->enlarge_reqbuf); + return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize); +} +EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf); + +/** + * Used by ptlrpc client to allocate reply buffer of \a req. + * + * \note After this, req->rq_repmsg is still not accessible. + */ +int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + + if (req->rq_repbuf) + return 0; + + policy = ctx->cc_sec->ps_policy; + return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize); +} + +/** + * Used by ptlrpc client to free reply buffer of \a req. After this + * req->rq_repmsg is set to NULL and should not be accessed anymore. + */ +void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); + + if (req->rq_repbuf == NULL) + return; + LASSERT(req->rq_repbuf_len); + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_repbuf(ctx->cc_sec, req); + req->rq_repmsg = NULL; +} + +int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy; + + if (!policy->sp_cops->install_rctx) + return 0; + return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx); +} + +int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->sc_policy; + + if (!policy->sp_sops->install_rctx) + return 0; + return policy->sp_sops->install_rctx(imp, ctx); +} + +/**************************************** + * server side security * + ****************************************/ + +static int flavor_allowed(struct sptlrpc_flavor *exp, + struct ptlrpc_request *req) +{ + struct sptlrpc_flavor *flvr = &req->rq_flvr; + + if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc) + return 1; + + if ((req->rq_ctx_init || req->rq_ctx_fini) && + SPTLRPC_FLVR_POLICY(exp->sf_rpc) == + SPTLRPC_FLVR_POLICY(flvr->sf_rpc) && + SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc)) + return 1; + + return 0; +} + +#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10) + +/** + * Given an export \a exp, check whether the flavor of incoming \a req + * is allowed by the export \a exp. Main logic is about taking care of + * changing configurations. Return 0 means success. + */ +int sptlrpc_target_export_check(struct obd_export *exp, + struct ptlrpc_request *req) +{ + struct sptlrpc_flavor flavor; + + if (exp == NULL) + return 0; + + /* client side export has no imp_reverse, skip + * FIXME maybe we should check flavor this as well??? */ + if (exp->exp_imp_reverse == NULL) + return 0; + + /* don't care about ctx fini rpc */ + if (req->rq_ctx_fini) + return 0; + + spin_lock(&exp->exp_lock); + + /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for + * the first req with the new flavor, then treat it as current flavor, + * adapt reverse sec according to it. + * note the first rpc with new flavor might not be with root ctx, in + * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */ + if (unlikely(exp->exp_flvr_changed) && + flavor_allowed(&exp->exp_flvr_old[1], req)) { + /* make the new flavor as "current", and old ones as + * about-to-expire */ + CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp, + exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc); + flavor = exp->exp_flvr_old[1]; + exp->exp_flvr_old[1] = exp->exp_flvr_old[0]; + exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0]; + exp->exp_flvr_old[0] = exp->exp_flvr; + exp->exp_flvr_expire[0] = get_seconds() + + EXP_FLVR_UPDATE_EXPIRE; + exp->exp_flvr = flavor; + + /* flavor change finished */ + exp->exp_flvr_changed = 0; + LASSERT(exp->exp_flvr_adapt == 1); + + /* if it's gss, we only interested in root ctx init */ + if (req->rq_auth_gss && + !(req->rq_ctx_init && + (req->rq_auth_usr_root || req->rq_auth_usr_mdt || + req->rq_auth_usr_ost))) { + spin_unlock(&exp->exp_lock); + CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n", + req->rq_auth_gss, req->rq_ctx_init, + req->rq_auth_usr_root, req->rq_auth_usr_mdt, + req->rq_auth_usr_ost); + return 0; + } + + exp->exp_flvr_adapt = 0; + spin_unlock(&exp->exp_lock); + + return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, + req->rq_svc_ctx, &flavor); + } + + /* if it equals to the current flavor, we accept it, but need to + * dealing with reverse sec/ctx */ + if (likely(flavor_allowed(&exp->exp_flvr, req))) { + /* most cases should return here, we only interested in + * gss root ctx init */ + if (!req->rq_auth_gss || !req->rq_ctx_init || + (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && + !req->rq_auth_usr_ost)) { + spin_unlock(&exp->exp_lock); + return 0; + } + + /* if flavor just changed, we should not proceed, just leave + * it and current flavor will be discovered and replaced + * shortly, and let _this_ rpc pass through */ + if (exp->exp_flvr_changed) { + LASSERT(exp->exp_flvr_adapt); + spin_unlock(&exp->exp_lock); + return 0; + } + + if (exp->exp_flvr_adapt) { + exp->exp_flvr_adapt = 0; + CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n", + exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + flavor = exp->exp_flvr; + spin_unlock(&exp->exp_lock); + + return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, + req->rq_svc_ctx, + &flavor); + } else { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n", + exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + spin_unlock(&exp->exp_lock); + + return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse, + req->rq_svc_ctx); + } + } + + if (exp->exp_flvr_expire[0]) { + if (exp->exp_flvr_expire[0] >= get_seconds()) { + if (flavor_allowed(&exp->exp_flvr_old[0], req)) { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (" CFS_DURATION_T ")\n", exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc, + exp->exp_flvr_expire[0] - + get_seconds()); + spin_unlock(&exp->exp_lock); + return 0; + } + } else { + CDEBUG(D_SEC, "mark middle expired\n"); + exp->exp_flvr_expire[0] = 0; + } + CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, + req->rq_flvr.sf_rpc); + } + + /* now it doesn't match the current flavor, the only chance we can + * accept it is match the old flavors which is not expired. */ + if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) { + if (exp->exp_flvr_expire[1] >= get_seconds()) { + if (flavor_allowed(&exp->exp_flvr_old[1], req)) { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (" CFS_DURATION_T ")\n", + exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc, + exp->exp_flvr_expire[1] - + get_seconds()); + spin_unlock(&exp->exp_lock); + return 0; + } + } else { + CDEBUG(D_SEC, "mark oldest expired\n"); + exp->exp_flvr_expire[1] = 0; + } + CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n", + exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, + req->rq_flvr.sf_rpc); + } else { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n", + exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + } + + spin_unlock(&exp->exp_lock); + + CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n", + exp, exp->exp_obd->obd_name, + req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini, + req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost, + req->rq_flvr.sf_rpc, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_expire[0] ? + (unsigned long) (exp->exp_flvr_expire[0] - + get_seconds()) : 0, + exp->exp_flvr_old[1].sf_rpc, + exp->exp_flvr_expire[1] ? + (unsigned long) (exp->exp_flvr_expire[1] - + get_seconds()) : 0); + return -EACCES; +} +EXPORT_SYMBOL(sptlrpc_target_export_check); + +void sptlrpc_target_update_exp_flavor(struct obd_device *obd, + struct sptlrpc_rule_set *rset) +{ + struct obd_export *exp; + struct sptlrpc_flavor new_flvr; + + LASSERT(obd); + + spin_lock(&obd->obd_dev_lock); + + list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { + if (exp->exp_connection == NULL) + continue; + + /* note if this export had just been updated flavor + * (exp_flvr_changed == 1), this will override the + * previous one. */ + spin_lock(&exp->exp_lock); + sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer, + exp->exp_connection->c_peer.nid, + &new_flvr); + if (exp->exp_flvr_changed || + !flavor_equal(&new_flvr, &exp->exp_flvr)) { + exp->exp_flvr_old[1] = new_flvr; + exp->exp_flvr_expire[1] = 0; + exp->exp_flvr_changed = 1; + exp->exp_flvr_adapt = 1; + + CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n", + exp, sptlrpc_part2name(exp->exp_sp_peer), + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + } + spin_unlock(&exp->exp_lock); + } + + spin_unlock(&obd->obd_dev_lock); +} +EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor); + +static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc) +{ + /* peer's claim is unreliable unless gss is being used */ + if (!req->rq_auth_gss || svc_rc == SECSVC_DROP) + return svc_rc; + + switch (req->rq_sp_from) { + case LUSTRE_SP_CLI: + if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source CLI"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_MDT: + if (!req->rq_auth_usr_mdt) { + DEBUG_REQ(D_ERROR, req, "faked source MDT"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_OST: + if (!req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source OST"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_MGS: + case LUSTRE_SP_MGC: + if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && + !req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_ANY: + default: + DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from); + svc_rc = SECSVC_DROP; + } + + return svc_rc; +} + +/** + * Used by ptlrpc server, to perform transformation upon request message of + * incoming \a req. This must be the first thing to do with a incoming + * request in ptlrpc layer. + * + * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in + * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set. + * \retval SECSVC_COMPLETE success, the request has been fully processed, and + * reply message has been prepared. + * \retval SECSVC_DROP failed, this request should be dropped. + */ +int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + struct lustre_msg *msg = req->rq_reqbuf; + int rc; + + LASSERT(msg); + LASSERT(req->rq_reqmsg == NULL); + LASSERT(req->rq_repmsg == NULL); + LASSERT(req->rq_svc_ctx == NULL); + + req->rq_req_swab_mask = 0; + + rc = __lustre_unpack_msg(msg, req->rq_reqdata_len); + switch (rc) { + case 1: + lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); + case 0: + break; + default: + CERROR("error unpacking request from %s x%llu\n", + libcfs_id2str(req->rq_peer), req->rq_xid); + return SECSVC_DROP; + } + + req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr); + req->rq_sp_from = LUSTRE_SP_ANY; + req->rq_auth_uid = -1; + req->rq_auth_mapped_uid = -1; + + policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc); + if (!policy) { + CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc); + return SECSVC_DROP; + } + + LASSERT(policy->sp_sops->accept); + rc = policy->sp_sops->accept(req); + sptlrpc_policy_put(policy); + LASSERT(req->rq_reqmsg || rc != SECSVC_OK); + LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP); + + /* + * if it's not null flavor (which means embedded packing msg), + * reset the swab mask for the coming inner msg unpacking. + */ + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) + req->rq_req_swab_mask = 0; + + /* sanity check for the request source */ + rc = sptlrpc_svc_check_from(req, rc); + return rc; +} + +/** + * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed, + * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to + * a buffer of \a msglen size. + */ +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_reply_state *rs; + int rc; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->alloc_rs); + + rc = policy->sp_sops->alloc_rs(req, msglen); + if (unlikely(rc == -ENOMEM)) { + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + if (svcpt->scp_service->srv_max_reply_size < + msglen + sizeof(struct ptlrpc_reply_state)) { + /* Just return failure if the size is too big */ + CERROR("size of message is too big (%zd), %d allowed", + msglen + sizeof(struct ptlrpc_reply_state), + svcpt->scp_service->srv_max_reply_size); + return -ENOMEM; + } + + /* failed alloc, try emergency pool */ + rs = lustre_get_emerg_rs(svcpt); + if (rs == NULL) + return -ENOMEM; + + req->rq_reply_state = rs; + rc = policy->sp_sops->alloc_rs(req, msglen); + if (rc) { + lustre_put_emerg_rs(rs); + req->rq_reply_state = NULL; + } + } + + LASSERT(rc != 0 || + (req->rq_reply_state && req->rq_reply_state->rs_msg)); + + return rc; +} + +/** + * Used by ptlrpc server, to perform transformation upon reply message. + * + * \post req->rq_reply_off is set to appropriate server-controlled reply offset. + * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible. + */ +int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + int rc; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->authorize); + + rc = policy->sp_sops->authorize(req); + LASSERT(rc || req->rq_reply_state->rs_repdata_len); + + return rc; +} + +/** + * Used by ptlrpc server, to free reply_state. + */ +void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_sec_policy *policy; + unsigned int prealloc; + + LASSERT(rs->rs_svc_ctx); + LASSERT(rs->rs_svc_ctx->sc_policy); + + policy = rs->rs_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->free_rs); + + prealloc = rs->rs_prealloc; + policy->sp_sops->free_rs(rs); + + if (prealloc) + lustre_put_emerg_rs(rs); +} + +void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx != NULL) + atomic_inc(&ctx->sc_refcount); +} + +void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT_ATOMIC_POS(&ctx->sc_refcount); + if (atomic_dec_and_test(&ctx->sc_refcount)) { + if (ctx->sc_policy->sp_sops->free_ctx) + ctx->sc_policy->sp_sops->free_ctx(ctx); + } + req->rq_svc_ctx = NULL; +} + +void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT_ATOMIC_POS(&ctx->sc_refcount); + if (ctx->sc_policy->sp_sops->invalidate_ctx) + ctx->sc_policy->sp_sops->invalidate_ctx(ctx); +} +EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate); + +/**************************************** + * bulk security * + ****************************************/ + +/** + * Perform transformation upon bulk data pointed by \a desc. This is called + * before transforming the request message. + */ +int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + if (!req->rq_pack_bulk) + return 0; + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->wrap_bulk) + return ctx->cc_ops->wrap_bulk(ctx, req, desc); + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk); + +/** + * This is called after unwrap the reply message. + * return nob of actual plain text size received, or error code. + */ +int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc, + int nob) +{ + struct ptlrpc_cli_ctx *ctx; + int rc; + + LASSERT(req->rq_bulk_read && !req->rq_bulk_write); + + if (!req->rq_pack_bulk) + return desc->bd_nob_transferred; + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) { + rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); + if (rc < 0) + return rc; + } + return desc->bd_nob_transferred; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read); + +/** + * This is called after unwrap the reply message. + * return 0 for success or error code. + */ +int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + int rc; + + LASSERT(!req->rq_bulk_read && req->rq_bulk_write); + + if (!req->rq_pack_bulk) + return 0; + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) { + rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); + if (rc < 0) + return rc; + } + + /* + * if everything is going right, nob should equals to nob_transferred. + * in case of privacy mode, nob_transferred needs to be adjusted. + */ + if (desc->bd_nob != desc->bd_nob_transferred) { + CERROR("nob %d doesn't match transferred nob %d", + desc->bd_nob, desc->bd_nob_transferred); + return -EPROTO; + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write); + + +/**************************************** + * user descriptor helpers * + ****************************************/ + +int sptlrpc_current_user_desc_size(void) +{ + int ngroups; + + ngroups = current_ngroups; + + if (ngroups > LUSTRE_MAX_GROUPS) + ngroups = LUSTRE_MAX_GROUPS; + return sptlrpc_user_desc_size(ngroups); +} +EXPORT_SYMBOL(sptlrpc_current_user_desc_size); + +int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_user_desc *pud; + + pud = lustre_msg_buf(msg, offset, 0); + + pud->pud_uid = from_kuid(&init_user_ns, current_uid()); + pud->pud_gid = from_kgid(&init_user_ns, current_gid()); + pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid()); + pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid()); + pud->pud_cap = cfs_curproc_cap_pack(); + pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; + + task_lock(current); + if (pud->pud_ngroups > current_ngroups) + pud->pud_ngroups = current_ngroups; + memcpy(pud->pud_groups, current_cred()->group_info->blocks[0], + pud->pud_ngroups * sizeof(__u32)); + task_unlock(current); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_pack_user_desc); + +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed) +{ + struct ptlrpc_user_desc *pud; + int i; + + pud = lustre_msg_buf(msg, offset, sizeof(*pud)); + if (!pud) + return -EINVAL; + + if (swabbed) { + __swab32s(&pud->pud_uid); + __swab32s(&pud->pud_gid); + __swab32s(&pud->pud_fsuid); + __swab32s(&pud->pud_fsgid); + __swab32s(&pud->pud_cap); + __swab32s(&pud->pud_ngroups); + } + + if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) { + CERROR("%u groups is too large\n", pud->pud_ngroups); + return -EINVAL; + } + + if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) > + msg->lm_buflens[offset]) { + CERROR("%u groups are claimed but bufsize only %u\n", + pud->pud_ngroups, msg->lm_buflens[offset]); + return -EINVAL; + } + + if (swabbed) { + for (i = 0; i < pud->pud_ngroups; i++) + __swab32s(&pud->pud_groups[i]); + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_unpack_user_desc); + +/**************************************** + * misc helpers * + ****************************************/ + +const char *sec2target_str(struct ptlrpc_sec *sec) +{ + if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) + return "*"; + if (sec_is_reverse(sec)) + return "c"; + return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); +} +EXPORT_SYMBOL(sec2target_str); + +/* + * return true if the bulk data is protected + */ +int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr) +{ + switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) { + case SPTLRPC_BULK_SVC_INTG: + case SPTLRPC_BULK_SVC_PRIV: + return 1; + default: + return 0; + } +} +EXPORT_SYMBOL(sptlrpc_flavor_has_bulk); + +/**************************************** + * crypto API helper/alloc blkciper * + ****************************************/ + +/**************************************** + * initialize/finalize * + ****************************************/ + +int sptlrpc_init(void) +{ + int rc; + + rwlock_init(&policy_lock); + + rc = sptlrpc_gc_init(); + if (rc) + goto out; + + rc = sptlrpc_conf_init(); + if (rc) + goto out_gc; + + rc = sptlrpc_enc_pool_init(); + if (rc) + goto out_conf; + + rc = sptlrpc_null_init(); + if (rc) + goto out_pool; + + rc = sptlrpc_plain_init(); + if (rc) + goto out_null; + + rc = sptlrpc_lproc_init(); + if (rc) + goto out_plain; + + return 0; + +out_plain: + sptlrpc_plain_fini(); +out_null: + sptlrpc_null_fini(); +out_pool: + sptlrpc_enc_pool_fini(); +out_conf: + sptlrpc_conf_fini(); +out_gc: + sptlrpc_gc_fini(); +out: + return rc; +} + +void sptlrpc_fini(void) +{ + sptlrpc_lproc_fini(); + sptlrpc_plain_fini(); + sptlrpc_null_fini(); + sptlrpc_enc_pool_fini(); + sptlrpc_conf_fini(); + sptlrpc_gc_fini(); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c new file mode 100644 index 000000000..c05a8554d --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c @@ -0,0 +1,884 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec_bulk.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/crypto.h> + +#include "../include/obd.h" +#include "../include/obd_cksum.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/lustre_net.h" +#include "../include/lustre_import.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_sec.h" + +#include "ptlrpc_internal.h" + +/**************************************** + * bulk encryption page pools * + ****************************************/ + + +#define POINTERS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *)) +#define PAGES_PER_POOL (POINTERS_PER_PAGE) + +#define IDLE_IDX_MAX (100) +#define IDLE_IDX_WEIGHT (3) + +#define CACHE_QUIESCENT_PERIOD (20) + +static struct ptlrpc_enc_page_pool { + /* + * constants + */ + unsigned long epp_max_pages; /* maximum pages can hold, const */ + unsigned int epp_max_pools; /* number of pools, const */ + + /* + * wait queue in case of not enough free pages. + */ + wait_queue_head_t epp_waitq; /* waiting threads */ + unsigned int epp_waitqlen; /* wait queue length */ + unsigned long epp_pages_short; /* # of pages wanted of in-q users */ + unsigned int epp_growing:1; /* during adding pages */ + + /* + * indicating how idle the pools are, from 0 to MAX_IDLE_IDX + * this is counted based on each time when getting pages from + * the pools, not based on time. which means in case that system + * is idled for a while but the idle_idx might still be low if no + * activities happened in the pools. + */ + unsigned long epp_idle_idx; + + /* last shrink time due to mem tight */ + long epp_last_shrink; + long epp_last_access; + + /* + * in-pool pages bookkeeping + */ + spinlock_t epp_lock; /* protect following fields */ + unsigned long epp_total_pages; /* total pages in pools */ + unsigned long epp_free_pages; /* current pages available */ + + /* + * statistics + */ + unsigned long epp_st_max_pages; /* # of pages ever reached */ + unsigned int epp_st_grows; /* # of grows */ + unsigned int epp_st_grow_fails; /* # of add pages failures */ + unsigned int epp_st_shrinks; /* # of shrinks */ + unsigned long epp_st_access; /* # of access */ + unsigned long epp_st_missings; /* # of cache missing */ + unsigned long epp_st_lowfree; /* lowest free pages reached */ + unsigned int epp_st_max_wqlen; /* highest waitqueue length */ + unsigned long epp_st_max_wait; /* in jiffies */ + /* + * pointers to pools + */ + struct page ***epp_pools; +} page_pools; + +/* + * /proc/fs/lustre/sptlrpc/encrypt_page_pools + */ +int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) +{ + spin_lock(&page_pools.epp_lock); + + seq_printf(m, + "physical pages: %lu\n" + "pages per pool: %lu\n" + "max pages: %lu\n" + "max pools: %u\n" + "total pages: %lu\n" + "total free: %lu\n" + "idle index: %lu/100\n" + "last shrink: %lds\n" + "last access: %lds\n" + "max pages reached: %lu\n" + "grows: %u\n" + "grows failure: %u\n" + "shrinks: %u\n" + "cache access: %lu\n" + "cache missing: %lu\n" + "low free mark: %lu\n" + "max waitqueue depth: %u\n" + "max wait time: " CFS_TIME_T "/%u\n", + totalram_pages, + PAGES_PER_POOL, + page_pools.epp_max_pages, + page_pools.epp_max_pools, + page_pools.epp_total_pages, + page_pools.epp_free_pages, + page_pools.epp_idle_idx, + get_seconds() - page_pools.epp_last_shrink, + get_seconds() - page_pools.epp_last_access, + page_pools.epp_st_max_pages, + page_pools.epp_st_grows, + page_pools.epp_st_grow_fails, + page_pools.epp_st_shrinks, + page_pools.epp_st_access, + page_pools.epp_st_missings, + page_pools.epp_st_lowfree, + page_pools.epp_st_max_wqlen, + page_pools.epp_st_max_wait, + HZ); + + spin_unlock(&page_pools.epp_lock); + + return 0; +} + +static void enc_pools_release_free_pages(long npages) +{ + int p_idx, g_idx; + int p_idx_max1, p_idx_max2; + + LASSERT(npages > 0); + LASSERT(npages <= page_pools.epp_free_pages); + LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages); + + /* max pool index before the release */ + p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL; + + page_pools.epp_free_pages -= npages; + page_pools.epp_total_pages -= npages; + + /* max pool index after the release */ + p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 : + ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL); + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + LASSERT(page_pools.epp_pools[p_idx]); + + while (npages--) { + LASSERT(page_pools.epp_pools[p_idx]); + LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); + + __free_page(page_pools.epp_pools[p_idx][g_idx]); + page_pools.epp_pools[p_idx][g_idx] = NULL; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + /* free unused pools */ + while (p_idx_max1 < p_idx_max2) { + LASSERT(page_pools.epp_pools[p_idx_max2]); + OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE); + page_pools.epp_pools[p_idx_max2] = NULL; + p_idx_max2--; + } +} + +/* + * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. + */ +static unsigned long enc_pools_shrink_count(struct shrinker *s, + struct shrink_control *sc) +{ + /* + * if no pool access for a long time, we consider it's fully idle. + * a little race here is fine. + */ + if (unlikely(get_seconds() - page_pools.epp_last_access > + CACHE_QUIESCENT_PERIOD)) { + spin_lock(&page_pools.epp_lock); + page_pools.epp_idle_idx = IDLE_IDX_MAX; + spin_unlock(&page_pools.epp_lock); + } + + LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); + return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * + (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; +} + +/* + * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. + */ +static unsigned long enc_pools_shrink_scan(struct shrinker *s, + struct shrink_control *sc) +{ + spin_lock(&page_pools.epp_lock); + sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, + page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); + if (sc->nr_to_scan > 0) { + enc_pools_release_free_pages(sc->nr_to_scan); + CDEBUG(D_SEC, "released %ld pages, %ld left\n", + (long)sc->nr_to_scan, page_pools.epp_free_pages); + + page_pools.epp_st_shrinks++; + page_pools.epp_last_shrink = get_seconds(); + } + spin_unlock(&page_pools.epp_lock); + + /* + * if no pool access for a long time, we consider it's fully idle. + * a little race here is fine. + */ + if (unlikely(get_seconds() - page_pools.epp_last_access > + CACHE_QUIESCENT_PERIOD)) { + spin_lock(&page_pools.epp_lock); + page_pools.epp_idle_idx = IDLE_IDX_MAX; + spin_unlock(&page_pools.epp_lock); + } + + LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); + return sc->nr_to_scan; +} + +static inline +int npages_to_npools(unsigned long npages) +{ + return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); +} + +/* + * return how many pages cleaned up. + */ +static unsigned long enc_pools_cleanup(struct page ***pools, int npools) +{ + unsigned long cleaned = 0; + int i, j; + + for (i = 0; i < npools; i++) { + if (pools[i]) { + for (j = 0; j < PAGES_PER_POOL; j++) { + if (pools[i][j]) { + __free_page(pools[i][j]); + cleaned++; + } + } + OBD_FREE(pools[i], PAGE_CACHE_SIZE); + pools[i] = NULL; + } + } + + return cleaned; +} + +/* + * merge @npools pointed by @pools which contains @npages new pages + * into current pools. + * + * we have options to avoid most memory copy with some tricks. but we choose + * the simplest way to avoid complexity. It's not frequently called. + */ +static void enc_pools_insert(struct page ***pools, int npools, int npages) +{ + int freeslot; + int op_idx, np_idx, og_idx, ng_idx; + int cur_npools, end_npools; + + LASSERT(npages > 0); + LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages); + LASSERT(npages_to_npools(npages) == npools); + LASSERT(page_pools.epp_growing); + + spin_lock(&page_pools.epp_lock); + + /* + * (1) fill all the free slots of current pools. + */ + /* free slots are those left by rent pages, and the extra ones with + * index >= total_pages, locate at the tail of last pool. */ + freeslot = page_pools.epp_total_pages % PAGES_PER_POOL; + if (freeslot != 0) + freeslot = PAGES_PER_POOL - freeslot; + freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages; + + op_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + og_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + np_idx = npools - 1; + ng_idx = (npages - 1) % PAGES_PER_POOL; + + while (freeslot) { + LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL); + LASSERT(pools[np_idx][ng_idx] != NULL); + + page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; + pools[np_idx][ng_idx] = NULL; + + freeslot--; + + if (++og_idx == PAGES_PER_POOL) { + op_idx++; + og_idx = 0; + } + if (--ng_idx < 0) { + if (np_idx == 0) + break; + np_idx--; + ng_idx = PAGES_PER_POOL - 1; + } + } + + /* + * (2) add pools if needed. + */ + cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) / + PAGES_PER_POOL; + end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL - 1) + / PAGES_PER_POOL; + LASSERT(end_npools <= page_pools.epp_max_pools); + + np_idx = 0; + while (cur_npools < end_npools) { + LASSERT(page_pools.epp_pools[cur_npools] == NULL); + LASSERT(np_idx < npools); + LASSERT(pools[np_idx] != NULL); + + page_pools.epp_pools[cur_npools++] = pools[np_idx]; + pools[np_idx++] = NULL; + } + + page_pools.epp_total_pages += npages; + page_pools.epp_free_pages += npages; + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + if (page_pools.epp_total_pages > page_pools.epp_st_max_pages) + page_pools.epp_st_max_pages = page_pools.epp_total_pages; + + CDEBUG(D_SEC, "add %d pages to total %lu\n", npages, + page_pools.epp_total_pages); + + spin_unlock(&page_pools.epp_lock); +} + +static int enc_pools_add_pages(int npages) +{ + static DEFINE_MUTEX(add_pages_mutex); + struct page ***pools; + int npools, alloced = 0; + int i, j, rc = -ENOMEM; + + if (npages < PTLRPC_MAX_BRW_PAGES) + npages = PTLRPC_MAX_BRW_PAGES; + + mutex_lock(&add_pages_mutex); + + if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages) + npages = page_pools.epp_max_pages - page_pools.epp_total_pages; + LASSERT(npages > 0); + + page_pools.epp_st_grows++; + + npools = npages_to_npools(npages); + OBD_ALLOC(pools, npools * sizeof(*pools)); + if (pools == NULL) + goto out; + + for (i = 0; i < npools; i++) { + OBD_ALLOC(pools[i], PAGE_CACHE_SIZE); + if (pools[i] == NULL) + goto out_pools; + + for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { + pools[i][j] = alloc_page(GFP_NOFS | + __GFP_HIGHMEM); + if (pools[i][j] == NULL) + goto out_pools; + + alloced++; + } + } + LASSERT(alloced == npages); + + enc_pools_insert(pools, npools, npages); + CDEBUG(D_SEC, "added %d pages into pools\n", npages); + rc = 0; + +out_pools: + enc_pools_cleanup(pools, npools); + OBD_FREE(pools, npools * sizeof(*pools)); +out: + if (rc) { + page_pools.epp_st_grow_fails++; + CERROR("Failed to allocate %d enc pages\n", npages); + } + + mutex_unlock(&add_pages_mutex); + return rc; +} + +static inline void enc_pools_wakeup(void) +{ + assert_spin_locked(&page_pools.epp_lock); + LASSERT(page_pools.epp_waitqlen >= 0); + + if (unlikely(page_pools.epp_waitqlen)) { + LASSERT(waitqueue_active(&page_pools.epp_waitq)); + wake_up_all(&page_pools.epp_waitq); + } +} + +static int enc_pools_should_grow(int page_needed, long now) +{ + /* don't grow if someone else is growing the pools right now, + * or the pools has reached its full capacity + */ + if (page_pools.epp_growing || + page_pools.epp_total_pages == page_pools.epp_max_pages) + return 0; + + /* if total pages is not enough, we need to grow */ + if (page_pools.epp_total_pages < page_needed) + return 1; + + /* + * we wanted to return 0 here if there was a shrink just happened + * moment ago, but this may cause deadlock if both client and ost + * live on single node. + */ +#if 0 + if (now - page_pools.epp_last_shrink < 2) + return 0; +#endif + + /* + * here we perhaps need consider other factors like wait queue + * length, idle index, etc. ? + */ + + /* grow the pools in any other cases */ + return 1; +} + +/* + * we allocate the requested pages atomically. + */ +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) +{ + wait_queue_t waitlink; + unsigned long this_idle = -1; + unsigned long tick = 0; + long now; + int p_idx, g_idx; + int i; + + LASSERT(desc->bd_iov_count > 0); + LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages); + + /* resent bulk, enc iov might have been allocated previously */ + if (desc->bd_enc_iov != NULL) + return 0; + + OBD_ALLOC(desc->bd_enc_iov, + desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); + if (desc->bd_enc_iov == NULL) + return -ENOMEM; + + spin_lock(&page_pools.epp_lock); + + page_pools.epp_st_access++; +again: + if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) { + if (tick == 0) + tick = cfs_time_current(); + + now = get_seconds(); + + page_pools.epp_st_missings++; + page_pools.epp_pages_short += desc->bd_iov_count; + + if (enc_pools_should_grow(desc->bd_iov_count, now)) { + page_pools.epp_growing = 1; + + spin_unlock(&page_pools.epp_lock); + enc_pools_add_pages(page_pools.epp_pages_short / 2); + spin_lock(&page_pools.epp_lock); + + page_pools.epp_growing = 0; + + enc_pools_wakeup(); + } else { + if (++page_pools.epp_waitqlen > + page_pools.epp_st_max_wqlen) + page_pools.epp_st_max_wqlen = + page_pools.epp_waitqlen; + + set_current_state(TASK_UNINTERRUPTIBLE); + init_waitqueue_entry(&waitlink, current); + add_wait_queue(&page_pools.epp_waitq, &waitlink); + + spin_unlock(&page_pools.epp_lock); + schedule(); + remove_wait_queue(&page_pools.epp_waitq, &waitlink); + LASSERT(page_pools.epp_waitqlen > 0); + spin_lock(&page_pools.epp_lock); + page_pools.epp_waitqlen--; + } + + LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count); + page_pools.epp_pages_short -= desc->bd_iov_count; + + this_idle = 0; + goto again; + } + + /* record max wait time */ + if (unlikely(tick != 0)) { + tick = cfs_time_current() - tick; + if (tick > page_pools.epp_st_max_wait) + page_pools.epp_st_max_wait = tick; + } + + /* proceed with rest of allocation */ + page_pools.epp_free_pages -= desc->bd_iov_count; + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + for (i = 0; i < desc->bd_iov_count; i++) { + LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); + desc->bd_enc_iov[i].kiov_page = + page_pools.epp_pools[p_idx][g_idx]; + page_pools.epp_pools[p_idx][g_idx] = NULL; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + /* + * new idle index = (old * weight + new) / (weight + 1) + */ + if (this_idle == -1) { + this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX / + page_pools.epp_total_pages; + } + page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT + + this_idle) / + (IDLE_IDX_WEIGHT + 1); + + page_pools.epp_last_access = get_seconds(); + + spin_unlock(&page_pools.epp_lock); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages); + +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) +{ + int p_idx, g_idx; + int i; + + if (desc->bd_enc_iov == NULL) + return; + + LASSERT(desc->bd_iov_count > 0); + + spin_lock(&page_pools.epp_lock); + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <= + page_pools.epp_total_pages); + LASSERT(page_pools.epp_pools[p_idx]); + + for (i = 0; i < desc->bd_iov_count; i++) { + LASSERT(desc->bd_enc_iov[i].kiov_page != NULL); + LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); + LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL); + + page_pools.epp_pools[p_idx][g_idx] = + desc->bd_enc_iov[i].kiov_page; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + page_pools.epp_free_pages += desc->bd_iov_count; + + enc_pools_wakeup(); + + spin_unlock(&page_pools.epp_lock); + + OBD_FREE(desc->bd_enc_iov, + desc->bd_iov_count * sizeof(*desc->bd_enc_iov)); + desc->bd_enc_iov = NULL; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages); + +/* + * we don't do much stuff for add_user/del_user anymore, except adding some + * initial pages in add_user() if current pools are empty, rest would be + * handled by the pools's self-adaption. + */ +int sptlrpc_enc_pool_add_user(void) +{ + int need_grow = 0; + + spin_lock(&page_pools.epp_lock); + if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) { + page_pools.epp_growing = 1; + need_grow = 1; + } + spin_unlock(&page_pools.epp_lock); + + if (need_grow) { + enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES + + PTLRPC_MAX_BRW_PAGES); + + spin_lock(&page_pools.epp_lock); + page_pools.epp_growing = 0; + enc_pools_wakeup(); + spin_unlock(&page_pools.epp_lock); + } + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_add_user); + +int sptlrpc_enc_pool_del_user(void) +{ + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_del_user); + +static inline void enc_pools_alloc(void) +{ + LASSERT(page_pools.epp_max_pools); + OBD_ALLOC_LARGE(page_pools.epp_pools, + page_pools.epp_max_pools * + sizeof(*page_pools.epp_pools)); +} + +static inline void enc_pools_free(void) +{ + LASSERT(page_pools.epp_max_pools); + LASSERT(page_pools.epp_pools); + + OBD_FREE_LARGE(page_pools.epp_pools, + page_pools.epp_max_pools * + sizeof(*page_pools.epp_pools)); +} + +static struct shrinker pools_shrinker = { + .count_objects = enc_pools_shrink_count, + .scan_objects = enc_pools_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + +int sptlrpc_enc_pool_init(void) +{ + /* + * maximum capacity is 1/8 of total physical memory. + * is the 1/8 a good number? + */ + page_pools.epp_max_pages = totalram_pages / 8; + page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); + + init_waitqueue_head(&page_pools.epp_waitq); + page_pools.epp_waitqlen = 0; + page_pools.epp_pages_short = 0; + + page_pools.epp_growing = 0; + + page_pools.epp_idle_idx = 0; + page_pools.epp_last_shrink = get_seconds(); + page_pools.epp_last_access = get_seconds(); + + spin_lock_init(&page_pools.epp_lock); + page_pools.epp_total_pages = 0; + page_pools.epp_free_pages = 0; + + page_pools.epp_st_max_pages = 0; + page_pools.epp_st_grows = 0; + page_pools.epp_st_grow_fails = 0; + page_pools.epp_st_shrinks = 0; + page_pools.epp_st_access = 0; + page_pools.epp_st_missings = 0; + page_pools.epp_st_lowfree = 0; + page_pools.epp_st_max_wqlen = 0; + page_pools.epp_st_max_wait = 0; + + enc_pools_alloc(); + if (page_pools.epp_pools == NULL) + return -ENOMEM; + + register_shrinker(&pools_shrinker); + + return 0; +} + +void sptlrpc_enc_pool_fini(void) +{ + unsigned long cleaned, npools; + + LASSERT(page_pools.epp_pools); + LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); + + unregister_shrinker(&pools_shrinker); + + npools = npages_to_npools(page_pools.epp_total_pages); + cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); + LASSERT(cleaned == page_pools.epp_total_pages); + + enc_pools_free(); + + if (page_pools.epp_st_access > 0) { + CDEBUG(D_SEC, + "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait " + CFS_TIME_T"/%d\n", + page_pools.epp_st_max_pages, page_pools.epp_st_grows, + page_pools.epp_st_grow_fails, + page_pools.epp_st_shrinks, page_pools.epp_st_access, + page_pools.epp_st_missings, page_pools.epp_st_max_wqlen, + page_pools.epp_st_max_wait, HZ); + } +} + + +static int cfs_hash_alg_id[] = { + [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL, + [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32, + [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32, + [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5, + [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1, + [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256, + [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384, + [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512, +}; +const char *sptlrpc_get_hash_name(__u8 hash_alg) +{ + return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]); +} +EXPORT_SYMBOL(sptlrpc_get_hash_name); + +__u8 sptlrpc_get_hash_alg(const char *algname) +{ + return cfs_crypto_hash_alg(algname); +} +EXPORT_SYMBOL(sptlrpc_get_hash_alg); + +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed) +{ + struct ptlrpc_bulk_sec_desc *bsd; + int size = msg->lm_buflens[offset]; + + bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); + if (bsd == NULL) { + CERROR("Invalid bulk sec desc: size %d\n", size); + return -EINVAL; + } + + if (swabbed) + __swab32s(&bsd->bsd_nob); + + if (unlikely(bsd->bsd_version != 0)) { + CERROR("Unexpected version %u\n", bsd->bsd_version); + return -EPROTO; + } + + if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) { + CERROR("Invalid type %u\n", bsd->bsd_type); + return -EPROTO; + } + + /* FIXME more sanity check here */ + + if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && + bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG && + bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) { + CERROR("Invalid svc %u\n", bsd->bsd_svc); + return -EPROTO; + } + + return 0; +} +EXPORT_SYMBOL(bulk_sec_desc_unpack); + +int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, + void *buf, int buflen) +{ + struct cfs_crypto_hash_desc *hdesc; + int hashsize; + char hashbuf[64]; + unsigned int bufsize; + int i, err; + + LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX); + LASSERT(buflen >= 4); + + hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0); + if (IS_ERR(hdesc)) { + CERROR("Unable to initialize checksum hash %s\n", + cfs_crypto_hash_name(cfs_hash_alg_id[alg])); + return PTR_ERR(hdesc); + } + + hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]); + + for (i = 0; i < desc->bd_iov_count; i++) { + cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page, + desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK, + desc->bd_iov[i].kiov_len); + } + if (hashsize > buflen) { + bufsize = sizeof(hashbuf); + err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf, + &bufsize); + memcpy(buf, hashbuf, buflen); + } else { + bufsize = buflen; + err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf, + &bufsize); + } + + if (err) + cfs_crypto_hash_final(hdesc, NULL, NULL); + return err; +} +EXPORT_SYMBOL(sptlrpc_get_bulk_checksum); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c new file mode 100644 index 000000000..56ba9e4e5 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c @@ -0,0 +1,901 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_SEC + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/crypto.h> +#include <linux/key.h> + +#include "../include/obd.h" +#include "../include/obd_support.h" +#include "../include/lustre_import.h" +#include "../include/lustre_param.h" +#include "../include/lustre_sec.h" + +#include "ptlrpc_internal.h" + +const char *sptlrpc_part2name(enum lustre_sec_part part) +{ + switch (part) { + case LUSTRE_SP_CLI: + return "cli"; + case LUSTRE_SP_MDT: + return "mdt"; + case LUSTRE_SP_OST: + return "ost"; + case LUSTRE_SP_MGC: + return "mgc"; + case LUSTRE_SP_MGS: + return "mgs"; + case LUSTRE_SP_ANY: + return "any"; + default: + return "err"; + } +} +EXPORT_SYMBOL(sptlrpc_part2name); + +enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd) +{ + const char *type = obd->obd_type->typ_name; + + if (!strcmp(type, LUSTRE_MDT_NAME)) + return LUSTRE_SP_MDT; + if (!strcmp(type, LUSTRE_OST_NAME)) + return LUSTRE_SP_OST; + if (!strcmp(type, LUSTRE_MGS_NAME)) + return LUSTRE_SP_MGS; + + CERROR("unknown target %p(%s)\n", obd, type); + return LUSTRE_SP_ANY; +} +EXPORT_SYMBOL(sptlrpc_target_sec_part); + +/**************************************** + * user supplied flavor string parsing * + ****************************************/ + +/* + * format: <base_flavor>[-<bulk_type:alg_spec>] + */ +int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr) +{ + char buf[32]; + char *bulk, *alg; + + memset(flvr, 0, sizeof(*flvr)); + + if (str == NULL || str[0] == '\0') { + flvr->sf_rpc = SPTLRPC_FLVR_INVALID; + return 0; + } + + strncpy(buf, str, sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + + bulk = strchr(buf, '-'); + if (bulk) + *bulk++ = '\0'; + + flvr->sf_rpc = sptlrpc_name2flavor_base(buf); + if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID) + goto err_out; + + /* + * currently only base flavor "plain" can have bulk specification. + */ + if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) { + flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32; + if (bulk) { + /* + * format: plain-hash:<hash_alg> + */ + alg = strchr(bulk, ':'); + if (alg == NULL) + goto err_out; + *alg++ = '\0'; + + if (strcmp(bulk, "hash")) + goto err_out; + + flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg); + if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX) + goto err_out; + } + + if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL) + flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL); + else + flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG); + } else { + if (bulk) + goto err_out; + } + + flvr->sf_flags = 0; + return 0; + +err_out: + CERROR("invalid flavor string: %s\n", str); + return -EINVAL; +} +EXPORT_SYMBOL(sptlrpc_parse_flavor); + +/**************************************** + * configure rules * + ****************************************/ + +static void get_default_flavor(struct sptlrpc_flavor *sf) +{ + memset(sf, 0, sizeof(*sf)); + + sf->sf_rpc = SPTLRPC_FLVR_NULL; + sf->sf_flags = 0; +} + +static void sptlrpc_rule_init(struct sptlrpc_rule *rule) +{ + rule->sr_netid = LNET_NIDNET(LNET_NID_ANY); + rule->sr_from = LUSTRE_SP_ANY; + rule->sr_to = LUSTRE_SP_ANY; + rule->sr_padding = 0; + + get_default_flavor(&rule->sr_flvr); +} + +/* + * format: network[.direction]=flavor + */ +int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule) +{ + char *flavor, *dir; + int rc; + + sptlrpc_rule_init(rule); + + flavor = strchr(param, '='); + if (flavor == NULL) { + CERROR("invalid param, no '='\n"); + return -EINVAL; + } + *flavor++ = '\0'; + + dir = strchr(param, '.'); + if (dir) + *dir++ = '\0'; + + /* 1.1 network */ + if (strcmp(param, "default")) { + rule->sr_netid = libcfs_str2net(param); + if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) { + CERROR("invalid network name: %s\n", param); + return -EINVAL; + } + } + + /* 1.2 direction */ + if (dir) { + if (!strcmp(dir, "mdt2ost")) { + rule->sr_from = LUSTRE_SP_MDT; + rule->sr_to = LUSTRE_SP_OST; + } else if (!strcmp(dir, "mdt2mdt")) { + rule->sr_from = LUSTRE_SP_MDT; + rule->sr_to = LUSTRE_SP_MDT; + } else if (!strcmp(dir, "cli2ost")) { + rule->sr_from = LUSTRE_SP_CLI; + rule->sr_to = LUSTRE_SP_OST; + } else if (!strcmp(dir, "cli2mdt")) { + rule->sr_from = LUSTRE_SP_CLI; + rule->sr_to = LUSTRE_SP_MDT; + } else { + CERROR("invalid rule dir segment: %s\n", dir); + return -EINVAL; + } + } + + /* 2.1 flavor */ + rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr); + if (rc) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(sptlrpc_parse_rule); + +void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset) +{ + LASSERT(rset->srs_nslot || + (rset->srs_nrule == 0 && rset->srs_rules == NULL)); + + if (rset->srs_nslot) { + OBD_FREE(rset->srs_rules, + rset->srs_nslot * sizeof(*rset->srs_rules)); + sptlrpc_rule_set_init(rset); + } +} +EXPORT_SYMBOL(sptlrpc_rule_set_free); + +/* + * return 0 if the rule set could accommodate one more rule. + */ +int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset) +{ + struct sptlrpc_rule *rules; + int nslot; + + might_sleep(); + + if (rset->srs_nrule < rset->srs_nslot) + return 0; + + nslot = rset->srs_nslot + 8; + + /* better use realloc() if available */ + OBD_ALLOC(rules, nslot * sizeof(*rset->srs_rules)); + if (rules == NULL) + return -ENOMEM; + + if (rset->srs_nrule) { + LASSERT(rset->srs_nslot && rset->srs_rules); + memcpy(rules, rset->srs_rules, + rset->srs_nrule * sizeof(*rset->srs_rules)); + + OBD_FREE(rset->srs_rules, + rset->srs_nslot * sizeof(*rset->srs_rules)); + } + + rset->srs_rules = rules; + rset->srs_nslot = nslot; + return 0; +} +EXPORT_SYMBOL(sptlrpc_rule_set_expand); + +static inline int rule_spec_dir(struct sptlrpc_rule *rule) +{ + return (rule->sr_from != LUSTRE_SP_ANY || + rule->sr_to != LUSTRE_SP_ANY); +} +static inline int rule_spec_net(struct sptlrpc_rule *rule) +{ + return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY)); +} +static inline int rule_match_dir(struct sptlrpc_rule *r1, + struct sptlrpc_rule *r2) +{ + return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to); +} +static inline int rule_match_net(struct sptlrpc_rule *r1, + struct sptlrpc_rule *r2) +{ + return (r1->sr_netid == r2->sr_netid); +} + +/* + * merge @rule into @rset. + * the @rset slots might be expanded. + */ +int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset, + struct sptlrpc_rule *rule) +{ + struct sptlrpc_rule *p = rset->srs_rules; + int spec_dir, spec_net; + int rc, n, match = 0; + + might_sleep(); + + spec_net = rule_spec_net(rule); + spec_dir = rule_spec_dir(rule); + + for (n = 0; n < rset->srs_nrule; n++) { + p = &rset->srs_rules[n]; + + /* test network match, if failed: + * - spec rule: skip rules which is also spec rule match, until + * we hit a wild rule, which means no more chance + * - wild rule: skip until reach the one which is also wild + * and matches + */ + if (!rule_match_net(p, rule)) { + if (spec_net) { + if (rule_spec_net(p)) + continue; + else + break; + } else { + continue; + } + } + + /* test dir match, same logic as net matching */ + if (!rule_match_dir(p, rule)) { + if (spec_dir) { + if (rule_spec_dir(p)) + continue; + else + break; + } else { + continue; + } + } + + /* find a match */ + match = 1; + break; + } + + if (match) { + LASSERT(n >= 0 && n < rset->srs_nrule); + + if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) { + /* remove this rule */ + if (n < rset->srs_nrule - 1) + memmove(&rset->srs_rules[n], + &rset->srs_rules[n + 1], + (rset->srs_nrule - n - 1) * + sizeof(*rule)); + rset->srs_nrule--; + } else { + /* override the rule */ + memcpy(&rset->srs_rules[n], rule, sizeof(*rule)); + } + } else { + LASSERT(n >= 0 && n <= rset->srs_nrule); + + if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) { + rc = sptlrpc_rule_set_expand(rset); + if (rc) + return rc; + + if (n < rset->srs_nrule) + memmove(&rset->srs_rules[n + 1], + &rset->srs_rules[n], + (rset->srs_nrule - n) * sizeof(*rule)); + memcpy(&rset->srs_rules[n], rule, sizeof(*rule)); + rset->srs_nrule++; + } else { + CDEBUG(D_CONFIG, "ignore the unmatched deletion\n"); + } + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_rule_set_merge); + +/** + * given from/to/nid, determine a matching flavor in ruleset. + * return 1 if a match found, otherwise return 0. + */ +int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset, + enum lustre_sec_part from, + enum lustre_sec_part to, + lnet_nid_t nid, + struct sptlrpc_flavor *sf) +{ + struct sptlrpc_rule *r; + int n; + + for (n = 0; n < rset->srs_nrule; n++) { + r = &rset->srs_rules[n]; + + if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) && + r->sr_netid != LNET_NIDNET(LNET_NID_ANY) && + LNET_NIDNET(nid) != r->sr_netid) + continue; + + if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY && + from != r->sr_from) + continue; + + if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY && + to != r->sr_to) + continue; + + *sf = r->sr_flvr; + return 1; + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_rule_set_choose); + +void sptlrpc_rule_set_dump(struct sptlrpc_rule_set *rset) +{ + struct sptlrpc_rule *r; + int n; + + for (n = 0; n < rset->srs_nrule; n++) { + r = &rset->srs_rules[n]; + CDEBUG(D_SEC, "<%02d> from %x to %x, net %x, rpc %x\n", n, + r->sr_from, r->sr_to, r->sr_netid, r->sr_flvr.sf_rpc); + } +} +EXPORT_SYMBOL(sptlrpc_rule_set_dump); + +/********************************** + * sptlrpc configuration support * + **********************************/ + +struct sptlrpc_conf_tgt { + struct list_head sct_list; + char sct_name[MAX_OBD_NAME]; + struct sptlrpc_rule_set sct_rset; +}; + +struct sptlrpc_conf { + struct list_head sc_list; + char sc_fsname[MTI_NAME_MAXLEN]; + unsigned int sc_modified; /* modified during updating */ + unsigned int sc_updated:1, /* updated copy from MGS */ + sc_local:1; /* local copy from target */ + struct sptlrpc_rule_set sc_rset; /* fs general rules */ + struct list_head sc_tgts; /* target-specific rules */ +}; + +static struct mutex sptlrpc_conf_lock; +static LIST_HEAD(sptlrpc_confs); + +static inline int is_hex(char c) +{ + return ((c >= '0' && c <= '9') || + (c >= 'a' && c <= 'f')); +} + +static void target2fsname(const char *tgt, char *fsname, int buflen) +{ + const char *ptr; + int len; + + ptr = strrchr(tgt, '-'); + if (ptr) { + if ((strncmp(ptr, "-MDT", 4) != 0 && + strncmp(ptr, "-OST", 4) != 0) || + !is_hex(ptr[4]) || !is_hex(ptr[5]) || + !is_hex(ptr[6]) || !is_hex(ptr[7])) + ptr = NULL; + } + + /* if we didn't find the pattern, treat the whole string as fsname */ + if (ptr == NULL) + len = strlen(tgt); + else + len = ptr - tgt; + + len = min(len, buflen - 1); + memcpy(fsname, tgt, len); + fsname[len] = '\0'; +} + +static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf) +{ + struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next; + + sptlrpc_rule_set_free(&conf->sc_rset); + + list_for_each_entry_safe(conf_tgt, conf_tgt_next, + &conf->sc_tgts, sct_list) { + sptlrpc_rule_set_free(&conf_tgt->sct_rset); + list_del(&conf_tgt->sct_list); + OBD_FREE_PTR(conf_tgt); + } + LASSERT(list_empty(&conf->sc_tgts)); + + conf->sc_updated = 0; + conf->sc_local = 0; +} + +static void sptlrpc_conf_free(struct sptlrpc_conf *conf) +{ + CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname); + + sptlrpc_conf_free_rsets(conf); + list_del(&conf->sc_list); + OBD_FREE_PTR(conf); +} + +static +struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf, + const char *name, + int create) +{ + struct sptlrpc_conf_tgt *conf_tgt; + + list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) { + if (strcmp(conf_tgt->sct_name, name) == 0) + return conf_tgt; + } + + if (!create) + return NULL; + + OBD_ALLOC_PTR(conf_tgt); + if (conf_tgt) { + strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name)); + sptlrpc_rule_set_init(&conf_tgt->sct_rset); + list_add(&conf_tgt->sct_list, &conf->sc_tgts); + } + + return conf_tgt; +} + +static +struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname, + int create) +{ + struct sptlrpc_conf *conf; + + list_for_each_entry(conf, &sptlrpc_confs, sc_list) { + if (strcmp(conf->sc_fsname, fsname) == 0) + return conf; + } + + if (!create) + return NULL; + + OBD_ALLOC_PTR(conf); + if (conf == NULL) + return NULL; + + strcpy(conf->sc_fsname, fsname); + sptlrpc_rule_set_init(&conf->sc_rset); + INIT_LIST_HEAD(&conf->sc_tgts); + list_add(&conf->sc_list, &sptlrpc_confs); + + CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname); + return conf; +} + +/** + * caller must hold conf_lock already. + */ +static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf, + const char *target, + struct sptlrpc_rule *rule) +{ + struct sptlrpc_conf_tgt *conf_tgt; + struct sptlrpc_rule_set *rule_set; + + /* fsname == target means general rules for the whole fs */ + if (strcmp(conf->sc_fsname, target) == 0) { + rule_set = &conf->sc_rset; + } else { + conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1); + if (conf_tgt) { + rule_set = &conf_tgt->sct_rset; + } else { + CERROR("out of memory, can't merge rule!\n"); + return -ENOMEM; + } + } + + return sptlrpc_rule_set_merge(rule_set, rule); +} + +/** + * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we + * find one through the target name in the record inside conf_lock; + * otherwise means caller already hold conf_lock. + */ +static int __sptlrpc_process_config(struct lustre_cfg *lcfg, + struct sptlrpc_conf *conf) +{ + char *target, *param; + char fsname[MTI_NAME_MAXLEN]; + struct sptlrpc_rule rule; + int rc; + + target = lustre_cfg_string(lcfg, 1); + if (target == NULL) { + CERROR("missing target name\n"); + return -EINVAL; + } + + param = lustre_cfg_string(lcfg, 2); + if (param == NULL) { + CERROR("missing parameter\n"); + return -EINVAL; + } + + CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param); + + /* parse rule to make sure the format is correct */ + if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) { + CERROR("Invalid sptlrpc parameter: %s\n", param); + return -EINVAL; + } + param += sizeof(PARAM_SRPC_FLVR) - 1; + + rc = sptlrpc_parse_rule(param, &rule); + if (rc) + return -EINVAL; + + if (conf == NULL) { + target2fsname(target, fsname, sizeof(fsname)); + + mutex_lock(&sptlrpc_conf_lock); + conf = sptlrpc_conf_get(fsname, 0); + if (conf == NULL) { + CERROR("can't find conf\n"); + rc = -ENOMEM; + } else { + rc = sptlrpc_conf_merge_rule(conf, target, &rule); + } + mutex_unlock(&sptlrpc_conf_lock); + } else { + LASSERT(mutex_is_locked(&sptlrpc_conf_lock)); + rc = sptlrpc_conf_merge_rule(conf, target, &rule); + } + + if (rc == 0) + conf->sc_modified++; + + return rc; +} + +int sptlrpc_process_config(struct lustre_cfg *lcfg) +{ + return __sptlrpc_process_config(lcfg, NULL); +} +EXPORT_SYMBOL(sptlrpc_process_config); + +static int logname2fsname(const char *logname, char *buf, int buflen) +{ + char *ptr; + int len; + + ptr = strrchr(logname, '-'); + if (ptr == NULL || strcmp(ptr, "-sptlrpc")) { + CERROR("%s is not a sptlrpc config log\n", logname); + return -EINVAL; + } + + len = min((int) (ptr - logname), buflen - 1); + + memcpy(buf, logname, len); + buf[len] = '\0'; + return 0; +} + +void sptlrpc_conf_log_update_begin(const char *logname) +{ + struct sptlrpc_conf *conf; + char fsname[16]; + + if (logname2fsname(logname, fsname, sizeof(fsname))) + return; + + mutex_lock(&sptlrpc_conf_lock); + + conf = sptlrpc_conf_get(fsname, 0); + if (conf) { + if (conf->sc_local) { + LASSERT(conf->sc_updated == 0); + sptlrpc_conf_free_rsets(conf); + } + conf->sc_modified = 0; + } + + mutex_unlock(&sptlrpc_conf_lock); +} +EXPORT_SYMBOL(sptlrpc_conf_log_update_begin); + +/** + * mark a config log has been updated + */ +void sptlrpc_conf_log_update_end(const char *logname) +{ + struct sptlrpc_conf *conf; + char fsname[16]; + + if (logname2fsname(logname, fsname, sizeof(fsname))) + return; + + mutex_lock(&sptlrpc_conf_lock); + + conf = sptlrpc_conf_get(fsname, 0); + if (conf) { + /* + * if original state is not updated, make sure the + * modified counter > 0 to enforce updating local copy. + */ + if (conf->sc_updated == 0) + conf->sc_modified++; + + conf->sc_updated = 1; + } + + mutex_unlock(&sptlrpc_conf_lock); +} +EXPORT_SYMBOL(sptlrpc_conf_log_update_end); + +void sptlrpc_conf_log_start(const char *logname) +{ + char fsname[16]; + + if (logname2fsname(logname, fsname, sizeof(fsname))) + return; + + mutex_lock(&sptlrpc_conf_lock); + sptlrpc_conf_get(fsname, 1); + mutex_unlock(&sptlrpc_conf_lock); +} +EXPORT_SYMBOL(sptlrpc_conf_log_start); + +void sptlrpc_conf_log_stop(const char *logname) +{ + struct sptlrpc_conf *conf; + char fsname[16]; + + if (logname2fsname(logname, fsname, sizeof(fsname))) + return; + + mutex_lock(&sptlrpc_conf_lock); + conf = sptlrpc_conf_get(fsname, 0); + if (conf) + sptlrpc_conf_free(conf); + mutex_unlock(&sptlrpc_conf_lock); +} +EXPORT_SYMBOL(sptlrpc_conf_log_stop); + +static inline void flavor_set_flags(struct sptlrpc_flavor *sf, + enum lustre_sec_part from, + enum lustre_sec_part to, + unsigned int fl_udesc) +{ + /* + * null flavor doesn't need to set any flavor, and in fact + * we'd better not do that because everybody share a single sec. + */ + if (sf->sf_rpc == SPTLRPC_FLVR_NULL) + return; + + if (from == LUSTRE_SP_MDT) { + /* MDT->MDT; MDT->OST */ + sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY; + } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) { + /* CLI->OST */ + sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK; + } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) { + /* CLI->MDT */ + if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL) + sf->sf_flags |= PTLRPC_SEC_FL_UDESC; + } +} + +void sptlrpc_conf_choose_flavor(enum lustre_sec_part from, + enum lustre_sec_part to, + struct obd_uuid *target, + lnet_nid_t nid, + struct sptlrpc_flavor *sf) +{ + struct sptlrpc_conf *conf; + struct sptlrpc_conf_tgt *conf_tgt; + char name[MTI_NAME_MAXLEN]; + int len, rc = 0; + + target2fsname(target->uuid, name, sizeof(name)); + + mutex_lock(&sptlrpc_conf_lock); + + conf = sptlrpc_conf_get(name, 0); + if (conf == NULL) + goto out; + + /* convert uuid name (supposed end with _UUID) to target name */ + len = strlen(target->uuid); + LASSERT(len > 5); + memcpy(name, target->uuid, len - 5); + name[len - 5] = '\0'; + + conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0); + if (conf_tgt) { + rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset, + from, to, nid, sf); + if (rc) + goto out; + } + + rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf); +out: + mutex_unlock(&sptlrpc_conf_lock); + + if (rc == 0) + get_default_flavor(sf); + + flavor_set_flags(sf, from, to, 1); +} + +/** + * called by target devices, determine the expected flavor from + * certain peer (from, nid). + */ +void sptlrpc_target_choose_flavor(struct sptlrpc_rule_set *rset, + enum lustre_sec_part from, + lnet_nid_t nid, + struct sptlrpc_flavor *sf) +{ + if (sptlrpc_rule_set_choose(rset, from, LUSTRE_SP_ANY, nid, sf) == 0) + get_default_flavor(sf); +} +EXPORT_SYMBOL(sptlrpc_target_choose_flavor); + +#define SEC_ADAPT_DELAY (10) + +/** + * called by client devices, notify the sptlrpc config has changed and + * do import_sec_adapt later. + */ +void sptlrpc_conf_client_adapt(struct obd_device *obd) +{ + struct obd_import *imp; + + LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0); + CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid); + + /* serialize with connect/disconnect import */ + down_read(&obd->u.cli.cl_sem); + + imp = obd->u.cli.cl_import; + if (imp) { + spin_lock(&imp->imp_lock); + if (imp->imp_sec) + imp->imp_sec_expire = get_seconds() + + SEC_ADAPT_DELAY; + spin_unlock(&imp->imp_lock); + } + + up_read(&obd->u.cli.cl_sem); +} +EXPORT_SYMBOL(sptlrpc_conf_client_adapt); + +int sptlrpc_conf_init(void) +{ + mutex_init(&sptlrpc_conf_lock); + return 0; +} + +void sptlrpc_conf_fini(void) +{ + struct sptlrpc_conf *conf, *conf_next; + + mutex_lock(&sptlrpc_conf_lock); + list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) { + sptlrpc_conf_free(conf); + } + LASSERT(list_empty(&sptlrpc_confs)); + mutex_unlock(&sptlrpc_conf_lock); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c new file mode 100644 index 000000000..81de68edb --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c @@ -0,0 +1,252 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec_gc.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + +#include "../../include/linux/libcfs/libcfs.h" + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_sec.h" + +#include "ptlrpc_internal.h" + +#define SEC_GC_INTERVAL (30 * 60) + + +static struct mutex sec_gc_mutex; +static LIST_HEAD(sec_gc_list); +static spinlock_t sec_gc_list_lock; + +static LIST_HEAD(sec_gc_ctx_list); +static spinlock_t sec_gc_ctx_list_lock; + +static struct ptlrpc_thread sec_gc_thread; +static atomic_t sec_gc_wait_del = ATOMIC_INIT(0); + + +void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec) +{ + LASSERT(sec->ps_policy->sp_cops->gc_ctx); + LASSERT(sec->ps_gc_interval > 0); + LASSERT(list_empty(&sec->ps_gc_list)); + + sec->ps_gc_next = get_seconds() + sec->ps_gc_interval; + + spin_lock(&sec_gc_list_lock); + list_add_tail(&sec_gc_list, &sec->ps_gc_list); + spin_unlock(&sec_gc_list_lock); + + CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name); +} +EXPORT_SYMBOL(sptlrpc_gc_add_sec); + +void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec) +{ + if (list_empty(&sec->ps_gc_list)) + return; + + might_sleep(); + + /* signal before list_del to make iteration in gc thread safe */ + atomic_inc(&sec_gc_wait_del); + + spin_lock(&sec_gc_list_lock); + list_del_init(&sec->ps_gc_list); + spin_unlock(&sec_gc_list_lock); + + /* barrier */ + mutex_lock(&sec_gc_mutex); + mutex_unlock(&sec_gc_mutex); + + atomic_dec(&sec_gc_wait_del); + + CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name); +} +EXPORT_SYMBOL(sptlrpc_gc_del_sec); + +void sptlrpc_gc_add_ctx(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(list_empty(&ctx->cc_gc_chain)); + + CDEBUG(D_SEC, "hand over ctx %p(%u->%s)\n", + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); + spin_lock(&sec_gc_ctx_list_lock); + list_add(&ctx->cc_gc_chain, &sec_gc_ctx_list); + spin_unlock(&sec_gc_ctx_list_lock); + + thread_add_flags(&sec_gc_thread, SVC_SIGNAL); + wake_up(&sec_gc_thread.t_ctl_waitq); +} +EXPORT_SYMBOL(sptlrpc_gc_add_ctx); + +static void sec_process_ctx_list(void) +{ + struct ptlrpc_cli_ctx *ctx; + + spin_lock(&sec_gc_ctx_list_lock); + + while (!list_empty(&sec_gc_ctx_list)) { + ctx = list_entry(sec_gc_ctx_list.next, + struct ptlrpc_cli_ctx, cc_gc_chain); + list_del_init(&ctx->cc_gc_chain); + spin_unlock(&sec_gc_ctx_list_lock); + + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) == 1); + CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n", + ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); + sptlrpc_cli_ctx_put(ctx, 1); + + spin_lock(&sec_gc_ctx_list_lock); + } + + spin_unlock(&sec_gc_ctx_list_lock); +} + +static void sec_do_gc(struct ptlrpc_sec *sec) +{ + LASSERT(sec->ps_policy->sp_cops->gc_ctx); + + if (unlikely(sec->ps_gc_next == 0)) { + CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n", + sec, sec->ps_policy->sp_name); + return; + } + + CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name); + + if (cfs_time_after(sec->ps_gc_next, get_seconds())) + return; + + sec->ps_policy->sp_cops->gc_ctx(sec); + sec->ps_gc_next = get_seconds() + sec->ps_gc_interval; +} + +static int sec_gc_main(void *arg) +{ + struct ptlrpc_thread *thread = (struct ptlrpc_thread *) arg; + struct l_wait_info lwi; + + unshare_fs_struct(); + + /* Record that the thread is running */ + thread_set_flags(thread, SVC_RUNNING); + wake_up(&thread->t_ctl_waitq); + + while (1) { + struct ptlrpc_sec *sec; + + thread_clear_flags(thread, SVC_SIGNAL); + sec_process_ctx_list(); +again: + /* go through sec list do gc. + * FIXME here we iterate through the whole list each time which + * is not optimal. we perhaps want to use balanced binary tree + * to trace each sec as order of expiry time. + * another issue here is we wakeup as fixed interval instead of + * according to each sec's expiry time */ + mutex_lock(&sec_gc_mutex); + list_for_each_entry(sec, &sec_gc_list, ps_gc_list) { + /* if someone is waiting to be deleted, let it + * proceed as soon as possible. */ + if (atomic_read(&sec_gc_wait_del)) { + CDEBUG(D_SEC, "deletion pending, start over\n"); + mutex_unlock(&sec_gc_mutex); + goto again; + } + + sec_do_gc(sec); + } + mutex_unlock(&sec_gc_mutex); + + /* check ctx list again before sleep */ + sec_process_ctx_list(); + + lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL); + l_wait_event(thread->t_ctl_waitq, + thread_is_stopping(thread) || + thread_is_signal(thread), + &lwi); + + if (thread_test_and_clear_flags(thread, SVC_STOPPING)) + break; + } + + thread_set_flags(thread, SVC_STOPPED); + wake_up(&thread->t_ctl_waitq); + return 0; +} + +int sptlrpc_gc_init(void) +{ + struct l_wait_info lwi = { 0 }; + struct task_struct *task; + + mutex_init(&sec_gc_mutex); + spin_lock_init(&sec_gc_list_lock); + spin_lock_init(&sec_gc_ctx_list_lock); + + /* initialize thread control */ + memset(&sec_gc_thread, 0, sizeof(sec_gc_thread)); + init_waitqueue_head(&sec_gc_thread.t_ctl_waitq); + + task = kthread_run(sec_gc_main, &sec_gc_thread, "sptlrpc_gc"); + if (IS_ERR(task)) { + CERROR("can't start gc thread: %ld\n", PTR_ERR(task)); + return PTR_ERR(task); + } + + l_wait_event(sec_gc_thread.t_ctl_waitq, + thread_is_running(&sec_gc_thread), &lwi); + return 0; +} + +void sptlrpc_gc_fini(void) +{ + struct l_wait_info lwi = { 0 }; + + thread_set_flags(&sec_gc_thread, SVC_STOPPING); + wake_up(&sec_gc_thread.t_ctl_waitq); + + l_wait_event(sec_gc_thread.t_ctl_waitq, + thread_is_stopped(&sec_gc_thread), &lwi); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c new file mode 100644 index 000000000..0d08145a6 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c @@ -0,0 +1,199 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec_lproc.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + +#include "../../include/linux/libcfs/libcfs.h" +#include <linux/crypto.h> + +#include "../include/obd.h" +#include "../include/obd_class.h" +#include "../include/obd_support.h" +#include "../include/lustre_net.h" +#include "../include/lustre_import.h" +#include "../include/lustre_dlm.h" +#include "../include/lustre_sec.h" + +#include "ptlrpc_internal.h" + + +struct proc_dir_entry *sptlrpc_proc_root = NULL; +EXPORT_SYMBOL(sptlrpc_proc_root); + +static char *sec_flags2str(unsigned long flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_SEC_FL_REVERSE) + strlcat(buf, "reverse,", bufsize); + if (flags & PTLRPC_SEC_FL_ROOTONLY) + strlcat(buf, "rootonly,", bufsize); + if (flags & PTLRPC_SEC_FL_UDESC) + strlcat(buf, "udesc,", bufsize); + if (flags & PTLRPC_SEC_FL_BULK) + strlcat(buf, "bulk,", bufsize); + if (buf[0] == '\0') + strlcat(buf, "-,", bufsize); + + return buf; +} + +static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v) +{ + struct obd_device *dev = seq->private; + struct client_obd *cli = &dev->u.cli; + struct ptlrpc_sec *sec = NULL; + char str[32]; + + LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || + strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); + + if (cli->cl_import) + sec = sptlrpc_import_sec_ref(cli->cl_import); + if (sec == NULL) + goto out; + + sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)); + + seq_printf(seq, "rpc flavor: %s\n", + sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc)); + seq_printf(seq, "bulk flavor: %s\n", + sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str))); + seq_printf(seq, "flags: %s\n", + sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str))); + seq_printf(seq, "id: %d\n", sec->ps_id); + seq_printf(seq, "refcount: %d\n", + atomic_read(&sec->ps_refcount)); + seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx)); + seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval); + seq_printf(seq, "gc next %ld\n", + sec->ps_gc_interval ? + sec->ps_gc_next - get_seconds() : 0); + + sptlrpc_sec_put(sec); +out: + return 0; +} +LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs); + +static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v) +{ + struct obd_device *dev = seq->private; + struct client_obd *cli = &dev->u.cli; + struct ptlrpc_sec *sec = NULL; + + LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || + strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); + + if (cli->cl_import) + sec = sptlrpc_import_sec_ref(cli->cl_import); + if (sec == NULL) + goto out; + + if (sec->ps_policy->sp_cops->display) + sec->ps_policy->sp_cops->display(sec, seq); + + sptlrpc_sec_put(sec); +out: + return 0; +} +LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs); + +int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev) +{ + int rc; + + if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 && + strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 && + strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) { + CERROR("can't register lproc for obd type %s\n", + dev->obd_type->typ_name); + return -EINVAL; + } + + rc = lprocfs_obd_seq_create(dev, "srpc_info", 0444, + &sptlrpc_info_lprocfs_fops, dev); + if (rc) { + CERROR("create proc entry srpc_info for %s: %d\n", + dev->obd_name, rc); + return rc; + } + + rc = lprocfs_obd_seq_create(dev, "srpc_contexts", 0444, + &sptlrpc_ctxs_lprocfs_fops, dev); + if (rc) { + CERROR("create proc entry srpc_contexts for %s: %d\n", + dev->obd_name, rc); + return rc; + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach); + +LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool); +static struct lprocfs_vars sptlrpc_lprocfs_vars[] = { + { "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops }, + { NULL } +}; + +int sptlrpc_lproc_init(void) +{ + int rc; + + LASSERT(sptlrpc_proc_root == NULL); + + sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root, + sptlrpc_lprocfs_vars, NULL); + if (IS_ERR(sptlrpc_proc_root)) { + rc = PTR_ERR(sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + return rc; + } + return 0; +} + +void sptlrpc_lproc_fini(void) +{ + if (sptlrpc_proc_root) { + lprocfs_remove(&sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + } +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c new file mode 100644 index 000000000..4e132435b --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c @@ -0,0 +1,458 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec_null.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + + +#include "../include/obd_support.h" +#include "../include/obd_cksum.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_sec.h" + +static struct ptlrpc_sec_policy null_policy; +static struct ptlrpc_sec null_sec; +static struct ptlrpc_cli_ctx null_cli_ctx; +static struct ptlrpc_svc_ctx null_svc_ctx; + +/* + * we can temporarily use the topmost 8-bits of lm_secflvr to identify + * the source sec part. + */ +static inline +void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp) +{ + msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24; +} + +static inline +enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg) +{ + return (msg->lm_secflvr >> 24) & 0xFF; +} + +static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + + if (!req->rq_import->imp_dlm_fake) { + struct obd_device *obd = req->rq_import->imp_obd; + null_encode_sec_part(req->rq_reqbuf, + obd->u.cli.cl_sp_me); + } + req->rq_reqdata_len = req->rq_reqlen; + return 0; +} + +static +int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + __u32 cksums, cksumc; + + LASSERT(req->rq_repdata); + + req->rq_repmsg = req->rq_repdata; + req->rq_replen = req->rq_repdata_len; + + if (req->rq_early) { + cksums = lustre_msg_get_cksum(req->rq_repdata); + cksumc = lustre_msg_calc_cksum(req->rq_repmsg); + if (cksumc != cksums) { + CDEBUG(D_SEC, + "early reply checksum mismatch: %08x != %08x\n", + cksumc, cksums); + return -EINVAL; + } + } + + return 0; +} + +static +struct ptlrpc_sec *null_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *sf) +{ + LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL); + + /* general layer has take a module reference for us, because we never + * really destroy the sec, simply release the reference here. + */ + sptlrpc_policy_put(&null_policy); + return &null_sec; +} + +static +void null_destroy_sec(struct ptlrpc_sec *sec) +{ + LASSERT(sec == &null_sec); +} + +static +struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred, + int create, int remove_dead) +{ + atomic_inc(&null_cli_ctx.cc_refcount); + return &null_cli_ctx; +} + +static +int null_flush_ctx_cache(struct ptlrpc_sec *sec, + uid_t uid, + int grace, int force) +{ + return 0; +} + +static +int null_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + if (!req->rq_reqbuf) { + int alloc_size = size_roundup_power2(msgsize); + + LASSERT(!req->rq_pool); + OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_size); + if (!req->rq_reqbuf) + return -ENOMEM; + + req->rq_reqbuf_len = alloc_size; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= msgsize); + memset(req->rq_reqbuf, 0, msgsize); + } + + req->rq_reqmsg = req->rq_reqbuf; + return 0; +} + +static +void null_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + if (!req->rq_pool) { + LASSERTF(req->rq_reqmsg == req->rq_reqbuf, + "req %p: reqmsg %p is not reqbuf %p in null sec\n", + req, req->rq_reqmsg, req->rq_reqbuf); + LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen, + "req %p: reqlen %d should smaller than buflen %d\n", + req, req->rq_reqlen, req->rq_reqbuf_len); + + OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } +} + +static +int null_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + /* add space for early replied */ + msgsize += lustre_msg_early_size(); + + msgsize = size_roundup_power2(msgsize); + + OBD_ALLOC_LARGE(req->rq_repbuf, msgsize); + if (!req->rq_repbuf) + return -ENOMEM; + + req->rq_repbuf_len = msgsize; + return 0; +} + +static +void null_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + LASSERT(req->rq_repbuf); + + OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; +} + +static +int null_enlarge_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newbuf; + struct lustre_msg *oldbuf = req->rq_reqmsg; + int oldsize, newmsg_size, alloc_size; + + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf == req->rq_reqmsg); + LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); + LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf)); + + /* compute new message size */ + oldsize = req->rq_reqbuf->lm_buflens[segment]; + req->rq_reqbuf->lm_buflens[segment] = newsize; + newmsg_size = lustre_packed_msg_size(oldbuf); + req->rq_reqbuf->lm_buflens[segment] = oldsize; + + /* request from pool should always have enough buffer */ + LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size); + + if (req->rq_reqbuf_len < newmsg_size) { + alloc_size = size_roundup_power2(newmsg_size); + + OBD_ALLOC_LARGE(newbuf, alloc_size); + if (newbuf == NULL) + return -ENOMEM; + + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); + memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen); + + OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = req->rq_reqmsg = newbuf; + req->rq_reqbuf_len = alloc_size; + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); + } + + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + req->rq_reqlen = newmsg_size; + + return 0; +} + +static struct ptlrpc_svc_ctx null_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &null_policy, +}; + +static +int null_accept(struct ptlrpc_request *req) +{ + LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == + SPTLRPC_POLICY_NULL); + + if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) { + CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc); + return SECSVC_DROP; + } + + req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf); + + req->rq_reqmsg = req->rq_reqbuf; + req->rq_reqlen = req->rq_reqdata_len; + + req->rq_svc_ctx = &null_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + return SECSVC_OK; +} + +static +int null_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + int rs_size = sizeof(*rs) + msgsize; + + LASSERT(msgsize % 8 == 0); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC_LARGE(rs, rs_size); + if (rs == NULL) + return -ENOMEM; + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + rs->rs_msg = rs->rs_repbuf; + + req->rq_reply_state = rs; + return 0; +} + +static +void null_free_rs(struct ptlrpc_reply_state *rs) +{ + LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE_LARGE(rs, rs->rs_size); +} + +static +int null_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + + rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + rs->rs_repdata_len = req->rq_replen; + + if (likely(req->rq_packed_final)) { + if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) + req->rq_reply_off = lustre_msg_early_size(); + else + req->rq_reply_off = 0; + } else { + __u32 cksum; + + cksum = lustre_msg_calc_cksum(rs->rs_repbuf); + lustre_msg_set_cksum(rs->rs_repbuf, cksum); + req->rq_reply_off = 0; + } + + return 0; +} + +static struct ptlrpc_ctx_ops null_ctx_ops = { + .refresh = null_ctx_refresh, + .sign = null_ctx_sign, + .verify = null_ctx_verify, +}; + +static struct ptlrpc_sec_cops null_sec_cops = { + .create_sec = null_create_sec, + .destroy_sec = null_destroy_sec, + .lookup_ctx = null_lookup_ctx, + .flush_ctx_cache = null_flush_ctx_cache, + .alloc_reqbuf = null_alloc_reqbuf, + .alloc_repbuf = null_alloc_repbuf, + .free_reqbuf = null_free_reqbuf, + .free_repbuf = null_free_repbuf, + .enlarge_reqbuf = null_enlarge_reqbuf, +}; + +static struct ptlrpc_sec_sops null_sec_sops = { + .accept = null_accept, + .alloc_rs = null_alloc_rs, + .authorize = null_authorize, + .free_rs = null_free_rs, +}; + +static struct ptlrpc_sec_policy null_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.null", + .sp_policy = SPTLRPC_POLICY_NULL, + .sp_cops = &null_sec_cops, + .sp_sops = &null_sec_sops, +}; + +static void null_init_internal(void) +{ + static HLIST_HEAD(__list); + + null_sec.ps_policy = &null_policy; + atomic_set(&null_sec.ps_refcount, 1); /* always busy */ + null_sec.ps_id = -1; + null_sec.ps_import = NULL; + null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL; + null_sec.ps_flvr.sf_flags = 0; + null_sec.ps_part = LUSTRE_SP_ANY; + null_sec.ps_dying = 0; + spin_lock_init(&null_sec.ps_lock); + atomic_set(&null_sec.ps_nctx, 1); /* for "null_cli_ctx" */ + INIT_LIST_HEAD(&null_sec.ps_gc_list); + null_sec.ps_gc_interval = 0; + null_sec.ps_gc_next = 0; + + hlist_add_head(&null_cli_ctx.cc_cache, &__list); + atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */ + null_cli_ctx.cc_sec = &null_sec; + null_cli_ctx.cc_ops = &null_ctx_ops; + null_cli_ctx.cc_expire = 0; + null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL | + PTLRPC_CTX_UPTODATE; + null_cli_ctx.cc_vcred.vc_uid = 0; + spin_lock_init(&null_cli_ctx.cc_lock); + INIT_LIST_HEAD(&null_cli_ctx.cc_req_list); + INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain); +} + +int sptlrpc_null_init(void) +{ + int rc; + + null_init_internal(); + + rc = sptlrpc_register_policy(&null_policy); + if (rc) + CERROR("failed to register %s: %d\n", null_policy.sp_name, rc); + + return rc; +} + +void sptlrpc_null_fini(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&null_policy); + if (rc) + CERROR("failed to unregister %s: %d\n", + null_policy.sp_name, rc); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c new file mode 100644 index 000000000..a79cd5301 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -0,0 +1,1013 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec_plain.c + * + * Author: Eric Mei <ericm@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_SEC + + +#include "../include/obd_support.h" +#include "../include/obd_cksum.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_sec.h" + +struct plain_sec { + struct ptlrpc_sec pls_base; + rwlock_t pls_lock; + struct ptlrpc_cli_ctx *pls_ctx; +}; + +static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec) +{ + return container_of(sec, struct plain_sec, pls_base); +} + +static struct ptlrpc_sec_policy plain_policy; +static struct ptlrpc_ctx_ops plain_ctx_ops; +static struct ptlrpc_svc_ctx plain_svc_ctx; + +static unsigned int plain_at_offset; + +/* + * for simplicity, plain policy rpc use fixed layout. + */ +#define PLAIN_PACK_SEGMENTS (4) + +#define PLAIN_PACK_HDR_OFF (0) +#define PLAIN_PACK_MSG_OFF (1) +#define PLAIN_PACK_USER_OFF (2) +#define PLAIN_PACK_BULK_OFF (3) + +#define PLAIN_FL_USER (0x01) +#define PLAIN_FL_BULK (0x02) + +struct plain_header { + __u8 ph_ver; /* 0 */ + __u8 ph_flags; + __u8 ph_sp; /* source */ + __u8 ph_bulk_hash_alg; /* complete flavor desc */ + __u8 ph_pad[4]; +}; + +struct plain_bulk_token { + __u8 pbt_hash[8]; +}; + +#define PLAIN_BSD_SIZE \ + (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token)) + +/**************************************** + * bulk checksum helpers * + ****************************************/ + +static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed) +{ + struct ptlrpc_bulk_sec_desc *bsd; + + if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed)) + return -EPROTO; + + bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE); + if (bsd == NULL) { + CERROR("bulk sec desc has short size %d\n", + lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF)); + return -EPROTO; + } + + if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && + bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) { + CERROR("invalid bulk svc %u\n", bsd->bsd_svc); + return -EPROTO; + } + + return 0; +} + +static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc, + __u8 hash_alg, + struct plain_bulk_token *token) +{ + if (hash_alg == BULK_HASH_ALG_NULL) + return 0; + + memset(token->pbt_hash, 0, sizeof(token->pbt_hash)); + return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash, + sizeof(token->pbt_hash)); +} + +static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc, + __u8 hash_alg, + struct plain_bulk_token *tokenr) +{ + struct plain_bulk_token tokenv; + int rc; + + if (hash_alg == BULK_HASH_ALG_NULL) + return 0; + + memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash)); + rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash, + sizeof(tokenv.pbt_hash)); + if (rc) + return rc; + + if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash))) + return -EACCES; + return 0; +} + +static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc) +{ + char *ptr; + unsigned int off, i; + + for (i = 0; i < desc->bd_iov_count; i++) { + if (desc->bd_iov[i].kiov_len == 0) + continue; + + ptr = kmap(desc->bd_iov[i].kiov_page); + off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + ptr[off] ^= 0x1; + kunmap(desc->bd_iov[i].kiov_page); + return; + } +} + +/**************************************** + * cli_ctx apis * + ****************************************/ + +static +int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx) +{ + return 0; +} + +static +int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_reqbuf; + struct plain_header *phdr; + + msg->lm_secflvr = req->rq_flvr.sf_rpc; + + phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0); + phdr->ph_ver = 0; + phdr->ph_flags = 0; + phdr->ph_sp = ctx->cc_sec->ps_part; + phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg; + + if (req->rq_pack_udesc) + phdr->ph_flags |= PLAIN_FL_USER; + if (req->rq_pack_bulk) + phdr->ph_flags |= PLAIN_FL_BULK; + + req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount, + msg->lm_buflens); + return 0; +} + +static +int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_repdata; + struct plain_header *phdr; + __u32 cksum; + int swabbed; + + if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) { + CERROR("unexpected reply buf count %u\n", msg->lm_bufcount); + return -EPROTO; + } + + swabbed = ptlrpc_rep_need_swab(req); + + phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr)); + if (phdr == NULL) { + CERROR("missing plain header\n"); + return -EPROTO; + } + + if (phdr->ph_ver != 0) { + CERROR("Invalid header version\n"); + return -EPROTO; + } + + /* expect no user desc in reply */ + if (phdr->ph_flags & PLAIN_FL_USER) { + CERROR("Unexpected udesc flag in reply\n"); + return -EPROTO; + } + + if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) { + CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg, + req->rq_flvr.u_bulk.hash.hash_alg); + return -EPROTO; + } + + if (unlikely(req->rq_early)) { + unsigned int hsize = 4; + + cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, + lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0), + lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF), + NULL, 0, (unsigned char *)&cksum, &hsize); + if (cksum != msg->lm_cksum) { + CDEBUG(D_SEC, + "early reply checksum mismatch: %08x != %08x\n", + cpu_to_le32(cksum), msg->lm_cksum); + return -EINVAL; + } + } else { + /* whether we sent with bulk or not, we expect the same + * in reply, except for early reply */ + if (!req->rq_early && + !equi(req->rq_pack_bulk == 1, + phdr->ph_flags & PLAIN_FL_BULK)) { + CERROR("%s bulk checksum in reply\n", + req->rq_pack_bulk ? "Missing" : "Unexpected"); + return -EPROTO; + } + + if (phdr->ph_flags & PLAIN_FL_BULK) { + if (plain_unpack_bsd(msg, swabbed)) + return -EPROTO; + } + } + + req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0); + req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF); + return 0; +} + +static +int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_bulk_sec_desc *bsd; + struct plain_bulk_token *token; + int rc; + + LASSERT(req->rq_pack_bulk); + LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS); + + bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); + token = (struct plain_bulk_token *) bsd->bsd_data; + + bsd->bsd_version = 0; + bsd->bsd_flags = 0; + bsd->bsd_type = SPTLRPC_BULK_DEFAULT; + bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc); + + if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL) + return 0; + + if (req->rq_bulk_read) + return 0; + + rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, + token); + if (rc) { + CERROR("bulk write: failed to compute checksum: %d\n", rc); + } else { + /* + * for sending we only compute the wrong checksum instead + * of corrupting the data so it is still correct on a redo + */ + if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) && + req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL) + token->pbt_hash[0] ^= 0x1; + } + + return rc; +} + +static +int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_bulk_sec_desc *bsdv; + struct plain_bulk_token *tokenv; + int rc; + int i, nob; + + LASSERT(req->rq_pack_bulk); + LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS); + LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS); + + bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0); + tokenv = (struct plain_bulk_token *) bsdv->bsd_data; + + if (req->rq_bulk_write) { + if (bsdv->bsd_flags & BSD_FL_ERR) + return -EIO; + return 0; + } + + /* fix the actual data size */ + for (i = 0, nob = 0; i < desc->bd_iov_count; i++) { + if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) { + desc->bd_iov[i].kiov_len = + desc->bd_nob_transferred - nob; + } + nob += desc->bd_iov[i].kiov_len; + } + + rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, + tokenv); + if (rc) + CERROR("bulk read: client verify failed: %d\n", rc); + + return rc; +} + +/**************************************** + * sec apis * + ****************************************/ + +static +struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec) +{ + struct ptlrpc_cli_ctx *ctx, *ctx_new; + + OBD_ALLOC_PTR(ctx_new); + + write_lock(&plsec->pls_lock); + + ctx = plsec->pls_ctx; + if (ctx) { + atomic_inc(&ctx->cc_refcount); + + if (ctx_new) + OBD_FREE_PTR(ctx_new); + } else if (ctx_new) { + ctx = ctx_new; + + atomic_set(&ctx->cc_refcount, 1); /* for cache */ + ctx->cc_sec = &plsec->pls_base; + ctx->cc_ops = &plain_ctx_ops; + ctx->cc_expire = 0; + ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE; + ctx->cc_vcred.vc_uid = 0; + spin_lock_init(&ctx->cc_lock); + INIT_LIST_HEAD(&ctx->cc_req_list); + INIT_LIST_HEAD(&ctx->cc_gc_chain); + + plsec->pls_ctx = ctx; + atomic_inc(&plsec->pls_base.ps_nctx); + atomic_inc(&plsec->pls_base.ps_refcount); + + atomic_inc(&ctx->cc_refcount); /* for caller */ + } + + write_unlock(&plsec->pls_lock); + + return ctx; +} + +static +void plain_destroy_sec(struct ptlrpc_sec *sec) +{ + struct plain_sec *plsec = sec2plsec(sec); + + LASSERT(sec->ps_policy == &plain_policy); + LASSERT(sec->ps_import); + LASSERT(atomic_read(&sec->ps_refcount) == 0); + LASSERT(atomic_read(&sec->ps_nctx) == 0); + LASSERT(plsec->pls_ctx == NULL); + + class_import_put(sec->ps_import); + + OBD_FREE_PTR(plsec); +} + +static +void plain_kill_sec(struct ptlrpc_sec *sec) +{ + sec->ps_dying = 1; +} + +static +struct ptlrpc_sec *plain_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *sf) +{ + struct plain_sec *plsec; + struct ptlrpc_sec *sec; + struct ptlrpc_cli_ctx *ctx; + + LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN); + + OBD_ALLOC_PTR(plsec); + if (plsec == NULL) + return NULL; + + /* + * initialize plain_sec + */ + rwlock_init(&plsec->pls_lock); + plsec->pls_ctx = NULL; + + sec = &plsec->pls_base; + sec->ps_policy = &plain_policy; + atomic_set(&sec->ps_refcount, 0); + atomic_set(&sec->ps_nctx, 0); + sec->ps_id = sptlrpc_get_next_secid(); + sec->ps_import = class_import_get(imp); + sec->ps_flvr = *sf; + spin_lock_init(&sec->ps_lock); + INIT_LIST_HEAD(&sec->ps_gc_list); + sec->ps_gc_interval = 0; + sec->ps_gc_next = 0; + + /* install ctx immediately if this is a reverse sec */ + if (svc_ctx) { + ctx = plain_sec_install_ctx(plsec); + if (ctx == NULL) { + plain_destroy_sec(sec); + return NULL; + } + sptlrpc_cli_ctx_put(ctx, 1); + } + + return sec; +} + +static +struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred, + int create, int remove_dead) +{ + struct plain_sec *plsec = sec2plsec(sec); + struct ptlrpc_cli_ctx *ctx; + + read_lock(&plsec->pls_lock); + ctx = plsec->pls_ctx; + if (ctx) + atomic_inc(&ctx->cc_refcount); + read_unlock(&plsec->pls_lock); + + if (unlikely(ctx == NULL)) + ctx = plain_sec_install_ctx(plsec); + + return ctx; +} + +static +void plain_release_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx, int sync) +{ + LASSERT(atomic_read(&sec->ps_refcount) > 0); + LASSERT(atomic_read(&sec->ps_nctx) > 0); + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + LASSERT(ctx->cc_sec == sec); + + OBD_FREE_PTR(ctx); + + atomic_dec(&sec->ps_nctx); + sptlrpc_sec_put(sec); +} + +static +int plain_flush_ctx_cache(struct ptlrpc_sec *sec, + uid_t uid, int grace, int force) +{ + struct plain_sec *plsec = sec2plsec(sec); + struct ptlrpc_cli_ctx *ctx; + + /* do nothing unless caller want to flush for 'all' */ + if (uid != -1) + return 0; + + write_lock(&plsec->pls_lock); + ctx = plsec->pls_ctx; + plsec->pls_ctx = NULL; + write_unlock(&plsec->pls_lock); + + if (ctx) + sptlrpc_cli_ctx_put(ctx, 1); + return 0; +} + +static +int plain_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; + int alloc_len; + + buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); + buflens[PLAIN_PACK_MSG_OFF] = msgsize; + + if (req->rq_pack_udesc) + buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size(); + + if (req->rq_pack_bulk) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; + } + + alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); + + if (!req->rq_reqbuf) { + LASSERT(!req->rq_pool); + + alloc_len = size_roundup_power2(alloc_len); + OBD_ALLOC_LARGE(req->rq_reqbuf, alloc_len); + if (!req->rq_reqbuf) + return -ENOMEM; + + req->rq_reqbuf_len = alloc_len; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= alloc_len); + memset(req->rq_reqbuf, 0, alloc_len); + } + + lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0); + + if (req->rq_pack_udesc) + sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF); + + return 0; +} + +static +void plain_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + if (!req->rq_pool) { + OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } +} + +static +int plain_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; + int alloc_len; + + buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); + buflens[PLAIN_PACK_MSG_OFF] = msgsize; + + if (req->rq_pack_bulk) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; + } + + alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); + + /* add space for early reply */ + alloc_len += plain_at_offset; + + alloc_len = size_roundup_power2(alloc_len); + + OBD_ALLOC_LARGE(req->rq_repbuf, alloc_len); + if (!req->rq_repbuf) + return -ENOMEM; + + req->rq_repbuf_len = alloc_len; + return 0; +} + +static +void plain_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; +} + +static +int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int segment, int newsize) +{ + struct lustre_msg *newbuf; + int oldsize; + int newmsg_size, newbuf_size; + + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); + LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) == + req->rq_reqmsg); + + /* compute new embedded msg size. */ + oldsize = req->rq_reqmsg->lm_buflens[segment]; + req->rq_reqmsg->lm_buflens[segment] = newsize; + newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount, + req->rq_reqmsg->lm_buflens); + req->rq_reqmsg->lm_buflens[segment] = oldsize; + + /* compute new wrapper msg size. */ + oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF]; + req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size; + newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount, + req->rq_reqbuf->lm_buflens); + req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize; + + /* request from pool should always have enough buffer */ + LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size); + + if (req->rq_reqbuf_len < newbuf_size) { + newbuf_size = size_roundup_power2(newbuf_size); + + OBD_ALLOC_LARGE(newbuf, newbuf_size); + if (newbuf == NULL) + return -ENOMEM; + + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); + + memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); + + OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = newbuf; + req->rq_reqbuf_len = newbuf_size; + req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, + PLAIN_PACK_MSG_OFF, 0); + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); + } + + _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, + newmsg_size); + _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); + + req->rq_reqlen = newmsg_size; + return 0; +} + +/**************************************** + * service apis * + ****************************************/ + +static struct ptlrpc_svc_ctx plain_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &plain_policy, +}; + +static +int plain_accept(struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_reqbuf; + struct plain_header *phdr; + int swabbed; + + LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == + SPTLRPC_POLICY_PLAIN); + + if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) != + SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) || + SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) != + SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) { + CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc); + return SECSVC_DROP; + } + + if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) { + CERROR("unexpected request buf count %u\n", msg->lm_bufcount); + return SECSVC_DROP; + } + + swabbed = ptlrpc_req_need_swab(req); + + phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr)); + if (phdr == NULL) { + CERROR("missing plain header\n"); + return -EPROTO; + } + + if (phdr->ph_ver != 0) { + CERROR("Invalid header version\n"); + return -EPROTO; + } + + if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) { + CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg); + return -EPROTO; + } + + req->rq_sp_from = phdr->ph_sp; + req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg; + + if (phdr->ph_flags & PLAIN_FL_USER) { + if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF, + swabbed)) { + CERROR("Mal-formed user descriptor\n"); + return SECSVC_DROP; + } + + req->rq_pack_udesc = 1; + req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0); + } + + if (phdr->ph_flags & PLAIN_FL_BULK) { + if (plain_unpack_bsd(msg, swabbed)) + return SECSVC_DROP; + + req->rq_pack_bulk = 1; + } + + req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0); + req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF]; + + req->rq_svc_ctx = &plain_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + return SECSVC_OK; +} + +static +int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; + int rs_size = sizeof(*rs); + + LASSERT(msgsize % 8 == 0); + + buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); + buflens[PLAIN_PACK_MSG_OFF] = msgsize; + + if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write)) + buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; + + rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC_LARGE(rs, rs_size); + if (rs == NULL) + return -ENOMEM; + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + + lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); + rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0); + + req->rq_reply_state = rs; + return 0; +} + +static +void plain_free_rs(struct ptlrpc_reply_state *rs) +{ + LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE_LARGE(rs, rs->rs_size); +} + +static +int plain_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct lustre_msg_v2 *msg = rs->rs_repbuf; + struct plain_header *phdr; + int len; + + LASSERT(rs); + LASSERT(msg); + + if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF]) + len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF, + req->rq_replen, 1); + else + len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + + msg->lm_secflvr = req->rq_flvr.sf_rpc; + + phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0); + phdr->ph_ver = 0; + phdr->ph_flags = 0; + phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg; + + if (req->rq_pack_bulk) + phdr->ph_flags |= PLAIN_FL_BULK; + + rs->rs_repdata_len = len; + + if (likely(req->rq_packed_final)) { + if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) + req->rq_reply_off = plain_at_offset; + else + req->rq_reply_off = 0; + } else { + unsigned int hsize = 4; + + cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, + lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0), + lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF), + NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize); + req->rq_reply_off = 0; + } + + return 0; +} + +static +int plain_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct ptlrpc_bulk_sec_desc *bsdr, *bsdv; + struct plain_bulk_token *tokenr; + int rc; + + LASSERT(req->rq_bulk_write); + LASSERT(req->rq_pack_bulk); + + bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); + tokenr = (struct plain_bulk_token *) bsdr->bsd_data; + bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); + + bsdv->bsd_version = 0; + bsdv->bsd_type = SPTLRPC_BULK_DEFAULT; + bsdv->bsd_svc = bsdr->bsd_svc; + bsdv->bsd_flags = 0; + + if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL) + return 0; + + rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, + tokenr); + if (rc) { + bsdv->bsd_flags |= BSD_FL_ERR; + CERROR("bulk write: server verify failed: %d\n", rc); + } + + return rc; +} + +static +int plain_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct ptlrpc_bulk_sec_desc *bsdr, *bsdv; + struct plain_bulk_token *tokenv; + int rc; + + LASSERT(req->rq_bulk_read); + LASSERT(req->rq_pack_bulk); + + bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); + bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); + tokenv = (struct plain_bulk_token *) bsdv->bsd_data; + + bsdv->bsd_version = 0; + bsdv->bsd_type = SPTLRPC_BULK_DEFAULT; + bsdv->bsd_svc = bsdr->bsd_svc; + bsdv->bsd_flags = 0; + + if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL) + return 0; + + rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, + tokenv); + if (rc) { + CERROR("bulk read: server failed to compute checksum: %d\n", + rc); + } else { + if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) + corrupt_bulk_data(desc); + } + + return rc; +} + +static struct ptlrpc_ctx_ops plain_ctx_ops = { + .refresh = plain_ctx_refresh, + .validate = plain_ctx_validate, + .sign = plain_ctx_sign, + .verify = plain_ctx_verify, + .wrap_bulk = plain_cli_wrap_bulk, + .unwrap_bulk = plain_cli_unwrap_bulk, +}; + +static struct ptlrpc_sec_cops plain_sec_cops = { + .create_sec = plain_create_sec, + .destroy_sec = plain_destroy_sec, + .kill_sec = plain_kill_sec, + .lookup_ctx = plain_lookup_ctx, + .release_ctx = plain_release_ctx, + .flush_ctx_cache = plain_flush_ctx_cache, + .alloc_reqbuf = plain_alloc_reqbuf, + .free_reqbuf = plain_free_reqbuf, + .alloc_repbuf = plain_alloc_repbuf, + .free_repbuf = plain_free_repbuf, + .enlarge_reqbuf = plain_enlarge_reqbuf, +}; + +static struct ptlrpc_sec_sops plain_sec_sops = { + .accept = plain_accept, + .alloc_rs = plain_alloc_rs, + .authorize = plain_authorize, + .free_rs = plain_free_rs, + .unwrap_bulk = plain_svc_unwrap_bulk, + .wrap_bulk = plain_svc_wrap_bulk, +}; + +static struct ptlrpc_sec_policy plain_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "plain", + .sp_policy = SPTLRPC_POLICY_PLAIN, + .sp_cops = &plain_sec_cops, + .sp_sops = &plain_sec_sops, +}; + +int sptlrpc_plain_init(void) +{ + __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; + int rc; + + buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size(); + plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); + + rc = sptlrpc_register_policy(&plain_policy); + if (rc) + CERROR("failed to register: %d\n", rc); + + return rc; +} + +void sptlrpc_plain_fini(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&plain_policy); + if (rc) + CERROR("cannot unregister: %d\n", rc); +} diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c new file mode 100644 index 000000000..8e6142151 --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -0,0 +1,3105 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2010, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lu_object.h" +#include "../../include/linux/lnet/types.h" +#include "ptlrpc_internal.h" + +/* The following are visible and mutable through /sys/module/ptlrpc */ +int test_req_buffer_pressure = 0; +module_param(test_req_buffer_pressure, int, 0444); +MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools"); +module_param(at_min, int, 0644); +MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)"); +module_param(at_max, int, 0644); +MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)"); +module_param(at_history, int, 0644); +MODULE_PARM_DESC(at_history, + "Adaptive timeouts remember the slowest event that took place within this period (sec)"); +module_param(at_early_margin, int, 0644); +MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply"); +module_param(at_extra, int, 0644); +MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply"); + + +/* forward ref */ +static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt); +static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req); +static void ptlrpc_at_remove_timed(struct ptlrpc_request *req); + +/** Holds a list of all PTLRPC services */ +LIST_HEAD(ptlrpc_all_services); +/** Used to protect the \e ptlrpc_all_services list */ +struct mutex ptlrpc_all_services_mutex; + +struct ptlrpc_request_buffer_desc * +ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_service *svc = svcpt->scp_service; + struct ptlrpc_request_buffer_desc *rqbd; + + OBD_CPT_ALLOC_PTR(rqbd, svc->srv_cptable, svcpt->scp_cpt); + if (rqbd == NULL) + return NULL; + + rqbd->rqbd_svcpt = svcpt; + rqbd->rqbd_refcount = 0; + rqbd->rqbd_cbid.cbid_fn = request_in_callback; + rqbd->rqbd_cbid.cbid_arg = rqbd; + INIT_LIST_HEAD(&rqbd->rqbd_reqs); + OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable, + svcpt->scp_cpt, svc->srv_buf_size); + if (rqbd->rqbd_buffer == NULL) { + OBD_FREE_PTR(rqbd); + return NULL; + } + + spin_lock(&svcpt->scp_lock); + list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle); + svcpt->scp_nrqbds_total++; + spin_unlock(&svcpt->scp_lock); + + return rqbd; +} + +void +ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd) +{ + struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; + + LASSERT(rqbd->rqbd_refcount == 0); + LASSERT(list_empty(&rqbd->rqbd_reqs)); + + spin_lock(&svcpt->scp_lock); + list_del(&rqbd->rqbd_list); + svcpt->scp_nrqbds_total--; + spin_unlock(&svcpt->scp_lock); + + OBD_FREE_LARGE(rqbd->rqbd_buffer, svcpt->scp_service->srv_buf_size); + OBD_FREE_PTR(rqbd); +} + +int +ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) +{ + struct ptlrpc_service *svc = svcpt->scp_service; + struct ptlrpc_request_buffer_desc *rqbd; + int rc = 0; + int i; + + if (svcpt->scp_rqbd_allocating) + goto try_post; + + spin_lock(&svcpt->scp_lock); + /* check again with lock */ + if (svcpt->scp_rqbd_allocating) { + /* NB: we might allow more than one thread in the future */ + LASSERT(svcpt->scp_rqbd_allocating == 1); + spin_unlock(&svcpt->scp_lock); + goto try_post; + } + + svcpt->scp_rqbd_allocating++; + spin_unlock(&svcpt->scp_lock); + + + for (i = 0; i < svc->srv_nbuf_per_group; i++) { + /* NB: another thread might have recycled enough rqbds, we + * need to make sure it wouldn't over-allocate, see LU-1212. */ + if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group) + break; + + rqbd = ptlrpc_alloc_rqbd(svcpt); + + if (rqbd == NULL) { + CERROR("%s: Can't allocate request buffer\n", + svc->srv_name); + rc = -ENOMEM; + break; + } + } + + spin_lock(&svcpt->scp_lock); + + LASSERT(svcpt->scp_rqbd_allocating == 1); + svcpt->scp_rqbd_allocating--; + + spin_unlock(&svcpt->scp_lock); + + CDEBUG(D_RPCTRACE, + "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n", + svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted, + svcpt->scp_nrqbds_total, rc); + + try_post: + if (post && rc == 0) + rc = ptlrpc_server_post_idle_rqbds(svcpt); + + return rc; +} + +/** + * Part of Rep-Ack logic. + * Puts a lock and its mode into reply state associated to request reply. + */ +void +ptlrpc_save_lock(struct ptlrpc_request *req, + struct lustre_handle *lock, int mode, int no_ack) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + int idx; + + LASSERT(rs != NULL); + LASSERT(rs->rs_nlocks < RS_MAX_LOCKS); + + if (req->rq_export->exp_disconnected) { + ldlm_lock_decref(lock, mode); + } else { + idx = rs->rs_nlocks++; + rs->rs_locks[idx] = *lock; + rs->rs_modes[idx] = mode; + rs->rs_difficult = 1; + rs->rs_no_ack = !!no_ack; + } +} +EXPORT_SYMBOL(ptlrpc_save_lock); + + +struct ptlrpc_hr_partition; + +struct ptlrpc_hr_thread { + int hrt_id; /* thread ID */ + spinlock_t hrt_lock; + wait_queue_head_t hrt_waitq; + struct list_head hrt_queue; /* RS queue */ + struct ptlrpc_hr_partition *hrt_partition; +}; + +struct ptlrpc_hr_partition { + /* # of started threads */ + atomic_t hrp_nstarted; + /* # of stopped threads */ + atomic_t hrp_nstopped; + /* cpu partition id */ + int hrp_cpt; + /* round-robin rotor for choosing thread */ + int hrp_rotor; + /* total number of threads on this partition */ + int hrp_nthrs; + /* threads table */ + struct ptlrpc_hr_thread *hrp_thrs; +}; + +#define HRT_RUNNING 0 +#define HRT_STOPPING 1 + +struct ptlrpc_hr_service { + /* CPU partition table, it's just cfs_cpt_table for now */ + struct cfs_cpt_table *hr_cpt_table; + /** controller sleep waitq */ + wait_queue_head_t hr_waitq; + unsigned int hr_stopping; + /** roundrobin rotor for non-affinity service */ + unsigned int hr_rotor; + /* partition data */ + struct ptlrpc_hr_partition **hr_partitions; +}; + +struct rs_batch { + struct list_head rsb_replies; + unsigned int rsb_n_replies; + struct ptlrpc_service_part *rsb_svcpt; +}; + +/** reply handling service. */ +static struct ptlrpc_hr_service ptlrpc_hr; + +/** + * maximum number of replies scheduled in one batch + */ +#define MAX_SCHEDULED 256 + +/** + * Initialize a reply batch. + * + * \param b batch + */ +static void rs_batch_init(struct rs_batch *b) +{ + memset(b, 0, sizeof(*b)); + INIT_LIST_HEAD(&b->rsb_replies); +} + +/** + * Choose an hr thread to dispatch requests to. + */ +static struct ptlrpc_hr_thread * +ptlrpc_hr_select(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_hr_partition *hrp; + unsigned int rotor; + + if (svcpt->scp_cpt >= 0 && + svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) { + /* directly match partition */ + hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt]; + + } else { + rotor = ptlrpc_hr.hr_rotor++; + rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table); + + hrp = ptlrpc_hr.hr_partitions[rotor]; + } + + rotor = hrp->hrp_rotor++; + return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs]; +} + +/** + * Dispatch all replies accumulated in the batch to one from + * dedicated reply handling threads. + * + * \param b batch + */ +static void rs_batch_dispatch(struct rs_batch *b) +{ + if (b->rsb_n_replies != 0) { + struct ptlrpc_hr_thread *hrt; + + hrt = ptlrpc_hr_select(b->rsb_svcpt); + + spin_lock(&hrt->hrt_lock); + list_splice_init(&b->rsb_replies, &hrt->hrt_queue); + spin_unlock(&hrt->hrt_lock); + + wake_up(&hrt->hrt_waitq); + b->rsb_n_replies = 0; + } +} + +/** + * Add a reply to a batch. + * Add one reply object to a batch, schedule batched replies if overload. + * + * \param b batch + * \param rs reply + */ +static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_service_part *svcpt = rs->rs_svcpt; + + if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) { + if (b->rsb_svcpt != NULL) { + rs_batch_dispatch(b); + spin_unlock(&b->rsb_svcpt->scp_rep_lock); + } + spin_lock(&svcpt->scp_rep_lock); + b->rsb_svcpt = svcpt; + } + spin_lock(&rs->rs_lock); + rs->rs_scheduled_ever = 1; + if (rs->rs_scheduled == 0) { + list_move(&rs->rs_list, &b->rsb_replies); + rs->rs_scheduled = 1; + b->rsb_n_replies++; + } + rs->rs_committed = 1; + spin_unlock(&rs->rs_lock); +} + +/** + * Reply batch finalization. + * Dispatch remaining replies from the batch + * and release remaining spinlock. + * + * \param b batch + */ +static void rs_batch_fini(struct rs_batch *b) +{ + if (b->rsb_svcpt != NULL) { + rs_batch_dispatch(b); + spin_unlock(&b->rsb_svcpt->scp_rep_lock); + } +} + +#define DECLARE_RS_BATCH(b) struct rs_batch b + + +/** + * Put reply state into a queue for processing because we received + * ACK from the client + */ +void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_hr_thread *hrt; + + LASSERT(list_empty(&rs->rs_list)); + + hrt = ptlrpc_hr_select(rs->rs_svcpt); + + spin_lock(&hrt->hrt_lock); + list_add_tail(&rs->rs_list, &hrt->hrt_queue); + spin_unlock(&hrt->hrt_lock); + + wake_up(&hrt->hrt_waitq); +} + +void +ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs) +{ + assert_spin_locked(&rs->rs_svcpt->scp_rep_lock); + assert_spin_locked(&rs->rs_lock); + LASSERT(rs->rs_difficult); + rs->rs_scheduled_ever = 1; /* flag any notification attempt */ + + if (rs->rs_scheduled) { /* being set up or already notified */ + return; + } + + rs->rs_scheduled = 1; + list_del_init(&rs->rs_list); + ptlrpc_dispatch_difficult_reply(rs); +} +EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply); + +void ptlrpc_commit_replies(struct obd_export *exp) +{ + struct ptlrpc_reply_state *rs, *nxt; + DECLARE_RS_BATCH(batch); + + rs_batch_init(&batch); + /* Find any replies that have been committed and get their service + * to attend to complete them. */ + + /* CAVEAT EMPTOR: spinlock ordering!!! */ + spin_lock(&exp->exp_uncommitted_replies_lock); + list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies, + rs_obd_list) { + LASSERT(rs->rs_difficult); + /* VBR: per-export last_committed */ + LASSERT(rs->rs_export); + if (rs->rs_transno <= exp->exp_last_committed) { + list_del_init(&rs->rs_obd_list); + rs_batch_add(&batch, rs); + } + } + spin_unlock(&exp->exp_uncommitted_replies_lock); + rs_batch_fini(&batch); +} +EXPORT_SYMBOL(ptlrpc_commit_replies); + +static int +ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_request_buffer_desc *rqbd; + int rc; + int posted = 0; + + for (;;) { + spin_lock(&svcpt->scp_lock); + + if (list_empty(&svcpt->scp_rqbd_idle)) { + spin_unlock(&svcpt->scp_lock); + return posted; + } + + rqbd = list_entry(svcpt->scp_rqbd_idle.next, + struct ptlrpc_request_buffer_desc, + rqbd_list); + list_del(&rqbd->rqbd_list); + + /* assume we will post successfully */ + svcpt->scp_nrqbds_posted++; + list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted); + + spin_unlock(&svcpt->scp_lock); + + rc = ptlrpc_register_rqbd(rqbd); + if (rc != 0) + break; + + posted = 1; + } + + spin_lock(&svcpt->scp_lock); + + svcpt->scp_nrqbds_posted--; + list_del(&rqbd->rqbd_list); + list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle); + + /* Don't complain if no request buffers are posted right now; LNET + * won't drop requests because we set the portal lazy! */ + + spin_unlock(&svcpt->scp_lock); + + return -1; +} + +static void ptlrpc_at_timer(unsigned long castmeharder) +{ + struct ptlrpc_service_part *svcpt; + + svcpt = (struct ptlrpc_service_part *)castmeharder; + + svcpt->scp_at_check = 1; + svcpt->scp_at_checktime = cfs_time_current(); + wake_up(&svcpt->scp_waitq); +} + +static void +ptlrpc_server_nthreads_check(struct ptlrpc_service *svc, + struct ptlrpc_service_conf *conf) +{ + struct ptlrpc_service_thr_conf *tc = &conf->psc_thr; + unsigned init; + unsigned total; + unsigned nthrs; + int weight; + + /* + * Common code for estimating & validating threads number. + * CPT affinity service could have percpt thread-pool instead + * of a global thread-pool, which means user might not always + * get the threads number they give it in conf::tc_nthrs_user + * even they did set. It's because we need to validate threads + * number for each CPT to guarantee each pool will have enough + * threads to keep the service healthy. + */ + init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL); + init = max_t(int, init, tc->tc_nthrs_init); + + /* NB: please see comments in lustre_lnet.h for definition + * details of these members */ + LASSERT(tc->tc_nthrs_max != 0); + + if (tc->tc_nthrs_user != 0) { + /* In case there is a reason to test a service with many + * threads, we give a less strict check here, it can + * be up to 8 * nthrs_max */ + total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user); + nthrs = total / svc->srv_ncpts; + init = max(init, nthrs); + goto out; + } + + total = tc->tc_nthrs_max; + if (tc->tc_nthrs_base == 0) { + /* don't care about base threads number per partition, + * this is most for non-affinity service */ + nthrs = total / svc->srv_ncpts; + goto out; + } + + nthrs = tc->tc_nthrs_base; + if (svc->srv_ncpts == 1) { + int i; + + /* NB: Increase the base number if it's single partition + * and total number of cores/HTs is larger or equal to 4. + * result will always < 2 * nthrs_base */ + weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY); + for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */ + (tc->tc_nthrs_base >> i) != 0; i++) + nthrs += tc->tc_nthrs_base >> i; + } + + if (tc->tc_thr_factor != 0) { + int factor = tc->tc_thr_factor; + const int fade = 4; + + /* + * User wants to increase number of threads with for + * each CPU core/HT, most likely the factor is larger then + * one thread/core because service threads are supposed to + * be blocked by lock or wait for IO. + */ + /* + * Amdahl's law says that adding processors wouldn't give + * a linear increasing of parallelism, so it's nonsense to + * have too many threads no matter how many cores/HTs + * there are. + */ + /* weight is # of HTs */ + if (cpumask_weight(topology_thread_cpumask(0)) > 1) { + /* depress thread factor for hyper-thread */ + factor = factor - (factor >> 1) + (factor >> 3); + } + + weight = cfs_cpt_weight(svc->srv_cptable, 0); + LASSERT(weight > 0); + + for (; factor > 0 && weight > 0; factor--, weight -= fade) + nthrs += min(weight, fade) * factor; + } + + if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) { + nthrs = max(tc->tc_nthrs_base, + tc->tc_nthrs_max / svc->srv_ncpts); + } + out: + nthrs = max(nthrs, tc->tc_nthrs_init); + svc->srv_nthrs_cpt_limit = nthrs; + svc->srv_nthrs_cpt_init = init; + + if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) { + CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n", + svc->srv_name, nthrs * svc->srv_ncpts, + tc->tc_nthrs_max); + } +} + +/** + * Initialize percpt data for a service + */ +static int +ptlrpc_service_part_init(struct ptlrpc_service *svc, + struct ptlrpc_service_part *svcpt, int cpt) +{ + struct ptlrpc_at_array *array; + int size; + int index; + int rc; + + svcpt->scp_cpt = cpt; + INIT_LIST_HEAD(&svcpt->scp_threads); + + /* rqbd and incoming request queue */ + spin_lock_init(&svcpt->scp_lock); + INIT_LIST_HEAD(&svcpt->scp_rqbd_idle); + INIT_LIST_HEAD(&svcpt->scp_rqbd_posted); + INIT_LIST_HEAD(&svcpt->scp_req_incoming); + init_waitqueue_head(&svcpt->scp_waitq); + /* history request & rqbd list */ + INIT_LIST_HEAD(&svcpt->scp_hist_reqs); + INIT_LIST_HEAD(&svcpt->scp_hist_rqbds); + + /* active requests and hp requests */ + spin_lock_init(&svcpt->scp_req_lock); + + /* reply states */ + spin_lock_init(&svcpt->scp_rep_lock); + INIT_LIST_HEAD(&svcpt->scp_rep_active); + INIT_LIST_HEAD(&svcpt->scp_rep_idle); + init_waitqueue_head(&svcpt->scp_rep_waitq); + atomic_set(&svcpt->scp_nreps_difficult, 0); + + /* adaptive timeout */ + spin_lock_init(&svcpt->scp_at_lock); + array = &svcpt->scp_at_array; + + size = at_est2timeout(at_max); + array->paa_size = size; + array->paa_count = 0; + array->paa_deadline = -1; + + /* allocate memory for scp_at_array (ptlrpc_at_array) */ + OBD_CPT_ALLOC(array->paa_reqs_array, + svc->srv_cptable, cpt, sizeof(struct list_head) * size); + if (array->paa_reqs_array == NULL) + return -ENOMEM; + + for (index = 0; index < size; index++) + INIT_LIST_HEAD(&array->paa_reqs_array[index]); + + OBD_CPT_ALLOC(array->paa_reqs_count, + svc->srv_cptable, cpt, sizeof(__u32) * size); + if (array->paa_reqs_count == NULL) + goto failed; + + cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt); + /* At SOW, service time should be quick; 10s seems generous. If client + * timeout is less than this, we'll be sending an early reply. */ + at_init(&svcpt->scp_at_estimate, 10, 0); + + /* assign this before call ptlrpc_grow_req_bufs */ + svcpt->scp_service = svc; + /* Now allocate the request buffers, but don't post them now */ + rc = ptlrpc_grow_req_bufs(svcpt, 0); + /* We shouldn't be under memory pressure at startup, so + * fail if we can't allocate all our buffers at this time. */ + if (rc != 0) + goto failed; + + return 0; + + failed: + if (array->paa_reqs_count != NULL) { + OBD_FREE(array->paa_reqs_count, sizeof(__u32) * size); + array->paa_reqs_count = NULL; + } + + if (array->paa_reqs_array != NULL) { + OBD_FREE(array->paa_reqs_array, + sizeof(struct list_head) * array->paa_size); + array->paa_reqs_array = NULL; + } + + return -ENOMEM; +} + +/** + * Initialize service on a given portal. + * This includes starting serving threads , allocating and posting rqbds and + * so on. + */ +struct ptlrpc_service * +ptlrpc_register_service(struct ptlrpc_service_conf *conf, + struct proc_dir_entry *proc_entry) +{ + struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt; + struct ptlrpc_service *service; + struct ptlrpc_service_part *svcpt; + struct cfs_cpt_table *cptable; + __u32 *cpts = NULL; + int ncpts; + int cpt; + int rc; + int i; + + LASSERT(conf->psc_buf.bc_nbufs > 0); + LASSERT(conf->psc_buf.bc_buf_size >= + conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD); + LASSERT(conf->psc_thr.tc_ctx_tags != 0); + + cptable = cconf->cc_cptable; + if (cptable == NULL) + cptable = cfs_cpt_table; + + if (!conf->psc_thr.tc_cpu_affinity) { + ncpts = 1; + } else { + ncpts = cfs_cpt_number(cptable); + if (cconf->cc_pattern != NULL) { + struct cfs_expr_list *el; + + rc = cfs_expr_list_parse(cconf->cc_pattern, + strlen(cconf->cc_pattern), + 0, ncpts - 1, &el); + if (rc != 0) { + CERROR("%s: invalid CPT pattern string: %s", + conf->psc_name, cconf->cc_pattern); + return ERR_PTR(-EINVAL); + } + + rc = cfs_expr_list_values(el, ncpts, &cpts); + cfs_expr_list_free(el); + if (rc <= 0) { + CERROR("%s: failed to parse CPT array %s: %d\n", + conf->psc_name, cconf->cc_pattern, rc); + if (cpts != NULL) + OBD_FREE(cpts, sizeof(*cpts) * ncpts); + return ERR_PTR(rc < 0 ? rc : -EINVAL); + } + ncpts = rc; + } + } + + OBD_ALLOC(service, offsetof(struct ptlrpc_service, srv_parts[ncpts])); + if (service == NULL) { + if (cpts != NULL) + OBD_FREE(cpts, sizeof(*cpts) * ncpts); + return ERR_PTR(-ENOMEM); + } + + service->srv_cptable = cptable; + service->srv_cpts = cpts; + service->srv_ncpts = ncpts; + + service->srv_cpt_bits = 0; /* it's zero already, easy to read... */ + while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable)) + service->srv_cpt_bits++; + + /* public members */ + spin_lock_init(&service->srv_lock); + service->srv_name = conf->psc_name; + service->srv_watchdog_factor = conf->psc_watchdog_factor; + INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */ + + /* buffer configuration */ + service->srv_nbuf_per_group = test_req_buffer_pressure ? + 1 : conf->psc_buf.bc_nbufs; + service->srv_max_req_size = conf->psc_buf.bc_req_max_size + + SPTLRPC_MAX_PAYLOAD; + service->srv_buf_size = conf->psc_buf.bc_buf_size; + service->srv_rep_portal = conf->psc_buf.bc_rep_portal; + service->srv_req_portal = conf->psc_buf.bc_req_portal; + + /* Increase max reply size to next power of two */ + service->srv_max_reply_size = 1; + while (service->srv_max_reply_size < + conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD) + service->srv_max_reply_size <<= 1; + + service->srv_thread_name = conf->psc_thr.tc_thr_name; + service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags; + service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO; + service->srv_ops = conf->psc_ops; + + for (i = 0; i < ncpts; i++) { + if (!conf->psc_thr.tc_cpu_affinity) + cpt = CFS_CPT_ANY; + else + cpt = cpts != NULL ? cpts[i] : i; + + OBD_CPT_ALLOC(svcpt, cptable, cpt, sizeof(*svcpt)); + if (svcpt == NULL) { + rc = -ENOMEM; + goto failed; + } + + service->srv_parts[i] = svcpt; + rc = ptlrpc_service_part_init(service, svcpt, cpt); + if (rc != 0) + goto failed; + } + + ptlrpc_server_nthreads_check(service, conf); + + rc = LNetSetLazyPortal(service->srv_req_portal); + LASSERT(rc == 0); + + mutex_lock(&ptlrpc_all_services_mutex); + list_add(&service->srv_list, &ptlrpc_all_services); + mutex_unlock(&ptlrpc_all_services_mutex); + + if (proc_entry != NULL) + ptlrpc_lprocfs_register_service(proc_entry, service); + + rc = ptlrpc_service_nrs_setup(service); + if (rc != 0) + goto failed; + + CDEBUG(D_NET, "%s: Started, listening on portal %d\n", + service->srv_name, service->srv_req_portal); + + rc = ptlrpc_start_threads(service); + if (rc != 0) { + CERROR("Failed to start threads for service %s: %d\n", + service->srv_name, rc); + goto failed; + } + + return service; +failed: + ptlrpc_unregister_service(service); + return ERR_PTR(rc); +} +EXPORT_SYMBOL(ptlrpc_register_service); + +/** + * to actually free the request, must be called without holding svc_lock. + * note it's caller's responsibility to unlink req->rq_list. + */ +static void ptlrpc_server_free_request(struct ptlrpc_request *req) +{ + LASSERT(atomic_read(&req->rq_refcount) == 0); + LASSERT(list_empty(&req->rq_timed_list)); + + /* DEBUG_REQ() assumes the reply state of a request with a valid + * ref will not be destroyed until that reference is dropped. */ + ptlrpc_req_drop_rs(req); + + sptlrpc_svc_ctx_decref(req); + + if (req != &req->rq_rqbd->rqbd_req) { + /* NB request buffers use an embedded + * req if the incoming req unlinked the + * MD; this isn't one of them! */ + ptlrpc_request_cache_free(req); + } +} + +/** + * drop a reference count of the request. if it reaches 0, we either + * put it into history list, or free it immediately. + */ +void ptlrpc_server_drop_request(struct ptlrpc_request *req) +{ + struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd; + struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; + struct ptlrpc_service *svc = svcpt->scp_service; + int refcount; + struct list_head *tmp; + struct list_head *nxt; + + if (!atomic_dec_and_test(&req->rq_refcount)) + return; + + if (req->rq_at_linked) { + spin_lock(&svcpt->scp_at_lock); + /* recheck with lock, in case it's unlinked by + * ptlrpc_at_check_timed() */ + if (likely(req->rq_at_linked)) + ptlrpc_at_remove_timed(req); + spin_unlock(&svcpt->scp_at_lock); + } + + LASSERT(list_empty(&req->rq_timed_list)); + + /* finalize request */ + if (req->rq_export) { + class_export_put(req->rq_export); + req->rq_export = NULL; + } + + spin_lock(&svcpt->scp_lock); + + list_add(&req->rq_list, &rqbd->rqbd_reqs); + + refcount = --(rqbd->rqbd_refcount); + if (refcount == 0) { + /* request buffer is now idle: add to history */ + list_del(&rqbd->rqbd_list); + + list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds); + svcpt->scp_hist_nrqbds++; + + /* cull some history? + * I expect only about 1 or 2 rqbds need to be recycled here */ + while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) { + rqbd = list_entry(svcpt->scp_hist_rqbds.next, + struct ptlrpc_request_buffer_desc, + rqbd_list); + + list_del(&rqbd->rqbd_list); + svcpt->scp_hist_nrqbds--; + + /* remove rqbd's reqs from svc's req history while + * I've got the service lock */ + list_for_each(tmp, &rqbd->rqbd_reqs) { + req = list_entry(tmp, struct ptlrpc_request, + rq_list); + /* Track the highest culled req seq */ + if (req->rq_history_seq > + svcpt->scp_hist_seq_culled) { + svcpt->scp_hist_seq_culled = + req->rq_history_seq; + } + list_del(&req->rq_history_list); + } + + spin_unlock(&svcpt->scp_lock); + + list_for_each_safe(tmp, nxt, &rqbd->rqbd_reqs) { + req = list_entry(rqbd->rqbd_reqs.next, + struct ptlrpc_request, + rq_list); + list_del(&req->rq_list); + ptlrpc_server_free_request(req); + } + + spin_lock(&svcpt->scp_lock); + /* + * now all reqs including the embedded req has been + * disposed, schedule request buffer for re-use. + */ + LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) == + 0); + list_add_tail(&rqbd->rqbd_list, + &svcpt->scp_rqbd_idle); + } + + spin_unlock(&svcpt->scp_lock); + } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) { + /* If we are low on memory, we are not interested in history */ + list_del(&req->rq_list); + list_del_init(&req->rq_history_list); + + /* Track the highest culled req seq */ + if (req->rq_history_seq > svcpt->scp_hist_seq_culled) + svcpt->scp_hist_seq_culled = req->rq_history_seq; + + spin_unlock(&svcpt->scp_lock); + + ptlrpc_server_free_request(req); + } else { + spin_unlock(&svcpt->scp_lock); + } +} + +/** Change request export and move hp request from old export to new */ +void ptlrpc_request_change_export(struct ptlrpc_request *req, + struct obd_export *export) +{ + if (req->rq_export != NULL) { + if (!list_empty(&req->rq_exp_list)) { + /* remove rq_exp_list from last export */ + spin_lock_bh(&req->rq_export->exp_rpc_lock); + list_del_init(&req->rq_exp_list); + spin_unlock_bh(&req->rq_export->exp_rpc_lock); + + /* export has one reference already, so it`s safe to + * add req to export queue here and get another + * reference for request later */ + spin_lock_bh(&export->exp_rpc_lock); + list_add(&req->rq_exp_list, &export->exp_hp_rpcs); + spin_unlock_bh(&export->exp_rpc_lock); + } + class_export_rpc_dec(req->rq_export); + class_export_put(req->rq_export); + } + + /* request takes one export refcount */ + req->rq_export = class_export_get(export); + class_export_rpc_inc(export); + + return; +} + +/** + * to finish a request: stop sending more early replies, and release + * the request. + */ +static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req) +{ + ptlrpc_server_hpreq_fini(req); + + ptlrpc_server_drop_request(req); +} + +/** + * to finish a active request: stop sending more early replies, and release + * the request. should be called after we finished handling the request. + */ +static void ptlrpc_server_finish_active_request( + struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req) +{ + spin_lock(&svcpt->scp_req_lock); + ptlrpc_nrs_req_stop_nolock(req); + svcpt->scp_nreqs_active--; + if (req->rq_hp) + svcpt->scp_nhreqs_active--; + spin_unlock(&svcpt->scp_req_lock); + + ptlrpc_nrs_req_finalize(req); + + if (req->rq_export != NULL) + class_export_rpc_dec(req->rq_export); + + ptlrpc_server_finish_request(svcpt, req); +} + +/** + * This function makes sure dead exports are evicted in a timely manner. + * This function is only called when some export receives a message (i.e., + * the network is up.) + */ +static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay) +{ + struct obd_export *oldest_exp; + time_t oldest_time, new_time; + + LASSERT(exp); + + /* Compensate for slow machines, etc, by faking our request time + into the future. Although this can break the strict time-ordering + of the list, we can be really lazy here - we don't have to evict + at the exact right moment. Eventually, all silent exports + will make it to the top of the list. */ + + /* Do not pay attention on 1sec or smaller renewals. */ + new_time = get_seconds() + extra_delay; + if (exp->exp_last_request_time + 1 /*second */ >= new_time) + return; + + exp->exp_last_request_time = new_time; + + /* exports may get disconnected from the chain even though the + export has references, so we must keep the spin lock while + manipulating the lists */ + spin_lock(&exp->exp_obd->obd_dev_lock); + + if (list_empty(&exp->exp_obd_chain_timed)) { + /* this one is not timed */ + spin_unlock(&exp->exp_obd->obd_dev_lock); + return; + } + + list_move_tail(&exp->exp_obd_chain_timed, + &exp->exp_obd->obd_exports_timed); + + oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next, + struct obd_export, exp_obd_chain_timed); + oldest_time = oldest_exp->exp_last_request_time; + spin_unlock(&exp->exp_obd->obd_dev_lock); + + if (exp->exp_obd->obd_recovering) { + /* be nice to everyone during recovery */ + return; + } + + /* Note - racing to start/reset the obd_eviction timer is safe */ + if (exp->exp_obd->obd_eviction_timer == 0) { + /* Check if the oldest entry is expired. */ + if (get_seconds() > (oldest_time + PING_EVICT_TIMEOUT + + extra_delay)) { + /* We need a second timer, in case the net was down and + * it just came back. Since the pinger may skip every + * other PING_INTERVAL (see note in ptlrpc_pinger_main), + * we better wait for 3. */ + exp->exp_obd->obd_eviction_timer = + get_seconds() + 3 * PING_INTERVAL; + CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n", + exp->exp_obd->obd_name, + obd_export_nid2str(oldest_exp), oldest_time); + } + } else { + if (get_seconds() > + (exp->exp_obd->obd_eviction_timer + extra_delay)) { + /* The evictor won't evict anyone who we've heard from + * recently, so we don't have to check before we start + * it. */ + if (!ping_evictor_wake(exp)) + exp->exp_obd->obd_eviction_timer = 0; + } + } +} + +/** + * Sanity check request \a req. + * Return 0 if all is ok, error code otherwise. + */ +static int ptlrpc_check_req(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + int rc = 0; + + if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) < + req->rq_export->exp_conn_cnt)) { + DEBUG_REQ(D_RPCTRACE, req, + "DROPPING req from old connection %d < %d", + lustre_msg_get_conn_cnt(req->rq_reqmsg), + req->rq_export->exp_conn_cnt); + return -EEXIST; + } + if (unlikely(obd == NULL || obd->obd_fail)) { + /* + * Failing over, don't handle any more reqs, send + * error response instead. + */ + CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n", + req, (obd != NULL) ? obd->obd_name : "unknown"); + rc = -ENODEV; + } else if (lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_REPLAY | MSG_REQ_REPLAY_DONE) && + !obd->obd_recovering) { + DEBUG_REQ(D_ERROR, req, + "Invalid replay without recovery"); + class_fail_export(req->rq_export); + rc = -ENODEV; + } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 && + !obd->obd_recovering) { + DEBUG_REQ(D_ERROR, req, "Invalid req with transno %llu without recovery", + lustre_msg_get_transno(req->rq_reqmsg)); + class_fail_export(req->rq_export); + rc = -ENODEV; + } + + if (unlikely(rc < 0)) { + req->rq_status = rc; + ptlrpc_error(req); + } + return rc; +} + +static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_at_array *array = &svcpt->scp_at_array; + __s32 next; + + if (array->paa_count == 0) { + cfs_timer_disarm(&svcpt->scp_at_timer); + return; + } + + /* Set timer for closest deadline */ + next = (__s32)(array->paa_deadline - get_seconds() - + at_early_margin); + if (next <= 0) { + ptlrpc_at_timer((unsigned long)svcpt); + } else { + cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next)); + CDEBUG(D_INFO, "armed %s at %+ds\n", + svcpt->scp_service->srv_name, next); + } +} + +/* Add rpc to early reply check list */ +static int ptlrpc_at_add_timed(struct ptlrpc_request *req) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + struct ptlrpc_at_array *array = &svcpt->scp_at_array; + struct ptlrpc_request *rq = NULL; + __u32 index; + + if (AT_OFF) + return 0; + + if (req->rq_no_reply) + return 0; + + if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0) + return -ENOSYS; + + spin_lock(&svcpt->scp_at_lock); + LASSERT(list_empty(&req->rq_timed_list)); + + index = (unsigned long)req->rq_deadline % array->paa_size; + if (array->paa_reqs_count[index] > 0) { + /* latest rpcs will have the latest deadlines in the list, + * so search backward. */ + list_for_each_entry_reverse(rq, + &array->paa_reqs_array[index], + rq_timed_list) { + if (req->rq_deadline >= rq->rq_deadline) { + list_add(&req->rq_timed_list, + &rq->rq_timed_list); + break; + } + } + } + + /* Add the request at the head of the list */ + if (list_empty(&req->rq_timed_list)) + list_add(&req->rq_timed_list, + &array->paa_reqs_array[index]); + + spin_lock(&req->rq_lock); + req->rq_at_linked = 1; + spin_unlock(&req->rq_lock); + req->rq_at_index = index; + array->paa_reqs_count[index]++; + array->paa_count++; + if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) { + array->paa_deadline = req->rq_deadline; + ptlrpc_at_set_timer(svcpt); + } + spin_unlock(&svcpt->scp_at_lock); + + return 0; +} + +static void +ptlrpc_at_remove_timed(struct ptlrpc_request *req) +{ + struct ptlrpc_at_array *array; + + array = &req->rq_rqbd->rqbd_svcpt->scp_at_array; + + /* NB: must call with hold svcpt::scp_at_lock */ + LASSERT(!list_empty(&req->rq_timed_list)); + list_del_init(&req->rq_timed_list); + + spin_lock(&req->rq_lock); + req->rq_at_linked = 0; + spin_unlock(&req->rq_lock); + + array->paa_reqs_count[req->rq_at_index]--; + array->paa_count--; +} + +static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + struct ptlrpc_request *reqcopy; + struct lustre_msg *reqmsg; + long olddl = req->rq_deadline - get_seconds(); + time_t newdl; + int rc; + + /* deadline is when the client expects us to reply, margin is the + difference between clients' and servers' expectations */ + DEBUG_REQ(D_ADAPTTO, req, + "%ssending early reply (deadline %+lds, margin %+lds) for %d+%d", + AT_OFF ? "AT off - not " : "", + olddl, olddl - at_get(&svcpt->scp_at_estimate), + at_get(&svcpt->scp_at_estimate), at_extra); + + if (AT_OFF) + return 0; + + if (olddl < 0) { + DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?", + olddl, at_early_margin); + + /* Return an error so we're not re-added to the timed list. */ + return -ETIMEDOUT; + } + + if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { + DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support"); + return -ENOSYS; + } + + if (req->rq_export && + lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { + /* During recovery, we don't want to send too many early + * replies, but on the other hand we want to make sure the + * client has enough time to resend if the rpc is lost. So + * during the recovery period send at least 4 early replies, + * spacing them every at_extra if we can. at_estimate should + * always equal this fixed value during recovery. */ + at_measured(&svcpt->scp_at_estimate, min(at_extra, + req->rq_export->exp_obd->obd_recovery_timeout / 4)); + } else { + /* Fake our processing time into the future to ask the clients + * for some extra amount of time */ + at_measured(&svcpt->scp_at_estimate, at_extra + + get_seconds() - + req->rq_arrival_time.tv_sec); + + /* Check to see if we've actually increased the deadline - + * we may be past adaptive_max */ + if (req->rq_deadline >= req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate)) { + DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%ld), not sending early reply\n", + olddl, req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate) - + get_seconds()); + return -ETIMEDOUT; + } + } + newdl = get_seconds() + at_get(&svcpt->scp_at_estimate); + + reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); + if (reqcopy == NULL) + return -ENOMEM; + OBD_ALLOC_LARGE(reqmsg, req->rq_reqlen); + if (!reqmsg) { + rc = -ENOMEM; + goto out_free; + } + + *reqcopy = *req; + reqcopy->rq_reply_state = NULL; + reqcopy->rq_rep_swab_mask = 0; + reqcopy->rq_pack_bulk = 0; + reqcopy->rq_pack_udesc = 0; + reqcopy->rq_packed_final = 0; + sptlrpc_svc_ctx_addref(reqcopy); + /* We only need the reqmsg for the magic */ + reqcopy->rq_reqmsg = reqmsg; + memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); + + LASSERT(atomic_read(&req->rq_refcount)); + /** if it is last refcount then early reply isn't needed */ + if (atomic_read(&req->rq_refcount) == 1) { + DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n"); + rc = -EINVAL; + goto out; + } + + /* Connection ref */ + reqcopy->rq_export = class_conn2export( + lustre_msg_get_handle(reqcopy->rq_reqmsg)); + if (reqcopy->rq_export == NULL) { + rc = -ENODEV; + goto out; + } + + /* RPC ref */ + class_export_rpc_inc(reqcopy->rq_export); + if (reqcopy->rq_export->exp_obd && + reqcopy->rq_export->exp_obd->obd_fail) { + rc = -ENODEV; + goto out_put; + } + + rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY); + if (rc) + goto out_put; + + rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY); + + if (!rc) { + /* Adjust our own deadline to what we told the client */ + req->rq_deadline = newdl; + req->rq_early_count++; /* number sent, server side */ + } else { + DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc); + } + + /* Free the (early) reply state from lustre_pack_reply. + (ptlrpc_send_reply takes it's own rs ref, so this is safe here) */ + ptlrpc_req_drop_rs(reqcopy); + +out_put: + class_export_rpc_dec(reqcopy->rq_export); + class_export_put(reqcopy->rq_export); +out: + sptlrpc_svc_ctx_decref(reqcopy); + OBD_FREE_LARGE(reqmsg, req->rq_reqlen); +out_free: + ptlrpc_request_cache_free(reqcopy); + return rc; +} + +/* Send early replies to everybody expiring within at_early_margin + asking for at_extra time */ +static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_at_array *array = &svcpt->scp_at_array; + struct ptlrpc_request *rq, *n; + struct list_head work_list; + __u32 index, count; + time_t deadline; + time_t now = get_seconds(); + long delay; + int first, counter = 0; + + spin_lock(&svcpt->scp_at_lock); + if (svcpt->scp_at_check == 0) { + spin_unlock(&svcpt->scp_at_lock); + return 0; + } + delay = cfs_time_sub(cfs_time_current(), svcpt->scp_at_checktime); + svcpt->scp_at_check = 0; + + if (array->paa_count == 0) { + spin_unlock(&svcpt->scp_at_lock); + return 0; + } + + /* The timer went off, but maybe the nearest rpc already completed. */ + first = array->paa_deadline - now; + if (first > at_early_margin) { + /* We've still got plenty of time. Reset the timer. */ + ptlrpc_at_set_timer(svcpt); + spin_unlock(&svcpt->scp_at_lock); + return 0; + } + + /* We're close to a timeout, and we don't know how much longer the + server will take. Send early replies to everyone expiring soon. */ + INIT_LIST_HEAD(&work_list); + deadline = -1; + index = (unsigned long)array->paa_deadline % array->paa_size; + count = array->paa_count; + while (count > 0) { + count -= array->paa_reqs_count[index]; + list_for_each_entry_safe(rq, n, + &array->paa_reqs_array[index], + rq_timed_list) { + if (rq->rq_deadline > now + at_early_margin) { + /* update the earliest deadline */ + if (deadline == -1 || + rq->rq_deadline < deadline) + deadline = rq->rq_deadline; + break; + } + + ptlrpc_at_remove_timed(rq); + /** + * ptlrpc_server_drop_request() may drop + * refcount to 0 already. Let's check this and + * don't add entry to work_list + */ + if (likely(atomic_inc_not_zero(&rq->rq_refcount))) + list_add(&rq->rq_timed_list, &work_list); + counter++; + } + + if (++index >= array->paa_size) + index = 0; + } + array->paa_deadline = deadline; + /* we have a new earliest deadline, restart the timer */ + ptlrpc_at_set_timer(svcpt); + + spin_unlock(&svcpt->scp_at_lock); + + CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n", + first, at_extra, counter); + if (first < 0) { + /* We're already past request deadlines before we even get a + chance to send early replies */ + LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n", + svcpt->scp_service->srv_name); + CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=" CFS_DURATION_T "(jiff)\n", + counter, svcpt->scp_nreqs_incoming, + svcpt->scp_nreqs_active, + at_get(&svcpt->scp_at_estimate), delay); + } + + /* we took additional refcount so entries can't be deleted from list, no + * locking is needed */ + while (!list_empty(&work_list)) { + rq = list_entry(work_list.next, struct ptlrpc_request, + rq_timed_list); + list_del_init(&rq->rq_timed_list); + + if (ptlrpc_at_send_early_reply(rq) == 0) + ptlrpc_at_add_timed(rq); + + ptlrpc_server_drop_request(rq); + } + + return 1; /* return "did_something" for liblustre */ +} + +/** + * Put the request to the export list if the request may become + * a high priority one. + */ +static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req) +{ + int rc = 0; + + if (svcpt->scp_service->srv_ops.so_hpreq_handler) { + rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req); + if (rc < 0) + return rc; + LASSERT(rc == 0); + } + if (req->rq_export && req->rq_ops) { + /* Perform request specific check. We should do this check + * before the request is added into exp_hp_rpcs list otherwise + * it may hit swab race at LU-1044. */ + if (req->rq_ops->hpreq_check) { + rc = req->rq_ops->hpreq_check(req); + /** + * XXX: Out of all current + * ptlrpc_hpreq_ops::hpreq_check(), only + * ldlm_cancel_hpreq_check() can return an error code; + * other functions assert in similar places, which seems + * odd. What also does not seem right is that handlers + * for those RPCs do not assert on the same checks, but + * rather handle the error cases. e.g. see + * ost_rw_hpreq_check(), and ost_brw_read(), + * ost_brw_write(). + */ + if (rc < 0) + return rc; + LASSERT(rc == 0 || rc == 1); + } + + spin_lock_bh(&req->rq_export->exp_rpc_lock); + list_add(&req->rq_exp_list, + &req->rq_export->exp_hp_rpcs); + spin_unlock_bh(&req->rq_export->exp_rpc_lock); + } + + ptlrpc_nrs_req_initialize(svcpt, req, rc); + + return rc; +} + +/** Remove the request from the export list. */ +static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req) +{ + if (req->rq_export && req->rq_ops) { + /* refresh lock timeout again so that client has more + * room to send lock cancel RPC. */ + if (req->rq_ops->hpreq_fini) + req->rq_ops->hpreq_fini(req); + + spin_lock_bh(&req->rq_export->exp_rpc_lock); + list_del_init(&req->rq_exp_list); + spin_unlock_bh(&req->rq_export->exp_rpc_lock); + } +} + +static int ptlrpc_hpreq_check(struct ptlrpc_request *req) +{ + return 1; +} + +static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = { + .hpreq_check = ptlrpc_hpreq_check, +}; + +/* Hi-Priority RPC check by RPC operation code. */ +int ptlrpc_hpreq_handler(struct ptlrpc_request *req) +{ + int opc = lustre_msg_get_opc(req->rq_reqmsg); + + /* Check for export to let only reconnects for not yet evicted + * export to become a HP rpc. */ + if ((req->rq_export != NULL) && + (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT)) + req->rq_ops = &ptlrpc_hpreq_common; + + return 0; +} +EXPORT_SYMBOL(ptlrpc_hpreq_handler); + +static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, + struct ptlrpc_request *req) +{ + int rc; + + rc = ptlrpc_server_hpreq_init(svcpt, req); + if (rc < 0) + return rc; + + ptlrpc_nrs_req_add(svcpt, req, !!rc); + + return 0; +} + +/** + * Allow to handle high priority request + * User can call it w/o any lock but need to hold + * ptlrpc_service_part::scp_req_lock to get reliable result + */ +static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt, + bool force) +{ + int running = svcpt->scp_nthrs_running; + + if (!nrs_svcpt_has_hp(svcpt)) + return false; + + if (force) + return true; + + if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL && + CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) { + /* leave just 1 thread for normal RPCs */ + running = PTLRPC_NTHRS_INIT; + if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL) + running += 1; + } + + if (svcpt->scp_nreqs_active >= running - 1) + return false; + + if (svcpt->scp_nhreqs_active == 0) + return true; + + return !ptlrpc_nrs_req_pending_nolock(svcpt, false) || + svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio; +} + +static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt, + bool force) +{ + return ptlrpc_server_allow_high(svcpt, force) && + ptlrpc_nrs_req_pending_nolock(svcpt, true); +} + +/** + * Only allow normal priority requests on a service that has a high-priority + * queue if forced (i.e. cleanup), if there are other high priority requests + * already being processed (i.e. those threads can service more high-priority + * requests), or if there are enough idle threads that a later thread can do + * a high priority request. + * User can call it w/o any lock but need to hold + * ptlrpc_service_part::scp_req_lock to get reliable result + */ +static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt, + bool force) +{ + int running = svcpt->scp_nthrs_running; + if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL && + CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) { + /* leave just 1 thread for normal RPCs */ + running = PTLRPC_NTHRS_INIT; + if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL) + running += 1; + } + + if (force || + svcpt->scp_nreqs_active < running - 2) + return true; + + if (svcpt->scp_nreqs_active >= running - 1) + return false; + + return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt); +} + +static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt, + bool force) +{ + return ptlrpc_server_allow_normal(svcpt, force) && + ptlrpc_nrs_req_pending_nolock(svcpt, false); +} + +/** + * Returns true if there are requests available in incoming + * request queue for processing and it is allowed to fetch them. + * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock + * to get reliable result + * \see ptlrpc_server_allow_normal + * \see ptlrpc_server_allow high + */ +static inline bool +ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force) +{ + return ptlrpc_server_high_pending(svcpt, force) || + ptlrpc_server_normal_pending(svcpt, force); +} + +/** + * Fetch a request for processing from queue of unprocessed requests. + * Favors high-priority requests. + * Returns a pointer to fetched request. + */ +static struct ptlrpc_request * +ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force) +{ + struct ptlrpc_request *req = NULL; + + spin_lock(&svcpt->scp_req_lock); + + if (ptlrpc_server_high_pending(svcpt, force)) { + req = ptlrpc_nrs_req_get_nolock(svcpt, true, force); + if (req != NULL) { + svcpt->scp_hreq_count++; + goto got_request; + } + } + + if (ptlrpc_server_normal_pending(svcpt, force)) { + req = ptlrpc_nrs_req_get_nolock(svcpt, false, force); + if (req != NULL) { + svcpt->scp_hreq_count = 0; + goto got_request; + } + } + + spin_unlock(&svcpt->scp_req_lock); + return NULL; + +got_request: + svcpt->scp_nreqs_active++; + if (req->rq_hp) + svcpt->scp_nhreqs_active++; + + spin_unlock(&svcpt->scp_req_lock); + + if (likely(req->rq_export)) + class_export_rpc_inc(req->rq_export); + + return req; +} + +/** + * Handle freshly incoming reqs, add to timed early reply list, + * pass on to regular request queue. + * All incoming requests pass through here before getting into + * ptlrpc_server_handle_req later on. + */ +static int +ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, + struct ptlrpc_thread *thread) +{ + struct ptlrpc_service *svc = svcpt->scp_service; + struct ptlrpc_request *req; + __u32 deadline; + int rc; + + spin_lock(&svcpt->scp_lock); + if (list_empty(&svcpt->scp_req_incoming)) { + spin_unlock(&svcpt->scp_lock); + return 0; + } + + req = list_entry(svcpt->scp_req_incoming.next, + struct ptlrpc_request, rq_list); + list_del_init(&req->rq_list); + svcpt->scp_nreqs_incoming--; + /* Consider this still a "queued" request as far as stats are + * concerned */ + spin_unlock(&svcpt->scp_lock); + + /* go through security check/transform */ + rc = sptlrpc_svc_unwrap_request(req); + switch (rc) { + case SECSVC_OK: + break; + case SECSVC_COMPLETE: + target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET); + goto err_req; + case SECSVC_DROP: + goto err_req; + default: + LBUG(); + } + + /* + * for null-flavored rpc, msg has been unpacked by sptlrpc, although + * redo it wouldn't be harmful. + */ + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) { + rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen); + if (rc != 0) { + CERROR("error unpacking request: ptl %d from %s x%llu\n", + svc->srv_req_portal, libcfs_id2str(req->rq_peer), + req->rq_xid); + goto err_req; + } + } + + rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF); + if (rc) { + CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n", + svc->srv_req_portal, libcfs_id2str(req->rq_peer), + req->rq_xid); + goto err_req; + } + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) && + lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) { + CERROR("drop incoming rpc opc %u, x%llu\n", + cfs_fail_val, req->rq_xid); + goto err_req; + } + + rc = -EINVAL; + if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) { + CERROR("wrong packet type received (type=%u) from %s\n", + lustre_msg_get_type(req->rq_reqmsg), + libcfs_id2str(req->rq_peer)); + goto err_req; + } + + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case MDS_WRITEPAGE: + case OST_WRITE: + req->rq_bulk_write = 1; + break; + case MDS_READPAGE: + case OST_READ: + case MGS_CONFIG_READ: + req->rq_bulk_read = 1; + break; + } + + CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid); + + req->rq_export = class_conn2export( + lustre_msg_get_handle(req->rq_reqmsg)); + if (req->rq_export) { + rc = ptlrpc_check_req(req); + if (rc == 0) { + rc = sptlrpc_target_export_check(req->rq_export, req); + if (rc) + DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,"); + } + + if (rc) + goto err_req; + ptlrpc_update_export_timer(req->rq_export, 0); + } + + /* req_in handling should/must be fast */ + if (get_seconds() - req->rq_arrival_time.tv_sec > 5) + DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s", + cfs_time_sub(get_seconds(), + req->rq_arrival_time.tv_sec)); + + /* Set rpc server deadline and add it to the timed list */ + deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) & + MSGHDR_AT_SUPPORT) ? + /* The max time the client expects us to take */ + lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout; + req->rq_deadline = req->rq_arrival_time.tv_sec + deadline; + if (unlikely(deadline == 0)) { + DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout"); + goto err_req; + } + + req->rq_svc_thread = thread; + + ptlrpc_at_add_timed(req); + + /* Move it over to the request processing queue */ + rc = ptlrpc_server_request_add(svcpt, req); + if (rc) + goto err_req; + + wake_up(&svcpt->scp_waitq); + return 1; + +err_req: + ptlrpc_server_finish_request(svcpt, req); + + return 1; +} + +/** + * Main incoming request handling logic. + * Calls handler function from service to do actual processing. + */ +static int +ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, + struct ptlrpc_thread *thread) +{ + struct ptlrpc_service *svc = svcpt->scp_service; + struct ptlrpc_request *request; + struct timeval work_start; + struct timeval work_end; + long timediff; + int rc; + int fail_opc = 0; + + request = ptlrpc_server_request_get(svcpt, false); + if (request == NULL) + return 0; + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT)) + fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT; + else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT)) + fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT; + + if (unlikely(fail_opc)) { + if (request->rq_export && request->rq_ops) + OBD_FAIL_TIMEOUT(fail_opc, 4); + } + + ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET); + + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG)) + libcfs_debug_dumplog(); + + do_gettimeofday(&work_start); + timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time, + NULL); + if (likely(svc->srv_stats != NULL)) { + lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, + timediff); + lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR, + svcpt->scp_nreqs_incoming); + lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR, + svcpt->scp_nreqs_active); + lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT, + at_get(&svcpt->scp_at_estimate)); + } + + rc = lu_context_init(&request->rq_session, LCT_SESSION | LCT_NOREF); + if (rc) { + CERROR("Failure to initialize session: %d\n", rc); + goto out_req; + } + request->rq_session.lc_thread = thread; + request->rq_session.lc_cookie = 0x5; + lu_context_enter(&request->rq_session); + + CDEBUG(D_NET, "got req %llu\n", request->rq_xid); + + request->rq_svc_thread = thread; + if (thread) + request->rq_svc_thread->t_env->le_ses = &request->rq_session; + + if (likely(request->rq_export)) { + if (unlikely(ptlrpc_check_req(request))) + goto put_conn; + ptlrpc_update_export_timer(request->rq_export, timediff >> 19); + } + + /* Discard requests queued for longer than the deadline. + The deadline is increased if we send an early reply. */ + if (get_seconds() > request->rq_deadline) { + DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline " CFS_DURATION_T ":" CFS_DURATION_T "s ago\n", + libcfs_id2str(request->rq_peer), + cfs_time_sub(request->rq_deadline, + request->rq_arrival_time.tv_sec), + cfs_time_sub(get_seconds(), + request->rq_deadline)); + goto put_conn; + } + + CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n", + current_comm(), + (request->rq_export ? + (char *)request->rq_export->exp_client_uuid.uuid : "0"), + (request->rq_export ? + atomic_read(&request->rq_export->exp_refcount) : -99), + lustre_msg_get_status(request->rq_reqmsg), request->rq_xid, + libcfs_id2str(request->rq_peer), + lustre_msg_get_opc(request->rq_reqmsg)); + + if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING) + CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val); + + rc = svc->srv_ops.so_req_handler(request); + + ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE); + +put_conn: + lu_context_exit(&request->rq_session); + lu_context_fini(&request->rq_session); + + if (unlikely(get_seconds() > request->rq_deadline)) { + DEBUG_REQ(D_WARNING, request, + "Request took longer than estimated (" + CFS_DURATION_T":"CFS_DURATION_T + "s); client may timeout.", + cfs_time_sub(request->rq_deadline, + request->rq_arrival_time.tv_sec), + cfs_time_sub(get_seconds(), + request->rq_deadline)); + } + + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n", + current_comm(), + (request->rq_export ? + (char *)request->rq_export->exp_client_uuid.uuid : "0"), + (request->rq_export ? + atomic_read(&request->rq_export->exp_refcount) : -99), + lustre_msg_get_status(request->rq_reqmsg), + request->rq_xid, + libcfs_id2str(request->rq_peer), + lustre_msg_get_opc(request->rq_reqmsg), + timediff, + cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL), + (request->rq_repmsg ? + lustre_msg_get_transno(request->rq_repmsg) : + request->rq_transno), + request->rq_status, + (request->rq_repmsg ? + lustre_msg_get_status(request->rq_repmsg) : -999)); + if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) { + __u32 op = lustre_msg_get_opc(request->rq_reqmsg); + int opc = opcode_offset(op); + if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) { + LASSERT(opc < LUSTRE_MAX_OPCODES); + lprocfs_counter_add(svc->srv_stats, + opc + EXTRA_MAX_OPCODES, + timediff); + } + } + if (unlikely(request->rq_early_count)) { + DEBUG_REQ(D_ADAPTTO, request, + "sent %d early replies before finishing in " + CFS_DURATION_T"s", + request->rq_early_count, + cfs_time_sub(work_end.tv_sec, + request->rq_arrival_time.tv_sec)); + } + +out_req: + ptlrpc_server_finish_active_request(svcpt, request); + + return 1; +} + +/** + * An internal function to process a single reply state object. + */ +static int +ptlrpc_handle_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_service_part *svcpt = rs->rs_svcpt; + struct ptlrpc_service *svc = svcpt->scp_service; + struct obd_export *exp; + int nlocks; + int been_handled; + + exp = rs->rs_export; + + LASSERT(rs->rs_difficult); + LASSERT(rs->rs_scheduled); + LASSERT(list_empty(&rs->rs_list)); + + spin_lock(&exp->exp_lock); + /* Noop if removed already */ + list_del_init(&rs->rs_exp_list); + spin_unlock(&exp->exp_lock); + + /* The disk commit callback holds exp_uncommitted_replies_lock while it + * iterates over newly committed replies, removing them from + * exp_uncommitted_replies. It then drops this lock and schedules the + * replies it found for handling here. + * + * We can avoid contention for exp_uncommitted_replies_lock between the + * HRT threads and further commit callbacks by checking rs_committed + * which is set in the commit callback while it holds both + * rs_lock and exp_uncommitted_reples. + * + * If we see rs_committed clear, the commit callback _may_ not have + * handled this reply yet and we race with it to grab + * exp_uncommitted_replies_lock before removing the reply from + * exp_uncommitted_replies. Note that if we lose the race and the + * reply has already been removed, list_del_init() is a noop. + * + * If we see rs_committed set, we know the commit callback is handling, + * or has handled this reply since store reordering might allow us to + * see rs_committed set out of sequence. But since this is done + * holding rs_lock, we can be sure it has all completed once we hold + * rs_lock, which we do right next. + */ + if (!rs->rs_committed) { + spin_lock(&exp->exp_uncommitted_replies_lock); + list_del_init(&rs->rs_obd_list); + spin_unlock(&exp->exp_uncommitted_replies_lock); + } + + spin_lock(&rs->rs_lock); + + been_handled = rs->rs_handled; + rs->rs_handled = 1; + + nlocks = rs->rs_nlocks; /* atomic "steal", but */ + rs->rs_nlocks = 0; /* locks still on rs_locks! */ + + if (nlocks == 0 && !been_handled) { + /* If we see this, we should already have seen the warning + * in mds_steal_ack_locks() */ + CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n", + rs, + rs->rs_xid, rs->rs_transno, rs->rs_opc, + libcfs_nid2str(exp->exp_connection->c_peer.nid)); + } + + if ((!been_handled && rs->rs_on_net) || nlocks > 0) { + spin_unlock(&rs->rs_lock); + + if (!been_handled && rs->rs_on_net) { + LNetMDUnlink(rs->rs_md_h); + /* Ignore return code; we're racing with completion */ + } + + while (nlocks-- > 0) + ldlm_lock_decref(&rs->rs_locks[nlocks], + rs->rs_modes[nlocks]); + + spin_lock(&rs->rs_lock); + } + + rs->rs_scheduled = 0; + + if (!rs->rs_on_net) { + /* Off the net */ + spin_unlock(&rs->rs_lock); + + class_export_put(exp); + rs->rs_export = NULL; + ptlrpc_rs_decref(rs); + if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) && + svc->srv_is_stopping) + wake_up_all(&svcpt->scp_waitq); + return 1; + } + + /* still on the net; callback will schedule */ + spin_unlock(&rs->rs_lock); + return 1; +} + + +static void +ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) +{ + int avail = svcpt->scp_nrqbds_posted; + int low_water = test_req_buffer_pressure ? 0 : + svcpt->scp_service->srv_nbuf_per_group / 2; + + /* NB I'm not locking; just looking. */ + + /* CAVEAT EMPTOR: We might be allocating buffers here because we've + * allowed the request history to grow out of control. We could put a + * sanity check on that here and cull some history if we need the + * space. */ + + if (avail <= low_water) + ptlrpc_grow_req_bufs(svcpt, 1); + + if (svcpt->scp_service->srv_stats) { + lprocfs_counter_add(svcpt->scp_service->srv_stats, + PTLRPC_REQBUF_AVAIL_CNTR, avail); + } +} + +static int +ptlrpc_retry_rqbds(void *arg) +{ + struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg; + + svcpt->scp_rqbd_timeout = 0; + return -ETIMEDOUT; +} + +static inline int +ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt) +{ + return svcpt->scp_nreqs_active < + svcpt->scp_nthrs_running - 1 - + (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL); +} + +/** + * allowed to create more threads + * user can call it w/o any lock but need to hold + * ptlrpc_service_part::scp_lock to get reliable result + */ +static inline int +ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt) +{ + return svcpt->scp_nthrs_running + + svcpt->scp_nthrs_starting < + svcpt->scp_service->srv_nthrs_cpt_limit; +} + +/** + * too many requests and allowed to create more threads + */ +static inline int +ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt) +{ + return !ptlrpc_threads_enough(svcpt) && + ptlrpc_threads_increasable(svcpt); +} + +static inline int +ptlrpc_thread_stopping(struct ptlrpc_thread *thread) +{ + return thread_is_stopping(thread) || + thread->t_svcpt->scp_service->srv_is_stopping; +} + +static inline int +ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt) +{ + return !list_empty(&svcpt->scp_rqbd_idle) && + svcpt->scp_rqbd_timeout == 0; +} + +static inline int +ptlrpc_at_check(struct ptlrpc_service_part *svcpt) +{ + return svcpt->scp_at_check; +} + +/** + * requests wait on preprocessing + * user can call it w/o any lock but need to hold + * ptlrpc_service_part::scp_lock to get reliable result + */ +static inline int +ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt) +{ + return !list_empty(&svcpt->scp_req_incoming); +} + +static __attribute__((__noinline__)) int +ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, + struct ptlrpc_thread *thread) +{ + /* Don't exit while there are replies to be handled */ + struct l_wait_info lwi = LWI_TIMEOUT(svcpt->scp_rqbd_timeout, + ptlrpc_retry_rqbds, svcpt); + + /* XXX: Add this back when libcfs watchdog is merged upstream + lc_watchdog_disable(thread->t_watchdog); + */ + + cond_resched(); + + l_wait_event_exclusive_head(svcpt->scp_waitq, + ptlrpc_thread_stopping(thread) || + ptlrpc_server_request_incoming(svcpt) || + ptlrpc_server_request_pending(svcpt, false) || + ptlrpc_rqbd_pending(svcpt) || + ptlrpc_at_check(svcpt), &lwi); + + if (ptlrpc_thread_stopping(thread)) + return -EINTR; + + /* + lc_watchdog_touch(thread->t_watchdog, + ptlrpc_server_get_timeout(svcpt)); + */ + return 0; +} + +/** + * Main thread body for service threads. + * Waits in a loop waiting for new requests to process to appear. + * Every time an incoming requests is added to its queue, a waitq + * is woken up and one of the threads will handle it. + */ +static int ptlrpc_main(void *arg) +{ + struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; + struct ptlrpc_service_part *svcpt = thread->t_svcpt; + struct ptlrpc_service *svc = svcpt->scp_service; + struct ptlrpc_reply_state *rs; + struct group_info *ginfo = NULL; + struct lu_env *env; + int counter = 0, rc = 0; + + thread->t_pid = current_pid(); + unshare_fs_struct(); + + /* NB: we will call cfs_cpt_bind() for all threads, because we + * might want to run lustre server only on a subset of system CPUs, + * in that case ->scp_cpt is CFS_CPT_ANY */ + rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt); + if (rc != 0) { + CWARN("%s: failed to bind %s on CPT %d\n", + svc->srv_name, thread->t_name, svcpt->scp_cpt); + } + + ginfo = groups_alloc(0); + if (!ginfo) { + rc = -ENOMEM; + goto out; + } + + set_current_groups(ginfo); + put_group_info(ginfo); + + if (svc->srv_ops.so_thr_init != NULL) { + rc = svc->srv_ops.so_thr_init(thread); + if (rc) + goto out; + } + + OBD_ALLOC_PTR(env); + if (env == NULL) { + rc = -ENOMEM; + goto out_srv_fini; + } + + rc = lu_context_init(&env->le_ctx, + svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF); + if (rc) + goto out_srv_fini; + + thread->t_env = env; + env->le_ctx.lc_thread = thread; + env->le_ctx.lc_cookie = 0x6; + + while (!list_empty(&svcpt->scp_rqbd_idle)) { + rc = ptlrpc_server_post_idle_rqbds(svcpt); + if (rc >= 0) + continue; + + CERROR("Failed to post rqbd for %s on CPT %d: %d\n", + svc->srv_name, svcpt->scp_cpt, rc); + goto out_srv_fini; + } + + /* Alloc reply state structure for this one */ + OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size); + if (!rs) { + rc = -ENOMEM; + goto out_srv_fini; + } + + spin_lock(&svcpt->scp_lock); + + LASSERT(thread_is_starting(thread)); + thread_clear_flags(thread, SVC_STARTING); + + LASSERT(svcpt->scp_nthrs_starting == 1); + svcpt->scp_nthrs_starting--; + + /* SVC_STOPPING may already be set here if someone else is trying + * to stop the service while this new thread has been dynamically + * forked. We still set SVC_RUNNING to let our creator know that + * we are now running, however we will exit as soon as possible */ + thread_add_flags(thread, SVC_RUNNING); + svcpt->scp_nthrs_running++; + spin_unlock(&svcpt->scp_lock); + + /* wake up our creator in case he's still waiting. */ + wake_up(&thread->t_ctl_waitq); + + /* + thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt), + NULL, NULL); + */ + + spin_lock(&svcpt->scp_rep_lock); + list_add(&rs->rs_list, &svcpt->scp_rep_idle); + wake_up(&svcpt->scp_rep_waitq); + spin_unlock(&svcpt->scp_rep_lock); + + CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id, + svcpt->scp_nthrs_running); + + /* XXX maintain a list of all managed devices: insert here */ + while (!ptlrpc_thread_stopping(thread)) { + if (ptlrpc_wait_event(svcpt, thread)) + break; + + ptlrpc_check_rqbd_pool(svcpt); + + if (ptlrpc_threads_need_create(svcpt)) { + /* Ignore return code - we tried... */ + ptlrpc_start_thread(svcpt, 0); + } + + /* Process all incoming reqs before handling any */ + if (ptlrpc_server_request_incoming(svcpt)) { + lu_context_enter(&env->le_ctx); + env->le_ses = NULL; + ptlrpc_server_handle_req_in(svcpt, thread); + lu_context_exit(&env->le_ctx); + + /* but limit ourselves in case of flood */ + if (counter++ < 100) + continue; + counter = 0; + } + + if (ptlrpc_at_check(svcpt)) + ptlrpc_at_check_timed(svcpt); + + if (ptlrpc_server_request_pending(svcpt, false)) { + lu_context_enter(&env->le_ctx); + ptlrpc_server_handle_request(svcpt, thread); + lu_context_exit(&env->le_ctx); + } + + if (ptlrpc_rqbd_pending(svcpt) && + ptlrpc_server_post_idle_rqbds(svcpt) < 0) { + /* I just failed to repost request buffers. + * Wait for a timeout (unless something else + * happens) before I try again */ + svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10; + CDEBUG(D_RPCTRACE, "Posted buffers: %d\n", + svcpt->scp_nrqbds_posted); + } + } + + /* + lc_watchdog_delete(thread->t_watchdog); + thread->t_watchdog = NULL; + */ + +out_srv_fini: + /* + * deconstruct service specific state created by ptlrpc_start_thread() + */ + if (svc->srv_ops.so_thr_done != NULL) + svc->srv_ops.so_thr_done(thread); + + if (env != NULL) { + lu_context_fini(&env->le_ctx); + OBD_FREE_PTR(env); + } +out: + CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n", + thread, thread->t_pid, thread->t_id, rc); + + spin_lock(&svcpt->scp_lock); + if (thread_test_and_clear_flags(thread, SVC_STARTING)) + svcpt->scp_nthrs_starting--; + + if (thread_test_and_clear_flags(thread, SVC_RUNNING)) { + /* must know immediately */ + svcpt->scp_nthrs_running--; + } + + thread->t_id = rc; + thread_add_flags(thread, SVC_STOPPED); + + wake_up(&thread->t_ctl_waitq); + spin_unlock(&svcpt->scp_lock); + + return rc; +} + +static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt, + struct list_head *replies) +{ + int result; + + spin_lock(&hrt->hrt_lock); + + list_splice_init(&hrt->hrt_queue, replies); + result = ptlrpc_hr.hr_stopping || !list_empty(replies); + + spin_unlock(&hrt->hrt_lock); + return result; +} + +/** + * Main body of "handle reply" function. + * It processes acked reply states + */ +static int ptlrpc_hr_main(void *arg) +{ + struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg; + struct ptlrpc_hr_partition *hrp = hrt->hrt_partition; + LIST_HEAD (replies); + char threadname[20]; + int rc; + + snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d", + hrp->hrp_cpt, hrt->hrt_id); + unshare_fs_struct(); + + rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt); + if (rc != 0) { + CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n", + threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc); + } + + atomic_inc(&hrp->hrp_nstarted); + wake_up(&ptlrpc_hr.hr_waitq); + + while (!ptlrpc_hr.hr_stopping) { + l_wait_condition(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); + + while (!list_empty(&replies)) { + struct ptlrpc_reply_state *rs; + + rs = list_entry(replies.prev, + struct ptlrpc_reply_state, + rs_list); + list_del_init(&rs->rs_list); + ptlrpc_handle_rs(rs); + } + } + + atomic_inc(&hrp->hrp_nstopped); + wake_up(&ptlrpc_hr.hr_waitq); + + return 0; +} + +static void ptlrpc_stop_hr_threads(void) +{ + struct ptlrpc_hr_partition *hrp; + int i; + int j; + + ptlrpc_hr.hr_stopping = 1; + + cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { + if (hrp->hrp_thrs == NULL) + continue; /* uninitialized */ + for (j = 0; j < hrp->hrp_nthrs; j++) + wake_up_all(&hrp->hrp_thrs[j].hrt_waitq); + } + + cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { + if (hrp->hrp_thrs == NULL) + continue; /* uninitialized */ + wait_event(ptlrpc_hr.hr_waitq, + atomic_read(&hrp->hrp_nstopped) == + atomic_read(&hrp->hrp_nstarted)); + } +} + +static int ptlrpc_start_hr_threads(void) +{ + struct ptlrpc_hr_partition *hrp; + int i; + int j; + + cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { + int rc = 0; + + for (j = 0; j < hrp->hrp_nthrs; j++) { + struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j]; + rc = PTR_ERR(kthread_run(ptlrpc_hr_main, + &hrp->hrp_thrs[j], + "ptlrpc_hr%02d_%03d", + hrp->hrp_cpt, + hrt->hrt_id)); + if (IS_ERR_VALUE(rc)) + break; + } + wait_event(ptlrpc_hr.hr_waitq, + atomic_read(&hrp->hrp_nstarted) == j); + if (!IS_ERR_VALUE(rc)) + continue; + + CERROR("Reply handling thread %d:%d Failed on starting: rc = %d\n", + i, j, rc); + ptlrpc_stop_hr_threads(); + return rc; + } + return 0; +} + +static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) +{ + struct l_wait_info lwi = { 0 }; + struct ptlrpc_thread *thread; + LIST_HEAD (zombie); + + CDEBUG(D_INFO, "Stopping threads for service %s\n", + svcpt->scp_service->srv_name); + + spin_lock(&svcpt->scp_lock); + /* let the thread know that we would like it to stop asap */ + list_for_each_entry(thread, &svcpt->scp_threads, t_link) { + CDEBUG(D_INFO, "Stopping thread %s #%u\n", + svcpt->scp_service->srv_thread_name, thread->t_id); + thread_add_flags(thread, SVC_STOPPING); + } + + wake_up_all(&svcpt->scp_waitq); + + while (!list_empty(&svcpt->scp_threads)) { + thread = list_entry(svcpt->scp_threads.next, + struct ptlrpc_thread, t_link); + if (thread_is_stopped(thread)) { + list_del(&thread->t_link); + list_add(&thread->t_link, &zombie); + continue; + } + spin_unlock(&svcpt->scp_lock); + + CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n", + svcpt->scp_service->srv_thread_name, thread->t_id); + l_wait_event(thread->t_ctl_waitq, + thread_is_stopped(thread), &lwi); + + spin_lock(&svcpt->scp_lock); + } + + spin_unlock(&svcpt->scp_lock); + + while (!list_empty(&zombie)) { + thread = list_entry(zombie.next, + struct ptlrpc_thread, t_link); + list_del(&thread->t_link); + OBD_FREE_PTR(thread); + } +} + +/** + * Stops all threads of a particular service \a svc + */ +void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service != NULL) + ptlrpc_svcpt_stop_threads(svcpt); + } +} +EXPORT_SYMBOL(ptlrpc_stop_all_threads); + +int ptlrpc_start_threads(struct ptlrpc_service *svc) +{ + int rc = 0; + int i; + int j; + + /* We require 2 threads min, see note in ptlrpc_server_handle_request */ + LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT); + + for (i = 0; i < svc->srv_ncpts; i++) { + for (j = 0; j < svc->srv_nthrs_cpt_init; j++) { + rc = ptlrpc_start_thread(svc->srv_parts[i], 1); + if (rc == 0) + continue; + + if (rc != -EMFILE) + goto failed; + /* We have enough threads, don't start more. b=15759 */ + break; + } + } + + return 0; + failed: + CERROR("cannot start %s thread #%d_%d: rc %d\n", + svc->srv_thread_name, i, j, rc); + ptlrpc_stop_all_threads(svc); + return rc; +} +EXPORT_SYMBOL(ptlrpc_start_threads); + +int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) +{ + struct l_wait_info lwi = { 0 }; + struct ptlrpc_thread *thread; + struct ptlrpc_service *svc; + int rc; + + LASSERT(svcpt != NULL); + + svc = svcpt->scp_service; + + CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n", + svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running, + svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit); + + again: + if (unlikely(svc->srv_is_stopping)) + return -ESRCH; + + if (!ptlrpc_threads_increasable(svcpt) || + (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) && + svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1)) + return -EMFILE; + + OBD_CPT_ALLOC_PTR(thread, svc->srv_cptable, svcpt->scp_cpt); + if (thread == NULL) + return -ENOMEM; + init_waitqueue_head(&thread->t_ctl_waitq); + + spin_lock(&svcpt->scp_lock); + if (!ptlrpc_threads_increasable(svcpt)) { + spin_unlock(&svcpt->scp_lock); + OBD_FREE_PTR(thread); + return -EMFILE; + } + + if (svcpt->scp_nthrs_starting != 0) { + /* serialize starting because some modules (obdfilter) + * might require unique and contiguous t_id */ + LASSERT(svcpt->scp_nthrs_starting == 1); + spin_unlock(&svcpt->scp_lock); + OBD_FREE_PTR(thread); + if (wait) { + CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n", + svc->srv_thread_name, svcpt->scp_thr_nextid); + schedule(); + goto again; + } + + CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n", + svc->srv_thread_name, svcpt->scp_thr_nextid); + return -EAGAIN; + } + + svcpt->scp_nthrs_starting++; + thread->t_id = svcpt->scp_thr_nextid++; + thread_add_flags(thread, SVC_STARTING); + thread->t_svcpt = svcpt; + + list_add(&thread->t_link, &svcpt->scp_threads); + spin_unlock(&svcpt->scp_lock); + + if (svcpt->scp_cpt >= 0) { + snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d", + svc->srv_thread_name, svcpt->scp_cpt, thread->t_id); + } else { + snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d", + svc->srv_thread_name, thread->t_id); + } + + CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name); + rc = PTR_ERR(kthread_run(ptlrpc_main, thread, "%s", thread->t_name)); + if (IS_ERR_VALUE(rc)) { + CERROR("cannot start thread '%s': rc %d\n", + thread->t_name, rc); + spin_lock(&svcpt->scp_lock); + --svcpt->scp_nthrs_starting; + if (thread_is_stopping(thread)) { + /* this ptlrpc_thread is being handled + * by ptlrpc_svcpt_stop_threads now + */ + thread_add_flags(thread, SVC_STOPPED); + wake_up(&thread->t_ctl_waitq); + spin_unlock(&svcpt->scp_lock); + } else { + list_del(&thread->t_link); + spin_unlock(&svcpt->scp_lock); + OBD_FREE_PTR(thread); + } + return rc; + } + + if (!wait) + return 0; + + l_wait_event(thread->t_ctl_waitq, + thread_is_running(thread) || thread_is_stopped(thread), + &lwi); + + rc = thread_is_stopped(thread) ? thread->t_id : 0; + return rc; +} + +int ptlrpc_hr_init(void) +{ + struct ptlrpc_hr_partition *hrp; + struct ptlrpc_hr_thread *hrt; + int rc; + int i; + int j; + int weight; + + memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr)); + ptlrpc_hr.hr_cpt_table = cfs_cpt_table; + + ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table, + sizeof(*hrp)); + if (ptlrpc_hr.hr_partitions == NULL) + return -ENOMEM; + + init_waitqueue_head(&ptlrpc_hr.hr_waitq); + + weight = cpumask_weight(topology_thread_cpumask(0)); + + cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { + hrp->hrp_cpt = i; + + atomic_set(&hrp->hrp_nstarted, 0); + atomic_set(&hrp->hrp_nstopped, 0); + + hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i); + hrp->hrp_nthrs /= weight; + + LASSERT(hrp->hrp_nthrs > 0); + OBD_CPT_ALLOC(hrp->hrp_thrs, ptlrpc_hr.hr_cpt_table, i, + hrp->hrp_nthrs * sizeof(*hrt)); + if (hrp->hrp_thrs == NULL) { + rc = -ENOMEM; + goto out; + } + + for (j = 0; j < hrp->hrp_nthrs; j++) { + hrt = &hrp->hrp_thrs[j]; + + hrt->hrt_id = j; + hrt->hrt_partition = hrp; + init_waitqueue_head(&hrt->hrt_waitq); + spin_lock_init(&hrt->hrt_lock); + INIT_LIST_HEAD(&hrt->hrt_queue); + } + } + + rc = ptlrpc_start_hr_threads(); +out: + if (rc != 0) + ptlrpc_hr_fini(); + return rc; +} + +void ptlrpc_hr_fini(void) +{ + struct ptlrpc_hr_partition *hrp; + int i; + + if (ptlrpc_hr.hr_partitions == NULL) + return; + + ptlrpc_stop_hr_threads(); + + cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { + if (hrp->hrp_thrs != NULL) { + OBD_FREE(hrp->hrp_thrs, + hrp->hrp_nthrs * sizeof(hrp->hrp_thrs[0])); + } + } + + cfs_percpt_free(ptlrpc_hr.hr_partitions); + ptlrpc_hr.hr_partitions = NULL; +} + + +/** + * Wait until all already scheduled replies are processed. + */ +static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt) +{ + while (1) { + int rc; + struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10), + NULL, NULL); + + rc = l_wait_event(svcpt->scp_waitq, + atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi); + if (rc == 0) + break; + CWARN("Unexpectedly long timeout %s %p\n", + svcpt->scp_service->srv_name, svcpt->scp_service); + } +} + +static void +ptlrpc_service_del_atimer(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + int i; + + /* early disarm AT timer... */ + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service != NULL) + cfs_timer_disarm(&svcpt->scp_at_timer); + } +} + +static void +ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + struct ptlrpc_request_buffer_desc *rqbd; + struct l_wait_info lwi; + int rc; + int i; + + /* All history will be culled when the next request buffer is + * freed in ptlrpc_service_purge_all() */ + svc->srv_hist_nrqbds_cpt_max = 0; + + rc = LNetClearLazyPortal(svc->srv_req_portal); + LASSERT(rc == 0); + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service == NULL) + break; + + /* Unlink all the request buffers. This forces a 'final' + * event with its 'unlink' flag set for each posted rqbd */ + list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted, + rqbd_list) { + rc = LNetMDUnlink(rqbd->rqbd_md_h); + LASSERT(rc == 0 || rc == -ENOENT); + } + } + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service == NULL) + break; + + /* Wait for the network to release any buffers + * it's currently filling */ + spin_lock(&svcpt->scp_lock); + while (svcpt->scp_nrqbds_posted != 0) { + spin_unlock(&svcpt->scp_lock); + /* Network access will complete in finite time but + * the HUGE timeout lets us CWARN for visibility + * of sluggish NALs */ + lwi = LWI_TIMEOUT_INTERVAL( + cfs_time_seconds(LONG_UNLINK), + cfs_time_seconds(1), NULL, NULL); + rc = l_wait_event(svcpt->scp_waitq, + svcpt->scp_nrqbds_posted == 0, &lwi); + if (rc == -ETIMEDOUT) { + CWARN("Service %s waiting for request buffers\n", + svcpt->scp_service->srv_name); + } + spin_lock(&svcpt->scp_lock); + } + spin_unlock(&svcpt->scp_lock); + } +} + +static void +ptlrpc_service_purge_all(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + struct ptlrpc_request_buffer_desc *rqbd; + struct ptlrpc_request *req; + struct ptlrpc_reply_state *rs; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service == NULL) + break; + + spin_lock(&svcpt->scp_rep_lock); + while (!list_empty(&svcpt->scp_rep_active)) { + rs = list_entry(svcpt->scp_rep_active.next, + struct ptlrpc_reply_state, rs_list); + spin_lock(&rs->rs_lock); + ptlrpc_schedule_difficult_reply(rs); + spin_unlock(&rs->rs_lock); + } + spin_unlock(&svcpt->scp_rep_lock); + + /* purge the request queue. NB No new replies (rqbds + * all unlinked) and no service threads, so I'm the only + * thread noodling the request queue now */ + while (!list_empty(&svcpt->scp_req_incoming)) { + req = list_entry(svcpt->scp_req_incoming.next, + struct ptlrpc_request, rq_list); + + list_del(&req->rq_list); + svcpt->scp_nreqs_incoming--; + ptlrpc_server_finish_request(svcpt, req); + } + + while (ptlrpc_server_request_pending(svcpt, true)) { + req = ptlrpc_server_request_get(svcpt, true); + ptlrpc_server_finish_active_request(svcpt, req); + } + + LASSERT(list_empty(&svcpt->scp_rqbd_posted)); + LASSERT(svcpt->scp_nreqs_incoming == 0); + LASSERT(svcpt->scp_nreqs_active == 0); + /* history should have been culled by + * ptlrpc_server_finish_request */ + LASSERT(svcpt->scp_hist_nrqbds == 0); + + /* Now free all the request buffers since nothing + * references them any more... */ + + while (!list_empty(&svcpt->scp_rqbd_idle)) { + rqbd = list_entry(svcpt->scp_rqbd_idle.next, + struct ptlrpc_request_buffer_desc, + rqbd_list); + ptlrpc_free_rqbd(rqbd); + } + ptlrpc_wait_replies(svcpt); + + while (!list_empty(&svcpt->scp_rep_idle)) { + rs = list_entry(svcpt->scp_rep_idle.next, + struct ptlrpc_reply_state, + rs_list); + list_del(&rs->rs_list); + OBD_FREE_LARGE(rs, svc->srv_max_reply_size); + } + } +} + +static void +ptlrpc_service_free(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + struct ptlrpc_at_array *array; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (svcpt->scp_service == NULL) + break; + + /* In case somebody rearmed this in the meantime */ + cfs_timer_disarm(&svcpt->scp_at_timer); + array = &svcpt->scp_at_array; + + if (array->paa_reqs_array != NULL) { + OBD_FREE(array->paa_reqs_array, + sizeof(struct list_head) * array->paa_size); + array->paa_reqs_array = NULL; + } + + if (array->paa_reqs_count != NULL) { + OBD_FREE(array->paa_reqs_count, + sizeof(__u32) * array->paa_size); + array->paa_reqs_count = NULL; + } + } + + ptlrpc_service_for_each_part(svcpt, i, svc) + OBD_FREE_PTR(svcpt); + + if (svc->srv_cpts != NULL) + cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts); + + OBD_FREE(svc, offsetof(struct ptlrpc_service, + srv_parts[svc->srv_ncpts])); +} + +int ptlrpc_unregister_service(struct ptlrpc_service *service) +{ + CDEBUG(D_NET, "%s: tearing down\n", service->srv_name); + + service->srv_is_stopping = 1; + + mutex_lock(&ptlrpc_all_services_mutex); + list_del_init(&service->srv_list); + mutex_unlock(&ptlrpc_all_services_mutex); + + ptlrpc_service_del_atimer(service); + ptlrpc_stop_all_threads(service); + + ptlrpc_service_unlink_rqbd(service); + ptlrpc_service_purge_all(service); + ptlrpc_service_nrs_cleanup(service); + + ptlrpc_lprocfs_unregister_service(service); + + ptlrpc_service_free(service); + + return 0; +} +EXPORT_SYMBOL(ptlrpc_unregister_service); + +/** + * Returns 0 if the service is healthy. + * + * Right now, it just checks to make sure that requests aren't languishing + * in the queue. We'll use this health check to govern whether a node needs + * to be shot, so it's intentionally non-aggressive. */ +int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt) +{ + struct ptlrpc_request *request = NULL; + struct timeval right_now; + long timediff; + + do_gettimeofday(&right_now); + + spin_lock(&svcpt->scp_req_lock); + /* How long has the next entry been waiting? */ + if (ptlrpc_server_high_pending(svcpt, true)) + request = ptlrpc_nrs_req_peek_nolock(svcpt, true); + else if (ptlrpc_server_normal_pending(svcpt, true)) + request = ptlrpc_nrs_req_peek_nolock(svcpt, false); + + if (request == NULL) { + spin_unlock(&svcpt->scp_req_lock); + return 0; + } + + timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL); + spin_unlock(&svcpt->scp_req_lock); + + if ((timediff / ONE_MILLION) > + (AT_OFF ? obd_timeout * 3 / 2 : at_max)) { + CERROR("%s: unhealthy - request has been waiting %lds\n", + svcpt->scp_service->srv_name, timediff / ONE_MILLION); + return -1; + } + + return 0; +} + +int +ptlrpc_service_health_check(struct ptlrpc_service *svc) +{ + struct ptlrpc_service_part *svcpt; + int i; + + if (svc == NULL) + return 0; + + ptlrpc_service_for_each_part(svcpt, i, svc) { + int rc = ptlrpc_svcpt_health_check(svcpt); + + if (rc != 0) + return rc; + } + return 0; +} +EXPORT_SYMBOL(ptlrpc_service_health_check); diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c new file mode 100644 index 000000000..d6d92046c --- /dev/null +++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c @@ -0,0 +1,4492 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_RPC + +#include <linux/fs.h> +#include <linux/posix_acl_xattr.h> + +#include "../include/obd_support.h" +#include "../include/obd_class.h" +#include "../include/lustre_net.h" +#include "../include/lustre_disk.h" +void lustre_assert_wire_constants(void) +{ + /* Wire protocol assertions generated by 'wirecheck' + * (make -C lustre/utils newwiretest) + * running on Linux centos6-bis 2.6.32-358.0.1.el6-head + * #3 SMP Wed Apr 17 17:37:43 CEST 2013 + * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) + */ + + /* Constants... */ + LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n", + (long long)PTL_RPC_MSG_REQUEST); + LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n", + (long long)PTL_RPC_MSG_ERR); + LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n", + (long long)PTL_RPC_MSG_REPLY); + LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n", + MDS_DIR_END_OFF); + LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n", + DEAD_HANDLE_MAGIC); + CLASSERT(MTI_NAME_MAXLEN == 64); + LASSERTF(OST_REPLY == 0, "found %lld\n", + (long long)OST_REPLY); + LASSERTF(OST_GETATTR == 1, "found %lld\n", + (long long)OST_GETATTR); + LASSERTF(OST_SETATTR == 2, "found %lld\n", + (long long)OST_SETATTR); + LASSERTF(OST_READ == 3, "found %lld\n", + (long long)OST_READ); + LASSERTF(OST_WRITE == 4, "found %lld\n", + (long long)OST_WRITE); + LASSERTF(OST_CREATE == 5, "found %lld\n", + (long long)OST_CREATE); + LASSERTF(OST_DESTROY == 6, "found %lld\n", + (long long)OST_DESTROY); + LASSERTF(OST_GET_INFO == 7, "found %lld\n", + (long long)OST_GET_INFO); + LASSERTF(OST_CONNECT == 8, "found %lld\n", + (long long)OST_CONNECT); + LASSERTF(OST_DISCONNECT == 9, "found %lld\n", + (long long)OST_DISCONNECT); + LASSERTF(OST_PUNCH == 10, "found %lld\n", + (long long)OST_PUNCH); + LASSERTF(OST_OPEN == 11, "found %lld\n", + (long long)OST_OPEN); + LASSERTF(OST_CLOSE == 12, "found %lld\n", + (long long)OST_CLOSE); + LASSERTF(OST_STATFS == 13, "found %lld\n", + (long long)OST_STATFS); + LASSERTF(OST_SYNC == 16, "found %lld\n", + (long long)OST_SYNC); + LASSERTF(OST_SET_INFO == 17, "found %lld\n", + (long long)OST_SET_INFO); + LASSERTF(OST_QUOTACHECK == 18, "found %lld\n", + (long long)OST_QUOTACHECK); + LASSERTF(OST_QUOTACTL == 19, "found %lld\n", + (long long)OST_QUOTACTL); + LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n", + (long long)OST_QUOTA_ADJUST_QUNIT); + LASSERTF(OST_LAST_OPC == 21, "found %lld\n", + (long long)OST_LAST_OPC); + LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n", + OBD_OBJECT_EOF); + LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n", + (long long)OST_MIN_PRECREATE); + LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n", + (long long)OST_MAX_PRECREATE); + LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n", + OST_LVB_ERR_INIT); + LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n", + OST_LVB_ERR_MASK); + LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n", + (long long)MDS_FIRST_OPC); + LASSERTF(MDS_GETATTR == 33, "found %lld\n", + (long long)MDS_GETATTR); + LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n", + (long long)MDS_GETATTR_NAME); + LASSERTF(MDS_CLOSE == 35, "found %lld\n", + (long long)MDS_CLOSE); + LASSERTF(MDS_REINT == 36, "found %lld\n", + (long long)MDS_REINT); + LASSERTF(MDS_READPAGE == 37, "found %lld\n", + (long long)MDS_READPAGE); + LASSERTF(MDS_CONNECT == 38, "found %lld\n", + (long long)MDS_CONNECT); + LASSERTF(MDS_DISCONNECT == 39, "found %lld\n", + (long long)MDS_DISCONNECT); + LASSERTF(MDS_GETSTATUS == 40, "found %lld\n", + (long long)MDS_GETSTATUS); + LASSERTF(MDS_STATFS == 41, "found %lld\n", + (long long)MDS_STATFS); + LASSERTF(MDS_PIN == 42, "found %lld\n", + (long long)MDS_PIN); + LASSERTF(MDS_UNPIN == 43, "found %lld\n", + (long long)MDS_UNPIN); + LASSERTF(MDS_SYNC == 44, "found %lld\n", + (long long)MDS_SYNC); + LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n", + (long long)MDS_DONE_WRITING); + LASSERTF(MDS_SET_INFO == 46, "found %lld\n", + (long long)MDS_SET_INFO); + LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n", + (long long)MDS_QUOTACHECK); + LASSERTF(MDS_QUOTACTL == 48, "found %lld\n", + (long long)MDS_QUOTACTL); + LASSERTF(MDS_GETXATTR == 49, "found %lld\n", + (long long)MDS_GETXATTR); + LASSERTF(MDS_SETXATTR == 50, "found %lld\n", + (long long)MDS_SETXATTR); + LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n", + (long long)MDS_WRITEPAGE); + LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n", + (long long)MDS_IS_SUBDIR); + LASSERTF(MDS_GET_INFO == 53, "found %lld\n", + (long long)MDS_GET_INFO); + LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n", + (long long)MDS_HSM_STATE_GET); + LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n", + (long long)MDS_HSM_STATE_SET); + LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n", + (long long)MDS_HSM_ACTION); + LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n", + (long long)MDS_HSM_PROGRESS); + LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n", + (long long)MDS_HSM_REQUEST); + LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n", + (long long)MDS_HSM_CT_REGISTER); + LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n", + (long long)MDS_HSM_CT_UNREGISTER); + LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n", + (long long)MDS_SWAP_LAYOUTS); + LASSERTF(MDS_LAST_OPC == 62, "found %lld\n", + (long long)MDS_LAST_OPC); + LASSERTF(REINT_SETATTR == 1, "found %lld\n", + (long long)REINT_SETATTR); + LASSERTF(REINT_CREATE == 2, "found %lld\n", + (long long)REINT_CREATE); + LASSERTF(REINT_LINK == 3, "found %lld\n", + (long long)REINT_LINK); + LASSERTF(REINT_UNLINK == 4, "found %lld\n", + (long long)REINT_UNLINK); + LASSERTF(REINT_RENAME == 5, "found %lld\n", + (long long)REINT_RENAME); + LASSERTF(REINT_OPEN == 6, "found %lld\n", + (long long)REINT_OPEN); + LASSERTF(REINT_SETXATTR == 7, "found %lld\n", + (long long)REINT_SETXATTR); + LASSERTF(REINT_RMENTRY == 8, "found %lld\n", + (long long)REINT_RMENTRY); + LASSERTF(REINT_MAX == 9, "found %lld\n", + (long long)REINT_MAX); + LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)DISP_IT_EXECD); + LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)DISP_LOOKUP_EXECD); + LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)DISP_LOOKUP_NEG); + LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n", + (unsigned)DISP_LOOKUP_POS); + LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n", + (unsigned)DISP_OPEN_CREATE); + LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n", + (unsigned)DISP_OPEN_OPEN); + LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n", + (unsigned)DISP_ENQ_COMPLETE); + LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n", + (unsigned)DISP_ENQ_OPEN_REF); + LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n", + (unsigned)DISP_ENQ_CREATE_REF); + LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n", + (unsigned)DISP_OPEN_LOCK); + LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n", + (long long)MDS_STATUS_CONN); + LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n", + (long long)MDS_STATUS_LOV); + LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n", + (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES); + LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)MF_SOM_CHANGE); + LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)MF_EPOCH_OPEN); + LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)MF_EPOCH_CLOSE); + LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n", + (unsigned)MF_MDC_CANCEL_FID1); + LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n", + (unsigned)MF_MDC_CANCEL_FID2); + LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n", + (unsigned)MF_MDC_CANCEL_FID3); + LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n", + (unsigned)MF_MDC_CANCEL_FID4); + LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n", + (unsigned)MF_SOM_AU); + LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n", + (unsigned)MF_GETATTR_LOCK); + LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_MODE); + LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_UID); + LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_GID); + LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_SIZE); + LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_ATIME); + LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_MTIME); + LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_CTIME); + LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_ATIME_SET); + LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_MTIME_SET); + LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_FORCE); + LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_ATTR_FLAG); + LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_KILL_SUID); + LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_KILL_SGID); + LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_CTIME_SET); + LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_FROM_OPEN); + LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n", + (long long)MDS_ATTR_BLOCKS); + LASSERTF(FLD_QUERY == 900, "found %lld\n", + (long long)FLD_QUERY); + LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n", + (long long)FLD_FIRST_OPC); + LASSERTF(FLD_LAST_OPC == 901, "found %lld\n", + (long long)FLD_LAST_OPC); + LASSERTF(SEQ_QUERY == 700, "found %lld\n", + (long long)SEQ_QUERY); + LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n", + (long long)SEQ_FIRST_OPC); + LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n", + (long long)SEQ_LAST_OPC); + LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n", + (long long)SEQ_ALLOC_SUPER); + LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n", + (long long)SEQ_ALLOC_META); + LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n", + (long long)LDLM_ENQUEUE); + LASSERTF(LDLM_CONVERT == 102, "found %lld\n", + (long long)LDLM_CONVERT); + LASSERTF(LDLM_CANCEL == 103, "found %lld\n", + (long long)LDLM_CANCEL); + LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n", + (long long)LDLM_BL_CALLBACK); + LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n", + (long long)LDLM_CP_CALLBACK); + LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n", + (long long)LDLM_GL_CALLBACK); + LASSERTF(LDLM_SET_INFO == 107, "found %lld\n", + (long long)LDLM_SET_INFO); + LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n", + (long long)LDLM_LAST_OPC); + LASSERTF(LCK_MINMODE == 0, "found %lld\n", + (long long)LCK_MINMODE); + LASSERTF(LCK_EX == 1, "found %lld\n", + (long long)LCK_EX); + LASSERTF(LCK_PW == 2, "found %lld\n", + (long long)LCK_PW); + LASSERTF(LCK_PR == 4, "found %lld\n", + (long long)LCK_PR); + LASSERTF(LCK_CW == 8, "found %lld\n", + (long long)LCK_CW); + LASSERTF(LCK_CR == 16, "found %lld\n", + (long long)LCK_CR); + LASSERTF(LCK_NL == 32, "found %lld\n", + (long long)LCK_NL); + LASSERTF(LCK_GROUP == 64, "found %lld\n", + (long long)LCK_GROUP); + LASSERTF(LCK_COS == 128, "found %lld\n", + (long long)LCK_COS); + LASSERTF(LCK_MAXMODE == 129, "found %lld\n", + (long long)LCK_MAXMODE); + LASSERTF(LCK_MODE_NUM == 8, "found %lld\n", + (long long)LCK_MODE_NUM); + CLASSERT(LDLM_PLAIN == 10); + CLASSERT(LDLM_EXTENT == 11); + CLASSERT(LDLM_FLOCK == 12); + CLASSERT(LDLM_IBITS == 13); + CLASSERT(LDLM_MAX_TYPE == 14); + CLASSERT(LUSTRE_RES_ID_SEQ_OFF == 0); + CLASSERT(LUSTRE_RES_ID_VER_OID_OFF == 1); + LASSERTF(UPDATE_OBJ == 1000, "found %lld\n", + (long long)UPDATE_OBJ); + LASSERTF(UPDATE_LAST_OPC == 1001, "found %lld\n", + (long long)UPDATE_LAST_OPC); + CLASSERT(LUSTRE_RES_ID_QUOTA_SEQ_OFF == 2); + CLASSERT(LUSTRE_RES_ID_QUOTA_VER_OID_OFF == 3); + CLASSERT(LUSTRE_RES_ID_HSH_OFF == 3); + CLASSERT(LQUOTA_TYPE_USR == 0); + CLASSERT(LQUOTA_TYPE_GRP == 1); + CLASSERT(LQUOTA_RES_MD == 1); + CLASSERT(LQUOTA_RES_DT == 2); + LASSERTF(OBD_PING == 400, "found %lld\n", + (long long)OBD_PING); + LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n", + (long long)OBD_LOG_CANCEL); + LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n", + (long long)OBD_QC_CALLBACK); + LASSERTF(OBD_IDX_READ == 403, "found %lld\n", + (long long)OBD_IDX_READ); + LASSERTF(OBD_LAST_OPC == 404, "found %lld\n", + (long long)OBD_LAST_OPC); + LASSERTF(QUOTA_DQACQ == 601, "found %lld\n", + (long long)QUOTA_DQACQ); + LASSERTF(QUOTA_DQREL == 602, "found %lld\n", + (long long)QUOTA_DQREL); + LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n", + (long long)QUOTA_LAST_OPC); + LASSERTF(MGS_CONNECT == 250, "found %lld\n", + (long long)MGS_CONNECT); + LASSERTF(MGS_DISCONNECT == 251, "found %lld\n", + (long long)MGS_DISCONNECT); + LASSERTF(MGS_EXCEPTION == 252, "found %lld\n", + (long long)MGS_EXCEPTION); + LASSERTF(MGS_TARGET_REG == 253, "found %lld\n", + (long long)MGS_TARGET_REG); + LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n", + (long long)MGS_TARGET_DEL); + LASSERTF(MGS_SET_INFO == 255, "found %lld\n", + (long long)MGS_SET_INFO); + LASSERTF(MGS_LAST_OPC == 257, "found %lld\n", + (long long)MGS_LAST_OPC); + LASSERTF(SEC_CTX_INIT == 801, "found %lld\n", + (long long)SEC_CTX_INIT); + LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n", + (long long)SEC_CTX_INIT_CONT); + LASSERTF(SEC_CTX_FINI == 803, "found %lld\n", + (long long)SEC_CTX_FINI); + LASSERTF(SEC_LAST_OPC == 804, "found %lld\n", + (long long)SEC_LAST_OPC); + /* Sizes and Offsets */ + + /* Checks for struct obd_uuid */ + LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n", + (long long)(int)sizeof(struct obd_uuid)); + + /* Checks for struct lu_seq_range */ + LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n", + (long long)(int)sizeof(struct lu_seq_range)); + LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n", + (long long)(int)offsetof(struct lu_seq_range, lsr_start)); + LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start)); + LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n", + (long long)(int)offsetof(struct lu_seq_range, lsr_end)); + LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end)); + LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n", + (long long)(int)offsetof(struct lu_seq_range, lsr_index)); + LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index)); + LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n", + (long long)(int)offsetof(struct lu_seq_range, lsr_flags)); + LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags)); + LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n", + (long long)LU_SEQ_RANGE_MDT); + LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n", + (long long)LU_SEQ_RANGE_OST); + + /* Checks for struct lustre_mdt_attrs */ + LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n", + (long long)(int)sizeof(struct lustre_mdt_attrs)); + LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n", + (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat)); + LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat)); + LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n", + (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat)); + LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat)); + LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n", + (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid)); + LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid)); + LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LMAI_RELEASED); + LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LMAC_HSM); + LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)LMAC_SOM); + LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)LMAC_NOT_IN_OI); + LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n", + (unsigned)LMAC_FID_ON_OST); + LASSERTF(OBJ_CREATE == 1, "found %lld\n", + (long long)OBJ_CREATE); + LASSERTF(OBJ_DESTROY == 2, "found %lld\n", + (long long)OBJ_DESTROY); + LASSERTF(OBJ_REF_ADD == 3, "found %lld\n", + (long long)OBJ_REF_ADD); + LASSERTF(OBJ_REF_DEL == 4, "found %lld\n", + (long long)OBJ_REF_DEL); + LASSERTF(OBJ_ATTR_SET == 5, "found %lld\n", + (long long)OBJ_ATTR_SET); + LASSERTF(OBJ_ATTR_GET == 6, "found %lld\n", + (long long)OBJ_ATTR_GET); + LASSERTF(OBJ_XATTR_SET == 7, "found %lld\n", + (long long)OBJ_XATTR_SET); + LASSERTF(OBJ_XATTR_GET == 8, "found %lld\n", + (long long)OBJ_XATTR_GET); + LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n", + (long long)OBJ_INDEX_LOOKUP); + LASSERTF(OBJ_INDEX_LOOKUP == 9, "found %lld\n", + (long long)OBJ_INDEX_LOOKUP); + LASSERTF(OBJ_INDEX_INSERT == 10, "found %lld\n", + (long long)OBJ_INDEX_INSERT); + LASSERTF(OBJ_INDEX_DELETE == 11, "found %lld\n", + (long long)OBJ_INDEX_DELETE); + + /* Checks for struct ost_id */ + LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n", + (long long)(int)sizeof(struct ost_id)); + LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n", + (long long)(int)offsetof(struct ost_id, oi)); + LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct ost_id *)0)->oi)); + LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n", + (long long)LUSTRE_FID_INIT_OID); + LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n", + (long long)FID_SEQ_OST_MDT0); + LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n", + (long long)FID_SEQ_LLOG); + LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n", + (long long)FID_SEQ_ECHO); + LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n", + (long long)FID_SEQ_OST_MDT1); + LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n", + (long long)FID_SEQ_OST_MAX); + LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n", + (long long)FID_SEQ_RSVD); + LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n", + (long long)FID_SEQ_IGIF); + LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_IGIF_MAX); + LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_IDIF); + LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_IDIF_MAX); + LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_START); + LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_LOCAL_FILE); + LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_DOT_LUSTRE); + LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_SPECIAL); + LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_QUOTA); + LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_QUOTA_GLB); + LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_ROOT); + LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_NORMAL); + LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n", + (long long)FID_SEQ_LOV_DEFAULT); + LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)FID_OID_SPECIAL_BFL); + LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)FID_OID_DOT_LUSTRE); + LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)FID_OID_DOT_LUSTRE_OBF); + + /* Checks for struct lu_dirent */ + LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n", + (long long)(int)sizeof(struct lu_dirent)); + LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_fid)); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid)); + LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_hash)); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash)); + LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_reclen)); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen)); + LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_namelen)); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen)); + LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_attrs)); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs)); + LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n", + (long long)(int)offsetof(struct lu_dirent, lde_name[0])); + LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0])); + LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LUDA_FID); + LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)LUDA_TYPE); + LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)LUDA_64BITHASH); + + /* Checks for struct luda_type */ + LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n", + (long long)(int)sizeof(struct luda_type)); + LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n", + (long long)(int)offsetof(struct luda_type, lt_type)); + LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n", + (long long)(int)sizeof(((struct luda_type *)0)->lt_type)); + + /* Checks for struct lu_dirpage */ + LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n", + (long long)(int)sizeof(struct lu_dirpage)); + LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n", + (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start)); + LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start)); + LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n", + (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end)); + LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end)); + LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct lu_dirpage, ldp_flags)); + LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags)); + LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n", + (long long)(int)offsetof(struct lu_dirpage, ldp_pad0)); + LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0)); + LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n", + (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0])); + LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n", + (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0])); + LASSERTF(LDF_EMPTY == 1, "found %lld\n", + (long long)LDF_EMPTY); + LASSERTF(LDF_COLLIDE == 2, "found %lld\n", + (long long)LDF_COLLIDE); + LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n", + (long long)LU_PAGE_SIZE); + /* Checks for union lu_page */ + LASSERTF((int)sizeof(union lu_page) == 4096, "found %lld\n", + (long long)(int)sizeof(union lu_page)); + + /* Checks for struct lustre_handle */ + LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n", + (long long)(int)sizeof(struct lustre_handle)); + LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n", + (long long)(int)offsetof(struct lustre_handle, cookie)); + LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lustre_handle *)0)->cookie)); + + /* Checks for struct lustre_msg_v2 */ + LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n", + (long long)(int)sizeof(struct lustre_msg_v2)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_magic)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_flags)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3)); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3)); + LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n", + (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0])); + LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0])); + LASSERTF(LUSTRE_MSG_MAGIC_V1 == 0x0BD00BD0, "found 0x%.8x\n", + LUSTRE_MSG_MAGIC_V1); + LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n", + LUSTRE_MSG_MAGIC_V2); + LASSERTF(LUSTRE_MSG_MAGIC_V1_SWABBED == 0xD00BD00B, "found 0x%.8x\n", + LUSTRE_MSG_MAGIC_V1_SWABBED); + LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n", + LUSTRE_MSG_MAGIC_V2_SWABBED); + + /* Checks for struct ptlrpc_body */ + LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n", + (long long)(int)sizeof(struct ptlrpc_body_v3)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == 32, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_seen)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv)); + CLASSERT(PTLRPC_NUM_VERSIONS == 4); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == 120, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding)); + CLASSERT(JOBSTATS_JOBID_SIZE == 32); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n", + (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_seen) == (int)offsetof(struct ptlrpc_body_v2, pb_last_seen), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_last_seen), (int)offsetof(struct ptlrpc_body_v2, pb_last_seen)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_seen), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_seen)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions)); + LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding) == (int)offsetof(struct ptlrpc_body_v2, pb_padding), "%d != %d\n", + (int)offsetof(struct ptlrpc_body_v3, pb_padding), (int)offsetof(struct ptlrpc_body_v2, pb_padding)); + LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding), "%d != %d\n", + (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding)); + LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n", + (long long)MSG_PTLRPC_BODY_OFF); + LASSERTF(REQ_REC_OFF == 1, "found %lld\n", + (long long)REQ_REC_OFF); + LASSERTF(REPLY_REC_OFF == 1, "found %lld\n", + (long long)REPLY_REC_OFF); + LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n", + (long long)DLM_LOCKREQ_OFF); + LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n", + (long long)DLM_REQ_REC_OFF); + LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n", + (long long)DLM_INTENT_IT_OFF); + LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n", + (long long)DLM_INTENT_REC_OFF); + LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n", + (long long)DLM_LOCKREPLY_OFF); + LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n", + (long long)DLM_REPLY_REC_OFF); + LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n", + (long long)MSG_PTLRPC_HEADER_OFF); + LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n", + PTLRPC_MSG_VERSION); + LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n", + LUSTRE_VERSION_MASK); + LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n", + LUSTRE_OBD_VERSION); + LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n", + LUSTRE_MDS_VERSION); + LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n", + LUSTRE_OST_VERSION); + LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n", + LUSTRE_DLM_VERSION); + LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n", + LUSTRE_LOG_VERSION); + LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n", + LUSTRE_MGS_VERSION); + LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n", + (long long)MSGHDR_AT_SUPPORT); + LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n", + (long long)MSGHDR_CKSUM_INCOMPAT18); + LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n", + (unsigned)MSG_OP_FLAG_MASK); + LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n", + (long long)MSG_OP_FLAG_SHIFT); + LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n", + (unsigned)MSG_GEN_FLAG_MASK); + LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)MSG_LAST_REPLAY); + LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)MSG_RESENT); + LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)MSG_REPLAY); + LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n", + (unsigned)MSG_DELAY_REPLAY); + LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n", + (unsigned)MSG_VERSION_REPLAY); + LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n", + (unsigned)MSG_REQ_REPLAY_DONE); + LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n", + (unsigned)MSG_LOCK_REPLAY_DONE); + LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_RECOVERING); + LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_RECONNECT); + LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_REPLAYABLE); + LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_LIBCLIENT); + LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_INITIAL); + LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_ASYNC); + LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_NEXT_VER); + LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n", + (unsigned)MSG_CONNECT_TRANSNO); + + /* Checks for struct obd_connect_data */ + LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n", + (long long)(int)sizeof(struct obd_connect_data)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_version)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_grant)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_index)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_unused)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_transno)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_group)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_instance)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance)); + LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes)); + LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 72, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding1)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1)); + LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 80, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding2)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2)); + LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding3)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3)); + LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding4)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4)); + LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding5)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5)); + LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding6)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6)); + LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding7)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7)); + LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding8)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8)); + LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, padding9)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingA)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingB)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingC)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingD)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingE)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE)); + LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingF)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); + LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_RDONLY); + LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_INDEX); + LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MDS); + LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_GRANT); + LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_SRVLOCK); + LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_VERSION); + LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_REQPORTAL); + LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_ACL); + LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_XATTR); + LASSERTF(OBD_CONNECT_CROW == 0x200ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_CROW); + LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_TRUNCLOCK); + LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_TRANSNO); + LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_IBITS); + LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_JOIN); + LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_ATTRFID); + LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_NODEVOH); + LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_RMT_CLIENT); + LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_RMT_CLIENT_FORCE); + LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_BRW_SIZE); + LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_QUOTA64); + LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MDS_CAPA); + LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_OSS_CAPA); + LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_CANCELSET); + LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_SOM); + LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_AT); + LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LRU_RESIZE); + LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MDS_MDS); + LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_REAL); + LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_CHANGE_QS); + LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_CKSUM); + LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_FID); + LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_VBR); + LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LOV_V3); + LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_GRANT_SHRINK); + LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_SKIP_ORPHAN); + LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MAX_EASIZE); + LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_FULL20); + LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LAYOUTLOCK); + LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_64BITHASH); + LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_MAXBYTES); + LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_IMP_RECOV); + LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_JOBSTATS); + LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_UMASK); + LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_EINPROGRESS); + LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_GRANT_PARAM); + LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_FLOCK_OWNER); + LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LVB_TYPE); + LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_NANOSEC_TIME); + LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_LIGHTWEIGHT); + LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_SHORTIO); + LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT_PINGLESS); + LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, + "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD); + LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)OBD_CKSUM_CRC32); + LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)OBD_CKSUM_ADLER); + LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)OBD_CKSUM_CRC32C); + + /* Checks for struct obdo */ + LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n", + (long long)(int)sizeof(struct obdo)); + LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_valid)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_valid)); + LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_oi)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_oi)); + LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_parent_seq)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq)); + LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_size)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_size)); + LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_mtime)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_mtime)); + LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_atime)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_atime)); + LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_ctime)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_ctime)); + LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_blocks)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_blocks)); + LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_grant)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_grant)); + LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_blksize)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_blksize)); + LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_mode)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_mode)); + LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_uid)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_uid)); + LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_gid)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_gid)); + LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_flags)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_flags)); + LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_nlink)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_nlink)); + LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_parent_oid)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid)); + LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_misc)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_misc)); + LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_ioepoch)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch)); + LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_stripe_idx)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx)); + LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_parent_ver)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver)); + LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_handle)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_handle)); + LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_lcookie)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_lcookie)); + LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_uid_h)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_uid_h)); + LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_gid_h)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_gid_h)); + LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_data_version)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_data_version)); + LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_padding_4)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_padding_4)); + LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_padding_5)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_padding_5)); + LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n", + (long long)(int)offsetof(struct obdo, o_padding_6)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_padding_6)); + LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n", + OBD_MD_FLID); + LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n", + OBD_MD_FLATIME); + LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n", + OBD_MD_FLMTIME); + LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n", + OBD_MD_FLCTIME); + LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n", + OBD_MD_FLSIZE); + LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n", + OBD_MD_FLBLOCKS); + LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n", + OBD_MD_FLBLKSZ); + LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n", + OBD_MD_FLMODE); + LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n", + OBD_MD_FLTYPE); + LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n", + OBD_MD_FLUID); + LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGID); + LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n", + OBD_MD_FLFLAGS); + LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLNLINK); + LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGENER); + LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRDEV); + LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLEASIZE); + LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n", + OBD_MD_LINKNAME); + LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLHANDLE); + LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLCKSUM); + LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLQOS); + LASSERTF(OBD_MD_FLCOOKIE == (0x00800000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLCOOKIE); + LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGROUP); + LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLFID); + LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLEPOCH); + LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGRANT); + LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLDIREA); + LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLUSRQUOTA); + LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGRPQUOTA); + LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLMODEASIZE); + LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n", + OBD_MD_MDS); + LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n", + OBD_MD_REINT); + LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n", + OBD_MD_MEA); + LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL), + "found 0x%.16llxULL\n", OBD_MD_TSTATE); + LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLXATTR); + LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLXATTRLS); + LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLXATTRRM); + LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLACL); + LASSERTF(OBD_MD_FLRMTPERM == (0x0000010000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRMTPERM); + LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLMDSCAPA); + LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLOSSCAPA); + LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLCKSPLIT); + LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLCROSSREF); + LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLGETATTRLOCK); + LASSERTF(OBD_MD_FLRMTLSETFACL == (0x0001000000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRMTLSETFACL); + LASSERTF(OBD_MD_FLRMTLGETFACL == (0x0002000000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRMTLGETFACL); + LASSERTF(OBD_MD_FLRMTRSETFACL == (0x0004000000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRMTRSETFACL); + LASSERTF(OBD_MD_FLRMTRGETFACL == (0x0008000000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLRMTRGETFACL); + LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n", + OBD_MD_FLDATAVERSION); + CLASSERT(OBD_FL_INLINEDATA == 0x00000001); + CLASSERT(OBD_FL_OBDMDEXISTS == 0x00000002); + CLASSERT(OBD_FL_DELORPHAN == 0x00000004); + CLASSERT(OBD_FL_NORPC == 0x00000008); + CLASSERT(OBD_FL_IDONLY == 0x00000010); + CLASSERT(OBD_FL_RECREATE_OBJS == 0x00000020); + CLASSERT(OBD_FL_DEBUG_CHECK == 0x00000040); + CLASSERT(OBD_FL_NO_USRQUOTA == 0x00000100); + CLASSERT(OBD_FL_NO_GRPQUOTA == 0x00000200); + CLASSERT(OBD_FL_CREATE_CROW == 0x00000400); + CLASSERT(OBD_FL_SRVLOCK == 0x00000800); + CLASSERT(OBD_FL_CKSUM_CRC32 == 0x00001000); + CLASSERT(OBD_FL_CKSUM_ADLER == 0x00002000); + CLASSERT(OBD_FL_CKSUM_CRC32C == 0x00004000); + CLASSERT(OBD_FL_CKSUM_RSVD2 == 0x00008000); + CLASSERT(OBD_FL_CKSUM_RSVD3 == 0x00010000); + CLASSERT(OBD_FL_SHRINK_GRANT == 0x00020000); + CLASSERT(OBD_FL_MMAP == 0x00040000); + CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000); + CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000); + CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000); + + /* Checks for struct lov_ost_data_v1 */ + LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n", + (long long)(int)sizeof(struct lov_ost_data_v1)); + LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n", + (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi)); + LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi)); + LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n", + (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen)); + LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen)); + LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n", + (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx)); + LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx)); + + /* Checks for struct lov_mds_md_v1 */ + LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n", + (long long)(int)sizeof(struct lov_mds_md_v1)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen)); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen)); + LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0])); + LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0])); + CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0); + + /* Checks for struct lov_mds_md_v3 */ + LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n", + (long long)(int)sizeof(struct lov_mds_md_v3)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count)); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen)); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen)); + CLASSERT(LOV_MAXPOOLNAME == 16); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16])); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16])); + LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0])); + LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0])); + CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0); + LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LOV_PATTERN_RAID0); + LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)LOV_PATTERN_RAID1); + LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n", + (unsigned)LOV_PATTERN_FIRST); + LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n", + (unsigned)LOV_PATTERN_CMOBD); + + /* Checks for struct obd_statfs */ + LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n", + (long long)(int)sizeof(struct obd_statfs)); + LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_type)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_type)); + LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_blocks)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks)); + LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_bfree)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree)); + LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_bavail)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail)); + LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_ffree)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree)); + LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_fsid)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid)); + LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_bsize)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize)); + LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_namelen)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen)); + LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_state)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_state)); + LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_fprecreated)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare2)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare3)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare4)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare5)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare6)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare7)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare8)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare9)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9)); + + /* Checks for struct obd_ioobj */ + LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n", + (long long)(int)sizeof(struct obd_ioobj)); + LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_ioobj, ioo_oid)); + LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid)); + LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw)); + LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw)); + LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n", + (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt)); + LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt)); + + /* Checks for union lquota_id */ + LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n", + (long long)(int)sizeof(union lquota_id)); + + LASSERTF(QUOTABLOCK_BITS == 10, "found %lld\n", + (long long)QUOTABLOCK_BITS); + LASSERTF(QUOTABLOCK_SIZE == 1024, "found %lld\n", + (long long)QUOTABLOCK_SIZE); + + /* Checks for struct obd_quotactl */ + LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n", + (long long)(int)sizeof(struct obd_quotactl)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_cmd)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_type)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_id)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_stat)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo)); + LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n", + (long long)(int)offsetof(struct obd_quotactl, qc_dqblk)); + LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n", + (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk)); + + /* Checks for struct obd_dqinfo */ + LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n", + (long long)(int)sizeof(struct obd_dqinfo)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_flags)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags)); + LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n", + (long long)(int)offsetof(struct obd_dqinfo, dqi_valid)); + LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid)); + + /* Checks for struct obd_dqblk */ + LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n", + (long long)(int)sizeof(struct obd_dqblk)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_curspace)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_btime)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_itime)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_valid)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid)); + LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, dqb_padding)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding)); + LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n", + Q_QUOTACHECK); + LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n", + Q_INITQUOTA); + LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n", + Q_GETOINFO); + LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n", + Q_GETOQUOTA); + LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n", + Q_FINVALIDATE); + + /* Checks for struct lquota_acct_rec */ + LASSERTF((int)sizeof(struct lquota_acct_rec) == 16, "found %lld\n", + (long long)(int)sizeof(struct lquota_acct_rec)); + LASSERTF((int)offsetof(struct lquota_acct_rec, bspace) == 0, "found %lld\n", + (long long)(int)offsetof(struct lquota_acct_rec, bspace)); + LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->bspace) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_acct_rec *)0)->bspace)); + LASSERTF((int)offsetof(struct lquota_acct_rec, ispace) == 8, "found %lld\n", + (long long)(int)offsetof(struct lquota_acct_rec, ispace)); + LASSERTF((int)sizeof(((struct lquota_acct_rec *)0)->ispace) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_acct_rec *)0)->ispace)); + + /* Checks for struct lquota_glb_rec */ + LASSERTF((int)sizeof(struct lquota_glb_rec) == 32, "found %lld\n", + (long long)(int)sizeof(struct lquota_glb_rec)); + LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_hardlimit) == 0, "found %lld\n", + (long long)(int)offsetof(struct lquota_glb_rec, qbr_hardlimit)); + LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_hardlimit)); + LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_softlimit) == 8, "found %lld\n", + (long long)(int)offsetof(struct lquota_glb_rec, qbr_softlimit)); + LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_softlimit)); + LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_time) == 16, "found %lld\n", + (long long)(int)offsetof(struct lquota_glb_rec, qbr_time)); + LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_time)); + LASSERTF((int)offsetof(struct lquota_glb_rec, qbr_granted) == 24, "found %lld\n", + (long long)(int)offsetof(struct lquota_glb_rec, qbr_granted)); + LASSERTF((int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_glb_rec *)0)->qbr_granted)); + + /* Checks for struct lquota_slv_rec */ + LASSERTF((int)sizeof(struct lquota_slv_rec) == 8, "found %lld\n", + (long long)(int)sizeof(struct lquota_slv_rec)); + LASSERTF((int)offsetof(struct lquota_slv_rec, qsr_granted) == 0, "found %lld\n", + (long long)(int)offsetof(struct lquota_slv_rec, qsr_granted)); + LASSERTF((int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_slv_rec *)0)->qsr_granted)); + + /* Checks for struct idx_info */ + LASSERTF((int)sizeof(struct idx_info) == 80, "found %lld\n", + (long long)(int)sizeof(struct idx_info)); + LASSERTF((int)offsetof(struct idx_info, ii_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_magic)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_magic)); + LASSERTF((int)offsetof(struct idx_info, ii_flags) == 4, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_flags)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_flags)); + LASSERTF((int)offsetof(struct idx_info, ii_count) == 8, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_count)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_count) == 2, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_count)); + LASSERTF((int)offsetof(struct idx_info, ii_pad0) == 10, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_pad0)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad0) == 2, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_pad0)); + LASSERTF((int)offsetof(struct idx_info, ii_attrs) == 12, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_attrs)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_attrs) == 4, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_attrs)); + LASSERTF((int)offsetof(struct idx_info, ii_fid) == 16, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_fid)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_fid)); + LASSERTF((int)offsetof(struct idx_info, ii_version) == 32, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_version)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_version) == 8, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_version)); + LASSERTF((int)offsetof(struct idx_info, ii_hash_start) == 40, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_hash_start)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_start)); + LASSERTF((int)offsetof(struct idx_info, ii_hash_end) == 48, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_hash_end)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_hash_end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_hash_end)); + LASSERTF((int)offsetof(struct idx_info, ii_keysize) == 56, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_keysize)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_keysize) == 2, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_keysize)); + LASSERTF((int)offsetof(struct idx_info, ii_recsize) == 58, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_recsize)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_recsize) == 2, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_recsize)); + LASSERTF((int)offsetof(struct idx_info, ii_pad1) == 60, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_pad1)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_pad1)); + LASSERTF((int)offsetof(struct idx_info, ii_pad2) == 64, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_pad2)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_pad2)); + LASSERTF((int)offsetof(struct idx_info, ii_pad3) == 72, "found %lld\n", + (long long)(int)offsetof(struct idx_info, ii_pad3)); + LASSERTF((int)sizeof(((struct idx_info *)0)->ii_pad3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct idx_info *)0)->ii_pad3)); + CLASSERT(IDX_INFO_MAGIC == 0x3D37CC37); + + /* Checks for struct lu_idxpage */ + LASSERTF((int)sizeof(struct lu_idxpage) == 16, "found %lld\n", + (long long)(int)sizeof(struct lu_idxpage)); + LASSERTF((int)offsetof(struct lu_idxpage, lip_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct lu_idxpage, lip_magic)); + LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_magic)); + LASSERTF((int)offsetof(struct lu_idxpage, lip_flags) == 4, "found %lld\n", + (long long)(int)offsetof(struct lu_idxpage, lip_flags)); + LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_flags)); + LASSERTF((int)offsetof(struct lu_idxpage, lip_nr) == 6, "found %lld\n", + (long long)(int)offsetof(struct lu_idxpage, lip_nr)); + LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_nr) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_nr)); + LASSERTF((int)offsetof(struct lu_idxpage, lip_pad0) == 8, "found %lld\n", + (long long)(int)offsetof(struct lu_idxpage, lip_pad0)); + LASSERTF((int)sizeof(((struct lu_idxpage *)0)->lip_pad0) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lu_idxpage *)0)->lip_pad0)); + CLASSERT(LIP_MAGIC == 0x8A6D6B6C); + LASSERTF(LIP_HDR_SIZE == 16, "found %lld\n", + (long long)LIP_HDR_SIZE); + LASSERTF(II_FL_NOHASH == 1, "found %lld\n", + (long long)II_FL_NOHASH); + LASSERTF(II_FL_VARKEY == 2, "found %lld\n", + (long long)II_FL_VARKEY); + LASSERTF(II_FL_VARREC == 4, "found %lld\n", + (long long)II_FL_VARREC); + LASSERTF(II_FL_NONUNQ == 8, "found %lld\n", + (long long)II_FL_NONUNQ); + + /* Checks for struct niobuf_remote */ + LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n", + (long long)(int)sizeof(struct niobuf_remote)); + LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n", + (long long)(int)offsetof(struct niobuf_remote, offset)); + LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n", + (long long)(int)sizeof(((struct niobuf_remote *)0)->offset)); + LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n", + (long long)(int)offsetof(struct niobuf_remote, len)); + LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct niobuf_remote *)0)->len)); + LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n", + (long long)(int)offsetof(struct niobuf_remote, flags)); + LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct niobuf_remote *)0)->flags)); + LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n", + OBD_BRW_READ); + LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n", + OBD_BRW_WRITE); + LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n", + OBD_BRW_SYNC); + LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n", + OBD_BRW_CHECK); + LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n", + OBD_BRW_FROM_GRANT); + LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n", + OBD_BRW_GRANTED); + LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n", + OBD_BRW_NOCACHE); + LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n", + OBD_BRW_NOQUOTA); + LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n", + OBD_BRW_SRVLOCK); + LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n", + OBD_BRW_ASYNC); + LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n", + OBD_BRW_MEMALLOC); + + /* Checks for struct ost_body */ + LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n", + (long long)(int)sizeof(struct ost_body)); + LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n", + (long long)(int)offsetof(struct ost_body, oa)); + LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n", + (long long)(int)sizeof(((struct ost_body *)0)->oa)); + + /* Checks for struct ll_fid */ + LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n", + (long long)(int)sizeof(struct ll_fid)); + LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n", + (long long)(int)offsetof(struct ll_fid, id)); + LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_fid *)0)->id)); + LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fid, generation)); + LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_fid *)0)->generation)); + LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n", + (long long)(int)offsetof(struct ll_fid, f_type)); + LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_fid *)0)->f_type)); + + /* Checks for struct mdt_body */ + LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n", + (long long)(int)sizeof(struct mdt_body)); + LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, fid1)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->fid1)); + LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, fid2)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->fid2)); + LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, handle)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->handle)); + LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, valid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->valid)); + LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, size)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->size)); + LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, mtime)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->mtime)); + LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, atime)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->atime)); + LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, ctime)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->ctime)); + LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, blocks)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->blocks)); + LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, t_state)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8, + "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->t_state)); + LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, fsuid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->fsuid)); + LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, fsgid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->fsgid)); + LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, capability)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->capability)); + LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, mode)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->mode)); + LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, uid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->uid)); + LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, gid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->gid)); + LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, flags)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->flags)); + LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, rdev)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->rdev)); + LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, nlink)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->nlink)); + LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, unused2)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->unused2)); + LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, suppgid)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->suppgid)); + LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, eadatasize)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize)); + LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, aclsize)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->aclsize)); + LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, max_mdsize)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize)); + LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, max_cookiesize)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize)); + LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, uid_h)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->uid_h)); + LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, gid_h)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->gid_h)); + LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_5)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_5)); + LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_6)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_6)); + LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_7)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_7)); + LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_8)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_8)); + LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_9)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_9)); + LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n", + (long long)(int)offsetof(struct mdt_body, padding_10)); + LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_body *)0)->padding_10)); + LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n", + MDS_FMODE_CLOSED); + LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n", + MDS_FMODE_EXEC); + LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n", + MDS_FMODE_EPOCH); + LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n", + MDS_FMODE_TRUNC); + LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n", + MDS_FMODE_SOM); + LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n", + MDS_OPEN_CREATED); + LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n", + MDS_OPEN_CROSS); + LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n", + MDS_OPEN_CREAT); + LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n", + MDS_OPEN_EXCL); + LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n", + MDS_OPEN_TRUNC); + LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n", + MDS_OPEN_APPEND); + LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n", + MDS_OPEN_SYNC); + LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n", + MDS_OPEN_DIRECTORY); + LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n", + MDS_OPEN_BY_FID); + LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n", + MDS_OPEN_DELAY_CREATE); + LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n", + MDS_OPEN_OWNEROVERRIDE); + LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n", + MDS_OPEN_JOIN_FILE); + LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n", + MDS_OPEN_LOCK); + LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n", + MDS_OPEN_HAS_EA); + LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n", + MDS_OPEN_HAS_OBJS); + LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n", + (long long)MDS_OPEN_NORESTORE); + LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n", + (long long)MDS_OPEN_NEWSTRIPE); + LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n", + (long long)MDS_OPEN_VOLATILE); + LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n", + LUSTRE_SYNC_FL); + LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n", + LUSTRE_IMMUTABLE_FL); + LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n", + LUSTRE_APPEND_FL); + LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n", + LUSTRE_NOATIME_FL); + LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n", + LUSTRE_DIRSYNC_FL); + LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n", + MDS_INODELOCK_LOOKUP); + LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n", + MDS_INODELOCK_UPDATE); + LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n", + MDS_INODELOCK_OPEN); + LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n", + MDS_INODELOCK_LAYOUT); + + /* Checks for struct mdt_ioepoch */ + LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n", + (long long)(int)sizeof(struct mdt_ioepoch)); + LASSERTF((int)offsetof(struct mdt_ioepoch, handle) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, handle)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->handle)); + LASSERTF((int)offsetof(struct mdt_ioepoch, ioepoch) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, ioepoch)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->ioepoch) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->ioepoch)); + LASSERTF((int)offsetof(struct mdt_ioepoch, flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, flags)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->flags)); + LASSERTF((int)offsetof(struct mdt_ioepoch, padding) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_ioepoch, padding)); + LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_ioepoch *)0)->padding)); + + /* Checks for struct mdt_remote_perm */ + LASSERTF((int)sizeof(struct mdt_remote_perm) == 32, "found %lld\n", + (long long)(int)sizeof(struct mdt_remote_perm)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_uid) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_uid)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_uid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_uid)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_gid) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_gid)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_gid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_gid)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_fsuid)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsuid)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_fsgid)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_fsgid)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_access_perm) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_access_perm)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_access_perm)); + LASSERTF((int)offsetof(struct mdt_remote_perm, rp_padding) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_remote_perm, rp_padding)); + LASSERTF((int)sizeof(((struct mdt_remote_perm *)0)->rp_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_remote_perm *)0)->rp_padding)); + LASSERTF(CFS_SETUID_PERM == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)CFS_SETUID_PERM); + LASSERTF(CFS_SETGID_PERM == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)CFS_SETGID_PERM); + LASSERTF(CFS_SETGRP_PERM == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)CFS_SETGRP_PERM); + LASSERTF(CFS_RMTACL_PERM == 0x00000008UL, "found 0x%.8xUL\n", + (unsigned)CFS_RMTACL_PERM); + LASSERTF(CFS_RMTOWN_PERM == 0x00000010UL, "found 0x%.8xUL\n", + (unsigned)CFS_RMTOWN_PERM); + + /* Checks for struct mdt_rec_setattr */ + LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_setattr)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_size)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4)); + LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5)); + LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5)); + + /* Checks for struct mdt_rec_create */ + LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_create)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fid1)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_fid2)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_time)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_rdev)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_mode)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_umask)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask)); + LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4)); + + /* Checks for struct mdt_rec_link */ + LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_link)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fid1)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_fid2)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_time)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8)); + LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9)); + LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9)); + + /* Checks for struct mdt_rec_unlink */ + LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_unlink)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_time)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8)); + LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9)); + LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9)); + + /* Checks for struct mdt_rec_rename */ + LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_rename)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_time)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_mode)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7)); + LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8)); + LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8)); + + /* Checks for struct mdt_rec_setxattr */ + LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_setxattr)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10)); + LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11)); + LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11)); + + /* Checks for struct mdt_rec_reint */ + LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n", + (long long)(int)sizeof(struct mdt_rec_reint)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_cap)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_atime)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_size)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_bias)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_mode)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_flags)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_umask)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask)); + LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n", + (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4)); + LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4)); + + /* Checks for struct lmv_desc */ + LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n", + (long long)(int)sizeof(struct lmv_desc)); + LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_tgt_count)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count)); + LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count)); + LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count)); + LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_pattern)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern)); + LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size)); + LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_padding_1)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1)); + LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_padding_2)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2)); + LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_padding_3)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3)); + LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_padding_4)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4)); + LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n", + (long long)(int)offsetof(struct lmv_desc, ld_uuid)); + LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n", + (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid)); + + /* Checks for struct lmv_stripe_md */ + LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n", + (long long)(int)sizeof(struct lmv_stripe_md)); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_magic)); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic)); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_count)); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count)); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_master)); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master)); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_padding)); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding)); + CLASSERT(LOV_MAXPOOLNAME == 16); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16])); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16])); + LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n", + (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0])); + LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0])); + + /* Checks for struct lov_desc */ + LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n", + (long long)(int)sizeof(struct lov_desc)); + LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_tgt_count)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count)); + LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count)); + LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count)); + LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_pattern)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern)); + LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size)); + LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_0)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0)); + LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_1)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_2)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); + LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_uuid)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid)); + CLASSERT(LOV_DESC_MAGIC == 0xB0CCDE5C); + + /* Checks for struct ldlm_res_id */ + LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n", + (long long)(int)sizeof(struct ldlm_res_id)); + CLASSERT(RES_NAME_SIZE == 4); + LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n", + (long long)(int)offsetof(struct ldlm_res_id, name[4])); + LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4])); + + /* Checks for struct ldlm_extent */ + LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n", + (long long)(int)sizeof(struct ldlm_extent)); + LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_extent, start)); + LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_extent *)0)->start)); + LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n", + (long long)(int)offsetof(struct ldlm_extent, end)); + LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_extent *)0)->end)); + LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n", + (long long)(int)offsetof(struct ldlm_extent, gid)); + LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_extent *)0)->gid)); + + /* Checks for struct ldlm_inodebits */ + LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n", + (long long)(int)sizeof(struct ldlm_inodebits)); + LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_inodebits, bits)); + LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits)); + + /* Checks for struct ldlm_flock_wire */ + LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n", + (long long)(int)sizeof(struct ldlm_flock_wire)); + LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start)); + LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start)); + LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n", + (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end)); + LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end)); + LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n", + (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner)); + LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner)); + LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n", + (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding)); + LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding)); + LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n", + (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid)); + LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid)); + + /* Checks for struct ldlm_intent */ + LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n", + (long long)(int)sizeof(struct ldlm_intent)); + LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_intent, opc)); + LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_intent *)0)->opc)); + + /* Checks for struct ldlm_resource_desc */ + LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n", + (long long)(int)sizeof(struct ldlm_resource_desc)); + LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_resource_desc, lr_type)); + LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type)); + LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n", + (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding)); + LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding)); + LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n", + (long long)(int)offsetof(struct ldlm_resource_desc, lr_name)); + LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name)); + + /* Checks for struct ldlm_lock_desc */ + LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n", + (long long)(int)sizeof(struct ldlm_lock_desc)); + LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_lock_desc, l_resource)); + LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource)); + LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n", + (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode)); + LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode)); + LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n", + (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode)); + LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode)); + LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n", + (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data)); + LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data)); + + /* Checks for struct ldlm_request */ + LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n", + (long long)(int)sizeof(struct ldlm_request)); + LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_flags)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_count)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count)); + LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_desc)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc)); + LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_handle)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle)); + + /* Checks for struct ldlm_reply */ + LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n", + (long long)(int)sizeof(struct ldlm_reply)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_flags)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_padding)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_desc)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_handle)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1)); + LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n", + (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2)); + LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2)); + + /* Checks for struct ost_lvb_v1 */ + LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n", + (long long)(int)sizeof(struct ost_lvb_v1)); + LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb_v1, lvb_size)); + LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size)); + LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime)); + LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime)); + LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime)); + LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime)); + LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime)); + LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime)); + LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks)); + LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks)); + + /* Checks for struct ost_lvb */ + LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n", + (long long)(int)sizeof(struct ost_lvb)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_size)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_mtime)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_atime)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_ctime)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_blocks)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns)); + LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n", + (long long)(int)offsetof(struct ost_lvb, lvb_padding)); + LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding)); + + /* Checks for struct lquota_lvb */ + LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n", + (long long)(int)sizeof(struct lquota_lvb)); + LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n", + (long long)(int)offsetof(struct lquota_lvb, lvb_flags)); + LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags)); + LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n", + (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel)); + LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel)); + LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n", + (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel)); + LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel)); + LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n", + (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit)); + LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit)); + LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n", + (long long)(int)offsetof(struct lquota_lvb, lvb_pad1)); + LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1)); + LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n", + (long long)LQUOTA_FL_EDQUOT); + + /* Checks for struct ldlm_gl_lquota_desc */ + LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n", + (long long)(int)sizeof(struct ldlm_gl_lquota_desc)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time)); + LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n", + (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2)); + LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2)); + + /* Checks for struct mgs_send_param */ + LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n", + (long long)(int)sizeof(struct mgs_send_param)); + CLASSERT(MGS_PARAM_MAXLEN == 1024); + LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n", + (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024])); + LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024])); + + /* Checks for struct cfg_marker */ + LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n", + (long long)(int)sizeof(struct cfg_marker)); + LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_step)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step)); + LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_flags)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags)); + LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_vers)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers)); + LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_padding)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding)); + LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_createtime)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime)); + LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_canceltime)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime)); + LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_tgtname)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname)); + LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n", + (long long)(int)offsetof(struct cfg_marker, cm_comment)); + LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n", + (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment)); + + /* Checks for struct llog_logid */ + LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n", + (long long)(int)sizeof(struct llog_logid)); + LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_logid, lgl_oi)); + LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi)); + LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_logid, lgl_ogen)); + LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen)); + CLASSERT(OST_SZ_REC == 274730752); + CLASSERT(MDS_UNLINK_REC == 274801668); + CLASSERT(MDS_UNLINK64_REC == 275325956); + CLASSERT(MDS_SETATTR64_REC == 275325953); + CLASSERT(OBD_CFG_REC == 274857984); + CLASSERT(LLOG_GEN_REC == 274989056); + CLASSERT(CHANGELOG_REC == 275120128); + CLASSERT(CHANGELOG_USER_REC == 275185664); + CLASSERT(LLOG_HDR_MAGIC == 275010873); + CLASSERT(LLOG_LOGID_MAGIC == 275010875); + + /* Checks for struct llog_catid */ + LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n", + (long long)(int)sizeof(struct llog_catid)); + LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_logid)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding1)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding2)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding3)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3)); + + /* Checks for struct llog_rec_hdr */ + LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(struct llog_rec_hdr)); + LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, lrh_len)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len)); + LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, lrh_index)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index)); + LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, lrh_type)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type)); + LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, lrh_id)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id)); + + /* Checks for struct llog_rec_tail */ + LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n", + (long long)(int)sizeof(struct llog_rec_tail)); + LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_tail, lrt_len)); + LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len)); + LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n", + (long long)(int)offsetof(struct llog_rec_tail, lrt_index)); + LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index)); + + /* Checks for struct llog_logid_rec */ + LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct llog_logid_rec)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_hdr)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_id)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_padding1)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_padding2)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_padding3)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3)); + LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, lid_tail)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail)); + + /* Checks for struct llog_unlink_rec */ + LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n", + (long long)(int)sizeof(struct llog_unlink_rec)); + LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr)); + LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, lur_oid)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid)); + LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq)); + LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, lur_count)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count)); + LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, lur_tail)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail)); + /* Checks for struct llog_unlink64_rec */ + LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct llog_unlink64_rec)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_count)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2)); + LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n", + (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3)); + LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3)); + + /* Checks for struct llog_setattr64_rec */ + LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct llog_setattr64_rec)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding)); + LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n", + (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail)); + LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail)); + + /* Checks for struct llog_size_change_rec */ + LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct llog_size_change_rec)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3)); + LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail)); + + /* Checks for struct changelog_rec */ + LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct changelog_rec)); + LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_namelen)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen)); + LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_flags)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags)); + LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_type)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type)); + LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_index)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index)); + LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_prev)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev)); + LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_time)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time)); + LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_tfid)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid)); + LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n", + (long long)(int)offsetof(struct changelog_rec, cr_pfid)); + LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid)); + + /* Checks for struct changelog_ext_rec */ + LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n", + (long long)(int)sizeof(struct changelog_ext_rec)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_flags)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_type)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_index)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_prev)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_time)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid)); + LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n", + (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid)); + LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid)); + + /* Checks for struct changelog_setinfo */ + LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n", + (long long)(int)sizeof(struct changelog_setinfo)); + LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n", + (long long)(int)offsetof(struct changelog_setinfo, cs_recno)); + LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n", + (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno)); + LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n", + (long long)(int)offsetof(struct changelog_setinfo, cs_id)); + LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id)); + + /* Checks for struct llog_changelog_rec */ + LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n", + (long long)(int)sizeof(struct llog_changelog_rec)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr)); + LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_rec, cr_tail)); + LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail)); + + /* Checks for struct llog_changelog_user_rec */ + LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n", + (long long)(int)sizeof(struct llog_changelog_user_rec)); + LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr)); + LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr)); + LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id)); + LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id)); + LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding)); + LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding)); + LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec)); + LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec)); + LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail)); + LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail)); + + /* Checks for struct llog_gen */ + LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n", + (long long)(int)sizeof(struct llog_gen)); + LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_gen, mnt_cnt)); + LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt)); + LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n", + (long long)(int)offsetof(struct llog_gen, conn_cnt)); + LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt)); + + /* Checks for struct llog_gen_rec */ + LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n", + (long long)(int)sizeof(struct llog_gen_rec)); + LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr)); + LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr)); + LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_gen_rec, lgr_gen)); + LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen)); + LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n", + (long long)(int)offsetof(struct llog_gen_rec, lgr_tail)); + LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail)); + + /* Checks for struct llog_log_hdr */ + LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n", + (long long)(int)sizeof(struct llog_log_hdr)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_hdr)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_count)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_size)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_flags)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_reserved)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap)); + LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n", + (long long)(int)offsetof(struct llog_log_hdr, llh_tail)); + LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail)); + + /* Checks for struct llog_cookie */ + LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n", + (long long)(int)sizeof(struct llog_cookie)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_lgl)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_subsys)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_index)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_padding)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding)); + + /* Checks for struct llogd_body */ + LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n", + (long long)(int)sizeof(struct llogd_body)); + LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_logid)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid)); + LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx)); + LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_llh_flags)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags)); + LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_index)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index)); + LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_saved_index)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index)); + LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_len)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len)); + LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n", + (long long)(int)offsetof(struct llogd_body, lgd_cur_offset)); + LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n", + (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset)); + CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501); + CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502); + CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503); + CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504); + CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505); + CLASSERT(LLOG_ORIGIN_CONNECT == 506); + CLASSERT(LLOG_CATINFO == 507); + CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508); + CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509); + CLASSERT(LLOG_FIRST_OPC == 501); + CLASSERT(LLOG_LAST_OPC == 510); + + /* Checks for struct llogd_conn_body */ + LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n", + (long long)(int)sizeof(struct llogd_conn_body)); + LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n", + (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen)); + LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n", + (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen)); + LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n", + (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid)); + LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n", + (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid)); + LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n", + (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx)); + LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n", + (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); + + /* Checks for struct ll_fiemap_info_key */ + LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n", + (long long)(int)sizeof(struct ll_fiemap_info_key)); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, name[8]) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, name[8])); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->name[8]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->name[8])); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, oa) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, oa)); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->oa) == 208, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->oa)); + LASSERTF((int)offsetof(struct ll_fiemap_info_key, fiemap) == 216, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_info_key, fiemap)); + LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap) == 32, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->fiemap)); + + /* Checks for struct quota_body */ + LASSERTF((int)sizeof(struct quota_body) == 112, "found %lld\n", + (long long)(int)sizeof(struct quota_body)); + LASSERTF((int)offsetof(struct quota_body, qb_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_fid)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_fid)); + LASSERTF((int)offsetof(struct quota_body, qb_id) == 16, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_id)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_id) == 16, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_id)); + LASSERTF((int)offsetof(struct quota_body, qb_flags) == 32, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_flags)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_flags)); + LASSERTF((int)offsetof(struct quota_body, qb_padding) == 36, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_padding)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_padding)); + LASSERTF((int)offsetof(struct quota_body, qb_count) == 40, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_count)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_count) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_count)); + LASSERTF((int)offsetof(struct quota_body, qb_usage) == 48, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_usage)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_usage) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_usage)); + LASSERTF((int)offsetof(struct quota_body, qb_slv_ver) == 56, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_slv_ver)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_slv_ver) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_slv_ver)); + LASSERTF((int)offsetof(struct quota_body, qb_lockh) == 64, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_lockh)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_lockh) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_lockh)); + LASSERTF((int)offsetof(struct quota_body, qb_glb_lockh) == 72, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_glb_lockh)); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_glb_lockh) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_glb_lockh)); + LASSERTF((int)offsetof(struct quota_body, qb_padding1[4]) == 112, "found %lld\n", + (long long)(int)offsetof(struct quota_body, qb_padding1[4])); + LASSERTF((int)sizeof(((struct quota_body *)0)->qb_padding1[4]) == 8, "found %lld\n", + (long long)(int)sizeof(((struct quota_body *)0)->qb_padding1[4])); + + /* Checks for struct mgs_target_info */ + LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n", + (long long)(int)sizeof(struct mgs_target_info)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_config_ver)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_flags)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_nid_count)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_instance)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_fsname)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_svname)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_uuid)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_nids)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids)); + LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n", + (long long)(int)offsetof(struct mgs_target_info, mti_params)); + LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n", + (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params)); + + /* Checks for struct lustre_capa */ + LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n", + (long long)(int)sizeof(struct lustre_capa)); + LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_fid)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid)); + LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_opc)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc)); + LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_uid)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid)); + LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_gid)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid)); + LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_flags)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags)); + LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_keyid)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid)); + LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_timeout)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout)); + LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_expiry)); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry)); + CLASSERT(CAPA_HMAC_MAX_LEN == 64); + LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa, lc_hmac[64])); + LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64])); + + /* Checks for struct lustre_capa_key */ + LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n", + (long long)(int)sizeof(struct lustre_capa_key)); + LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa_key, lk_seq)); + LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq)); + LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa_key, lk_keyid)); + LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid)); + LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa_key, lk_padding)); + LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding)); + CLASSERT(CAPA_HMAC_KEY_MAX_LEN == 56); + LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n", + (long long)(int)offsetof(struct lustre_capa_key, lk_key[56])); + LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56])); + + /* Checks for struct getinfo_fid2path */ + LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n", + (long long)(int)sizeof(struct getinfo_fid2path)); + LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct getinfo_fid2path, gf_fid)); + LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid)); + LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n", + (long long)(int)offsetof(struct getinfo_fid2path, gf_recno)); + LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n", + (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno)); + LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n", + (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno)); + LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n", + (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno)); + LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n", + (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen)); + LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n", + (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen)); + LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n", + (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0])); + LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n", + (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0])); + + /* Checks for struct ll_user_fiemap */ + LASSERTF((int)sizeof(struct ll_user_fiemap) == 32, "found %lld\n", + (long long)(int)sizeof(struct ll_user_fiemap)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_start) == 0, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_start)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_start)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_length) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_length)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_length) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_length)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_flags)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_flags)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_mapped_extents) == 20, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_mapped_extents)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_mapped_extents)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extent_count) == 24, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_extent_count)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extent_count)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_reserved) == 28, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_reserved)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_reserved)); + LASSERTF((int)offsetof(struct ll_user_fiemap, fm_extents) == 32, "found %lld\n", + (long long)(int)offsetof(struct ll_user_fiemap, fm_extents)); + LASSERTF((int)sizeof(((struct ll_user_fiemap *)0)->fm_extents) == 0, "found %lld\n", + (long long)(int)sizeof(((struct ll_user_fiemap *)0)->fm_extents)); + CLASSERT(FIEMAP_FLAG_SYNC == 0x00000001); + CLASSERT(FIEMAP_FLAG_XATTR == 0x00000002); + CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000); + + /* Checks for struct ll_fiemap_extent */ + LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, "found %lld\n", + (long long)(int)sizeof(struct ll_fiemap_extent)); + LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_extent, fe_logical)); + LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_logical)); + LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_physical) == 8, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_extent, fe_physical)); + LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_physical)); + LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_length) == 16, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_extent, fe_length)); + LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length)); + LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_extent, fe_flags)); + LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags)); + LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, "found %lld\n", + (long long)(int)offsetof(struct ll_fiemap_extent, fe_device)); + LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, "found %lld\n", + (long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device)); + CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001); + CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002); + CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004); + CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008); + CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080); + CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100); + CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200); + CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400); + CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800); + CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000); + CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000); + CLASSERT(FIEMAP_EXTENT_NET == 0x80000000); + + /* Checks for type posix_acl_xattr_entry */ + LASSERTF((int)sizeof(posix_acl_xattr_entry) == 8, "found %lld\n", + (long long)(int)sizeof(posix_acl_xattr_entry)); + LASSERTF((int)offsetof(posix_acl_xattr_entry, e_tag) == 0, "found %lld\n", + (long long)(int)offsetof(posix_acl_xattr_entry, e_tag)); + LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n", + (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_tag)); + LASSERTF((int)offsetof(posix_acl_xattr_entry, e_perm) == 2, "found %lld\n", + (long long)(int)offsetof(posix_acl_xattr_entry, e_perm)); + LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n", + (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_perm)); + LASSERTF((int)offsetof(posix_acl_xattr_entry, e_id) == 4, "found %lld\n", + (long long)(int)offsetof(posix_acl_xattr_entry, e_id)); + LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n", + (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_id)); + + /* Checks for type posix_acl_xattr_header */ + LASSERTF((int)sizeof(posix_acl_xattr_header) == 4, "found %lld\n", + (long long)(int)sizeof(posix_acl_xattr_header)); + LASSERTF((int)offsetof(posix_acl_xattr_header, a_version) == 0, "found %lld\n", + (long long)(int)offsetof(posix_acl_xattr_header, a_version)); + LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n", + (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_version)); + LASSERTF((int)offsetof(posix_acl_xattr_header, a_entries) == 4, "found %lld\n", + (long long)(int)offsetof(posix_acl_xattr_header, a_entries)); + LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_entries) == 0, "found %lld\n", + (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_entries)); + + /* Checks for struct link_ea_header */ + LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n", + (long long)(int)sizeof(struct link_ea_header)); + LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_magic)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic)); + LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_reccount)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount)); + LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n", + (long long)(int)offsetof(struct link_ea_header, leh_len)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len)); + LASSERTF((int)offsetof(struct link_ea_header, padding1) == 16, "found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding1)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding1)); + LASSERTF((int)offsetof(struct link_ea_header, padding2) == 20, "found %lld\n", + (long long)(int)offsetof(struct link_ea_header, padding2)); + LASSERTF((int)sizeof(((struct link_ea_header *)0)->padding2) == 4, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_header *)0)->padding2)); + CLASSERT(LINK_EA_MAGIC == 0x11EAF1DFUL); + + /* Checks for struct link_ea_entry */ + LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n", + (long long)(int)sizeof(struct link_ea_entry)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_reclen)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid)); + LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n", + (long long)(int)offsetof(struct link_ea_entry, lee_name)); + LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n", + (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); + + /* Checks for struct layout_intent */ + LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n", + (long long)(int)sizeof(struct layout_intent)); + LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n", + (long long)(int)offsetof(struct layout_intent, li_opc)); + LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n", + (long long)(int)sizeof(((struct layout_intent *)0)->li_opc)); + LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n", + (long long)(int)offsetof(struct layout_intent, li_flags)); + LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct layout_intent *)0)->li_flags)); + LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n", + (long long)(int)offsetof(struct layout_intent, li_start)); + LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n", + (long long)(int)sizeof(((struct layout_intent *)0)->li_start)); + LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n", + (long long)(int)offsetof(struct layout_intent, li_end)); + LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n", + (long long)(int)sizeof(((struct layout_intent *)0)->li_end)); + LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n", + (long long)LAYOUT_INTENT_ACCESS); + LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n", + (long long)LAYOUT_INTENT_READ); + LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n", + (long long)LAYOUT_INTENT_WRITE); + LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n", + (long long)LAYOUT_INTENT_GLIMPSE); + LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n", + (long long)LAYOUT_INTENT_TRUNC); + LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n", + (long long)LAYOUT_INTENT_RELEASE); + LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n", + (long long)LAYOUT_INTENT_RESTORE); + + /* Checks for struct hsm_action_item */ + LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n", + (long long)(int)sizeof(struct hsm_action_item)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_len)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_action)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_fid)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_dfid)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_extent)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_cookie)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_gid)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid)); + LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_item, hai_data)); + LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data)); + + /* Checks for struct hsm_action_list */ + LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n", + (long long)(int)sizeof(struct hsm_action_list)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_version)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_count)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_compound_id)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_flags)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_archive_id)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id)); + LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, padding1)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1)); + LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n", + (long long)(int)offsetof(struct hsm_action_list, hal_fsname)); + LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n", + (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname)); + + /* Checks for struct hsm_progress */ + LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n", + (long long)(int)sizeof(struct hsm_progress)); + LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, hp_fid)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid)); + LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, hp_cookie)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie)); + LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, hp_extent)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent)); + LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, hp_flags)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags)); + LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, hp_errval)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval)); + LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress, padding)); + LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress *)0)->padding)); + LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n", + HP_FLAG_COMPLETED); + LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n", + HP_FLAG_RETRY); + + LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_copy, hc_data_version)); + LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version)); + LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_copy, hc_flags)); + LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags)); + LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n", + (long long)(int)offsetof(struct hsm_copy, hc_errval)); + LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval)); + LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n", + (long long)(int)offsetof(struct hsm_copy, padding)); + LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_copy *)0)->padding)); + LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_copy, hc_hai)); + LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n", + (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai)); + + /* Checks for struct hsm_progress_kernel */ + LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n", + (long long)(int)sizeof(struct hsm_progress_kernel)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version)); + LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n", + (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2)); + LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2)); + + /* Checks for struct hsm_user_item */ + LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n", + (long long)(int)sizeof(struct hsm_user_item)); + LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_item, hui_fid)); + LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid)); + LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_item, hui_extent)); + LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent)); + + /* Checks for struct hsm_user_state */ + LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n", + (long long)(int)sizeof(struct hsm_user_state)); + LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_state, hus_states)); + LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states)); + LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_state, hus_archive_id)); + LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id)); + LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state)); + LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state)); + LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action)); + LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action)); + LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location)); + LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location)); + + /* Checks for struct hsm_state_set */ + LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n", + (long long)(int)sizeof(struct hsm_state_set)); + LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_state_set, hss_valid)); + LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid)); + LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_state_set, hss_archive_id)); + LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id)); + LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_state_set, hss_setmask)); + LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask)); + LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_state_set, hss_clearmask)); + LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask)); + + /* Checks for struct hsm_current_action */ + LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n", + (long long)(int)sizeof(struct hsm_current_action)); + LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_current_action, hca_state)); + LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state)); + LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_current_action, hca_action)); + LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action)); + LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_current_action, hca_location)); + LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n", + (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location)); + + /* Checks for struct hsm_request */ + LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n", + (long long)(int)sizeof(struct hsm_request)); + LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_request, hr_action)); + LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_request *)0)->hr_action)); + LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n", + (long long)(int)offsetof(struct hsm_request, hr_archive_id)); + LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id)); + LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n", + (long long)(int)offsetof(struct hsm_request, hr_flags)); + LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n", + (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags)); + LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n", + (long long)(int)offsetof(struct hsm_request, hr_itemcount)); + LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount)); + LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n", + (long long)(int)offsetof(struct hsm_request, hr_data_len)); + LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n", + (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len)); + LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)HSM_FORCE_ACTION); + LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)HSM_GHOST_COPY); + + /* Checks for struct hsm_user_request */ + LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n", + (long long)(int)sizeof(struct hsm_user_request)); + LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_request, hur_request)); + LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request)); + LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n", + (long long)(int)offsetof(struct hsm_user_request, hur_user_item)); + LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n", + (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item)); + + /* Checks for struct hsm_user_import */ + LASSERTF(sizeof(struct hsm_user_import) == 48, "found %lld\n", + (long long)sizeof(struct hsm_user_import)); + LASSERTF(offsetof(struct hsm_user_import, hui_size) == 0, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_size)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_size) == 8, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_size)); + LASSERTF(offsetof(struct hsm_user_import, hui_uid) == 32, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_uid)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_uid) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_uid)); + LASSERTF(offsetof(struct hsm_user_import, hui_gid) == 36, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_gid)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_gid) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_gid)); + LASSERTF(offsetof(struct hsm_user_import, hui_mode) == 40, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_mode)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mode) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_mode)); + LASSERTF(offsetof(struct hsm_user_import, hui_atime) == 8, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_atime)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime) == 8, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_atime)); + LASSERTF(offsetof(struct hsm_user_import, hui_atime_ns) == 24, + "found %lld\n", + (long long)(int)offsetof(struct hsm_user_import, hui_atime_ns)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime_ns) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_atime_ns)); + LASSERTF(offsetof(struct hsm_user_import, hui_mtime) == 16, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_mtime)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime) == 8, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime)); + LASSERTF(offsetof(struct hsm_user_import, hui_mtime_ns) == 28, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_mtime_ns)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime_ns) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime_ns)); + LASSERTF(offsetof(struct hsm_user_import, hui_archive_id) == 44, + "found %lld\n", + (long long)offsetof(struct hsm_user_import, hui_archive_id)); + LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_archive_id) == 4, + "found %lld\n", + (long long)sizeof(((struct hsm_user_import *)0)->hui_archive_id)); + + /* Checks for struct update_buf */ + LASSERTF((int)sizeof(struct update_buf) == 8, "found %lld\n", + (long long)(int)sizeof(struct update_buf)); + LASSERTF((int)offsetof(struct update_buf, ub_magic) == 0, "found %lld\n", + (long long)(int)offsetof(struct update_buf, ub_magic)); + LASSERTF((int)sizeof(((struct update_buf *)0)->ub_magic) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update_buf *)0)->ub_magic)); + LASSERTF((int)offsetof(struct update_buf, ub_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct update_buf, ub_count)); + LASSERTF((int)sizeof(((struct update_buf *)0)->ub_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update_buf *)0)->ub_count)); + LASSERTF((int)offsetof(struct update_buf, ub_bufs) == 8, "found %lld\n", + (long long)(int)offsetof(struct update_buf, ub_bufs)); + LASSERTF((int)sizeof(((struct update_buf *)0)->ub_bufs) == 0, "found %lld\n", + (long long)(int)sizeof(((struct update_buf *)0)->ub_bufs)); + + /* Checks for struct update_reply */ + LASSERTF((int)sizeof(struct update_reply) == 8, "found %lld\n", + (long long)(int)sizeof(struct update_reply)); + LASSERTF((int)offsetof(struct update_reply, ur_version) == 0, "found %lld\n", + (long long)(int)offsetof(struct update_reply, ur_version)); + LASSERTF((int)sizeof(((struct update_reply *)0)->ur_version) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update_reply *)0)->ur_version)); + LASSERTF((int)offsetof(struct update_reply, ur_count) == 4, "found %lld\n", + (long long)(int)offsetof(struct update_reply, ur_count)); + LASSERTF((int)sizeof(((struct update_reply *)0)->ur_count) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update_reply *)0)->ur_count)); + LASSERTF((int)offsetof(struct update_reply, ur_lens) == 8, "found %lld\n", + (long long)(int)offsetof(struct update_reply, ur_lens)); + LASSERTF((int)sizeof(((struct update_reply *)0)->ur_lens) == 0, "found %lld\n", + (long long)(int)sizeof(((struct update_reply *)0)->ur_lens)); + + /* Checks for struct update */ + LASSERTF((int)sizeof(struct update) == 56, "found %lld\n", + (long long)(int)sizeof(struct update)); + LASSERTF((int)offsetof(struct update, u_type) == 0, "found %lld\n", + (long long)(int)offsetof(struct update, u_type)); + LASSERTF((int)sizeof(((struct update *)0)->u_type) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update *)0)->u_type)); + LASSERTF((int)offsetof(struct update, u_batchid) == 4, "found %lld\n", + (long long)(int)offsetof(struct update, u_batchid)); + LASSERTF((int)sizeof(((struct update *)0)->u_batchid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct update *)0)->u_batchid)); + LASSERTF((int)offsetof(struct update, u_fid) == 8, "found %lld\n", + (long long)(int)offsetof(struct update, u_fid)); + LASSERTF((int)sizeof(((struct update *)0)->u_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct update *)0)->u_fid)); + LASSERTF((int)offsetof(struct update, u_lens) == 24, "found %lld\n", + (long long)(int)offsetof(struct update, u_lens)); + LASSERTF((int)sizeof(((struct update *)0)->u_lens) == 32, "found %lld\n", + (long long)(int)sizeof(((struct update *)0)->u_lens)); + LASSERTF((int)offsetof(struct update, u_bufs) == 56, "found %lld\n", + (long long)(int)offsetof(struct update, u_bufs)); + LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n", + (long long)(int)sizeof(((struct update *)0)->u_bufs)); +} |