diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/ptlrpc/service.c')
-rw-r--r-- | drivers/staging/lustre/lustre/ptlrpc/service.c | 526 |
1 files changed, 92 insertions, 434 deletions
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 003344ccf..f45898f17 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -58,7 +58,6 @@ MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an ea module_param(at_extra, int, 0644); MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply"); - /* forward ref */ static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt); static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req); @@ -86,8 +85,10 @@ ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt) rqbd->rqbd_cbid.cbid_fn = request_in_callback; rqbd->rqbd_cbid.cbid_arg = rqbd; INIT_LIST_HEAD(&rqbd->rqbd_reqs); - OBD_CPT_ALLOC_LARGE(rqbd->rqbd_buffer, svc->srv_cptable, - svcpt->scp_cpt, svc->srv_buf_size); + rqbd->rqbd_buffer = libcfs_kvzalloc_cpt(svc->srv_cptable, + svcpt->scp_cpt, + svc->srv_buf_size, + GFP_KERNEL); if (rqbd->rqbd_buffer == NULL) { kfree(rqbd); return NULL; @@ -141,7 +142,6 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) svcpt->scp_rqbd_allocating++; spin_unlock(&svcpt->scp_lock); - for (i = 0; i < svc->srv_nbuf_per_group; i++) { /* NB: another thread might have recycled enough rqbds, we * need to make sure it wouldn't over-allocate, see LU-1212. */ @@ -177,33 +177,6 @@ ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) return rc; } -/** - * Part of Rep-Ack logic. - * Puts a lock and its mode into reply state associated to request reply. - */ -void -ptlrpc_save_lock(struct ptlrpc_request *req, - struct lustre_handle *lock, int mode, int no_ack) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - int idx; - - LASSERT(rs != NULL); - LASSERT(rs->rs_nlocks < RS_MAX_LOCKS); - - if (req->rq_export->exp_disconnected) { - ldlm_lock_decref(lock, mode); - } else { - idx = rs->rs_nlocks++; - rs->rs_locks[idx] = *lock; - rs->rs_modes[idx] = mode; - rs->rs_difficult = 1; - rs->rs_no_ack = !!no_ack; - } -} -EXPORT_SYMBOL(ptlrpc_save_lock); - - struct ptlrpc_hr_partition; struct ptlrpc_hr_thread { @@ -244,32 +217,10 @@ struct ptlrpc_hr_service { struct ptlrpc_hr_partition **hr_partitions; }; -struct rs_batch { - struct list_head rsb_replies; - unsigned int rsb_n_replies; - struct ptlrpc_service_part *rsb_svcpt; -}; - /** reply handling service. */ static struct ptlrpc_hr_service ptlrpc_hr; /** - * maximum number of replies scheduled in one batch - */ -#define MAX_SCHEDULED 256 - -/** - * Initialize a reply batch. - * - * \param b batch - */ -static void rs_batch_init(struct rs_batch *b) -{ - memset(b, 0, sizeof(*b)); - INIT_LIST_HEAD(&b->rsb_replies); -} - -/** * Choose an hr thread to dispatch requests to. */ static struct ptlrpc_hr_thread * @@ -295,76 +246,6 @@ ptlrpc_hr_select(struct ptlrpc_service_part *svcpt) } /** - * Dispatch all replies accumulated in the batch to one from - * dedicated reply handling threads. - * - * \param b batch - */ -static void rs_batch_dispatch(struct rs_batch *b) -{ - if (b->rsb_n_replies != 0) { - struct ptlrpc_hr_thread *hrt; - - hrt = ptlrpc_hr_select(b->rsb_svcpt); - - spin_lock(&hrt->hrt_lock); - list_splice_init(&b->rsb_replies, &hrt->hrt_queue); - spin_unlock(&hrt->hrt_lock); - - wake_up(&hrt->hrt_waitq); - b->rsb_n_replies = 0; - } -} - -/** - * Add a reply to a batch. - * Add one reply object to a batch, schedule batched replies if overload. - * - * \param b batch - * \param rs reply - */ -static void rs_batch_add(struct rs_batch *b, struct ptlrpc_reply_state *rs) -{ - struct ptlrpc_service_part *svcpt = rs->rs_svcpt; - - if (svcpt != b->rsb_svcpt || b->rsb_n_replies >= MAX_SCHEDULED) { - if (b->rsb_svcpt != NULL) { - rs_batch_dispatch(b); - spin_unlock(&b->rsb_svcpt->scp_rep_lock); - } - spin_lock(&svcpt->scp_rep_lock); - b->rsb_svcpt = svcpt; - } - spin_lock(&rs->rs_lock); - rs->rs_scheduled_ever = 1; - if (rs->rs_scheduled == 0) { - list_move(&rs->rs_list, &b->rsb_replies); - rs->rs_scheduled = 1; - b->rsb_n_replies++; - } - rs->rs_committed = 1; - spin_unlock(&rs->rs_lock); -} - -/** - * Reply batch finalization. - * Dispatch remaining replies from the batch - * and release remaining spinlock. - * - * \param b batch - */ -static void rs_batch_fini(struct rs_batch *b) -{ - if (b->rsb_svcpt != NULL) { - rs_batch_dispatch(b); - spin_unlock(&b->rsb_svcpt->scp_rep_lock); - } -} - -#define DECLARE_RS_BATCH(b) struct rs_batch b - - -/** * Put reply state into a queue for processing because we received * ACK from the client */ @@ -401,32 +282,6 @@ ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs) } EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply); -void ptlrpc_commit_replies(struct obd_export *exp) -{ - struct ptlrpc_reply_state *rs, *nxt; - DECLARE_RS_BATCH(batch); - - rs_batch_init(&batch); - /* Find any replies that have been committed and get their service - * to attend to complete them. */ - - /* CAVEAT EMPTOR: spinlock ordering!!! */ - spin_lock(&exp->exp_uncommitted_replies_lock); - list_for_each_entry_safe(rs, nxt, &exp->exp_uncommitted_replies, - rs_obd_list) { - LASSERT(rs->rs_difficult); - /* VBR: per-export last_committed */ - LASSERT(rs->rs_export); - if (rs->rs_transno <= exp->exp_last_committed) { - list_del_init(&rs->rs_obd_list); - rs_batch_add(&batch, rs); - } - } - spin_unlock(&exp->exp_uncommitted_replies_lock); - rs_batch_fini(&batch); -} -EXPORT_SYMBOL(ptlrpc_commit_replies); - static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt) { @@ -647,7 +502,9 @@ ptlrpc_service_part_init(struct ptlrpc_service *svc, if (array->paa_reqs_count == NULL) goto free_reqs_array; - cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt); + setup_timer(&svcpt->scp_at_timer, ptlrpc_at_timer, + (unsigned long)svcpt); + /* At SOW, service time should be quick; 10s seems generous. If client * timeout is less than this, we'll be sending an early reply. */ at_init(&svcpt->scp_at_estimate, 10, 0); @@ -856,7 +713,7 @@ static void ptlrpc_server_free_request(struct ptlrpc_request *req) * drop a reference count of the request. if it reaches 0, we either * put it into history list, or free it immediately. */ -void ptlrpc_server_drop_request(struct ptlrpc_request *req) +static void ptlrpc_server_drop_request(struct ptlrpc_request *req) { struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd; struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; @@ -960,35 +817,6 @@ void ptlrpc_server_drop_request(struct ptlrpc_request *req) } } -/** Change request export and move hp request from old export to new */ -void ptlrpc_request_change_export(struct ptlrpc_request *req, - struct obd_export *export) -{ - if (req->rq_export != NULL) { - if (!list_empty(&req->rq_exp_list)) { - /* remove rq_exp_list from last export */ - spin_lock_bh(&req->rq_export->exp_rpc_lock); - list_del_init(&req->rq_exp_list); - spin_unlock_bh(&req->rq_export->exp_rpc_lock); - - /* export has one reference already, so it`s safe to - * add req to export queue here and get another - * reference for request later */ - spin_lock_bh(&export->exp_rpc_lock); - list_add(&req->rq_exp_list, &export->exp_hp_rpcs); - spin_unlock_bh(&export->exp_rpc_lock); - } - class_export_rpc_dec(req->rq_export); - class_export_put(req->rq_export); - } - - /* request takes one export refcount */ - req->rq_export = class_export_get(export); - class_export_rpc_inc(export); - - return; -} - /** * to finish a request: stop sending more early replies, and release * the request. @@ -1025,82 +853,6 @@ static void ptlrpc_server_finish_active_request( } /** - * This function makes sure dead exports are evicted in a timely manner. - * This function is only called when some export receives a message (i.e., - * the network is up.) - */ -static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay) -{ - struct obd_export *oldest_exp; - time_t oldest_time, new_time; - - LASSERT(exp); - - /* Compensate for slow machines, etc, by faking our request time - into the future. Although this can break the strict time-ordering - of the list, we can be really lazy here - we don't have to evict - at the exact right moment. Eventually, all silent exports - will make it to the top of the list. */ - - /* Do not pay attention on 1sec or smaller renewals. */ - new_time = get_seconds() + extra_delay; - if (exp->exp_last_request_time + 1 /*second */ >= new_time) - return; - - exp->exp_last_request_time = new_time; - - /* exports may get disconnected from the chain even though the - export has references, so we must keep the spin lock while - manipulating the lists */ - spin_lock(&exp->exp_obd->obd_dev_lock); - - if (list_empty(&exp->exp_obd_chain_timed)) { - /* this one is not timed */ - spin_unlock(&exp->exp_obd->obd_dev_lock); - return; - } - - list_move_tail(&exp->exp_obd_chain_timed, - &exp->exp_obd->obd_exports_timed); - - oldest_exp = list_entry(exp->exp_obd->obd_exports_timed.next, - struct obd_export, exp_obd_chain_timed); - oldest_time = oldest_exp->exp_last_request_time; - spin_unlock(&exp->exp_obd->obd_dev_lock); - - if (exp->exp_obd->obd_recovering) { - /* be nice to everyone during recovery */ - return; - } - - /* Note - racing to start/reset the obd_eviction timer is safe */ - if (exp->exp_obd->obd_eviction_timer == 0) { - /* Check if the oldest entry is expired. */ - if (get_seconds() > (oldest_time + PING_EVICT_TIMEOUT + - extra_delay)) { - /* We need a second timer, in case the net was down and - * it just came back. Since the pinger may skip every - * other PING_INTERVAL (see note in ptlrpc_pinger_main), - * we better wait for 3. */ - exp->exp_obd->obd_eviction_timer = - get_seconds() + 3 * PING_INTERVAL; - CDEBUG(D_HA, "%s: Think about evicting %s from "CFS_TIME_T"\n", - exp->exp_obd->obd_name, - obd_export_nid2str(oldest_exp), oldest_time); - } - } else { - if (get_seconds() > - (exp->exp_obd->obd_eviction_timer + extra_delay)) { - /* The evictor won't evict anyone who we've heard from - * recently, so we don't have to check before we start - * it. */ - if (!ping_evictor_wake(exp)) - exp->exp_obd->obd_eviction_timer = 0; - } - } -} - -/** * Sanity check request \a req. * Return 0 if all is ok, error code otherwise. */ @@ -1126,18 +878,16 @@ static int ptlrpc_check_req(struct ptlrpc_request *req) req, (obd != NULL) ? obd->obd_name : "unknown"); rc = -ENODEV; } else if (lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_REPLAY | MSG_REQ_REPLAY_DONE) && - !obd->obd_recovering) { - DEBUG_REQ(D_ERROR, req, - "Invalid replay without recovery"); - class_fail_export(req->rq_export); - rc = -ENODEV; - } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 && - !obd->obd_recovering) { - DEBUG_REQ(D_ERROR, req, "Invalid req with transno %llu without recovery", - lustre_msg_get_transno(req->rq_reqmsg)); - class_fail_export(req->rq_export); - rc = -ENODEV; + (MSG_REPLAY | MSG_REQ_REPLAY_DONE)) { + DEBUG_REQ(D_ERROR, req, "Invalid replay without recovery"); + class_fail_export(req->rq_export); + rc = -ENODEV; + } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0) { + DEBUG_REQ(D_ERROR, req, + "Invalid req with transno %llu without recovery", + lustre_msg_get_transno(req->rq_reqmsg)); + class_fail_export(req->rq_export); + rc = -ENODEV; } if (unlikely(rc < 0)) { @@ -1153,17 +903,17 @@ static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt) __s32 next; if (array->paa_count == 0) { - cfs_timer_disarm(&svcpt->scp_at_timer); + del_timer(&svcpt->scp_at_timer); return; } /* Set timer for closest deadline */ - next = (__s32)(array->paa_deadline - get_seconds() - + next = (__s32)(array->paa_deadline - ktime_get_real_seconds() - at_early_margin); if (next <= 0) { ptlrpc_at_timer((unsigned long)svcpt); } else { - cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next)); + mod_timer(&svcpt->scp_at_timer, cfs_time_shift(next)); CDEBUG(D_INFO, "armed %s at %+ds\n", svcpt->scp_service->srv_name, next); } @@ -1189,7 +939,7 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req) spin_lock(&svcpt->scp_at_lock); LASSERT(list_empty(&req->rq_timed_list)); - index = (unsigned long)req->rq_deadline % array->paa_size; + div_u64_rem(req->rq_deadline, array->paa_size, &index); if (array->paa_reqs_count[index] > 0) { /* latest rpcs will have the latest deadlines in the list, * so search backward. */ @@ -1248,8 +998,8 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; struct ptlrpc_request *reqcopy; struct lustre_msg *reqmsg; - long olddl = req->rq_deadline - get_seconds(); - time_t newdl; + long olddl = req->rq_deadline - ktime_get_real_seconds(); + time64_t newdl; int rc; /* deadline is when the client expects us to reply, margin is the @@ -1276,36 +1026,22 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) return -ENOSYS; } - if (req->rq_export && - lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { - /* During recovery, we don't want to send too many early - * replies, but on the other hand we want to make sure the - * client has enough time to resend if the rpc is lost. So - * during the recovery period send at least 4 early replies, - * spacing them every at_extra if we can. at_estimate should - * always equal this fixed value during recovery. */ - at_measured(&svcpt->scp_at_estimate, min(at_extra, - req->rq_export->exp_obd->obd_recovery_timeout / 4)); - } else { - /* Fake our processing time into the future to ask the clients - * for some extra amount of time */ - at_measured(&svcpt->scp_at_estimate, at_extra + - get_seconds() - - req->rq_arrival_time.tv_sec); - - /* Check to see if we've actually increased the deadline - - * we may be past adaptive_max */ - if (req->rq_deadline >= req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate)) { - DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%ld), not sending early reply\n", - olddl, req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate) - - get_seconds()); - return -ETIMEDOUT; - } + /* Fake our processing time into the future to ask the clients + * for some extra amount of time */ + at_measured(&svcpt->scp_at_estimate, at_extra + + ktime_get_real_seconds() - req->rq_arrival_time.tv_sec); + + /* Check to see if we've actually increased the deadline - + * we may be past adaptive_max */ + if (req->rq_deadline >= req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate)) { + DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%lld), not sending early reply\n", + olddl, req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate) - + ktime_get_real_seconds()); + return -ETIMEDOUT; } - newdl = get_seconds() + at_get(&svcpt->scp_at_estimate); + newdl = ktime_get_real_seconds() + at_get(&svcpt->scp_at_estimate); reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); if (reqcopy == NULL) @@ -1388,8 +1124,8 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) struct ptlrpc_request *rq, *n; struct list_head work_list; __u32 index, count; - time_t deadline; - time_t now = get_seconds(); + time64_t deadline; + time64_t now = ktime_get_real_seconds(); long delay; int first, counter = 0; @@ -1419,7 +1155,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) server will take. Send early replies to everyone expiring soon. */ INIT_LIST_HEAD(&work_list); deadline = -1; - index = (unsigned long)array->paa_deadline % array->paa_size; + div_u64_rem(array->paa_deadline, array->paa_size, &index); count = array->paa_count; while (count > 0) { count -= array->paa_reqs_count[index]; @@ -1461,7 +1197,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) chance to send early replies */ LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n", svcpt->scp_service->srv_name); - CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=" CFS_DURATION_T "(jiff)\n", + CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%ld(jiff)\n", counter, svcpt->scp_nreqs_incoming, svcpt->scp_nreqs_active, at_get(&svcpt->scp_at_estimate), delay); @@ -1546,30 +1282,6 @@ static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req) } } -static int ptlrpc_hpreq_check(struct ptlrpc_request *req) -{ - return 1; -} - -static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = { - .hpreq_check = ptlrpc_hpreq_check, -}; - -/* Hi-Priority RPC check by RPC operation code. */ -int ptlrpc_hpreq_handler(struct ptlrpc_request *req) -{ - int opc = lustre_msg_get_opc(req->rq_reqmsg); - - /* Check for export to let only reconnects for not yet evicted - * export to become a HP rpc. */ - if ((req->rq_export != NULL) && - (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT)) - req->rq_ops = &ptlrpc_hpreq_common; - - return 0; -} -EXPORT_SYMBOL(ptlrpc_hpreq_handler); - static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, struct ptlrpc_request *req) { @@ -1638,6 +1350,7 @@ static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt, bool force) { int running = svcpt->scp_nthrs_running; + if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL && CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) { /* leave just 1 thread for normal RPCs */ @@ -1828,14 +1541,13 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, if (rc) goto err_req; - ptlrpc_update_export_timer(req->rq_export, 0); } /* req_in handling should/must be fast */ - if (get_seconds() - req->rq_arrival_time.tv_sec > 5) + if (ktime_get_real_seconds() - req->rq_arrival_time.tv_sec > 5) DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s", - cfs_time_sub(get_seconds(), - req->rq_arrival_time.tv_sec)); + (long)(ktime_get_real_seconds() - + req->rq_arrival_time.tv_sec)); /* Set rpc server deadline and add it to the timed list */ deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) & @@ -1876,9 +1588,12 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, { struct ptlrpc_service *svc = svcpt->scp_service; struct ptlrpc_request *request; - struct timeval work_start; - struct timeval work_end; - long timediff; + struct timespec64 work_start; + struct timespec64 work_end; + struct timespec64 timediff; + struct timespec64 arrived; + unsigned long timediff_usecs; + unsigned long arrived_usecs; int rc; int fail_opc = 0; @@ -1901,12 +1616,13 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG)) libcfs_debug_dumplog(); - do_gettimeofday(&work_start); - timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time, - NULL); + ktime_get_real_ts64(&work_start); + timediff = timespec64_sub(work_start, request->rq_arrival_time); + timediff_usecs = timediff.tv_sec * USEC_PER_SEC + + timediff.tv_nsec / NSEC_PER_USEC; if (likely(svc->srv_stats != NULL)) { lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, - timediff); + timediff_usecs); lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR, svcpt->scp_nreqs_incoming); lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR, @@ -1933,18 +1649,17 @@ ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, if (likely(request->rq_export)) { if (unlikely(ptlrpc_check_req(request))) goto put_conn; - ptlrpc_update_export_timer(request->rq_export, timediff >> 19); } /* Discard requests queued for longer than the deadline. The deadline is increased if we send an early reply. */ - if (get_seconds() > request->rq_deadline) { + if (ktime_get_real_seconds() > request->rq_deadline) { DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline " CFS_DURATION_T ":" CFS_DURATION_T "s ago\n", libcfs_id2str(request->rq_peer), - cfs_time_sub(request->rq_deadline, - request->rq_arrival_time.tv_sec), - cfs_time_sub(get_seconds(), - request->rq_deadline)); + (long)(request->rq_deadline - + request->rq_arrival_time.tv_sec), + (long)(ktime_get_real_seconds() - + request->rq_deadline)); goto put_conn; } @@ -1969,19 +1684,22 @@ put_conn: lu_context_exit(&request->rq_session); lu_context_fini(&request->rq_session); - if (unlikely(get_seconds() > request->rq_deadline)) { + if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) { DEBUG_REQ(D_WARNING, request, - "Request took longer than estimated (" - CFS_DURATION_T":"CFS_DURATION_T - "s); client may timeout.", - cfs_time_sub(request->rq_deadline, - request->rq_arrival_time.tv_sec), - cfs_time_sub(get_seconds(), - request->rq_deadline)); - } - - do_gettimeofday(&work_end); - timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + "Request took longer than estimated (%lld:%llds); " + "client may timeout.", + (s64)request->rq_deadline - + request->rq_arrival_time.tv_sec, + (s64)ktime_get_real_seconds() - request->rq_deadline); + } + + ktime_get_real_ts64(&work_end); + timediff = timespec64_sub(work_end, work_start); + timediff_usecs = timediff.tv_sec * USEC_PER_SEC + + timediff.tv_nsec / NSEC_PER_USEC; + arrived = timespec64_sub(work_end, request->rq_arrival_time); + arrived_usecs = arrived.tv_sec * USEC_PER_SEC + + arrived.tv_nsec / NSEC_PER_USEC; CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n", current_comm(), (request->rq_export ? @@ -1992,8 +1710,8 @@ put_conn: request->rq_xid, libcfs_id2str(request->rq_peer), lustre_msg_get_opc(request->rq_reqmsg), - timediff, - cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL), + timediff_usecs, + arrived_usecs, (request->rq_repmsg ? lustre_msg_get_transno(request->rq_repmsg) : request->rq_transno), @@ -2003,20 +1721,20 @@ put_conn: if (likely(svc->srv_stats != NULL && request->rq_reqmsg != NULL)) { __u32 op = lustre_msg_get_opc(request->rq_reqmsg); int opc = opcode_offset(op); + if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) { LASSERT(opc < LUSTRE_MAX_OPCODES); lprocfs_counter_add(svc->srv_stats, opc + EXTRA_MAX_OPCODES, - timediff); + timediff_usecs); } } if (unlikely(request->rq_early_count)) { DEBUG_REQ(D_ADAPTTO, request, - "sent %d early replies before finishing in " - CFS_DURATION_T"s", + "sent %d early replies before finishing in %llds", request->rq_early_count, - cfs_time_sub(work_end.tv_sec, - request->rq_arrival_time.tv_sec)); + (s64)work_end.tv_sec - + request->rq_arrival_time.tv_sec); } out_req: @@ -2128,7 +1846,6 @@ ptlrpc_handle_rs(struct ptlrpc_reply_state *rs) return 1; } - static void ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) { @@ -2155,7 +1872,7 @@ ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) static int ptlrpc_retry_rqbds(void *arg) { - struct ptlrpc_service_part *svcpt = (struct ptlrpc_service_part *)arg; + struct ptlrpc_service_part *svcpt = arg; svcpt->scp_rqbd_timeout = 0; return -ETIMEDOUT; @@ -2262,7 +1979,7 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, */ static int ptlrpc_main(void *arg) { - struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; + struct ptlrpc_thread *thread = arg; struct ptlrpc_service_part *svcpt = thread->t_svcpt; struct ptlrpc_service *svc = svcpt->scp_service; struct ptlrpc_reply_state *rs; @@ -2464,7 +2181,7 @@ static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt, */ static int ptlrpc_hr_main(void *arg) { - struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg; + struct ptlrpc_hr_thread *hrt = arg; struct ptlrpc_hr_partition *hrp = hrt->hrt_partition; LIST_HEAD (replies); char threadname[20]; @@ -2538,6 +2255,7 @@ static int ptlrpc_start_hr_threads(void) for (j = 0; j < hrp->hrp_nthrs; j++) { struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j]; + rc = PTR_ERR(kthread_run(ptlrpc_hr_main, &hrp->hrp_thrs[j], "ptlrpc_hr%02d_%03d", @@ -2609,7 +2327,7 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) /** * Stops all threads of a particular service \a svc */ -void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) +static void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) { struct ptlrpc_service_part *svcpt; int i; @@ -2619,7 +2337,6 @@ void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) ptlrpc_svcpt_stop_threads(svcpt); } } -EXPORT_SYMBOL(ptlrpc_stop_all_threads); int ptlrpc_start_threads(struct ptlrpc_service *svc) { @@ -2833,7 +2550,6 @@ void ptlrpc_hr_fini(void) ptlrpc_hr.hr_partitions = NULL; } - /** * Wait until all already scheduled replies are processed. */ @@ -2862,7 +2578,7 @@ ptlrpc_service_del_atimer(struct ptlrpc_service *svc) /* early disarm AT timer... */ ptlrpc_service_for_each_part(svcpt, i, svc) { if (svcpt->scp_service != NULL) - cfs_timer_disarm(&svcpt->scp_at_timer); + del_timer(&svcpt->scp_at_timer); } } @@ -3002,7 +2718,7 @@ ptlrpc_service_free(struct ptlrpc_service *svc) break; /* In case somebody rearmed this in the meantime */ - cfs_timer_disarm(&svcpt->scp_at_timer); + del_timer(&svcpt->scp_at_timer); array = &svcpt->scp_at_array; kfree(array->paa_reqs_array); @@ -3045,61 +2761,3 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) return 0; } EXPORT_SYMBOL(ptlrpc_unregister_service); - -/** - * Returns 0 if the service is healthy. - * - * Right now, it just checks to make sure that requests aren't languishing - * in the queue. We'll use this health check to govern whether a node needs - * to be shot, so it's intentionally non-aggressive. */ -static int ptlrpc_svcpt_health_check(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_request *request = NULL; - struct timeval right_now; - long timediff; - - do_gettimeofday(&right_now); - - spin_lock(&svcpt->scp_req_lock); - /* How long has the next entry been waiting? */ - if (ptlrpc_server_high_pending(svcpt, true)) - request = ptlrpc_nrs_req_peek_nolock(svcpt, true); - else if (ptlrpc_server_normal_pending(svcpt, true)) - request = ptlrpc_nrs_req_peek_nolock(svcpt, false); - - if (request == NULL) { - spin_unlock(&svcpt->scp_req_lock); - return 0; - } - - timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL); - spin_unlock(&svcpt->scp_req_lock); - - if ((timediff / ONE_MILLION) > - (AT_OFF ? obd_timeout * 3 / 2 : at_max)) { - CERROR("%s: unhealthy - request has been waiting %lds\n", - svcpt->scp_service->srv_name, timediff / ONE_MILLION); - return -1; - } - - return 0; -} - -int -ptlrpc_service_health_check(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - int i; - - if (svc == NULL) - return 0; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - int rc = ptlrpc_svcpt_health_check(svcpt); - - if (rc != 0) - return rc; - } - return 0; -} -EXPORT_SYMBOL(ptlrpc_service_health_check); |